View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.apache.hadoop.hbase.SplitLogCounters.tot_mgr_wait_for_zk_delete;
22  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_final_transition_failed;
23  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_preempt_task;
24  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_acquired;
25  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_done;
26  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_err;
27  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_resigned;
28  import static org.junit.Assert.assertEquals;
29  import static org.junit.Assert.assertFalse;
30  import static org.junit.Assert.assertTrue;
31  import static org.junit.Assert.fail;
32  
33  import java.io.IOException;
34  import java.util.ArrayList;
35  import java.util.Arrays;
36  import java.util.HashSet;
37  import java.util.Iterator;
38  import java.util.List;
39  import java.util.NavigableSet;
40  import java.util.Set;
41  import java.util.TreeSet;
42  import java.util.concurrent.ExecutorService;
43  import java.util.concurrent.Executors;
44  import java.util.concurrent.Future;
45  import java.util.concurrent.TimeUnit;
46  import java.util.concurrent.TimeoutException;
47  import java.util.concurrent.atomic.AtomicLong;
48  
49  import org.apache.commons.logging.Log;
50  import org.apache.commons.logging.LogFactory;
51  import org.apache.hadoop.conf.Configuration;
52  import org.apache.hadoop.fs.FSDataOutputStream;
53  import org.apache.hadoop.fs.FileStatus;
54  import org.apache.hadoop.fs.FileSystem;
55  import org.apache.hadoop.fs.Path;
56  import org.apache.hadoop.hbase.TableName;
57  import org.apache.hadoop.hbase.HBaseConfiguration;
58  import org.apache.hadoop.hbase.HBaseTestingUtility;
59  import org.apache.hadoop.hbase.HConstants;
60  import org.apache.hadoop.hbase.HRegionInfo;
61  import org.apache.hadoop.hbase.HTableDescriptor;
62  import org.apache.hadoop.hbase.KeyValue;
63  import org.apache.hadoop.hbase.LargeTests;
64  import org.apache.hadoop.hbase.MiniHBaseCluster;
65  import org.apache.hadoop.hbase.NamespaceDescriptor;
66  import org.apache.hadoop.hbase.ServerName;
67  import org.apache.hadoop.hbase.SplitLogCounters;
68  import org.apache.hadoop.hbase.Waiter;
69  import org.apache.hadoop.hbase.client.Delete;
70  import org.apache.hadoop.hbase.client.HTable;
71  import org.apache.hadoop.hbase.client.Put;
72  import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
73  import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
74  import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
75  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
76  import org.apache.hadoop.hbase.regionserver.HRegion;
77  import org.apache.hadoop.hbase.regionserver.HRegionServer;
78  import org.apache.hadoop.hbase.regionserver.wal.HLog;
79  import org.apache.hadoop.hbase.regionserver.wal.HLogFactory;
80  import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
81  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
82  import org.apache.hadoop.hbase.util.Bytes;
83  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
84  import org.apache.hadoop.hbase.util.FSUtils;
85  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
86  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
87  import org.apache.hadoop.hbase.util.Threads;
88  import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
89  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
90  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
91  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
92  import org.apache.hadoop.hdfs.MiniDFSCluster;
93  import org.apache.log4j.Level;
94  import org.apache.log4j.Logger;
95  import org.apache.zookeeper.KeeperException;
96  import org.junit.After;
97  import org.junit.AfterClass;
98  import org.junit.Assert;
99  import org.junit.BeforeClass;
100 import org.junit.Test;
101 import org.junit.experimental.categories.Category;
102 
103 @Category(LargeTests.class)
104 public class TestDistributedLogSplitting {
105   private static final Log LOG = LogFactory.getLog(TestSplitLogManager.class);
106   static {
107     Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
108 
109     // test ThreeRSAbort fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on. this
110     // turns it off for this test.  TODO: Figure out why scr breaks recovery. 
111     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
112 
113   }
114 
115   // Start a cluster with 2 masters and 6 regionservers
116   static final int NUM_MASTERS = 2;
117   static final int NUM_RS = 6;
118 
119   MiniHBaseCluster cluster;
120   HMaster master;
121   Configuration conf;
122   static HBaseTestingUtility TEST_UTIL;
123   static MiniDFSCluster dfsCluster;
124   static MiniZooKeeperCluster zkCluster;
125 
126   @BeforeClass
127   public static void setup() throws Exception {
128     TEST_UTIL = new HBaseTestingUtility(HBaseConfiguration.create());
129     dfsCluster = TEST_UTIL.startMiniDFSCluster(1);
130     zkCluster = TEST_UTIL.startMiniZKCluster();
131   }
132 
133   @AfterClass
134   public static void tearDown() throws IOException {
135     TEST_UTIL.shutdownMiniZKCluster();
136     TEST_UTIL.shutdownMiniDFSCluster();
137   }
138 
139   private void startCluster(int num_rs) throws Exception{
140     conf = HBaseConfiguration.create();
141     startCluster(num_rs, conf);
142   }
143 
144   private void startCluster(int num_rs, Configuration inConf) throws Exception {
145     SplitLogCounters.resetCounters();
146     LOG.info("Starting cluster");
147     this.conf = inConf;
148     conf.getLong("hbase.splitlog.max.resubmit", 0);
149     // Make the failure test faster
150     conf.setInt("zookeeper.recovery.retry", 0);
151     conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1);
152     conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0); // no load balancing
153     TEST_UTIL = new HBaseTestingUtility(conf);
154     TEST_UTIL.setDFSCluster(dfsCluster);
155     TEST_UTIL.setZkCluster(zkCluster);
156     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs);
157     cluster = TEST_UTIL.getHBaseCluster();
158     LOG.info("Waiting for active/ready master");
159     cluster.waitForActiveAndReadyMaster();
160     master = cluster.getMaster();
161     while (cluster.getLiveRegionServerThreads().size() < num_rs) {
162       Threads.sleep(1);
163     }
164   }
165 
166   @After
167   public void after() throws Exception {
168     for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
169       mt.getMaster().abort("closing...", new Exception("Trace info"));
170     }
171 
172     TEST_UTIL.shutdownMiniHBaseCluster();
173     TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
174     ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
175   }
176 
177   @Test (timeout=300000)
178   public void testRecoveredEdits() throws Exception {
179     LOG.info("testRecoveredEdits");
180     Configuration curConf = HBaseConfiguration.create();
181     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
182     startCluster(NUM_RS, curConf);
183 
184     final int NUM_LOG_LINES = 1000;
185     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
186     // turn off load balancing to prevent regions from moving around otherwise
187     // they will consume recovered.edits
188     master.balanceSwitch(false);
189     FileSystem fs = master.getMasterFileSystem().getFileSystem();
190 
191     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
192 
193     Path rootdir = FSUtils.getRootDir(conf);
194 
195     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
196         "table", "family", 40);
197     TableName table = TableName.valueOf("table");
198     List<HRegionInfo> regions = null;
199     HRegionServer hrs = null;
200     for (int i = 0; i < NUM_RS; i++) {
201       boolean foundRs = false;
202       hrs = rsts.get(i).getRegionServer();
203       regions = ProtobufUtil.getOnlineRegions(hrs);
204       for (HRegionInfo region : regions) {
205         if (region.getTableName().getNameAsString().equalsIgnoreCase("table")) {
206           foundRs = true;
207           break;
208         }
209       }
210       if (foundRs) break;
211     }
212     final Path logDir = new Path(rootdir, HLogUtil.getHLogDirectoryName(hrs
213         .getServerName().toString()));
214 
215     LOG.info("#regions = " + regions.size());
216     Iterator<HRegionInfo> it = regions.iterator();
217     while (it.hasNext()) {
218       HRegionInfo region = it.next();
219       if (region.getTableName().getNamespaceAsString()
220           .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
221         it.remove();
222       }
223     }
224     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
225 
226     slm.splitLogDistributed(logDir);
227 
228     int count = 0;
229     for (HRegionInfo hri : regions) {
230 
231       Path tdir = FSUtils.getTableDir(rootdir, table);
232       @SuppressWarnings("deprecation")
233       Path editsdir =
234         HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
235       LOG.debug("checking edits dir " + editsdir);
236       FileStatus[] files = fs.listStatus(editsdir);
237       assertEquals(1, files.length);
238       int c = countHLog(files[0].getPath(), fs, conf);
239       count += c;
240       LOG.info(c + " edits in " + files[0].getPath());
241     }
242     assertEquals(NUM_LOG_LINES, count);
243   }
244 
245   @Test(timeout = 300000)
246   public void testLogReplayWithNonMetaRSDown() throws Exception {
247     LOG.info("testLogReplayWithNonMetaRSDown");
248     Configuration curConf = HBaseConfiguration.create();
249     curConf.setLong("hbase.regionserver.hlog.blocksize", 100*1024);
250     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
251     startCluster(NUM_RS, curConf);
252     final int NUM_REGIONS_TO_CREATE = 40;
253     final int NUM_LOG_LINES = 1000;
254     // turn off load balancing to prevent regions from moving around otherwise
255     // they will consume recovered.edits
256     master.balanceSwitch(false);
257 
258     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
259     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
260     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
261 
262     HRegionServer hrs = findRSToKill(false, "table");
263     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
264     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
265 
266     // wait for abort completes
267     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
268     ht.close();
269     zkw.close();
270   }
271 
272   @Test(timeout = 300000)
273   public void testLogReplayWithMetaRSDown() throws Exception {
274     LOG.info("testRecoveredEditsReplayWithMetaRSDown");
275     Configuration curConf = HBaseConfiguration.create();
276     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
277     startCluster(NUM_RS, curConf);
278     final int NUM_REGIONS_TO_CREATE = 40;
279     final int NUM_LOG_LINES = 1000;
280     // turn off load balancing to prevent regions from moving around otherwise
281     // they will consume recovered.edits
282     master.balanceSwitch(false);
283 
284     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
285     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
286     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
287 
288     HRegionServer hrs = findRSToKill(true, "table");
289     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
290     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
291 
292     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
293     ht.close();
294     zkw.close();
295   }
296 
297   private void abortRSAndVerifyRecovery(HRegionServer hrs, HTable ht, final ZooKeeperWatcher zkw,
298       final int numRegions, final int numofLines) throws Exception {
299 
300     abortRSAndWaitForRecovery(hrs, zkw, numRegions);
301     assertEquals(numofLines, TEST_UTIL.countRows(ht));
302   }
303 
304   private void abortRSAndWaitForRecovery(HRegionServer hrs, final ZooKeeperWatcher zkw,
305       final int numRegions) throws Exception {
306     final MiniHBaseCluster tmpCluster = this.cluster;
307 
308     // abort RS
309     LOG.info("Aborting region server: " + hrs.getServerName());
310     hrs.abort("testing");
311 
312     // wait for abort completes
313     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
314       @Override
315       public boolean evaluate() throws Exception {
316         return (tmpCluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
317       }
318     });
319 
320     // wait for regions come online
321     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
322       @Override
323       public boolean evaluate() throws Exception {
324         return (getAllOnlineRegions(tmpCluster).size() >= (numRegions + 1));
325       }
326     });
327 
328     // wait for all regions are fully recovered
329     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
330       @Override
331       public boolean evaluate() throws Exception {
332         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
333           zkw.recoveringRegionsZNode, false);
334         return (recoveringRegions != null && recoveringRegions.size() == 0);
335       }
336     });
337   }
338 
339   @Test(timeout = 300000)
340   public void testMasterStartsUpWithLogSplittingWork() throws Exception {
341     LOG.info("testMasterStartsUpWithLogSplittingWork");
342     Configuration curConf = HBaseConfiguration.create();
343     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
344     curConf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
345     startCluster(NUM_RS, curConf);
346 
347     final int NUM_REGIONS_TO_CREATE = 40;
348     final int NUM_LOG_LINES = 1000;
349     // turn off load balancing to prevent regions from moving around otherwise
350     // they will consume recovered.edits
351     master.balanceSwitch(false);
352 
353     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
354     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
355     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
356 
357     HRegionServer hrs = findRSToKill(false, "table");
358     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
359     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
360 
361     // abort master
362     abortMaster(cluster);
363 
364     // abort RS
365     LOG.info("Aborting region server: " + hrs.getServerName());
366     hrs.abort("testing");
367 
368     // wait for abort completes
369     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
370       @Override
371       public boolean evaluate() throws Exception {
372         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
373       }
374     });
375 
376     Thread.sleep(2000);
377     LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
378     
379     startMasterAndWaitUntilLogSplit(cluster);
380     
381     // wait for abort completes
382     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
383       @Override
384       public boolean evaluate() throws Exception {
385         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
386       }
387     });
388 
389     LOG.info("Current Open Regions After Master Node Starts Up:"
390         + getAllOnlineRegions(cluster).size());
391 
392     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
393 
394     ht.close();
395     zkw.close();
396   }
397   
398   @Test(timeout = 300000)
399   public void testMasterStartsUpWithLogReplayWork() throws Exception {
400     LOG.info("testMasterStartsUpWithLogReplayWork");
401     Configuration curConf = HBaseConfiguration.create();
402     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
403     curConf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
404     startCluster(NUM_RS, curConf);
405 
406     final int NUM_REGIONS_TO_CREATE = 40;
407     final int NUM_LOG_LINES = 1000;
408     // turn off load balancing to prevent regions from moving around otherwise
409     // they will consume recovered.edits
410     master.balanceSwitch(false);
411 
412     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
413     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
414     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
415 
416     HRegionServer hrs = findRSToKill(false, "table");
417     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
418     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
419 
420     // abort master
421     abortMaster(cluster);
422 
423     // abort RS
424     LOG.info("Aborting region server: " + hrs.getServerName());
425     hrs.abort("testing");
426 
427     // wait for the RS dies
428     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
429       @Override
430       public boolean evaluate() throws Exception {
431         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
432       }
433     });
434 
435     Thread.sleep(2000);
436     LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
437 
438     startMasterAndWaitUntilLogSplit(cluster);
439 
440     // wait for all regions are fully recovered
441     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
442       @Override
443       public boolean evaluate() throws Exception {
444         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
445           zkw.recoveringRegionsZNode, false);
446         return (recoveringRegions != null && recoveringRegions.size() == 0);
447       }
448     });
449 
450     LOG.info("Current Open Regions After Master Node Starts Up:"
451         + getAllOnlineRegions(cluster).size());
452 
453     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
454 
455     ht.close();
456     zkw.close();
457   }
458 
459 
460   @Test(timeout = 300000)
461   public void testLogReplayTwoSequentialRSDown() throws Exception {
462     LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
463     Configuration curConf = HBaseConfiguration.create();
464     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
465     startCluster(NUM_RS, curConf);
466     final int NUM_REGIONS_TO_CREATE = 40;
467     final int NUM_LOG_LINES = 1000;
468     // turn off load balancing to prevent regions from moving around otherwise
469     // they will consume recovered.edits
470     master.balanceSwitch(false);
471 
472     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
473     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
474     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
475 
476     List<HRegionInfo> regions = null;
477     HRegionServer hrs1 = findRSToKill(false, "table");
478     regions = ProtobufUtil.getOnlineRegions(hrs1);
479 
480     makeHLog(hrs1.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
481 
482     // abort RS1
483     LOG.info("Aborting region server: " + hrs1.getServerName());
484     hrs1.abort("testing");
485 
486     // wait for abort completes
487     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
488       @Override
489       public boolean evaluate() throws Exception {
490         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
491       }
492     });
493 
494     // wait for regions come online
495     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
496       @Override
497       public boolean evaluate() throws Exception {
498         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
499       }
500     });
501 
502     // sleep a little bit in order to interrupt recovering in the middle
503     Thread.sleep(300);
504     // abort second region server
505     rsts = cluster.getLiveRegionServerThreads();
506     HRegionServer hrs2 = rsts.get(0).getRegionServer();
507     LOG.info("Aborting one more region server: " + hrs2.getServerName());
508     hrs2.abort("testing");
509 
510     // wait for abort completes
511     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
512       @Override
513       public boolean evaluate() throws Exception {
514         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
515       }
516     });
517 
518     // wait for regions come online
519     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
520       @Override
521       public boolean evaluate() throws Exception {
522         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
523       }
524     });
525 
526     // wait for all regions are fully recovered
527     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
528       @Override
529       public boolean evaluate() throws Exception {
530         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
531           zkw.recoveringRegionsZNode, false);
532         return (recoveringRegions != null && recoveringRegions.size() == 0);
533       }
534     });
535 
536     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
537     ht.close();
538     zkw.close();
539   }
540 
541   @Test(timeout = 300000)
542   public void testMarkRegionsRecoveringInZK() throws Exception {
543     LOG.info("testMarkRegionsRecoveringInZK");
544     Configuration curConf = HBaseConfiguration.create();
545     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
546     startCluster(NUM_RS, curConf);
547     master.balanceSwitch(false);
548     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
549     final ZooKeeperWatcher zkw = master.getZooKeeperWatcher();
550     HTable ht = installTable(zkw, "table", "family", 40);
551     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
552 
553     Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
554     HRegionInfo region = null;
555     HRegionServer hrs = null;
556     ServerName firstFailedServer = null;
557     ServerName secondFailedServer = null;
558     for (int i = 0; i < NUM_RS; i++) {
559       hrs = rsts.get(i).getRegionServer();
560       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
561       if (regions.isEmpty()) continue;
562       region = regions.get(0);
563       regionSet.add(region);
564       firstFailedServer = hrs.getServerName();
565       secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
566       break;
567     }
568 
569     slm.markRegionsRecoveringInZK(firstFailedServer, regionSet);
570     slm.markRegionsRecoveringInZK(secondFailedServer, regionSet);
571 
572     List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw,
573       ZKUtil.joinZNode(zkw.recoveringRegionsZNode, region.getEncodedName()));
574 
575     assertEquals(recoveringRegions.size(), 2);
576 
577     // wait for splitLogWorker to mark them up because there is no WAL files recorded in ZK
578     final HRegionServer tmphrs = hrs;
579     TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
580       @Override
581       public boolean evaluate() throws Exception {
582         return (tmphrs.getRecoveringRegions().size() == 0);
583       }
584     });
585     ht.close();
586     zkw.close();
587   }
588 
589   @Test(timeout = 300000)
590   public void testReplayCmd() throws Exception {
591     LOG.info("testReplayCmd");
592     Configuration curConf = HBaseConfiguration.create();
593     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
594     startCluster(NUM_RS, curConf);
595     final int NUM_REGIONS_TO_CREATE = 40;
596     // turn off load balancing to prevent regions from moving around otherwise
597     // they will consume recovered.edits
598     master.balanceSwitch(false);
599 
600     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
601     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
602     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
603 
604     List<HRegionInfo> regions = null;
605     HRegionServer hrs = null;
606     for (int i = 0; i < NUM_RS; i++) {
607       boolean isCarryingMeta = false;
608       hrs = rsts.get(i).getRegionServer();
609       regions = ProtobufUtil.getOnlineRegions(hrs);
610       for (HRegionInfo region : regions) {
611         if (region.isMetaRegion()) {
612           isCarryingMeta = true;
613           break;
614         }
615       }
616       if (isCarryingMeta) {
617         continue;
618       }
619       if (regions.size() > 0) break;
620     }
621 
622     this.prepareData(ht, Bytes.toBytes("family"), Bytes.toBytes("c1"));
623     String originalCheckSum = TEST_UTIL.checksumRows(ht);
624     
625     // abort RA and trigger replay
626     abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
627 
628     assertEquals("Data should remain after reopening of regions", originalCheckSum,
629       TEST_UTIL.checksumRows(ht));
630 
631     ht.close();
632     zkw.close();
633   }
634 
635   @Test(timeout = 300000)
636   public void testLogReplayForDisablingTable() throws Exception {
637     LOG.info("testLogReplayForDisablingTable");
638     Configuration curConf = HBaseConfiguration.create();
639     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
640     startCluster(NUM_RS, curConf);
641     final int NUM_REGIONS_TO_CREATE = 40;
642     final int NUM_LOG_LINES = 1000;
643 
644     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
645     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
646     HTable disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
647     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
648 
649     // turn off load balancing to prevent regions from moving around otherwise
650     // they will consume recovered.edits
651     master.balanceSwitch(false);
652 
653     List<HRegionInfo> regions = null;
654     HRegionServer hrs = null;
655     boolean hasRegionsForBothTables = false;
656     String tableName = null;
657     for (int i = 0; i < NUM_RS; i++) {
658       tableName = null;
659       hasRegionsForBothTables = false;
660       boolean isCarryingMeta = false;
661       hrs = rsts.get(i).getRegionServer();
662       regions = ProtobufUtil.getOnlineRegions(hrs);
663       for (HRegionInfo region : regions) {
664         if (region.isMetaRegion()) {
665           isCarryingMeta = true;
666           break;
667         }
668         if (tableName != null &&
669             !tableName.equalsIgnoreCase(region.getTableName().getNameAsString())) {
670           // make sure that we find a RS has online regions for both "table" and "disableTable"
671           hasRegionsForBothTables = true;
672           break;
673         } else if (tableName == null) {
674           tableName = region.getTableName().getNameAsString();
675         }
676       }
677       if (isCarryingMeta) {
678         continue;
679       }
680       if (hasRegionsForBothTables) {
681         break;
682       }
683     }
684 
685     // make sure we found a good RS
686     Assert.assertTrue(hasRegionsForBothTables);
687 
688     LOG.info("#regions = " + regions.size());
689     Iterator<HRegionInfo> it = regions.iterator();
690     while (it.hasNext()) {
691       HRegionInfo region = it.next();
692       if (region.isMetaTable()) {
693         it.remove();
694       }
695     }
696     makeHLog(hrs.getWAL(), regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
697     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
698     
699     LOG.info("Disabling table\n");
700     TEST_UTIL.getHBaseAdmin().disableTable(Bytes.toBytes("disableTable"));
701     
702     // abort RS
703     LOG.info("Aborting region server: " + hrs.getServerName());
704     hrs.abort("testing");
705 
706     // wait for abort completes
707     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
708       @Override
709       public boolean evaluate() throws Exception {
710         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
711       }
712     });
713 
714     // wait for regions come online
715     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
716       @Override
717       public boolean evaluate() throws Exception {
718         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
719       }
720     });
721 
722     // wait for all regions are fully recovered
723     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
724       @Override
725       public boolean evaluate() throws Exception {
726         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
727           zkw.recoveringRegionsZNode, false);
728         return (recoveringRegions != null && recoveringRegions.size() == 0);
729       }
730     });
731 
732     int count = 0;
733     FileSystem fs = master.getMasterFileSystem().getFileSystem();
734     Path rootdir = FSUtils.getRootDir(conf);
735     Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf("disableTable"));
736     for (HRegionInfo hri : regions) {
737       @SuppressWarnings("deprecation")
738       Path editsdir =
739         HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
740       LOG.debug("checking edits dir " + editsdir);
741       if(!fs.exists(editsdir)) continue;
742       FileStatus[] files = fs.listStatus(editsdir);
743       if(files != null) {
744         for(FileStatus file : files) {
745           int c = countHLog(file.getPath(), fs, conf);
746           count += c;
747           LOG.info(c + " edits in " + file.getPath());
748         }
749       }
750     }
751 
752     LOG.info("Verify edits in recovered.edits files");
753     assertEquals(NUM_LOG_LINES, count);
754     LOG.info("Verify replayed edits");
755     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
756     
757     // clean up
758     for (HRegionInfo hri : regions) {
759       @SuppressWarnings("deprecation")
760       Path editsdir =
761         HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
762       fs.delete(editsdir, true);
763     }
764     disablingHT.close();
765     ht.close();
766     zkw.close();
767   }
768 
769   @Test(timeout = 300000)
770   public void testDisallowWritesInRecovering() throws Exception {
771     LOG.info("testDisallowWritesInRecovering");
772     Configuration curConf = HBaseConfiguration.create();
773     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
774     curConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
775     curConf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true);
776     startCluster(NUM_RS, curConf);
777     final int NUM_REGIONS_TO_CREATE = 40;
778     final int NUM_LOG_LINES = 20000;
779     // turn off load balancing to prevent regions from moving around otherwise
780     // they will consume recovered.edits
781     master.balanceSwitch(false);
782 
783     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
784     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
785     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
786 
787     HRegionServer hrs = findRSToKill(false, "table");
788     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
789     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
790     
791     // abort RS
792     LOG.info("Aborting region server: " + hrs.getServerName());
793     hrs.abort("testing");
794     
795     // wait for abort completes
796     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
797       @Override
798       public boolean evaluate() throws Exception {
799         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
800       }
801     });
802     
803     // wait for regions come online
804     TEST_UTIL.waitFor(180000, 100, new Waiter.Predicate<Exception>() {
805       @Override
806       public boolean evaluate() throws Exception {
807         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
808       }
809     });
810 
811     try {
812       HRegionInfo region = regions.get(0);
813       byte[] key = region.getStartKey();
814       if (key == null || key.length == 0) {
815         key = new byte[] { 0, 0, 0, 0, 1 };
816       }
817       ht.setAutoFlush(true);
818       Put put = new Put(key);
819       put.add(Bytes.toBytes("family"), Bytes.toBytes("c1"), new byte[]{'b'});
820       ht.put(put);
821     } catch (IOException ioe) {
822       Assert.assertTrue(ioe instanceof RetriesExhaustedWithDetailsException);
823       RetriesExhaustedWithDetailsException re = (RetriesExhaustedWithDetailsException) ioe;
824       Assert.assertTrue(re.getCause(0) instanceof RegionInRecoveryException);
825     }
826 
827     ht.close();
828     zkw.close();
829   }
830 
831   /**
832    * The original intention of this test was to force an abort of a region
833    * server and to make sure that the failure path in the region servers is
834    * properly evaluated. But it is difficult to ensure that the region server
835    * doesn't finish the log splitting before it aborts. Also now, there is
836    * this code path where the master will preempt the region server when master
837    * detects that the region server has aborted.
838    * @throws Exception
839    */
840   @Test (timeout=300000)
841   public void testWorkerAbort() throws Exception {
842     LOG.info("testWorkerAbort");
843     startCluster(3);
844     final int NUM_LOG_LINES = 10000;
845     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
846     FileSystem fs = master.getMasterFileSystem().getFileSystem();
847 
848     final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
849     HRegionServer hrs = findRSToKill(false, "table");
850     Path rootdir = FSUtils.getRootDir(conf);
851     final Path logDir = new Path(rootdir,
852         HLogUtil.getHLogDirectoryName(hrs.getServerName().toString()));
853 
854     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
855         "table", "family", 40);
856 
857     makeHLog(hrs.getWAL(), ProtobufUtil.getOnlineRegions(hrs), "table", "family", NUM_LOG_LINES,
858       100);
859 
860     new Thread() {
861       public void run() {
862         waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
863         for (RegionServerThread rst : rsts) {
864           rst.getRegionServer().abort("testing");
865           break;
866         }
867       }
868     }.start();
869     // slm.splitLogDistributed(logDir);
870     FileStatus[] logfiles = fs.listStatus(logDir);
871     TaskBatch batch = new TaskBatch();
872     slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
873     //waitForCounter but for one of the 2 counters
874     long curt = System.currentTimeMillis();
875     long waitTime = 80000;
876     long endt = curt + waitTime;
877     while (curt < endt) {
878       if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
879           tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
880           tot_wkr_preempt_task.get()) == 0) {
881         Thread.yield();
882         curt = System.currentTimeMillis();
883       } else {
884         assertEquals(1, (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
885             tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
886             tot_wkr_preempt_task.get()));
887         return;
888       }
889     }
890     fail("none of the following counters went up in " + waitTime +
891         " milliseconds - " +
892         "tot_wkr_task_resigned, tot_wkr_task_err, " +
893         "tot_wkr_final_transition_failed, tot_wkr_task_done, " +
894         "tot_wkr_preempt_task");
895   }
896 
897   @Test (timeout=300000)
898   public void testThreeRSAbort() throws Exception {
899     LOG.info("testThreeRSAbort");
900     final int NUM_REGIONS_TO_CREATE = 40;
901     final int NUM_ROWS_PER_REGION = 100;
902 
903     startCluster(NUM_RS); // NUM_RS=6.
904 
905     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf,
906         "distributed log splitting test", null);
907 
908     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
909     populateDataInTable(NUM_ROWS_PER_REGION, "family");
910 
911 
912     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
913     assertEquals(NUM_RS, rsts.size());
914     rsts.get(0).getRegionServer().abort("testing");
915     rsts.get(1).getRegionServer().abort("testing");
916     rsts.get(2).getRegionServer().abort("testing");
917 
918     long start = EnvironmentEdgeManager.currentTimeMillis();
919     while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
920       if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
921         assertTrue(false);
922       }
923       Thread.sleep(200);
924     }
925 
926     start = EnvironmentEdgeManager.currentTimeMillis();
927     while (getAllOnlineRegions(cluster).size() < (NUM_REGIONS_TO_CREATE + 1)) {
928       if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
929         assertTrue("Timedout", false);
930       }
931       Thread.sleep(200);
932     }
933 
934     // wait for all regions are fully recovered
935     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
936       @Override
937       public boolean evaluate() throws Exception {
938         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
939           zkw.recoveringRegionsZNode, false);
940         return (recoveringRegions != null && recoveringRegions.size() == 0);
941       }
942     });
943 
944     assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
945         TEST_UTIL.countRows(ht));
946     ht.close();
947     zkw.close();
948   }
949 
950 
951 
952   @Test(timeout=30000)
953   public void testDelayedDeleteOnFailure() throws Exception {
954     LOG.info("testDelayedDeleteOnFailure");
955     startCluster(1);
956     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
957     final FileSystem fs = master.getMasterFileSystem().getFileSystem();
958     final Path logDir = new Path(FSUtils.getRootDir(conf), "x");
959     fs.mkdirs(logDir);
960     ExecutorService executor = null;
961     try {
962       final Path corruptedLogFile = new Path(logDir, "x");
963       FSDataOutputStream out;
964       out = fs.create(corruptedLogFile);
965       out.write(0);
966       out.write(Bytes.toBytes("corrupted bytes"));
967       out.close();
968       slm.ignoreZKDeleteForTesting = true;
969       executor = Executors.newSingleThreadExecutor();
970       Runnable runnable = new Runnable() {
971        @Override
972        public void run() {
973           try {
974             // since the logDir is a fake, corrupted one, so the split log worker
975             // will finish it quickly with error, and this call will fail and throw
976             // an IOException.
977             slm.splitLogDistributed(logDir);
978           } catch (IOException ioe) {
979             try {
980               assertTrue(fs.exists(corruptedLogFile));
981               // this call will block waiting for the task to be removed from the
982               // tasks map which is not going to happen since ignoreZKDeleteForTesting
983               // is set to true, until it is interrupted.
984               slm.splitLogDistributed(logDir);
985             } catch (IOException e) {
986               assertTrue(Thread.currentThread().isInterrupted());
987               return;
988             }
989             fail("did not get the expected IOException from the 2nd call");
990           }
991           fail("did not get the expected IOException from the 1st call");
992         }
993       };
994       Future<?> result = executor.submit(runnable);
995       try {
996         result.get(2000, TimeUnit.MILLISECONDS);
997       } catch (TimeoutException te) {
998         // it is ok, expected.
999       }
1000       waitForCounter(tot_mgr_wait_for_zk_delete, 0, 1, 10000);
1001       executor.shutdownNow();
1002       executor = null;
1003 
1004       // make sure the runnable is finished with no exception thrown.
1005       result.get();
1006     } finally {
1007       if (executor != null) {
1008         // interrupt the thread in case the test fails in the middle.
1009         // it has no effect if the thread is already terminated.
1010         executor.shutdownNow();
1011       }
1012       fs.delete(logDir, true);
1013     }
1014   }
1015 
1016   @Test(timeout = 300000)
1017   public void testMetaRecoveryInZK() throws Exception {
1018     LOG.info("testMetaRecoveryInZK");
1019     Configuration curConf = HBaseConfiguration.create();
1020     curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1021     startCluster(NUM_RS, curConf);
1022 
1023     // turn off load balancing to prevent regions from moving around otherwise
1024     // they will consume recovered.edits
1025     master.balanceSwitch(false);
1026     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(curConf, "table-creation", null);
1027     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1028 
1029     // only testing meta recovery in ZK operation
1030     HRegionServer hrs = findRSToKill(true, null);
1031     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1032 
1033     LOG.info("#regions = " + regions.size());
1034     Set<HRegionInfo> tmpRegions = new HashSet<HRegionInfo>();
1035     tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
1036     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), tmpRegions);
1037     Set<HRegionInfo> userRegionSet = new HashSet<HRegionInfo>();
1038     userRegionSet.addAll(regions);
1039     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), userRegionSet);
1040     boolean isMetaRegionInRecovery = false;
1041     List<String> recoveringRegions =
1042         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1043     for (String curEncodedRegionName : recoveringRegions) {
1044       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1045         isMetaRegionInRecovery = true;
1046         break;
1047       }
1048     }
1049     assertTrue(isMetaRegionInRecovery);
1050 
1051     master.getMasterFileSystem().splitMetaLog(hrs.getServerName());
1052     
1053     isMetaRegionInRecovery = false;
1054     recoveringRegions =
1055         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1056     for (String curEncodedRegionName : recoveringRegions) {
1057       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1058         isMetaRegionInRecovery = true;
1059         break;
1060       }
1061     }
1062     // meta region should be recovered
1063     assertFalse(isMetaRegionInRecovery);
1064     zkw.close();
1065   }
1066 
1067   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs) throws Exception {
1068     return installTable(zkw, tname, fname, nrs, 0);
1069   }
1070 
1071   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs, 
1072       int existingRegions) throws Exception {
1073     // Create a table with regions
1074     byte [] table = Bytes.toBytes(tname);
1075     byte [] family = Bytes.toBytes(fname);
1076     LOG.info("Creating table with " + nrs + " regions");
1077     HTable ht = TEST_UTIL.createTable(table, family);
1078     int numRegions = TEST_UTIL.createMultiRegions(conf, ht, family, nrs);
1079     assertEquals(nrs, numRegions);
1080       LOG.info("Waiting for no more RIT\n");
1081     blockUntilNoRIT(zkw, master);
1082     // disable-enable cycle to get rid of table's dead regions left behind
1083     // by createMultiRegions
1084     LOG.debug("Disabling table\n");
1085     TEST_UTIL.getHBaseAdmin().disableTable(table);
1086     LOG.debug("Waiting for no more RIT\n");
1087     blockUntilNoRIT(zkw, master);
1088     NavigableSet<String> regions = getAllOnlineRegions(cluster);
1089     LOG.debug("Verifying only catalog and namespace regions are assigned\n");
1090     if (regions.size() != 2) {
1091       for (String oregion : regions)
1092         LOG.debug("Region still online: " + oregion);
1093     }
1094     assertEquals(2 + existingRegions, regions.size());
1095     LOG.debug("Enabling table\n");
1096     TEST_UTIL.getHBaseAdmin().enableTable(table);
1097     LOG.debug("Waiting for no more RIT\n");
1098     blockUntilNoRIT(zkw, master);
1099     LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
1100     regions = getAllOnlineRegions(cluster);
1101     assertEquals(numRegions + 2 + existingRegions, regions.size());
1102     return ht;
1103   }
1104 
1105   void populateDataInTable(int nrows, String fname) throws Exception {
1106     byte [] family = Bytes.toBytes(fname);
1107 
1108     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1109     assertEquals(NUM_RS, rsts.size());
1110 
1111     for (RegionServerThread rst : rsts) {
1112       HRegionServer hrs = rst.getRegionServer();
1113       List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs);
1114       for (HRegionInfo hri : hris) {
1115         if (HTableDescriptor.isSystemTable(hri.getTableName())) {
1116           continue;
1117         }
1118         LOG.debug("adding data to rs = " + rst.getName() +
1119             " region = "+ hri.getRegionNameAsString());
1120         HRegion region = hrs.getOnlineRegion(hri.getRegionName());
1121         assertTrue(region != null);
1122         putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
1123       }
1124     }
1125   }
1126 
1127   public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1128       int num_edits, int edit_size) throws IOException {
1129     makeHLog(log, regions, tname, fname, num_edits, edit_size, true);
1130   }
1131 
1132   public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1133       int num_edits, int edit_size, boolean closeLog) throws IOException {
1134     TableName fullTName = TableName.valueOf(tname);
1135     // remove root and meta region
1136     regions.remove(HRegionInfo.FIRST_META_REGIONINFO);
1137 
1138     for(Iterator<HRegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
1139       HRegionInfo regionInfo = iter.next();
1140       if(HTableDescriptor.isSystemTable(regionInfo.getTableName())) {
1141          iter.remove();
1142       }
1143     }
1144     HTableDescriptor htd = new HTableDescriptor(fullTName);
1145     byte[] value = new byte[edit_size];
1146 
1147     List<HRegionInfo> hris = new ArrayList<HRegionInfo>();
1148     for (HRegionInfo region : regions) {
1149       if (!region.getTableName().getNameAsString().equalsIgnoreCase(tname)) {
1150         continue;
1151       }
1152       hris.add(region);
1153     }
1154     LOG.info("Creating wal edits across " + hris.size() + " regions.");
1155     for (int i = 0; i < edit_size; i++) {
1156       value[i] = (byte) ('a' + (i % 26));
1157     }
1158     int n = hris.size();
1159     int[] counts = new int[n];
1160     if (n > 0) {
1161       for (int i = 0; i < num_edits; i += 1) {
1162         WALEdit e = new WALEdit();
1163         HRegionInfo curRegionInfo = hris.get(i % n);
1164         byte[] startRow = curRegionInfo.getStartKey();
1165         if (startRow == null || startRow.length == 0) {
1166           startRow = new byte[] { 0, 0, 0, 0, 1 };
1167         }
1168         byte[] row = Bytes.incrementBytes(startRow, counts[i % n]);
1169         row = Arrays.copyOfRange(row, 3, 8); // use last 5 bytes because
1170                                              // HBaseTestingUtility.createMultiRegions use 5 bytes
1171                                              // key
1172         byte[] family = Bytes.toBytes(fname);
1173         byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
1174         e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
1175         log.append(curRegionInfo, fullTName, e, System.currentTimeMillis(), htd);
1176         counts[i % n] += 1;
1177       }
1178     }
1179     log.sync();
1180     if(closeLog) {
1181       log.close();
1182     }
1183     for (int i = 0; i < n; i++) {
1184       LOG.info("region " + hris.get(i).getRegionNameAsString() + " has " + counts[i] + " edits");
1185     }
1186     return;
1187   }
1188 
1189   private int countHLog(Path log, FileSystem fs, Configuration conf)
1190   throws IOException {
1191     int count = 0;
1192     HLog.Reader in = HLogFactory.createReader(fs, log, conf);
1193     while (in.next() != null) {
1194       count++;
1195     }
1196     return count;
1197   }
1198 
1199   private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master)
1200   throws KeeperException, InterruptedException {
1201     ZKAssign.blockUntilNoRIT(zkw);
1202     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
1203   }
1204 
1205   private void putData(HRegion region, byte[] startRow, int numRows, byte [] qf,
1206       byte [] ...families)
1207   throws IOException {
1208     for(int i = 0; i < numRows; i++) {
1209       Put put = new Put(Bytes.add(startRow, Bytes.toBytes(i)));
1210       for(byte [] family : families) {
1211         put.add(family, qf, null);
1212       }
1213       region.put(put);
1214     }
1215   }
1216 
1217   /**
1218    * Load table with puts and deletes with expected values so that we can verify later
1219    */
1220   private void prepareData(final HTable t, final byte[] f, final byte[] column) throws IOException {
1221     t.setAutoFlush(false);
1222     byte[] k = new byte[3];
1223 
1224     // add puts
1225     for (byte b1 = 'a'; b1 <= 'z'; b1++) {
1226       for (byte b2 = 'a'; b2 <= 'z'; b2++) {
1227         for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1228           k[0] = b1;
1229           k[1] = b2;
1230           k[2] = b3;
1231           Put put = new Put(k);
1232           put.add(f, column, k);
1233           t.put(put);
1234         }
1235       }
1236     }
1237     t.flushCommits();
1238     // add deletes
1239     for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1240       k[0] = 'a';
1241       k[1] = 'a';
1242       k[2] = b3;
1243       Delete del = new Delete(k);
1244       t.delete(del);
1245     }
1246     t.flushCommits();
1247   }
1248 
1249   private NavigableSet<String> getAllOnlineRegions(MiniHBaseCluster cluster)
1250       throws IOException {
1251     NavigableSet<String> online = new TreeSet<String>();
1252     for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
1253       for (HRegionInfo region : ProtobufUtil.getOnlineRegions(rst.getRegionServer())) {
1254         online.add(region.getRegionNameAsString());
1255       }
1256     }
1257     return online;
1258   }
1259 
1260   private void waitForCounter(AtomicLong ctr, long oldval, long newval,
1261       long timems) {
1262     long curt = System.currentTimeMillis();
1263     long endt = curt + timems;
1264     while (curt < endt) {
1265       if (ctr.get() == oldval) {
1266         Thread.yield();
1267         curt = System.currentTimeMillis();
1268       } else {
1269         assertEquals(newval, ctr.get());
1270         return;
1271       }
1272     }
1273     assertTrue(false);
1274   }
1275 
1276   private void abortMaster(MiniHBaseCluster cluster) throws InterruptedException {
1277     for (MasterThread mt : cluster.getLiveMasterThreads()) {
1278       if (mt.getMaster().isActiveMaster()) {
1279         mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
1280         mt.join();
1281         break;
1282       }
1283     }
1284     LOG.debug("Master is aborted");
1285   }
1286 
1287   private void startMasterAndWaitUntilLogSplit(MiniHBaseCluster cluster)
1288       throws IOException, InterruptedException {
1289     cluster.startMaster();
1290     HMaster master = cluster.getMaster();
1291     while (!master.isInitialized()) {
1292       Thread.sleep(100);
1293     }
1294     ServerManager serverManager = master.getServerManager();
1295     while (serverManager.areDeadServersInProgress()) {
1296       Thread.sleep(100);
1297     }
1298   }
1299 
1300   /**
1301    * Find a RS that has regions of a table.
1302    * @param hasMetaRegion when true, the returned RS has META region as well
1303    * @param tableName
1304    * @return
1305    * @throws Exception
1306    */
1307   private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception {
1308     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1309     int numOfRSs = rsts.size();
1310     List<HRegionInfo> regions = null;
1311     HRegionServer hrs = null;
1312 
1313     for (int i = 0; i < numOfRSs; i++) {
1314       boolean isCarryingMeta = false;
1315       boolean foundTableRegion = false;
1316       hrs = rsts.get(i).getRegionServer();
1317       regions = ProtobufUtil.getOnlineRegions(hrs);
1318       for (HRegionInfo region : regions) {
1319         if (region.isMetaRegion()) {
1320           isCarryingMeta = true;
1321         }
1322         if (tableName == null || region.getTableName().getNameAsString().equals(tableName)) {
1323           foundTableRegion = true;
1324         }
1325         if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
1326           break;
1327         }
1328       }
1329       if (isCarryingMeta && hasMetaRegion) {
1330         // clients ask for a RS with META
1331         if (!foundTableRegion) {
1332           final HRegionServer destRS = hrs;
1333           // the RS doesn't have regions of the specified table so we need move one to this RS
1334           List<HRegionInfo> tableRegions =
1335               TEST_UTIL.getHBaseAdmin().getTableRegions(Bytes.toBytes(tableName));
1336           final HRegionInfo hri = tableRegions.get(0);
1337           TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
1338             Bytes.toBytes(destRS.getServerName().getServerName()));
1339           // wait for region move completes
1340           final RegionStates regionStates =
1341               TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
1342           TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
1343             @Override
1344             public boolean evaluate() throws Exception {
1345               ServerName sn = regionStates.getRegionServerOfRegion(hri);
1346               return (sn != null && sn.equals(destRS.getServerName()));
1347             }
1348           });
1349         }
1350         return hrs;
1351       } else if (hasMetaRegion || isCarryingMeta) {
1352         continue;
1353       }
1354       if (foundTableRegion) break;
1355     }
1356 
1357     return hrs;
1358   }
1359 
1360 }