View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.apache.hadoop.hbase.SplitLogCounters.tot_mgr_wait_for_zk_delete;
22  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_final_transition_failed;
23  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_preempt_task;
24  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_acquired;
25  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_done;
26  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_err;
27  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_resigned;
28  import static org.junit.Assert.assertEquals;
29  import static org.junit.Assert.assertFalse;
30  import static org.junit.Assert.assertTrue;
31  import static org.junit.Assert.fail;
32  
33  import java.io.IOException;
34  import java.util.ArrayList;
35  import java.util.Arrays;
36  import java.util.HashSet;
37  import java.util.Iterator;
38  import java.util.List;
39  import java.util.NavigableSet;
40  import java.util.Set;
41  import java.util.TreeSet;
42  import java.util.concurrent.ExecutorService;
43  import java.util.concurrent.Executors;
44  import java.util.concurrent.Future;
45  import java.util.concurrent.TimeUnit;
46  import java.util.concurrent.TimeoutException;
47  import java.util.concurrent.atomic.AtomicLong;
48  
49  import org.apache.commons.logging.Log;
50  import org.apache.commons.logging.LogFactory;
51  import org.apache.hadoop.conf.Configuration;
52  import org.apache.hadoop.fs.FSDataOutputStream;
53  import org.apache.hadoop.fs.FileStatus;
54  import org.apache.hadoop.fs.FileSystem;
55  import org.apache.hadoop.fs.Path;
56  import org.apache.hadoop.hbase.HColumnDescriptor;
57  import org.apache.hadoop.hbase.TableName;
58  import org.apache.hadoop.hbase.HBaseConfiguration;
59  import org.apache.hadoop.hbase.HBaseTestingUtility;
60  import org.apache.hadoop.hbase.HConstants;
61  import org.apache.hadoop.hbase.HRegionInfo;
62  import org.apache.hadoop.hbase.HTableDescriptor;
63  import org.apache.hadoop.hbase.KeyValue;
64  import org.apache.hadoop.hbase.LargeTests;
65  import org.apache.hadoop.hbase.MiniHBaseCluster;
66  import org.apache.hadoop.hbase.NamespaceDescriptor;
67  import org.apache.hadoop.hbase.ServerName;
68  import org.apache.hadoop.hbase.SplitLogCounters;
69  import org.apache.hadoop.hbase.Waiter;
70  import org.apache.hadoop.hbase.client.Delete;
71  import org.apache.hadoop.hbase.client.HTable;
72  import org.apache.hadoop.hbase.client.Put;
73  import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
74  import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
75  import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
76  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
77  import org.apache.hadoop.hbase.regionserver.HRegion;
78  import org.apache.hadoop.hbase.regionserver.HRegionServer;
79  import org.apache.hadoop.hbase.regionserver.wal.HLog;
80  import org.apache.hadoop.hbase.regionserver.wal.HLogFactory;
81  import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
82  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
83  import org.apache.hadoop.hbase.util.Bytes;
84  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
85  import org.apache.hadoop.hbase.util.FSUtils;
86  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
87  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
88  import org.apache.hadoop.hbase.util.Threads;
89  import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
90  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
91  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
92  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
93  import org.apache.hadoop.hdfs.MiniDFSCluster;
94  import org.apache.log4j.Level;
95  import org.apache.log4j.Logger;
96  import org.apache.zookeeper.KeeperException;
97  import org.junit.After;
98  import org.junit.AfterClass;
99  import org.junit.Assert;
100 import org.junit.Before;
101 import org.junit.BeforeClass;
102 import org.junit.Test;
103 import org.junit.experimental.categories.Category;
104 
105 @Category(LargeTests.class)
106 public class TestDistributedLogSplitting {
107   private static final Log LOG = LogFactory.getLog(TestSplitLogManager.class);
108   static {
109     Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
110 
111     // test ThreeRSAbort fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on. this
112     // turns it off for this test.  TODO: Figure out why scr breaks recovery. 
113     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
114 
115   }
116 
117   // Start a cluster with 2 masters and 6 regionservers
118   static final int NUM_MASTERS = 2;
119   static final int NUM_RS = 6;
120 
121   MiniHBaseCluster cluster;
122   HMaster master;
123   Configuration conf;
124   static Configuration originalConf;
125   static HBaseTestingUtility TEST_UTIL;
126   static MiniDFSCluster dfsCluster;
127   static MiniZooKeeperCluster zkCluster;
128 
129   @BeforeClass
130   public static void setup() throws Exception {
131     TEST_UTIL = new HBaseTestingUtility(HBaseConfiguration.create());
132     dfsCluster = TEST_UTIL.startMiniDFSCluster(1);
133     zkCluster = TEST_UTIL.startMiniZKCluster();
134     originalConf = TEST_UTIL.getConfiguration();
135   }
136 
137   @AfterClass
138   public static void tearDown() throws IOException {
139     TEST_UTIL.shutdownMiniZKCluster();
140     TEST_UTIL.shutdownMiniDFSCluster();
141   }
142 
143   private void startCluster(int num_rs) throws Exception {
144     SplitLogCounters.resetCounters();
145     LOG.info("Starting cluster");
146     conf.getLong("hbase.splitlog.max.resubmit", 0);
147     // Make the failure test faster
148     conf.setInt("zookeeper.recovery.retry", 0);
149     conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1);
150     conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0); // no load balancing
151     conf.setInt("hbase.regionserver.wal.max.splitters", 3);
152     TEST_UTIL = new HBaseTestingUtility(conf);
153     TEST_UTIL.setDFSCluster(dfsCluster);
154     TEST_UTIL.setZkCluster(zkCluster);
155     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs);
156     cluster = TEST_UTIL.getHBaseCluster();
157     LOG.info("Waiting for active/ready master");
158     cluster.waitForActiveAndReadyMaster();
159     master = cluster.getMaster();
160     while (cluster.getLiveRegionServerThreads().size() < num_rs) {
161       Threads.sleep(1);
162     }
163   }
164 
165   @Before
166   public void before() throws Exception {
167     // refresh configuration
168     conf = HBaseConfiguration.create(originalConf);
169   }
170   
171   @After
172   public void after() throws Exception {
173     try {
174       if (TEST_UTIL.getHBaseCluster() != null) {
175         for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
176           mt.getMaster().abort("closing...", new Exception("Trace info"));
177         }
178       }
179       TEST_UTIL.shutdownMiniHBaseCluster();
180     } finally {
181       TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
182       ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
183     }
184   }
185   
186   @Test (timeout=300000)
187   public void testRecoveredEdits() throws Exception {
188     LOG.info("testRecoveredEdits");
189     conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024); // create more than one wal
190     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
191     startCluster(NUM_RS);
192 
193     final int NUM_LOG_LINES = 1000;
194     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
195     // turn off load balancing to prevent regions from moving around otherwise
196     // they will consume recovered.edits
197     master.balanceSwitch(false);
198     FileSystem fs = master.getMasterFileSystem().getFileSystem();
199 
200     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
201 
202     Path rootdir = FSUtils.getRootDir(conf);
203 
204     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
205         "table", "family", 40);
206     TableName table = TableName.valueOf("table");
207     List<HRegionInfo> regions = null;
208     HRegionServer hrs = null;
209     for (int i = 0; i < NUM_RS; i++) {
210       boolean foundRs = false;
211       hrs = rsts.get(i).getRegionServer();
212       regions = ProtobufUtil.getOnlineRegions(hrs);
213       for (HRegionInfo region : regions) {
214         if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
215           foundRs = true;
216           break;
217         }
218       }
219       if (foundRs) break;
220     }
221     final Path logDir = new Path(rootdir, HLogUtil.getHLogDirectoryName(hrs
222         .getServerName().toString()));
223 
224     LOG.info("#regions = " + regions.size());
225     Iterator<HRegionInfo> it = regions.iterator();
226     while (it.hasNext()) {
227       HRegionInfo region = it.next();
228       if (region.getTable().getNamespaceAsString()
229           .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
230         it.remove();
231       }
232     }
233     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
234 
235     slm.splitLogDistributed(logDir);
236 
237     int count = 0;
238     for (HRegionInfo hri : regions) {
239 
240       Path tdir = FSUtils.getTableDir(rootdir, table);
241       @SuppressWarnings("deprecation")
242       Path editsdir =
243         HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
244       LOG.debug("checking edits dir " + editsdir);
245       FileStatus[] files = fs.listStatus(editsdir);
246       assertTrue(files.length > 1);
247       for (int i = 0; i < files.length; i++) {
248         int c = countHLog(files[i].getPath(), fs, conf);
249         count += c;
250       }
251       LOG.info(count + " edits in " + files.length + " recovered edits files.");
252     }
253     assertEquals(NUM_LOG_LINES, count);
254   }
255 
256   @Test(timeout = 300000)
257   public void testLogReplayWithNonMetaRSDown() throws Exception {
258     LOG.info("testLogReplayWithNonMetaRSDown");
259     conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024); // create more than one wal
260     conf.setLong("hbase.regionserver.hlog.blocksize", 100*1024);
261     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
262     startCluster(NUM_RS);
263     final int NUM_REGIONS_TO_CREATE = 40;
264     final int NUM_LOG_LINES = 1000;
265     // turn off load balancing to prevent regions from moving around otherwise
266     // they will consume recovered.edits
267     master.balanceSwitch(false);
268 
269     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
270     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
271 
272     HRegionServer hrs = findRSToKill(false, "table");
273     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
274     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
275 
276     // wait for abort completes
277     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
278     ht.close();
279     zkw.close();
280   }
281 
282   @Test(timeout = 300000)
283   public void testLogReplayWithMetaRSDown() throws Exception {
284     LOG.info("testRecoveredEditsReplayWithMetaRSDown");
285     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
286     startCluster(NUM_RS);
287     final int NUM_REGIONS_TO_CREATE = 40;
288     final int NUM_LOG_LINES = 1000;
289     // turn off load balancing to prevent regions from moving around otherwise
290     // they will consume recovered.edits
291     master.balanceSwitch(false);
292 
293     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
294     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
295 
296     HRegionServer hrs = findRSToKill(true, "table");
297     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
298     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
299 
300     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
301     ht.close();
302     zkw.close();
303   }
304 
305   private void abortRSAndVerifyRecovery(HRegionServer hrs, HTable ht, final ZooKeeperWatcher zkw,
306       final int numRegions, final int numofLines) throws Exception {
307 
308     abortRSAndWaitForRecovery(hrs, zkw, numRegions);
309     assertEquals(numofLines, TEST_UTIL.countRows(ht));
310   }
311 
312   private void abortRSAndWaitForRecovery(HRegionServer hrs, final ZooKeeperWatcher zkw,
313       final int numRegions) throws Exception {
314     final MiniHBaseCluster tmpCluster = this.cluster;
315 
316     // abort RS
317     LOG.info("Aborting region server: " + hrs.getServerName());
318     hrs.abort("testing");
319 
320     // wait for abort completes
321     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
322       @Override
323       public boolean evaluate() throws Exception {
324         return (tmpCluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
325       }
326     });
327 
328     // wait for regions come online
329     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
330       @Override
331       public boolean evaluate() throws Exception {
332         return (getAllOnlineRegions(tmpCluster).size() >= (numRegions + 1));
333       }
334     });
335 
336     // wait for all regions are fully recovered
337     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
338       @Override
339       public boolean evaluate() throws Exception {
340         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
341           zkw.recoveringRegionsZNode, false);
342         return (recoveringRegions != null && recoveringRegions.size() == 0);
343       }
344     });
345   }
346 
347   @Test(timeout = 300000)
348   public void testMasterStartsUpWithLogSplittingWork() throws Exception {
349     LOG.info("testMasterStartsUpWithLogSplittingWork");
350     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
351     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
352     startCluster(NUM_RS);
353 
354     final int NUM_REGIONS_TO_CREATE = 40;
355     final int NUM_LOG_LINES = 1000;
356     // turn off load balancing to prevent regions from moving around otherwise
357     // they will consume recovered.edits
358     master.balanceSwitch(false);
359 
360     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
361     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
362     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
363 
364     HRegionServer hrs = findRSToKill(false, "table");
365     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
366     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
367 
368     // abort master
369     abortMaster(cluster);
370 
371     // abort RS
372     LOG.info("Aborting region server: " + hrs.getServerName());
373     hrs.abort("testing");
374 
375     // wait for abort completes
376     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
377       @Override
378       public boolean evaluate() throws Exception {
379         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
380       }
381     });
382 
383     Thread.sleep(2000);
384     LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
385     
386     startMasterAndWaitUntilLogSplit(cluster);
387     
388     // wait for abort completes
389     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
390       @Override
391       public boolean evaluate() throws Exception {
392         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
393       }
394     });
395 
396     LOG.info("Current Open Regions After Master Node Starts Up:"
397         + getAllOnlineRegions(cluster).size());
398 
399     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
400 
401     ht.close();
402     zkw.close();
403   }
404   
405   @Test(timeout = 300000)
406   public void testMasterStartsUpWithLogReplayWork() throws Exception {
407     LOG.info("testMasterStartsUpWithLogReplayWork");
408     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
409     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
410     startCluster(NUM_RS);
411 
412     final int NUM_REGIONS_TO_CREATE = 40;
413     final int NUM_LOG_LINES = 1000;
414     // turn off load balancing to prevent regions from moving around otherwise
415     // they will consume recovered.edits
416     master.balanceSwitch(false);
417 
418     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
419     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
420     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
421 
422     HRegionServer hrs = findRSToKill(false, "table");
423     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
424     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
425 
426     // abort master
427     abortMaster(cluster);
428 
429     // abort RS
430     LOG.info("Aborting region server: " + hrs.getServerName());
431     hrs.abort("testing");
432 
433     // wait for the RS dies
434     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
435       @Override
436       public boolean evaluate() throws Exception {
437         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
438       }
439     });
440 
441     Thread.sleep(2000);
442     LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
443 
444     startMasterAndWaitUntilLogSplit(cluster);
445 
446     // wait for all regions are fully recovered
447     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
448       @Override
449       public boolean evaluate() throws Exception {
450         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
451           zkw.recoveringRegionsZNode, false);
452         return (recoveringRegions != null && recoveringRegions.size() == 0);
453       }
454     });
455 
456     LOG.info("Current Open Regions After Master Node Starts Up:"
457         + getAllOnlineRegions(cluster).size());
458 
459     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
460 
461     ht.close();
462     zkw.close();
463   }
464 
465 
466   @Test(timeout = 300000)
467   public void testLogReplayTwoSequentialRSDown() throws Exception {
468     LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
469     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
470     startCluster(NUM_RS);
471     final int NUM_REGIONS_TO_CREATE = 40;
472     final int NUM_LOG_LINES = 1000;
473     // turn off load balancing to prevent regions from moving around otherwise
474     // they will consume recovered.edits
475     master.balanceSwitch(false);
476 
477     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
478     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
479     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
480 
481     List<HRegionInfo> regions = null;
482     HRegionServer hrs1 = findRSToKill(false, "table");
483     regions = ProtobufUtil.getOnlineRegions(hrs1);
484 
485     makeHLog(hrs1.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
486 
487     // abort RS1
488     LOG.info("Aborting region server: " + hrs1.getServerName());
489     hrs1.abort("testing");
490 
491     // wait for abort completes
492     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
493       @Override
494       public boolean evaluate() throws Exception {
495         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
496       }
497     });
498 
499     // wait for regions come online
500     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
501       @Override
502       public boolean evaluate() throws Exception {
503         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
504       }
505     });
506 
507     // sleep a little bit in order to interrupt recovering in the middle
508     Thread.sleep(300);
509     // abort second region server
510     rsts = cluster.getLiveRegionServerThreads();
511     HRegionServer hrs2 = rsts.get(0).getRegionServer();
512     LOG.info("Aborting one more region server: " + hrs2.getServerName());
513     hrs2.abort("testing");
514 
515     // wait for abort completes
516     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
517       @Override
518       public boolean evaluate() throws Exception {
519         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
520       }
521     });
522 
523     // wait for regions come online
524     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
525       @Override
526       public boolean evaluate() throws Exception {
527         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
528       }
529     });
530 
531     // wait for all regions are fully recovered
532     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
533       @Override
534       public boolean evaluate() throws Exception {
535         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
536           zkw.recoveringRegionsZNode, false);
537         return (recoveringRegions != null && recoveringRegions.size() == 0);
538       }
539     });
540 
541     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
542     ht.close();
543     zkw.close();
544   }
545 
546   @Test(timeout = 300000)
547   public void testMarkRegionsRecoveringInZK() throws Exception {
548     LOG.info("testMarkRegionsRecoveringInZK");
549     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
550     startCluster(NUM_RS);
551     master.balanceSwitch(false);
552     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
553     final ZooKeeperWatcher zkw = master.getZooKeeperWatcher();
554     HTable ht = installTable(zkw, "table", "family", 40);
555     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
556 
557     Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
558     HRegionInfo region = null;
559     HRegionServer hrs = null;
560     ServerName firstFailedServer = null;
561     ServerName secondFailedServer = null;
562     for (int i = 0; i < NUM_RS; i++) {
563       hrs = rsts.get(i).getRegionServer();
564       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
565       if (regions.isEmpty()) continue;
566       region = regions.get(0);
567       regionSet.add(region);
568       firstFailedServer = hrs.getServerName();
569       secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
570       break;
571     }
572 
573     slm.markRegionsRecoveringInZK(firstFailedServer, regionSet);
574     slm.markRegionsRecoveringInZK(secondFailedServer, regionSet);
575 
576     List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw,
577       ZKUtil.joinZNode(zkw.recoveringRegionsZNode, region.getEncodedName()));
578 
579     assertEquals(recoveringRegions.size(), 2);
580 
581     // wait for splitLogWorker to mark them up because there is no WAL files recorded in ZK
582     final HRegionServer tmphrs = hrs;
583     TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
584       @Override
585       public boolean evaluate() throws Exception {
586         return (tmphrs.getRecoveringRegions().size() == 0);
587       }
588     });
589     ht.close();
590     zkw.close();
591   }
592 
593   @Test(timeout = 300000)
594   public void testReplayCmd() throws Exception {
595     LOG.info("testReplayCmd");
596     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
597     startCluster(NUM_RS);
598     final int NUM_REGIONS_TO_CREATE = 40;
599     // turn off load balancing to prevent regions from moving around otherwise
600     // they will consume recovered.edits
601     master.balanceSwitch(false);
602 
603     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
604     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
605     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
606 
607     List<HRegionInfo> regions = null;
608     HRegionServer hrs = null;
609     for (int i = 0; i < NUM_RS; i++) {
610       boolean isCarryingMeta = false;
611       hrs = rsts.get(i).getRegionServer();
612       regions = ProtobufUtil.getOnlineRegions(hrs);
613       for (HRegionInfo region : regions) {
614         if (region.isMetaRegion()) {
615           isCarryingMeta = true;
616           break;
617         }
618       }
619       if (isCarryingMeta) {
620         continue;
621       }
622       if (regions.size() > 0) break;
623     }
624 
625     this.prepareData(ht, Bytes.toBytes("family"), Bytes.toBytes("c1"));
626     String originalCheckSum = TEST_UTIL.checksumRows(ht);
627     
628     // abort RA and trigger replay
629     abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
630 
631     assertEquals("Data should remain after reopening of regions", originalCheckSum,
632       TEST_UTIL.checksumRows(ht));
633 
634     ht.close();
635     zkw.close();
636   }
637 
638   @Test(timeout = 300000)
639   public void testLogReplayForDisablingTable() throws Exception {
640     LOG.info("testLogReplayForDisablingTable");
641     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
642     startCluster(NUM_RS);
643     final int NUM_REGIONS_TO_CREATE = 40;
644     final int NUM_LOG_LINES = 1000;
645 
646     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
647     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
648     HTable disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
649     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
650 
651     // turn off load balancing to prevent regions from moving around otherwise
652     // they will consume recovered.edits
653     master.balanceSwitch(false);
654 
655     List<HRegionInfo> regions = null;
656     HRegionServer hrs = null;
657     boolean hasRegionsForBothTables = false;
658     String tableName = null;
659     for (int i = 0; i < NUM_RS; i++) {
660       tableName = null;
661       hasRegionsForBothTables = false;
662       boolean isCarryingMeta = false;
663       hrs = rsts.get(i).getRegionServer();
664       regions = ProtobufUtil.getOnlineRegions(hrs);
665       for (HRegionInfo region : regions) {
666         if (region.isMetaRegion()) {
667           isCarryingMeta = true;
668           break;
669         }
670         if (tableName != null &&
671             !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
672           // make sure that we find a RS has online regions for both "table" and "disableTable"
673           hasRegionsForBothTables = true;
674           break;
675         } else if (tableName == null) {
676           tableName = region.getTable().getNameAsString();
677         }
678       }
679       if (isCarryingMeta) {
680         continue;
681       }
682       if (hasRegionsForBothTables) {
683         break;
684       }
685     }
686 
687     // make sure we found a good RS
688     Assert.assertTrue(hasRegionsForBothTables);
689 
690     LOG.info("#regions = " + regions.size());
691     Iterator<HRegionInfo> it = regions.iterator();
692     while (it.hasNext()) {
693       HRegionInfo region = it.next();
694       if (region.isMetaTable()) {
695         it.remove();
696       }
697     }
698     makeHLog(hrs.getWAL(), regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
699     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
700     
701     LOG.info("Disabling table\n");
702     TEST_UTIL.getHBaseAdmin().disableTable(Bytes.toBytes("disableTable"));
703     
704     // abort RS
705     LOG.info("Aborting region server: " + hrs.getServerName());
706     hrs.abort("testing");
707 
708     // wait for abort completes
709     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
710       @Override
711       public boolean evaluate() throws Exception {
712         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
713       }
714     });
715 
716     // wait for regions come online
717     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
718       @Override
719       public boolean evaluate() throws Exception {
720         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
721       }
722     });
723 
724     // wait for all regions are fully recovered
725     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
726       @Override
727       public boolean evaluate() throws Exception {
728         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
729           zkw.recoveringRegionsZNode, false);
730         return (recoveringRegions != null && recoveringRegions.size() == 0);
731       }
732     });
733 
734     int count = 0;
735     FileSystem fs = master.getMasterFileSystem().getFileSystem();
736     Path rootdir = FSUtils.getRootDir(conf);
737     Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf("disableTable"));
738     for (HRegionInfo hri : regions) {
739       @SuppressWarnings("deprecation")
740       Path editsdir =
741         HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
742       LOG.debug("checking edits dir " + editsdir);
743       if(!fs.exists(editsdir)) continue;
744       FileStatus[] files = fs.listStatus(editsdir);
745       if(files != null) {
746         for(FileStatus file : files) {
747           int c = countHLog(file.getPath(), fs, conf);
748           count += c;
749           LOG.info(c + " edits in " + file.getPath());
750         }
751       }
752     }
753 
754     LOG.info("Verify edits in recovered.edits files");
755     assertEquals(NUM_LOG_LINES, count);
756     LOG.info("Verify replayed edits");
757     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
758     
759     // clean up
760     for (HRegionInfo hri : regions) {
761       @SuppressWarnings("deprecation")
762       Path editsdir =
763         HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
764       fs.delete(editsdir, true);
765     }
766     disablingHT.close();
767     ht.close();
768     zkw.close();
769   }
770 
771   @Test(timeout = 300000)
772   public void testDisallowWritesInRecovering() throws Exception {
773     LOG.info("testDisallowWritesInRecovering");
774     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
775     conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
776     conf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true);
777     startCluster(NUM_RS);
778     final int NUM_REGIONS_TO_CREATE = 40;
779     // turn off load balancing to prevent regions from moving around otherwise
780     // they will consume recovered.edits
781     master.balanceSwitch(false);
782 
783     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
784     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
785     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
786     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
787 
788     Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
789     HRegionInfo region = null;
790     HRegionServer hrs = null;
791     HRegionServer dstRS = null;
792     for (int i = 0; i < NUM_RS; i++) {
793       hrs = rsts.get(i).getRegionServer();
794       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
795       if (regions.isEmpty()) continue;
796       region = regions.get(0);
797       regionSet.add(region);
798       dstRS = rsts.get((i+1) % NUM_RS).getRegionServer();
799       break;
800     }
801     
802     slm.markRegionsRecoveringInZK(hrs.getServerName(), regionSet);
803     // move region in order for the region opened in recovering state
804     final HRegionInfo hri = region;
805     final HRegionServer tmpRS = dstRS;
806     TEST_UTIL.getHBaseAdmin().move(region.getEncodedNameAsBytes(),
807       Bytes.toBytes(dstRS.getServerName().getServerName()));
808     // wait for region move completes
809     final RegionStates regionStates =
810         TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
811     TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
812       @Override
813       public boolean evaluate() throws Exception {
814         ServerName sn = regionStates.getRegionServerOfRegion(hri);
815         return (sn != null && sn.equals(tmpRS.getServerName()));
816       }
817     });
818     
819     try {
820       byte[] key = region.getStartKey();
821       if (key == null || key.length == 0) {
822         key = new byte[] { 0, 0, 0, 0, 1 };
823       }
824       ht.setAutoFlush(true, true);
825       Put put = new Put(key);
826       put.add(Bytes.toBytes("family"), Bytes.toBytes("c1"), new byte[]{'b'});
827       ht.put(put);
828       ht.close();
829     } catch (IOException ioe) {
830       Assert.assertTrue(ioe instanceof RetriesExhaustedWithDetailsException);
831       RetriesExhaustedWithDetailsException re = (RetriesExhaustedWithDetailsException) ioe;
832       boolean foundRegionInRecoveryException = false;
833       for (Throwable t : re.getCauses()) {
834         if (t instanceof RegionInRecoveryException) {
835           foundRegionInRecoveryException = true;
836           break;
837         }
838       }
839       Assert.assertTrue(
840         "No RegionInRecoveryException. Following exceptions returned=" + re.getCauses(),
841         foundRegionInRecoveryException);
842     }
843 
844     zkw.close();
845   }
846 
847   /**
848    * The original intention of this test was to force an abort of a region
849    * server and to make sure that the failure path in the region servers is
850    * properly evaluated. But it is difficult to ensure that the region server
851    * doesn't finish the log splitting before it aborts. Also now, there is
852    * this code path where the master will preempt the region server when master
853    * detects that the region server has aborted.
854    * @throws Exception
855    */
856   @Test (timeout=300000)
857   public void testWorkerAbort() throws Exception {
858     LOG.info("testWorkerAbort");
859     startCluster(3);
860     final int NUM_LOG_LINES = 10000;
861     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
862     FileSystem fs = master.getMasterFileSystem().getFileSystem();
863 
864     final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
865     HRegionServer hrs = findRSToKill(false, "table");
866     Path rootdir = FSUtils.getRootDir(conf);
867     final Path logDir = new Path(rootdir,
868         HLogUtil.getHLogDirectoryName(hrs.getServerName().toString()));
869 
870     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
871         "table", "family", 40);
872 
873     makeHLog(hrs.getWAL(), ProtobufUtil.getOnlineRegions(hrs), "table", "family", NUM_LOG_LINES,
874       100);
875 
876     new Thread() {
877       public void run() {
878         waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
879         for (RegionServerThread rst : rsts) {
880           rst.getRegionServer().abort("testing");
881           break;
882         }
883       }
884     }.start();
885     // slm.splitLogDistributed(logDir);
886     FileStatus[] logfiles = fs.listStatus(logDir);
887     TaskBatch batch = new TaskBatch();
888     slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
889     //waitForCounter but for one of the 2 counters
890     long curt = System.currentTimeMillis();
891     long waitTime = 80000;
892     long endt = curt + waitTime;
893     while (curt < endt) {
894       if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
895           tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
896           tot_wkr_preempt_task.get()) == 0) {
897         Thread.yield();
898         curt = System.currentTimeMillis();
899       } else {
900         assertTrue(1 <= (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
901             tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
902             tot_wkr_preempt_task.get()));
903         return;
904       }
905     }
906     fail("none of the following counters went up in " + waitTime +
907         " milliseconds - " +
908         "tot_wkr_task_resigned, tot_wkr_task_err, " +
909         "tot_wkr_final_transition_failed, tot_wkr_task_done, " +
910         "tot_wkr_preempt_task");
911   }
912 
913   @Test (timeout=300000)
914   public void testThreeRSAbort() throws Exception {
915     LOG.info("testThreeRSAbort");
916     final int NUM_REGIONS_TO_CREATE = 40;
917     final int NUM_ROWS_PER_REGION = 100;
918 
919     startCluster(NUM_RS); // NUM_RS=6.
920 
921     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf,
922         "distributed log splitting test", null);
923 
924     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
925     populateDataInTable(NUM_ROWS_PER_REGION, "family");
926 
927 
928     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
929     assertEquals(NUM_RS, rsts.size());
930     rsts.get(0).getRegionServer().abort("testing");
931     rsts.get(1).getRegionServer().abort("testing");
932     rsts.get(2).getRegionServer().abort("testing");
933 
934     long start = EnvironmentEdgeManager.currentTimeMillis();
935     while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
936       if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
937         assertTrue(false);
938       }
939       Thread.sleep(200);
940     }
941 
942     start = EnvironmentEdgeManager.currentTimeMillis();
943     while (getAllOnlineRegions(cluster).size() < (NUM_REGIONS_TO_CREATE + 1)) {
944       if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
945         assertTrue("Timedout", false);
946       }
947       Thread.sleep(200);
948     }
949 
950     // wait for all regions are fully recovered
951     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
952       @Override
953       public boolean evaluate() throws Exception {
954         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
955           zkw.recoveringRegionsZNode, false);
956         return (recoveringRegions != null && recoveringRegions.size() == 0);
957       }
958     });
959 
960     assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
961         TEST_UTIL.countRows(ht));
962     ht.close();
963     zkw.close();
964   }
965 
966 
967 
968   @Test(timeout=30000)
969   public void testDelayedDeleteOnFailure() throws Exception {
970     LOG.info("testDelayedDeleteOnFailure");
971     startCluster(1);
972     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
973     final FileSystem fs = master.getMasterFileSystem().getFileSystem();
974     final Path logDir = new Path(FSUtils.getRootDir(conf), "x");
975     fs.mkdirs(logDir);
976     ExecutorService executor = null;
977     try {
978       final Path corruptedLogFile = new Path(logDir, "x");
979       FSDataOutputStream out;
980       out = fs.create(corruptedLogFile);
981       out.write(0);
982       out.write(Bytes.toBytes("corrupted bytes"));
983       out.close();
984       slm.ignoreZKDeleteForTesting = true;
985       executor = Executors.newSingleThreadExecutor();
986       Runnable runnable = new Runnable() {
987        @Override
988        public void run() {
989           try {
990             // since the logDir is a fake, corrupted one, so the split log worker
991             // will finish it quickly with error, and this call will fail and throw
992             // an IOException.
993             slm.splitLogDistributed(logDir);
994           } catch (IOException ioe) {
995             try {
996               assertTrue(fs.exists(corruptedLogFile));
997               // this call will block waiting for the task to be removed from the
998               // tasks map which is not going to happen since ignoreZKDeleteForTesting
999               // is set to true, until it is interrupted.
1000               slm.splitLogDistributed(logDir);
1001             } catch (IOException e) {
1002               assertTrue(Thread.currentThread().isInterrupted());
1003               return;
1004             }
1005             fail("did not get the expected IOException from the 2nd call");
1006           }
1007           fail("did not get the expected IOException from the 1st call");
1008         }
1009       };
1010       Future<?> result = executor.submit(runnable);
1011       try {
1012         result.get(2000, TimeUnit.MILLISECONDS);
1013       } catch (TimeoutException te) {
1014         // it is ok, expected.
1015       }
1016       waitForCounter(tot_mgr_wait_for_zk_delete, 0, 1, 10000);
1017       executor.shutdownNow();
1018       executor = null;
1019 
1020       // make sure the runnable is finished with no exception thrown.
1021       result.get();
1022     } finally {
1023       if (executor != null) {
1024         // interrupt the thread in case the test fails in the middle.
1025         // it has no effect if the thread is already terminated.
1026         executor.shutdownNow();
1027       }
1028       fs.delete(logDir, true);
1029     }
1030   }
1031 
1032   @Test(timeout = 300000)
1033   public void testMetaRecoveryInZK() throws Exception {
1034     LOG.info("testMetaRecoveryInZK");
1035     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1036     startCluster(NUM_RS);
1037 
1038     // turn off load balancing to prevent regions from moving around otherwise
1039     // they will consume recovered.edits
1040     master.balanceSwitch(false);
1041     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1042     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1043 
1044     // only testing meta recovery in ZK operation
1045     HRegionServer hrs = findRSToKill(true, null);
1046     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1047 
1048     LOG.info("#regions = " + regions.size());
1049     Set<HRegionInfo> tmpRegions = new HashSet<HRegionInfo>();
1050     tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
1051     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), tmpRegions);
1052     Set<HRegionInfo> userRegionSet = new HashSet<HRegionInfo>();
1053     userRegionSet.addAll(regions);
1054     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), userRegionSet);
1055     boolean isMetaRegionInRecovery = false;
1056     List<String> recoveringRegions =
1057         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1058     for (String curEncodedRegionName : recoveringRegions) {
1059       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1060         isMetaRegionInRecovery = true;
1061         break;
1062       }
1063     }
1064     assertTrue(isMetaRegionInRecovery);
1065 
1066     master.getMasterFileSystem().splitMetaLog(hrs.getServerName());
1067     
1068     isMetaRegionInRecovery = false;
1069     recoveringRegions =
1070         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1071     for (String curEncodedRegionName : recoveringRegions) {
1072       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1073         isMetaRegionInRecovery = true;
1074         break;
1075       }
1076     }
1077     // meta region should be recovered
1078     assertFalse(isMetaRegionInRecovery);
1079     zkw.close();
1080   }
1081 
1082   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs) throws Exception {
1083     return installTable(zkw, tname, fname, nrs, 0);
1084   }
1085 
1086   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs, 
1087       int existingRegions) throws Exception {
1088     // Create a table with regions
1089     byte [] table = Bytes.toBytes(tname);
1090     byte [] family = Bytes.toBytes(fname);
1091     LOG.info("Creating table with " + nrs + " regions");
1092     HTable ht = TEST_UTIL.createTable(table, family);
1093     int numRegions = TEST_UTIL.createMultiRegions(conf, ht, family, nrs);
1094     assertEquals(nrs, numRegions);
1095       LOG.info("Waiting for no more RIT\n");
1096     blockUntilNoRIT(zkw, master);
1097     // disable-enable cycle to get rid of table's dead regions left behind
1098     // by createMultiRegions
1099     LOG.debug("Disabling table\n");
1100     TEST_UTIL.getHBaseAdmin().disableTable(table);
1101     LOG.debug("Waiting for no more RIT\n");
1102     blockUntilNoRIT(zkw, master);
1103     NavigableSet<String> regions = getAllOnlineRegions(cluster);
1104     LOG.debug("Verifying only catalog and namespace regions are assigned\n");
1105     if (regions.size() != 2) {
1106       for (String oregion : regions)
1107         LOG.debug("Region still online: " + oregion);
1108     }
1109     assertEquals(2 + existingRegions, regions.size());
1110     LOG.debug("Enabling table\n");
1111     TEST_UTIL.getHBaseAdmin().enableTable(table);
1112     LOG.debug("Waiting for no more RIT\n");
1113     blockUntilNoRIT(zkw, master);
1114     LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
1115     regions = getAllOnlineRegions(cluster);
1116     assertEquals(numRegions + 2 + existingRegions, regions.size());
1117     return ht;
1118   }
1119 
1120   void populateDataInTable(int nrows, String fname) throws Exception {
1121     byte [] family = Bytes.toBytes(fname);
1122 
1123     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1124     assertEquals(NUM_RS, rsts.size());
1125 
1126     for (RegionServerThread rst : rsts) {
1127       HRegionServer hrs = rst.getRegionServer();
1128       List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs);
1129       for (HRegionInfo hri : hris) {
1130         if (hri.getTable().isSystemTable()) {
1131           continue;
1132         }
1133         LOG.debug("adding data to rs = " + rst.getName() +
1134             " region = "+ hri.getRegionNameAsString());
1135         HRegion region = hrs.getOnlineRegion(hri.getRegionName());
1136         assertTrue(region != null);
1137         putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
1138       }
1139     }
1140   }
1141 
1142   public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1143       int num_edits, int edit_size) throws IOException {
1144     makeHLog(log, regions, tname, fname, num_edits, edit_size, true);
1145   }
1146 
1147   public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1148       int num_edits, int edit_size, boolean closeLog) throws IOException {
1149     TableName fullTName = TableName.valueOf(tname);
1150     // remove root and meta region
1151     regions.remove(HRegionInfo.FIRST_META_REGIONINFO);
1152 
1153     for(Iterator<HRegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
1154       HRegionInfo regionInfo = iter.next();
1155       if(regionInfo.getTable().isSystemTable()) {
1156          iter.remove();
1157       }
1158     }
1159     HTableDescriptor htd = new HTableDescriptor(fullTName);
1160     byte[] family = Bytes.toBytes(fname);
1161     htd.addFamily(new HColumnDescriptor(family));
1162     byte[] value = new byte[edit_size];
1163 
1164     List<HRegionInfo> hris = new ArrayList<HRegionInfo>();
1165     for (HRegionInfo region : regions) {
1166       if (!region.getTable().getNameAsString().equalsIgnoreCase(tname)) {
1167         continue;
1168       }
1169       hris.add(region);
1170     }
1171     LOG.info("Creating wal edits across " + hris.size() + " regions.");
1172     for (int i = 0; i < edit_size; i++) {
1173       value[i] = (byte) ('a' + (i % 26));
1174     }
1175     int n = hris.size();
1176     int[] counts = new int[n];
1177     if (n > 0) {
1178       for (int i = 0; i < num_edits; i += 1) {
1179         WALEdit e = new WALEdit();
1180         HRegionInfo curRegionInfo = hris.get(i % n);
1181         byte[] startRow = curRegionInfo.getStartKey();
1182         if (startRow == null || startRow.length == 0) {
1183           startRow = new byte[] { 0, 0, 0, 0, 1 };
1184         }
1185         byte[] row = Bytes.incrementBytes(startRow, counts[i % n]);
1186         row = Arrays.copyOfRange(row, 3, 8); // use last 5 bytes because
1187                                              // HBaseTestingUtility.createMultiRegions use 5 bytes
1188                                              // key
1189         byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
1190         e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
1191         log.append(curRegionInfo, fullTName, e, System.currentTimeMillis(), htd);
1192         counts[i % n] += 1;
1193       }
1194     }
1195     log.sync();
1196     if(closeLog) {
1197       log.close();
1198     }
1199     for (int i = 0; i < n; i++) {
1200       LOG.info("region " + hris.get(i).getRegionNameAsString() + " has " + counts[i] + " edits");
1201     }
1202     return;
1203   }
1204 
1205   private int countHLog(Path log, FileSystem fs, Configuration conf)
1206   throws IOException {
1207     int count = 0;
1208     HLog.Reader in = HLogFactory.createReader(fs, log, conf);
1209     while (in.next() != null) {
1210       count++;
1211     }
1212     return count;
1213   }
1214 
1215   private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master)
1216   throws KeeperException, InterruptedException {
1217     ZKAssign.blockUntilNoRIT(zkw);
1218     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
1219   }
1220 
1221   private void putData(HRegion region, byte[] startRow, int numRows, byte [] qf,
1222       byte [] ...families)
1223   throws IOException {
1224     for(int i = 0; i < numRows; i++) {
1225       Put put = new Put(Bytes.add(startRow, Bytes.toBytes(i)));
1226       for(byte [] family : families) {
1227         put.add(family, qf, null);
1228       }
1229       region.put(put);
1230     }
1231   }
1232 
1233   /**
1234    * Load table with puts and deletes with expected values so that we can verify later
1235    */
1236   private void prepareData(final HTable t, final byte[] f, final byte[] column) throws IOException {
1237     t.setAutoFlush(false, true);
1238     byte[] k = new byte[3];
1239 
1240     // add puts
1241     for (byte b1 = 'a'; b1 <= 'z'; b1++) {
1242       for (byte b2 = 'a'; b2 <= 'z'; b2++) {
1243         for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1244           k[0] = b1;
1245           k[1] = b2;
1246           k[2] = b3;
1247           Put put = new Put(k);
1248           put.add(f, column, k);
1249           t.put(put);
1250         }
1251       }
1252     }
1253     t.flushCommits();
1254     // add deletes
1255     for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1256       k[0] = 'a';
1257       k[1] = 'a';
1258       k[2] = b3;
1259       Delete del = new Delete(k);
1260       t.delete(del);
1261     }
1262     t.flushCommits();
1263   }
1264 
1265   private NavigableSet<String> getAllOnlineRegions(MiniHBaseCluster cluster)
1266       throws IOException {
1267     NavigableSet<String> online = new TreeSet<String>();
1268     for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
1269       for (HRegionInfo region : ProtobufUtil.getOnlineRegions(rst.getRegionServer())) {
1270         online.add(region.getRegionNameAsString());
1271       }
1272     }
1273     return online;
1274   }
1275 
1276   private void waitForCounter(AtomicLong ctr, long oldval, long newval,
1277       long timems) {
1278     long curt = System.currentTimeMillis();
1279     long endt = curt + timems;
1280     while (curt < endt) {
1281       if (ctr.get() == oldval) {
1282         Thread.yield();
1283         curt = System.currentTimeMillis();
1284       } else {
1285         assertEquals(newval, ctr.get());
1286         return;
1287       }
1288     }
1289     assertTrue(false);
1290   }
1291 
1292   private void abortMaster(MiniHBaseCluster cluster) throws InterruptedException {
1293     for (MasterThread mt : cluster.getLiveMasterThreads()) {
1294       if (mt.getMaster().isActiveMaster()) {
1295         mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
1296         mt.join();
1297         break;
1298       }
1299     }
1300     LOG.debug("Master is aborted");
1301   }
1302 
1303   private void startMasterAndWaitUntilLogSplit(MiniHBaseCluster cluster)
1304       throws IOException, InterruptedException {
1305     cluster.startMaster();
1306     HMaster master = cluster.getMaster();
1307     while (!master.isInitialized()) {
1308       Thread.sleep(100);
1309     }
1310     ServerManager serverManager = master.getServerManager();
1311     while (serverManager.areDeadServersInProgress()) {
1312       Thread.sleep(100);
1313     }
1314   }
1315 
1316   /**
1317    * Find a RS that has regions of a table.
1318    * @param hasMetaRegion when true, the returned RS has hbase:meta region as well
1319    * @param tableName
1320    * @return
1321    * @throws Exception
1322    */
1323   private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception {
1324     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1325     int numOfRSs = rsts.size();
1326     List<HRegionInfo> regions = null;
1327     HRegionServer hrs = null;
1328 
1329     for (int i = 0; i < numOfRSs; i++) {
1330       boolean isCarryingMeta = false;
1331       boolean foundTableRegion = false;
1332       hrs = rsts.get(i).getRegionServer();
1333       regions = ProtobufUtil.getOnlineRegions(hrs);
1334       for (HRegionInfo region : regions) {
1335         if (region.isMetaRegion()) {
1336           isCarryingMeta = true;
1337         }
1338         if (tableName == null || region.getTable().getNameAsString().equals(tableName)) {
1339           foundTableRegion = true;
1340         }
1341         if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
1342           break;
1343         }
1344       }
1345       if (isCarryingMeta && hasMetaRegion) {
1346         // clients ask for a RS with META
1347         if (!foundTableRegion) {
1348           final HRegionServer destRS = hrs;
1349           // the RS doesn't have regions of the specified table so we need move one to this RS
1350           List<HRegionInfo> tableRegions =
1351               TEST_UTIL.getHBaseAdmin().getTableRegions(Bytes.toBytes(tableName));
1352           final HRegionInfo hri = tableRegions.get(0);
1353           TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
1354             Bytes.toBytes(destRS.getServerName().getServerName()));
1355           // wait for region move completes
1356           final RegionStates regionStates =
1357               TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
1358           TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
1359             @Override
1360             public boolean evaluate() throws Exception {
1361               ServerName sn = regionStates.getRegionServerOfRegion(hri);
1362               return (sn != null && sn.equals(destRS.getServerName()));
1363             }
1364           });
1365         }
1366         return hrs;
1367       } else if (hasMetaRegion || isCarryingMeta) {
1368         continue;
1369       }
1370       if (foundTableRegion) break;
1371     }
1372 
1373     return hrs;
1374   }
1375 
1376 }