View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.apache.hadoop.hbase.SplitLogCounters.tot_mgr_wait_for_zk_delete;
22  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_final_transition_failed;
23  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_preempt_task;
24  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_acquired;
25  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_done;
26  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_err;
27  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_resigned;
28  import static org.junit.Assert.assertEquals;
29  import static org.junit.Assert.assertFalse;
30  import static org.junit.Assert.assertTrue;
31  import static org.junit.Assert.fail;
32  
33  import java.io.IOException;
34  import java.util.ArrayList;
35  import java.util.Arrays;
36  import java.util.HashSet;
37  import java.util.Iterator;
38  import java.util.LinkedList;
39  import java.util.List;
40  import java.util.NavigableSet;
41  import java.util.Set;
42  import java.util.concurrent.ExecutorService;
43  import java.util.concurrent.Executors;
44  import java.util.concurrent.Future;
45  import java.util.concurrent.TimeUnit;
46  import java.util.concurrent.TimeoutException;
47  import java.util.concurrent.atomic.AtomicLong;
48  
49  import org.apache.commons.logging.Log;
50  import org.apache.commons.logging.LogFactory;
51  import org.apache.hadoop.conf.Configuration;
52  import org.apache.hadoop.fs.FSDataOutputStream;
53  import org.apache.hadoop.fs.FileStatus;
54  import org.apache.hadoop.fs.FileSystem;
55  import org.apache.hadoop.fs.Path;
56  import org.apache.hadoop.fs.PathFilter;
57  import org.apache.hadoop.hbase.HBaseConfiguration;
58  import org.apache.hadoop.hbase.HBaseTestingUtility;
59  import org.apache.hadoop.hbase.HColumnDescriptor;
60  import org.apache.hadoop.hbase.HConstants;
61  import org.apache.hadoop.hbase.HRegionInfo;
62  import org.apache.hadoop.hbase.HTableDescriptor;
63  import org.apache.hadoop.hbase.KeyValue;
64  import org.apache.hadoop.hbase.testclassification.LargeTests;
65  import org.apache.hadoop.hbase.MiniHBaseCluster;
66  import org.apache.hadoop.hbase.NamespaceDescriptor;
67  import org.apache.hadoop.hbase.ServerName;
68  import org.apache.hadoop.hbase.SplitLogCounters;
69  import org.apache.hadoop.hbase.TableName;
70  import org.apache.hadoop.hbase.Waiter;
71  import org.apache.hadoop.hbase.client.ClusterConnection;
72  import org.apache.hadoop.hbase.client.ConnectionUtils;
73  import org.apache.hadoop.hbase.client.Delete;
74  import org.apache.hadoop.hbase.client.Get;
75  import org.apache.hadoop.hbase.client.HTable;
76  import org.apache.hadoop.hbase.client.Increment;
77  import org.apache.hadoop.hbase.client.NonceGenerator;
78  import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
79  import org.apache.hadoop.hbase.client.Put;
80  import org.apache.hadoop.hbase.client.Result;
81  import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
82  import org.apache.hadoop.hbase.client.Table;
83  import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
84  import org.apache.hadoop.hbase.coordination.ZKSplitLogManagerCoordination;
85  import org.apache.hadoop.hbase.exceptions.OperationConflictException;
86  import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
87  import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
88  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
89  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
90  import org.apache.hadoop.hbase.regionserver.HRegion;
91  import org.apache.hadoop.hbase.regionserver.HRegionServer;
92  import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
93  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
94  import org.apache.hadoop.hbase.wal.DefaultWALProvider;
95  import org.apache.hadoop.hbase.wal.WAL;
96  import org.apache.hadoop.hbase.wal.WALFactory;
97  import org.apache.hadoop.hbase.wal.WALSplitter;
98  import org.apache.hadoop.hbase.util.Bytes;
99  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
100 import org.apache.hadoop.hbase.util.FSUtils;
101 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
102 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
103 import org.apache.hadoop.hbase.util.Threads;
104 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
105 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
106 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
107 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
108 import org.apache.hadoop.hdfs.MiniDFSCluster;
109 import org.apache.zookeeper.KeeperException;
110 import org.junit.After;
111 import org.junit.AfterClass;
112 import org.junit.Assert;
113 import org.junit.Before;
114 import org.junit.BeforeClass;
115 import org.junit.Test;
116 import org.junit.experimental.categories.Category;
117 
118 @Category(LargeTests.class)
119 @SuppressWarnings("deprecation")
120 public class TestDistributedLogSplitting {
121   private static final Log LOG = LogFactory.getLog(TestSplitLogManager.class);
122   static {
123     // Uncomment the following line if more verbosity is needed for
124     // debugging (see HBASE-12285 for details).
125     //Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
126 
127     // test ThreeRSAbort fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on. this
128     // turns it off for this test.  TODO: Figure out why scr breaks recovery.
129     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
130 
131   }
132 
133   // Start a cluster with 2 masters and 6 regionservers
134   static final int NUM_MASTERS = 2;
135   static final int NUM_RS = 6;
136 
137   MiniHBaseCluster cluster;
138   HMaster master;
139   Configuration conf;
140   static Configuration originalConf;
141   static HBaseTestingUtility TEST_UTIL;
142   static MiniDFSCluster dfsCluster;
143   static MiniZooKeeperCluster zkCluster;
144 
145   @BeforeClass
146   public static void setup() throws Exception {
147     TEST_UTIL = new HBaseTestingUtility(HBaseConfiguration.create());
148     dfsCluster = TEST_UTIL.startMiniDFSCluster(1);
149     zkCluster = TEST_UTIL.startMiniZKCluster();
150     originalConf = TEST_UTIL.getConfiguration();
151   }
152 
153   @AfterClass
154   public static void tearDown() throws IOException {
155     TEST_UTIL.shutdownMiniZKCluster();
156     TEST_UTIL.shutdownMiniDFSCluster();
157     TEST_UTIL.shutdownMiniHBaseCluster();
158   }
159 
160   private void startCluster(int num_rs) throws Exception {
161     SplitLogCounters.resetCounters();
162     LOG.info("Starting cluster");
163     conf.getLong("hbase.splitlog.max.resubmit", 0);
164     // Make the failure test faster
165     conf.setInt("zookeeper.recovery.retry", 0);
166     conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1);
167     conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0); // no load balancing
168     conf.setInt("hbase.regionserver.wal.max.splitters", 3);
169     TEST_UTIL.shutdownMiniHBaseCluster();
170     TEST_UTIL = new HBaseTestingUtility(conf);
171     TEST_UTIL.setDFSCluster(dfsCluster);
172     TEST_UTIL.setZkCluster(zkCluster);
173     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs);
174     cluster = TEST_UTIL.getHBaseCluster();
175     LOG.info("Waiting for active/ready master");
176     cluster.waitForActiveAndReadyMaster();
177     master = cluster.getMaster();
178     while (cluster.getLiveRegionServerThreads().size() < num_rs) {
179       Threads.sleep(1);
180     }
181   }
182 
183   @Before
184   public void before() throws Exception {
185     // refresh configuration
186     conf = HBaseConfiguration.create(originalConf);
187   }
188 
189   @After
190   public void after() throws Exception {
191     try {
192       if (TEST_UTIL.getHBaseCluster() != null) {
193         for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
194           mt.getMaster().abort("closing...", null);
195         }
196       }
197       TEST_UTIL.shutdownMiniHBaseCluster();
198     } finally {
199       TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
200       ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
201     }
202   }
203 
204   @Test (timeout=300000)
205   public void testRecoveredEdits() throws Exception {
206     LOG.info("testRecoveredEdits");
207     conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024); // create more than one wal
208     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
209     startCluster(NUM_RS);
210 
211     final int NUM_LOG_LINES = 1000;
212     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
213     // turn off load balancing to prevent regions from moving around otherwise
214     // they will consume recovered.edits
215     master.balanceSwitch(false);
216     FileSystem fs = master.getMasterFileSystem().getFileSystem();
217 
218     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
219 
220     Path rootdir = FSUtils.getRootDir(conf);
221 
222     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
223         "table", "family", 40);
224     TableName table = TableName.valueOf("table");
225     List<HRegionInfo> regions = null;
226     HRegionServer hrs = null;
227     for (int i = 0; i < NUM_RS; i++) {
228       boolean foundRs = false;
229       hrs = rsts.get(i).getRegionServer();
230       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
231       for (HRegionInfo region : regions) {
232         if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
233           foundRs = true;
234           break;
235         }
236       }
237       if (foundRs) break;
238     }
239     final Path logDir = new Path(rootdir, DefaultWALProvider.getWALDirectoryName(hrs
240         .getServerName().toString()));
241 
242     LOG.info("#regions = " + regions.size());
243     Iterator<HRegionInfo> it = regions.iterator();
244     while (it.hasNext()) {
245       HRegionInfo region = it.next();
246       if (region.getTable().getNamespaceAsString()
247           .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
248         it.remove();
249       }
250     }
251     
252     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
253 
254     slm.splitLogDistributed(logDir);
255 
256     int count = 0;
257     for (HRegionInfo hri : regions) {
258 
259       Path tdir = FSUtils.getTableDir(rootdir, table);
260       Path editsdir =
261         WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
262       LOG.debug("checking edits dir " + editsdir);
263       FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
264         @Override
265         public boolean accept(Path p) {
266           if (WALSplitter.isSequenceIdFile(p)) {
267             return false;
268           }
269           return true;
270         }
271       });
272       assertTrue("edits dir should have more than a single file in it. instead has " + files.length,
273           files.length > 1);
274       for (int i = 0; i < files.length; i++) {
275         int c = countWAL(files[i].getPath(), fs, conf);
276         count += c;
277       }
278       LOG.info(count + " edits in " + files.length + " recovered edits files.");
279     }
280 
281     // check that the log file is moved
282     assertFalse(fs.exists(logDir));
283 
284     assertEquals(NUM_LOG_LINES, count);
285   }
286 
287   @Test(timeout = 300000)
288   public void testLogReplayWithNonMetaRSDown() throws Exception {
289     LOG.info("testLogReplayWithNonMetaRSDown");
290     conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024); // create more than one wal
291     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
292     startCluster(NUM_RS);
293     final int NUM_REGIONS_TO_CREATE = 40;
294     final int NUM_LOG_LINES = 1000;
295     // turn off load balancing to prevent regions from moving around otherwise
296     // they will consume recovered.edits
297     master.balanceSwitch(false);
298 
299     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
300     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
301 
302     HRegionServer hrs = findRSToKill(false, "table");
303     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
304     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
305 
306     // wait for abort completes
307     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
308     ht.close();
309     zkw.close();
310   }
311 
312   private static class NonceGeneratorWithDups extends PerClientRandomNonceGenerator {
313     private boolean isDups = false;
314     private LinkedList<Long> nonces = new LinkedList<Long>();
315 
316     public void startDups() {
317       isDups = true;
318     }
319 
320     @Override
321     public long newNonce() {
322       long nonce = isDups ? nonces.removeFirst() : super.newNonce();
323       if (!isDups) {
324         nonces.add(nonce);
325       }
326       return nonce;
327     }
328   }
329 
330   @Test(timeout = 300000)
331   public void testNonceRecovery() throws Exception {
332     LOG.info("testNonceRecovery");
333     final String TABLE_NAME = "table";
334     final String FAMILY_NAME = "family";
335     final int NUM_REGIONS_TO_CREATE = 40;
336 
337     conf.setLong("hbase.regionserver.hlog.blocksize", 100*1024);
338     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
339     startCluster(NUM_RS);
340     master.balanceSwitch(false);
341 
342     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
343     HTable ht = installTable(zkw, TABLE_NAME, FAMILY_NAME, NUM_REGIONS_TO_CREATE);
344     NonceGeneratorWithDups ng = new NonceGeneratorWithDups();
345     NonceGenerator oldNg =
346         ConnectionUtils.injectNonceGeneratorForTesting((ClusterConnection)ht.getConnection(), ng);
347 
348     try {
349       List<Increment> reqs = new ArrayList<Increment>();
350       for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
351         HRegionServer hrs = rst.getRegionServer();
352         List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
353         for (HRegionInfo hri : hris) {
354           if (TABLE_NAME.equalsIgnoreCase(hri.getTable().getNameAsString())) {
355             byte[] key = hri.getStartKey();
356             if (key == null || key.length == 0) {
357               key = Bytes.copy(hri.getEndKey());
358               --(key[key.length - 1]);
359             }
360             Increment incr = new Increment(key);
361             incr.addColumn(Bytes.toBytes(FAMILY_NAME), Bytes.toBytes("q"), 1);
362             ht.increment(incr);
363             reqs.add(incr);
364           }
365         }
366       }
367 
368       HRegionServer hrs = findRSToKill(false, "table");
369       abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
370       ng.startDups();
371       for (Increment incr : reqs) {
372         try {
373           ht.increment(incr);
374           fail("should have thrown");
375         } catch (OperationConflictException ope) {
376           LOG.debug("Caught as expected: " + ope.getMessage());
377         }
378       }
379     } finally {
380       ConnectionUtils.injectNonceGeneratorForTesting((ClusterConnection) ht.getConnection(), oldNg);
381       ht.close();
382       zkw.close();
383     }
384   }
385 
386   @Test(timeout = 300000)
387   public void testLogReplayWithMetaRSDown() throws Exception {
388     LOG.info("testRecoveredEditsReplayWithMetaRSDown");
389     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
390     startCluster(NUM_RS);
391     final int NUM_REGIONS_TO_CREATE = 40;
392     final int NUM_LOG_LINES = 1000;
393     // turn off load balancing to prevent regions from moving around otherwise
394     // they will consume recovered.edits
395     master.balanceSwitch(false);
396 
397     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
398     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
399 
400     HRegionServer hrs = findRSToKill(true, "table");
401     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
402     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
403 
404     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
405     ht.close();
406     zkw.close();
407   }
408 
409   private void abortRSAndVerifyRecovery(HRegionServer hrs, Table ht, final ZooKeeperWatcher zkw,
410       final int numRegions, final int numofLines) throws Exception {
411 
412     abortRSAndWaitForRecovery(hrs, zkw, numRegions);
413     assertEquals(numofLines, TEST_UTIL.countRows(ht));
414   }
415 
416   private void abortRSAndWaitForRecovery(HRegionServer hrs, final ZooKeeperWatcher zkw,
417       final int numRegions) throws Exception {
418     final MiniHBaseCluster tmpCluster = this.cluster;
419 
420     // abort RS
421     LOG.info("Aborting region server: " + hrs.getServerName());
422     hrs.abort("testing");
423 
424     // wait for abort completes
425     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
426       @Override
427       public boolean evaluate() throws Exception {
428         return (tmpCluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
429       }
430     });
431 
432     // wait for regions come online
433     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
434       @Override
435       public boolean evaluate() throws Exception {
436         return (HBaseTestingUtility.getAllOnlineRegions(tmpCluster).size()
437             >= (numRegions + 1));
438       }
439     });
440 
441     // wait for all regions are fully recovered
442     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
443       @Override
444       public boolean evaluate() throws Exception {
445         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
446           zkw.recoveringRegionsZNode, false);
447         return (recoveringRegions != null && recoveringRegions.size() == 0);
448       }
449     });
450   }
451 
452   @Test(timeout = 300000)
453   public void testMasterStartsUpWithLogSplittingWork() throws Exception {
454     LOG.info("testMasterStartsUpWithLogSplittingWork");
455     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
456     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
457     startCluster(NUM_RS);
458 
459     final int NUM_REGIONS_TO_CREATE = 40;
460     final int NUM_LOG_LINES = 1000;
461     // turn off load balancing to prevent regions from moving around otherwise
462     // they will consume recovered.edits
463     master.balanceSwitch(false);
464 
465     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
466     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
467 
468     HRegionServer hrs = findRSToKill(false, "table");
469     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
470     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
471 
472     // abort master
473     abortMaster(cluster);
474 
475     // abort RS
476     LOG.info("Aborting region server: " + hrs.getServerName());
477     hrs.abort("testing");
478 
479     // wait for abort completes
480     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
481       @Override
482       public boolean evaluate() throws Exception {
483         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
484       }
485     });
486 
487     Thread.sleep(2000);
488     LOG.info("Current Open Regions:"
489         + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
490 
491     // wait for abort completes
492     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
493       @Override
494       public boolean evaluate() throws Exception {
495         return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
496           >= (NUM_REGIONS_TO_CREATE + 1));
497       }
498     });
499 
500     LOG.info("Current Open Regions After Master Node Starts Up:"
501         + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
502 
503     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
504 
505     ht.close();
506     zkw.close();
507   }
508 
509   @Test(timeout = 300000)
510   public void testMasterStartsUpWithLogReplayWork() throws Exception {
511     LOG.info("testMasterStartsUpWithLogReplayWork");
512     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
513     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
514     startCluster(NUM_RS);
515 
516     final int NUM_REGIONS_TO_CREATE = 40;
517     final int NUM_LOG_LINES = 1000;
518     // turn off load balancing to prevent regions from moving around otherwise
519     // they will consume recovered.edits
520     master.balanceSwitch(false);
521 
522     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
523     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
524 
525     HRegionServer hrs = findRSToKill(false, "table");
526     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
527     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
528 
529     // abort master
530     abortMaster(cluster);
531 
532     // abort RS
533     LOG.info("Aborting region server: " + hrs.getServerName());
534     hrs.abort("testing");
535 
536     // wait for the RS dies
537     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
538       @Override
539       public boolean evaluate() throws Exception {
540         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
541       }
542     });
543 
544     Thread.sleep(2000);
545     LOG.info("Current Open Regions:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
546 
547     // wait for all regions are fully recovered
548     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
549       @Override
550       public boolean evaluate() throws Exception {
551         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
552           zkw.recoveringRegionsZNode, false);
553         boolean done = recoveringRegions != null && recoveringRegions.size() == 0;
554         if (!done) {
555           LOG.info("Recovering regions: " + recoveringRegions);
556         }
557         return done;
558       }
559     });
560 
561     LOG.info("Current Open Regions After Master Node Starts Up:"
562         + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
563 
564     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
565 
566     ht.close();
567     zkw.close();
568   }
569 
570 
571   @Test(timeout = 300000)
572   public void testLogReplayTwoSequentialRSDown() throws Exception {
573     LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
574     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
575     startCluster(NUM_RS);
576     final int NUM_REGIONS_TO_CREATE = 40;
577     final int NUM_LOG_LINES = 1000;
578     // turn off load balancing to prevent regions from moving around otherwise
579     // they will consume recovered.edits
580     master.balanceSwitch(false);
581 
582     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
583     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
584     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
585 
586     List<HRegionInfo> regions = null;
587     HRegionServer hrs1 = findRSToKill(false, "table");
588     regions = ProtobufUtil.getOnlineRegions(hrs1.getRSRpcServices());
589 
590     makeWAL(hrs1, regions, "table", "family", NUM_LOG_LINES, 100);
591 
592     // abort RS1
593     LOG.info("Aborting region server: " + hrs1.getServerName());
594     hrs1.abort("testing");
595 
596     // wait for abort completes
597     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
598       @Override
599       public boolean evaluate() throws Exception {
600         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
601       }
602     });
603 
604     // wait for regions come online
605     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
606       @Override
607       public boolean evaluate() throws Exception {
608         return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
609             >= (NUM_REGIONS_TO_CREATE + 1));
610       }
611     });
612 
613     // sleep a little bit in order to interrupt recovering in the middle
614     Thread.sleep(300);
615     // abort second region server
616     rsts = cluster.getLiveRegionServerThreads();
617     HRegionServer hrs2 = rsts.get(0).getRegionServer();
618     LOG.info("Aborting one more region server: " + hrs2.getServerName());
619     hrs2.abort("testing");
620 
621     // wait for abort completes
622     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
623       @Override
624       public boolean evaluate() throws Exception {
625         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
626       }
627     });
628 
629     // wait for regions come online
630     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
631       @Override
632       public boolean evaluate() throws Exception {
633         return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
634             >= (NUM_REGIONS_TO_CREATE + 1));
635       }
636     });
637 
638     // wait for all regions are fully recovered
639     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
640       @Override
641       public boolean evaluate() throws Exception {
642         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
643           zkw.recoveringRegionsZNode, false);
644         return (recoveringRegions != null && recoveringRegions.size() == 0);
645       }
646     });
647 
648     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
649     ht.close();
650     zkw.close();
651   }
652 
653   @Test(timeout = 300000)
654   public void testMarkRegionsRecoveringInZK() throws Exception {
655     LOG.info("testMarkRegionsRecoveringInZK");
656     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
657     startCluster(NUM_RS);
658     master.balanceSwitch(false);
659     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
660     final ZooKeeperWatcher zkw = master.getZooKeeper();
661     Table ht = installTable(zkw, "table", "family", 40);
662     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
663 
664     Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
665     HRegionInfo region = null;
666     HRegionServer hrs = null;
667     ServerName firstFailedServer = null;
668     ServerName secondFailedServer = null;
669     for (int i = 0; i < NUM_RS; i++) {
670       hrs = rsts.get(i).getRegionServer();
671       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
672       if (regions.isEmpty()) continue;
673       region = regions.get(0);
674       regionSet.add(region);
675       firstFailedServer = hrs.getServerName();
676       secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
677       break;
678     }
679 
680     slm.markRegionsRecovering(firstFailedServer, regionSet);
681     slm.markRegionsRecovering(secondFailedServer, regionSet);
682 
683     List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw,
684       ZKUtil.joinZNode(zkw.recoveringRegionsZNode, region.getEncodedName()));
685 
686     assertEquals(recoveringRegions.size(), 2);
687 
688     // wait for splitLogWorker to mark them up because there is no WAL files recorded in ZK
689     final HRegionServer tmphrs = hrs;
690     TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
691       @Override
692       public boolean evaluate() throws Exception {
693         return (tmphrs.getRecoveringRegions().size() == 0);
694       }
695     });
696     ht.close();
697   }
698 
699   @Test(timeout = 300000)
700   public void testReplayCmd() throws Exception {
701     LOG.info("testReplayCmd");
702     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
703     startCluster(NUM_RS);
704     final int NUM_REGIONS_TO_CREATE = 40;
705     // turn off load balancing to prevent regions from moving around otherwise
706     // they will consume recovered.edits
707     master.balanceSwitch(false);
708 
709     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
710     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
711     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
712 
713     List<HRegionInfo> regions = null;
714     HRegionServer hrs = null;
715     for (int i = 0; i < NUM_RS; i++) {
716       boolean isCarryingMeta = false;
717       hrs = rsts.get(i).getRegionServer();
718       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
719       for (HRegionInfo region : regions) {
720         if (region.isMetaRegion()) {
721           isCarryingMeta = true;
722           break;
723         }
724       }
725       if (isCarryingMeta) {
726         continue;
727       }
728       if (regions.size() > 0) break;
729     }
730 
731     this.prepareData(ht, Bytes.toBytes("family"), Bytes.toBytes("c1"));
732     String originalCheckSum = TEST_UTIL.checksumRows(ht);
733 
734     // abort RA and trigger replay
735     abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
736 
737     assertEquals("Data should remain after reopening of regions", originalCheckSum,
738       TEST_UTIL.checksumRows(ht));
739 
740     ht.close();
741     zkw.close();
742   }
743 
744   @Test(timeout = 300000)
745   public void testLogReplayForDisablingTable() throws Exception {
746     LOG.info("testLogReplayForDisablingTable");
747     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
748     startCluster(NUM_RS);
749     final int NUM_REGIONS_TO_CREATE = 40;
750     final int NUM_LOG_LINES = 1000;
751 
752     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
753     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
754     Table disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
755     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
756 
757     // turn off load balancing to prevent regions from moving around otherwise
758     // they will consume recovered.edits
759     master.balanceSwitch(false);
760 
761     List<HRegionInfo> regions = null;
762     HRegionServer hrs = null;
763     boolean hasRegionsForBothTables = false;
764     String tableName = null;
765     for (int i = 0; i < NUM_RS; i++) {
766       tableName = null;
767       hasRegionsForBothTables = false;
768       boolean isCarryingSystem = false;
769       hrs = rsts.get(i).getRegionServer();
770       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
771       for (HRegionInfo region : regions) {
772         if (region.getTable().isSystemTable()) {
773           isCarryingSystem = true;
774           break;
775         }
776         if (tableName != null &&
777             !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
778           // make sure that we find a RS has online regions for both "table" and "disableTable"
779           hasRegionsForBothTables = true;
780           break;
781         } else if (tableName == null) {
782           tableName = region.getTable().getNameAsString();
783         }
784       }
785       if (isCarryingSystem) {
786         continue;
787       }
788       if (hasRegionsForBothTables) {
789         break;
790       }
791     }
792 
793     // make sure we found a good RS
794     Assert.assertTrue(hasRegionsForBothTables);
795 
796     LOG.info("#regions = " + regions.size());
797     Iterator<HRegionInfo> it = regions.iterator();
798     while (it.hasNext()) {
799       HRegionInfo region = it.next();
800       if (region.isMetaTable()) {
801         it.remove();
802       }
803     }
804     makeWAL(hrs, regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
805     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
806 
807     LOG.info("Disabling table\n");
808     TEST_UTIL.getHBaseAdmin().disableTable(TableName.valueOf("disableTable"));
809 
810     // abort RS
811     LOG.info("Aborting region server: " + hrs.getServerName());
812     hrs.abort("testing");
813 
814     // wait for abort completes
815     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
816       @Override
817       public boolean evaluate() throws Exception {
818         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
819       }
820     });
821 
822     // wait for regions come online
823     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
824       @Override
825       public boolean evaluate() throws Exception {
826         return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
827             >= (NUM_REGIONS_TO_CREATE + 1));
828       }
829     });
830 
831     // wait for all regions are fully recovered
832     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
833       @Override
834       public boolean evaluate() throws Exception {
835         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
836           zkw.recoveringRegionsZNode, false);
837         ServerManager serverManager = master.getServerManager();
838         return (!serverManager.areDeadServersInProgress() &&
839             recoveringRegions != null && recoveringRegions.size() == 0);
840       }
841     });
842 
843     int count = 0;
844     FileSystem fs = master.getMasterFileSystem().getFileSystem();
845     Path rootdir = FSUtils.getRootDir(conf);
846     Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf("disableTable"));
847     for (HRegionInfo hri : regions) {
848       Path editsdir =
849         WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
850       LOG.debug("checking edits dir " + editsdir);
851       if(!fs.exists(editsdir)) continue;
852       FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
853         @Override
854         public boolean accept(Path p) {
855           if (WALSplitter.isSequenceIdFile(p)) {
856             return false;
857           }
858           return true;
859         }
860       });
861       if(files != null) {
862         for(FileStatus file : files) {
863           int c = countWAL(file.getPath(), fs, conf);
864           count += c;
865           LOG.info(c + " edits in " + file.getPath());
866         }
867       }
868     }
869 
870     LOG.info("Verify edits in recovered.edits files");
871     assertEquals(NUM_LOG_LINES, count);
872     LOG.info("Verify replayed edits");
873     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
874 
875     // clean up
876     for (HRegionInfo hri : regions) {
877       Path editsdir =
878         WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
879       fs.delete(editsdir, true);
880     }
881     disablingHT.close();
882     ht.close();
883     zkw.close();
884   }
885 
886   @Test(timeout = 300000)
887   public void testDisallowWritesInRecovering() throws Exception {
888     LOG.info("testDisallowWritesInRecovering");
889     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
890     conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3);
891     conf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true);
892     startCluster(NUM_RS);
893     final int NUM_REGIONS_TO_CREATE = 40;
894     // turn off load balancing to prevent regions from moving around otherwise
895     // they will consume recovered.edits
896     master.balanceSwitch(false);
897 
898     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
899     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
900     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
901     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
902 
903     Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
904     HRegionInfo region = null;
905     HRegionServer hrs = null;
906     HRegionServer dstRS = null;
907     for (int i = 0; i < NUM_RS; i++) {
908       hrs = rsts.get(i).getRegionServer();
909       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
910       if (regions.isEmpty()) continue;
911       region = regions.get(0);
912       if (region.isMetaRegion()) continue;
913       regionSet.add(region);
914       dstRS = rsts.get((i+1) % NUM_RS).getRegionServer();
915       break;
916     }
917 
918     slm.markRegionsRecovering(hrs.getServerName(), regionSet);
919     // move region in order for the region opened in recovering state
920     final HRegionInfo hri = region;
921     final HRegionServer tmpRS = dstRS;
922     TEST_UTIL.getHBaseAdmin().move(region.getEncodedNameAsBytes(),
923       Bytes.toBytes(dstRS.getServerName().getServerName()));
924     // wait for region move completes
925     final RegionStates regionStates =
926         TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
927     TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
928       @Override
929       public boolean evaluate() throws Exception {
930         ServerName sn = regionStates.getRegionServerOfRegion(hri);
931         return (sn != null && sn.equals(tmpRS.getServerName()));
932       }
933     });
934 
935     try {
936       byte[] key = region.getStartKey();
937       if (key == null || key.length == 0) {
938         key = new byte[] { 0, 0, 0, 0, 1 };
939       }
940       Put put = new Put(key);
941       put.add(Bytes.toBytes("family"), Bytes.toBytes("c1"), new byte[]{'b'});
942       ht.put(put);
943     } catch (IOException ioe) {
944       Assert.assertTrue(ioe instanceof RetriesExhaustedWithDetailsException);
945       RetriesExhaustedWithDetailsException re = (RetriesExhaustedWithDetailsException) ioe;
946       boolean foundRegionInRecoveryException = false;
947       for (Throwable t : re.getCauses()) {
948         if (t instanceof RegionInRecoveryException) {
949           foundRegionInRecoveryException = true;
950           break;
951         }
952       }
953       Assert.assertTrue(
954         "No RegionInRecoveryException. Following exceptions returned=" + re.getCauses(),
955         foundRegionInRecoveryException);
956     }
957 
958     ht.close();
959     zkw.close();
960   }
961 
962   /**
963    * The original intention of this test was to force an abort of a region
964    * server and to make sure that the failure path in the region servers is
965    * properly evaluated. But it is difficult to ensure that the region server
966    * doesn't finish the log splitting before it aborts. Also now, there is
967    * this code path where the master will preempt the region server when master
968    * detects that the region server has aborted.
969    * @throws Exception
970    */
971   @Test (timeout=300000)
972   public void testWorkerAbort() throws Exception {
973     LOG.info("testWorkerAbort");
974     startCluster(3);
975     final int NUM_LOG_LINES = 10000;
976     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
977     FileSystem fs = master.getMasterFileSystem().getFileSystem();
978 
979     final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
980     HRegionServer hrs = findRSToKill(false, "table");
981     Path rootdir = FSUtils.getRootDir(conf);
982     final Path logDir = new Path(rootdir,
983         DefaultWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
984 
985     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
986         "table", "family", 40);
987 
988     makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()),
989       "table", "family", NUM_LOG_LINES, 100);
990 
991     new Thread() {
992       @Override
993       public void run() {
994         waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
995         for (RegionServerThread rst : rsts) {
996           rst.getRegionServer().abort("testing");
997           break;
998         }
999       }
1000     }.start();
1001     // slm.splitLogDistributed(logDir);
1002     FileStatus[] logfiles = fs.listStatus(logDir);
1003     TaskBatch batch = new TaskBatch();
1004     slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
1005     //waitForCounter but for one of the 2 counters
1006     long curt = System.currentTimeMillis();
1007     long waitTime = 80000;
1008     long endt = curt + waitTime;
1009     while (curt < endt) {
1010       if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
1011           tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
1012           tot_wkr_preempt_task.get()) == 0) {
1013         Thread.yield();
1014         curt = System.currentTimeMillis();
1015       } else {
1016         assertTrue(1 <= (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
1017             tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
1018             tot_wkr_preempt_task.get()));
1019         return;
1020       }
1021     }
1022     fail("none of the following counters went up in " + waitTime +
1023         " milliseconds - " +
1024         "tot_wkr_task_resigned, tot_wkr_task_err, " +
1025         "tot_wkr_final_transition_failed, tot_wkr_task_done, " +
1026         "tot_wkr_preempt_task");
1027   }
1028 
1029   @Test (timeout=300000)
1030   public void testThreeRSAbort() throws Exception {
1031     LOG.info("testThreeRSAbort");
1032     final int NUM_REGIONS_TO_CREATE = 40;
1033     final int NUM_ROWS_PER_REGION = 100;
1034 
1035     startCluster(NUM_RS); // NUM_RS=6.
1036 
1037     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf,
1038         "distributed log splitting test", null);
1039 
1040     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1041     populateDataInTable(NUM_ROWS_PER_REGION, "family");
1042 
1043 
1044     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1045     assertEquals(NUM_RS, rsts.size());
1046     rsts.get(0).getRegionServer().abort("testing");
1047     rsts.get(1).getRegionServer().abort("testing");
1048     rsts.get(2).getRegionServer().abort("testing");
1049 
1050     long start = EnvironmentEdgeManager.currentTime();
1051     while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
1052       if (EnvironmentEdgeManager.currentTime() - start > 60000) {
1053         assertTrue(false);
1054       }
1055       Thread.sleep(200);
1056     }
1057 
1058     start = EnvironmentEdgeManager.currentTime();
1059     while (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
1060         < (NUM_REGIONS_TO_CREATE + 1)) {
1061       if (EnvironmentEdgeManager.currentTime() - start > 60000) {
1062         assertTrue("Timedout", false);
1063       }
1064       Thread.sleep(200);
1065     }
1066 
1067     // wait for all regions are fully recovered
1068     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
1069       @Override
1070       public boolean evaluate() throws Exception {
1071         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
1072           zkw.recoveringRegionsZNode, false);
1073         return (recoveringRegions != null && recoveringRegions.size() == 0);
1074       }
1075     });
1076 
1077     assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
1078         TEST_UTIL.countRows(ht));
1079     ht.close();
1080     zkw.close();
1081   }
1082 
1083 
1084 
1085   @Test(timeout=30000)
1086   public void testDelayedDeleteOnFailure() throws Exception {
1087     LOG.info("testDelayedDeleteOnFailure");
1088     startCluster(1);
1089     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
1090     final FileSystem fs = master.getMasterFileSystem().getFileSystem();
1091     final Path logDir = new Path(FSUtils.getRootDir(conf), "x");
1092     fs.mkdirs(logDir);
1093     ExecutorService executor = null;
1094     try {
1095       final Path corruptedLogFile = new Path(logDir, "x");
1096       FSDataOutputStream out;
1097       out = fs.create(corruptedLogFile);
1098       out.write(0);
1099       out.write(Bytes.toBytes("corrupted bytes"));
1100       out.close();
1101       ZKSplitLogManagerCoordination coordination =
1102           (ZKSplitLogManagerCoordination) ((BaseCoordinatedStateManager) master
1103               .getCoordinatedStateManager()).getSplitLogManagerCoordination();
1104       coordination.setIgnoreDeleteForTesting(true);
1105       executor = Executors.newSingleThreadExecutor();
1106       Runnable runnable = new Runnable() {
1107        @Override
1108        public void run() {
1109           try {
1110             // since the logDir is a fake, corrupted one, so the split log worker
1111             // will finish it quickly with error, and this call will fail and throw
1112             // an IOException.
1113             slm.splitLogDistributed(logDir);
1114           } catch (IOException ioe) {
1115             try {
1116               assertTrue(fs.exists(corruptedLogFile));
1117               // this call will block waiting for the task to be removed from the
1118               // tasks map which is not going to happen since ignoreZKDeleteForTesting
1119               // is set to true, until it is interrupted.
1120               slm.splitLogDistributed(logDir);
1121             } catch (IOException e) {
1122               assertTrue(Thread.currentThread().isInterrupted());
1123               return;
1124             }
1125             fail("did not get the expected IOException from the 2nd call");
1126           }
1127           fail("did not get the expected IOException from the 1st call");
1128         }
1129       };
1130       Future<?> result = executor.submit(runnable);
1131       try {
1132         result.get(2000, TimeUnit.MILLISECONDS);
1133       } catch (TimeoutException te) {
1134         // it is ok, expected.
1135       }
1136       waitForCounter(tot_mgr_wait_for_zk_delete, 0, 1, 10000);
1137       executor.shutdownNow();
1138       executor = null;
1139 
1140       // make sure the runnable is finished with no exception thrown.
1141       result.get();
1142     } finally {
1143       if (executor != null) {
1144         // interrupt the thread in case the test fails in the middle.
1145         // it has no effect if the thread is already terminated.
1146         executor.shutdownNow();
1147       }
1148       fs.delete(logDir, true);
1149     }
1150   }
1151 
1152   @Test(timeout = 300000)
1153   public void testMetaRecoveryInZK() throws Exception {
1154     LOG.info("testMetaRecoveryInZK");
1155     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1156     startCluster(NUM_RS);
1157 
1158     // turn off load balancing to prevent regions from moving around otherwise
1159     // they will consume recovered.edits
1160     master.balanceSwitch(false);
1161     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1162 
1163     // only testing meta recovery in ZK operation
1164     HRegionServer hrs = findRSToKill(true, null);
1165     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1166 
1167     LOG.info("#regions = " + regions.size());
1168     Set<HRegionInfo> tmpRegions = new HashSet<HRegionInfo>();
1169     tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
1170     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), tmpRegions);
1171     Set<HRegionInfo> userRegionSet = new HashSet<HRegionInfo>();
1172     userRegionSet.addAll(regions);
1173     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), userRegionSet);
1174     boolean isMetaRegionInRecovery = false;
1175     List<String> recoveringRegions =
1176         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1177     for (String curEncodedRegionName : recoveringRegions) {
1178       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1179         isMetaRegionInRecovery = true;
1180         break;
1181       }
1182     }
1183     assertTrue(isMetaRegionInRecovery);
1184 
1185     master.getMasterFileSystem().splitMetaLog(hrs.getServerName());
1186 
1187     isMetaRegionInRecovery = false;
1188     recoveringRegions =
1189         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1190     for (String curEncodedRegionName : recoveringRegions) {
1191       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1192         isMetaRegionInRecovery = true;
1193         break;
1194       }
1195     }
1196     // meta region should be recovered
1197     assertFalse(isMetaRegionInRecovery);
1198     zkw.close();
1199   }
1200 
1201   @Test(timeout = 300000)
1202   public void testSameVersionUpdatesRecovery() throws Exception {
1203     LOG.info("testSameVersionUpdatesRecovery");
1204     conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
1205     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1206     startCluster(NUM_RS);
1207     final AtomicLong sequenceId = new AtomicLong(100);
1208     final int NUM_REGIONS_TO_CREATE = 40;
1209     final int NUM_LOG_LINES = 1000;
1210     // turn off load balancing to prevent regions from moving around otherwise
1211     // they will consume recovered.edits
1212     master.balanceSwitch(false);
1213 
1214     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1215     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1216     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1217 
1218     List<HRegionInfo> regions = null;
1219     HRegionServer hrs = null;
1220     for (int i = 0; i < NUM_RS; i++) {
1221       boolean isCarryingMeta = false;
1222       hrs = rsts.get(i).getRegionServer();
1223       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1224       for (HRegionInfo region : regions) {
1225         if (region.isMetaRegion()) {
1226           isCarryingMeta = true;
1227           break;
1228         }
1229       }
1230       if (isCarryingMeta) {
1231         continue;
1232       }
1233       break;
1234     }
1235 
1236     LOG.info("#regions = " + regions.size());
1237     Iterator<HRegionInfo> it = regions.iterator();
1238     while (it.hasNext()) {
1239       HRegionInfo region = it.next();
1240       if (region.isMetaTable()
1241           || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1242         it.remove();
1243       }
1244     }
1245     if (regions.size() == 0) return;
1246     HRegionInfo curRegionInfo = regions.get(0);
1247     byte[] startRow = curRegionInfo.getStartKey();
1248     if (startRow == null || startRow.length == 0) {
1249       startRow = new byte[] { 0, 0, 0, 0, 1 };
1250     }
1251     byte[] row = Bytes.incrementBytes(startRow, 1);
1252     // use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
1253     row = Arrays.copyOfRange(row, 3, 8);
1254     long value = 0;
1255     TableName tableName = TableName.valueOf("table");
1256     byte[] family = Bytes.toBytes("family");
1257     byte[] qualifier = Bytes.toBytes("c1");
1258     long timeStamp = System.currentTimeMillis();
1259     HTableDescriptor htd = new HTableDescriptor();
1260     htd.addFamily(new HColumnDescriptor(family));
1261     final WAL wal = hrs.getWAL(curRegionInfo);
1262     for (int i = 0; i < NUM_LOG_LINES; i += 1) {
1263       WALEdit e = new WALEdit();
1264       value++;
1265       e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
1266       wal.append(htd, curRegionInfo,
1267           new HLogKey(curRegionInfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis()),
1268           e, sequenceId, true, null);
1269     }
1270     wal.sync();
1271     wal.shutdown();
1272 
1273     // wait for abort completes
1274     this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
1275 
1276     // verify we got the last value
1277     LOG.info("Verification Starts...");
1278     Get g = new Get(row);
1279     Result r = ht.get(g);
1280     long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1281     assertEquals(value, theStoredVal);
1282 
1283     // after flush
1284     LOG.info("Verification after flush...");
1285     TEST_UTIL.getHBaseAdmin().flush(tableName);
1286     r = ht.get(g);
1287     theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1288     assertEquals(value, theStoredVal);
1289     ht.close();
1290   }
1291 
1292   @Test(timeout = 300000)
1293   public void testSameVersionUpdatesRecoveryWithCompaction() throws Exception {
1294     LOG.info("testSameVersionUpdatesRecoveryWithWrites");
1295     conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
1296     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1297     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 30 * 1024);
1298     conf.setInt("hbase.hstore.compactionThreshold", 3);
1299     startCluster(NUM_RS);
1300     final AtomicLong sequenceId = new AtomicLong(100);
1301     final int NUM_REGIONS_TO_CREATE = 40;
1302     final int NUM_LOG_LINES = 2000;
1303     // turn off load balancing to prevent regions from moving around otherwise
1304     // they will consume recovered.edits
1305     master.balanceSwitch(false);
1306 
1307     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1308     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1309     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1310 
1311     List<HRegionInfo> regions = null;
1312     HRegionServer hrs = null;
1313     for (int i = 0; i < NUM_RS; i++) {
1314       boolean isCarryingMeta = false;
1315       hrs = rsts.get(i).getRegionServer();
1316       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1317       for (HRegionInfo region : regions) {
1318         if (region.isMetaRegion()) {
1319           isCarryingMeta = true;
1320           break;
1321         }
1322       }
1323       if (isCarryingMeta) {
1324         continue;
1325       }
1326       break;
1327     }
1328 
1329     LOG.info("#regions = " + regions.size());
1330     Iterator<HRegionInfo> it = regions.iterator();
1331     while (it.hasNext()) {
1332       HRegionInfo region = it.next();
1333       if (region.isMetaTable()
1334           || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1335         it.remove();
1336       }
1337     }
1338     if (regions.size() == 0) return;
1339     HRegionInfo curRegionInfo = regions.get(0);
1340     byte[] startRow = curRegionInfo.getStartKey();
1341     if (startRow == null || startRow.length == 0) {
1342       startRow = new byte[] { 0, 0, 0, 0, 1 };
1343     }
1344     byte[] row = Bytes.incrementBytes(startRow, 1);
1345     // use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
1346     row = Arrays.copyOfRange(row, 3, 8);
1347     long value = 0;
1348     final TableName tableName = TableName.valueOf("table");
1349     byte[] family = Bytes.toBytes("family");
1350     byte[] qualifier = Bytes.toBytes("c1");
1351     long timeStamp = System.currentTimeMillis();
1352     HTableDescriptor htd = new HTableDescriptor(tableName);
1353     htd.addFamily(new HColumnDescriptor(family));
1354     final WAL wal = hrs.getWAL(curRegionInfo);
1355     for (int i = 0; i < NUM_LOG_LINES; i += 1) {
1356       WALEdit e = new WALEdit();
1357       value++;
1358       e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
1359       wal.append(htd, curRegionInfo, new HLogKey(curRegionInfo.getEncodedNameAsBytes(),
1360           tableName, System.currentTimeMillis()), e, sequenceId, true, null);
1361     }
1362     wal.sync();
1363     wal.shutdown();
1364 
1365     // wait for abort completes
1366     this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
1367 
1368     // verify we got the last value
1369     LOG.info("Verification Starts...");
1370     Get g = new Get(row);
1371     Result r = ht.get(g);
1372     long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1373     assertEquals(value, theStoredVal);
1374 
1375     // after flush & compaction
1376     LOG.info("Verification after flush...");
1377     TEST_UTIL.getHBaseAdmin().flush(tableName);
1378     TEST_UTIL.getHBaseAdmin().compact(tableName);
1379 
1380     // wait for compaction completes
1381     TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {
1382       @Override
1383       public boolean evaluate() throws Exception {
1384         return (TEST_UTIL.getHBaseAdmin().getCompactionState(tableName) == CompactionState.NONE);
1385       }
1386     });
1387 
1388     r = ht.get(g);
1389     theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1390     assertEquals(value, theStoredVal);
1391     ht.close();
1392   }
1393 
1394   @Test(timeout = 300000)
1395   public void testReadWriteSeqIdFiles() throws Exception {
1396     LOG.info("testReadWriteSeqIdFiles");
1397     startCluster(2);
1398     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1399     HTable ht = installTable(zkw, "table", "family", 10);
1400     FileSystem fs = master.getMasterFileSystem().getFileSystem();
1401     Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), TableName.valueOf("table"));
1402     List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
1403     long newSeqId = WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0), 1L, 1000L);
1404     WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0) , 1L, 1000L);
1405     assertEquals(newSeqId + 2000,
1406       WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0), 3L, 1000L));
1407     
1408     Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(regionDirs.get(0));
1409     FileStatus[] files = FSUtils.listStatus(fs, editsdir, new PathFilter() {
1410       @Override
1411       public boolean accept(Path p) {
1412         return WALSplitter.isSequenceIdFile(p);
1413       }
1414     });
1415     // only one seqid file should exist
1416     assertEquals(1, files.length);
1417 
1418     // verify all seqId files aren't treated as recovered.edits files
1419     NavigableSet<Path> recoveredEdits = WALSplitter.getSplitEditFilesSorted(fs, regionDirs.get(0));
1420     assertEquals(0, recoveredEdits.size());
1421 
1422     ht.close();
1423   }
1424 
1425   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs) throws Exception {
1426     return installTable(zkw, tname, fname, nrs, 0);
1427   }
1428 
1429   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs,
1430       int existingRegions) throws Exception {
1431     // Create a table with regions
1432     TableName table = TableName.valueOf(tname);
1433     byte [] family = Bytes.toBytes(fname);
1434     LOG.info("Creating table with " + nrs + " regions");
1435     HTable ht = TEST_UTIL.createTable(table, family);
1436     int numRegions = TEST_UTIL.createMultiRegions(conf, ht, family, nrs);
1437     assertEquals(nrs, numRegions);
1438       LOG.info("Waiting for no more RIT\n");
1439     blockUntilNoRIT(zkw, master);
1440     // disable-enable cycle to get rid of table's dead regions left behind
1441     // by createMultiRegions
1442     LOG.debug("Disabling table\n");
1443     TEST_UTIL.getHBaseAdmin().disableTable(table);
1444     LOG.debug("Waiting for no more RIT\n");
1445     blockUntilNoRIT(zkw, master);
1446     NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
1447     LOG.debug("Verifying only catalog and namespace regions are assigned\n");
1448     if (regions.size() != 2) {
1449       for (String oregion : regions)
1450         LOG.debug("Region still online: " + oregion);
1451     }
1452     assertEquals(2 + existingRegions, regions.size());
1453     LOG.debug("Enabling table\n");
1454     TEST_UTIL.getHBaseAdmin().enableTable(table);
1455     LOG.debug("Waiting for no more RIT\n");
1456     blockUntilNoRIT(zkw, master);
1457     LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
1458     regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
1459     assertEquals(numRegions + 2 + existingRegions, regions.size());
1460     return ht;
1461   }
1462 
1463   void populateDataInTable(int nrows, String fname) throws Exception {
1464     byte [] family = Bytes.toBytes(fname);
1465 
1466     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1467     assertEquals(NUM_RS, rsts.size());
1468 
1469     for (RegionServerThread rst : rsts) {
1470       HRegionServer hrs = rst.getRegionServer();
1471       List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1472       for (HRegionInfo hri : hris) {
1473         if (hri.getTable().isSystemTable()) {
1474           continue;
1475         }
1476         LOG.debug("adding data to rs = " + rst.getName() +
1477             " region = "+ hri.getRegionNameAsString());
1478         HRegion region = hrs.getOnlineRegion(hri.getRegionName());
1479         assertTrue(region != null);
1480         putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
1481       }
1482     }
1483   }
1484 
1485   public void makeWAL(HRegionServer hrs, List<HRegionInfo> regions, String tname, String fname,
1486       int num_edits, int edit_size) throws IOException {
1487     makeWAL(hrs, regions, tname, fname, num_edits, edit_size, true);
1488   }
1489 
1490   public void makeWAL(HRegionServer hrs, List<HRegionInfo> regions, String tname, String fname,
1491       int num_edits, int edit_size, boolean cleanShutdown) throws IOException {
1492     TableName fullTName = TableName.valueOf(tname);
1493     // remove root and meta region
1494     regions.remove(HRegionInfo.FIRST_META_REGIONINFO);
1495     // using one sequenceId for edits across all regions is ok.
1496     final AtomicLong sequenceId = new AtomicLong(10);
1497 
1498 
1499     for(Iterator<HRegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
1500       HRegionInfo regionInfo = iter.next();
1501       if(regionInfo.getTable().isSystemTable()) {
1502          iter.remove();
1503       }
1504     }
1505     HTableDescriptor htd = new HTableDescriptor(fullTName);
1506     byte[] family = Bytes.toBytes(fname);
1507     htd.addFamily(new HColumnDescriptor(family));
1508     byte[] value = new byte[edit_size];
1509 
1510     List<HRegionInfo> hris = new ArrayList<HRegionInfo>();
1511     for (HRegionInfo region : regions) {
1512       if (!region.getTable().getNameAsString().equalsIgnoreCase(tname)) {
1513         continue;
1514       }
1515       hris.add(region);
1516     }
1517     LOG.info("Creating wal edits across " + hris.size() + " regions.");
1518     for (int i = 0; i < edit_size; i++) {
1519       value[i] = (byte) ('a' + (i % 26));
1520     }
1521     int n = hris.size();
1522     int[] counts = new int[n];
1523     // sync every ~30k to line up with desired wal rolls
1524     final int syncEvery = 30 * 1024 / edit_size;
1525     if (n > 0) {
1526       for (int i = 0; i < num_edits; i += 1) {
1527         WALEdit e = new WALEdit();
1528         HRegionInfo curRegionInfo = hris.get(i % n);
1529         final WAL log = hrs.getWAL(curRegionInfo);
1530         byte[] startRow = curRegionInfo.getStartKey();
1531         if (startRow == null || startRow.length == 0) {
1532           startRow = new byte[] { 0, 0, 0, 0, 1 };
1533         }
1534         byte[] row = Bytes.incrementBytes(startRow, counts[i % n]);
1535         row = Arrays.copyOfRange(row, 3, 8); // use last 5 bytes because
1536                                              // HBaseTestingUtility.createMultiRegions use 5 bytes
1537                                              // key
1538         byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
1539         e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
1540         log.append(htd, curRegionInfo, new HLogKey(curRegionInfo.getEncodedNameAsBytes(), fullTName,
1541             System.currentTimeMillis()), e, sequenceId, true, null);
1542         if (0 == i % syncEvery) {
1543           log.sync();
1544         }
1545         counts[i % n] += 1;
1546       }
1547     }
1548     // done as two passes because the regions might share logs. shutdown is idempotent, but sync
1549     // will cause errors if done after.
1550     for (HRegionInfo info : hris) {
1551       final WAL log = hrs.getWAL(info);
1552       log.sync();
1553     }
1554     if (cleanShutdown) {
1555       for (HRegionInfo info : hris) {
1556         final WAL log = hrs.getWAL(info);
1557         log.shutdown();
1558       }
1559     }
1560     for (int i = 0; i < n; i++) {
1561       LOG.info("region " + hris.get(i).getRegionNameAsString() + " has " + counts[i] + " edits");
1562     }
1563     return;
1564   }
1565 
1566   private int countWAL(Path log, FileSystem fs, Configuration conf)
1567   throws IOException {
1568     int count = 0;
1569     WAL.Reader in = WALFactory.createReader(fs, log, conf);
1570     try {
1571       WAL.Entry e;
1572       while ((e = in.next()) != null) {
1573         if (!WALEdit.isMetaEditFamily(e.getEdit().getCells().get(0))) {
1574           count++;
1575         }
1576       }
1577     } finally {
1578       try {
1579         in.close();
1580       } catch (IOException exception) {
1581         LOG.warn("Problem closing wal: " + exception.getMessage());
1582         LOG.debug("exception details.", exception);
1583       }
1584     }
1585     return count;
1586   }
1587 
1588   private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master)
1589   throws KeeperException, InterruptedException {
1590     ZKAssign.blockUntilNoRIT(zkw);
1591     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
1592   }
1593 
1594   private void putData(HRegion region, byte[] startRow, int numRows, byte [] qf,
1595       byte [] ...families)
1596   throws IOException {
1597     for(int i = 0; i < numRows; i++) {
1598       Put put = new Put(Bytes.add(startRow, Bytes.toBytes(i)));
1599       for(byte [] family : families) {
1600         put.add(family, qf, null);
1601       }
1602       region.put(put);
1603     }
1604   }
1605 
1606   /**
1607    * Load table with puts and deletes with expected values so that we can verify later
1608    */
1609   private void prepareData(final Table t, final byte[] f, final byte[] column) throws IOException {
1610     byte[] k = new byte[3];
1611 
1612     // add puts
1613     List<Put> puts = new ArrayList<>();
1614     for (byte b1 = 'a'; b1 <= 'z'; b1++) {
1615       for (byte b2 = 'a'; b2 <= 'z'; b2++) {
1616         for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1617           k[0] = b1;
1618           k[1] = b2;
1619           k[2] = b3;
1620           Put put = new Put(k);
1621           put.add(f, column, k);
1622           puts.add(put);
1623         }
1624       }
1625     }
1626     t.put(puts);
1627     // add deletes
1628     for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1629       k[0] = 'a';
1630       k[1] = 'a';
1631       k[2] = b3;
1632       Delete del = new Delete(k);
1633       t.delete(del);
1634     }
1635   }
1636 
1637   private void waitForCounter(AtomicLong ctr, long oldval, long newval,
1638       long timems) {
1639     long curt = System.currentTimeMillis();
1640     long endt = curt + timems;
1641     while (curt < endt) {
1642       if (ctr.get() == oldval) {
1643         Thread.yield();
1644         curt = System.currentTimeMillis();
1645       } else {
1646         assertEquals(newval, ctr.get());
1647         return;
1648       }
1649     }
1650     assertTrue(false);
1651   }
1652 
1653   private void abortMaster(MiniHBaseCluster cluster) throws InterruptedException {
1654     for (MasterThread mt : cluster.getLiveMasterThreads()) {
1655       if (mt.getMaster().isActiveMaster()) {
1656         mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
1657         mt.join();
1658         break;
1659       }
1660     }
1661     LOG.debug("Master is aborted");
1662   }
1663 
1664   /**
1665    * Find a RS that has regions of a table.
1666    * @param hasMetaRegion when true, the returned RS has hbase:meta region as well
1667    * @param tableName
1668    * @return
1669    * @throws Exception
1670    */
1671   private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception {
1672     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1673     int numOfRSs = rsts.size();
1674     List<HRegionInfo> regions = null;
1675     HRegionServer hrs = null;
1676 
1677     for (int i = 0; i < numOfRSs; i++) {
1678       boolean isCarryingMeta = false;
1679       boolean foundTableRegion = false;
1680       hrs = rsts.get(i).getRegionServer();
1681       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1682       for (HRegionInfo region : regions) {
1683         if (region.isMetaRegion()) {
1684           isCarryingMeta = true;
1685         }
1686         if (tableName == null || region.getTable().getNameAsString().equals(tableName)) {
1687           foundTableRegion = true;
1688         }
1689         if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
1690           break;
1691         }
1692       }
1693       if (isCarryingMeta && hasMetaRegion) {
1694         // clients ask for a RS with META
1695         if (!foundTableRegion) {
1696           final HRegionServer destRS = hrs;
1697           // the RS doesn't have regions of the specified table so we need move one to this RS
1698           List<HRegionInfo> tableRegions =
1699               TEST_UTIL.getHBaseAdmin().getTableRegions(TableName.valueOf(tableName));
1700           final HRegionInfo hri = tableRegions.get(0);
1701           TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
1702             Bytes.toBytes(destRS.getServerName().getServerName()));
1703           // wait for region move completes
1704           final RegionStates regionStates =
1705               TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
1706           TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
1707             @Override
1708             public boolean evaluate() throws Exception {
1709               ServerName sn = regionStates.getRegionServerOfRegion(hri);
1710               return (sn != null && sn.equals(destRS.getServerName()));
1711             }
1712           });
1713         }
1714         return hrs;
1715       } else if (hasMetaRegion || isCarryingMeta) {
1716         continue;
1717       }
1718       if (foundTableRegion) break;
1719     }
1720 
1721     return hrs;
1722   }
1723 
1724 }