View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.apache.hadoop.hbase.SplitLogCounters.tot_mgr_wait_for_zk_delete;
22  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_final_transition_failed;
23  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_preempt_task;
24  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_acquired;
25  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_done;
26  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_err;
27  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_resigned;
28  import static org.junit.Assert.assertEquals;
29  import static org.junit.Assert.assertFalse;
30  import static org.junit.Assert.assertTrue;
31  import static org.junit.Assert.fail;
32  
33  import java.io.IOException;
34  import java.util.ArrayList;
35  import java.util.Arrays;
36  import java.util.HashSet;
37  import java.util.Iterator;
38  import java.util.LinkedList;
39  import java.util.List;
40  import java.util.NavigableSet;
41  import java.util.Set;
42  import java.util.concurrent.ExecutorService;
43  import java.util.concurrent.Executors;
44  import java.util.concurrent.Future;
45  import java.util.concurrent.TimeUnit;
46  import java.util.concurrent.TimeoutException;
47  import java.util.concurrent.atomic.AtomicLong;
48  
49  import org.apache.commons.logging.Log;
50  import org.apache.commons.logging.LogFactory;
51  import org.apache.hadoop.conf.Configuration;
52  import org.apache.hadoop.fs.FSDataOutputStream;
53  import org.apache.hadoop.fs.FileStatus;
54  import org.apache.hadoop.fs.FileSystem;
55  import org.apache.hadoop.fs.Path;
56  import org.apache.hadoop.fs.PathFilter;
57  import org.apache.hadoop.hbase.HBaseConfiguration;
58  import org.apache.hadoop.hbase.HBaseTestingUtility;
59  import org.apache.hadoop.hbase.HColumnDescriptor;
60  import org.apache.hadoop.hbase.HConstants;
61  import org.apache.hadoop.hbase.HRegionInfo;
62  import org.apache.hadoop.hbase.HTableDescriptor;
63  import org.apache.hadoop.hbase.KeyValue;
64  import org.apache.hadoop.hbase.testclassification.LargeTests;
65  import org.apache.hadoop.hbase.MiniHBaseCluster;
66  import org.apache.hadoop.hbase.NamespaceDescriptor;
67  import org.apache.hadoop.hbase.ServerName;
68  import org.apache.hadoop.hbase.SplitLogCounters;
69  import org.apache.hadoop.hbase.TableName;
70  import org.apache.hadoop.hbase.Waiter;
71  import org.apache.hadoop.hbase.client.ClusterConnection;
72  import org.apache.hadoop.hbase.client.ConnectionUtils;
73  import org.apache.hadoop.hbase.client.Delete;
74  import org.apache.hadoop.hbase.client.Get;
75  import org.apache.hadoop.hbase.client.HTable;
76  import org.apache.hadoop.hbase.client.Increment;
77  import org.apache.hadoop.hbase.client.NonceGenerator;
78  import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
79  import org.apache.hadoop.hbase.client.Put;
80  import org.apache.hadoop.hbase.client.Result;
81  import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
82  import org.apache.hadoop.hbase.client.Table;
83  import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
84  import org.apache.hadoop.hbase.coordination.ZKSplitLogManagerCoordination;
85  import org.apache.hadoop.hbase.exceptions.OperationConflictException;
86  import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
87  import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
88  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
89  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
90  import org.apache.hadoop.hbase.regionserver.HRegion;
91  import org.apache.hadoop.hbase.regionserver.HRegionServer;
92  import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
93  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
94  import org.apache.hadoop.hbase.wal.DefaultWALProvider;
95  import org.apache.hadoop.hbase.wal.WAL;
96  import org.apache.hadoop.hbase.wal.WALFactory;
97  import org.apache.hadoop.hbase.wal.WALSplitter;
98  import org.apache.hadoop.hbase.util.Bytes;
99  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
100 import org.apache.hadoop.hbase.util.FSUtils;
101 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
102 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
103 import org.apache.hadoop.hbase.util.Threads;
104 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
105 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
106 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
107 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
108 import org.apache.hadoop.hdfs.MiniDFSCluster;
109 import org.apache.zookeeper.KeeperException;
110 import org.junit.After;
111 import org.junit.AfterClass;
112 import org.junit.Assert;
113 import org.junit.Before;
114 import org.junit.BeforeClass;
115 import org.junit.Test;
116 import org.junit.experimental.categories.Category;
117 
118 @Category(LargeTests.class)
119 @SuppressWarnings("deprecation")
120 public class TestDistributedLogSplitting {
121   private static final Log LOG = LogFactory.getLog(TestSplitLogManager.class);
122   static {
123     // Uncomment the following line if more verbosity is needed for
124     // debugging (see HBASE-12285 for details).
125     //Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
126 
127     // test ThreeRSAbort fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on. this
128     // turns it off for this test.  TODO: Figure out why scr breaks recovery.
129     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
130 
131   }
132 
133   // Start a cluster with 2 masters and 6 regionservers
134   static final int NUM_MASTERS = 2;
135   static final int NUM_RS = 6;
136 
137   MiniHBaseCluster cluster;
138   HMaster master;
139   Configuration conf;
140   static Configuration originalConf;
141   static HBaseTestingUtility TEST_UTIL;
142   static MiniDFSCluster dfsCluster;
143   static MiniZooKeeperCluster zkCluster;
144 
145   @BeforeClass
146   public static void setup() throws Exception {
147     TEST_UTIL = new HBaseTestingUtility(HBaseConfiguration.create());
148     dfsCluster = TEST_UTIL.startMiniDFSCluster(1);
149     zkCluster = TEST_UTIL.startMiniZKCluster();
150     originalConf = TEST_UTIL.getConfiguration();
151   }
152 
153   @AfterClass
154   public static void tearDown() throws IOException {
155     TEST_UTIL.shutdownMiniZKCluster();
156     TEST_UTIL.shutdownMiniDFSCluster();
157     TEST_UTIL.shutdownMiniHBaseCluster();
158   }
159 
160   private void startCluster(int num_rs) throws Exception {
161     SplitLogCounters.resetCounters();
162     LOG.info("Starting cluster");
163     conf.getLong("hbase.splitlog.max.resubmit", 0);
164     // Make the failure test faster
165     conf.setInt("zookeeper.recovery.retry", 0);
166     conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1);
167     conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0); // no load balancing
168     conf.setInt("hbase.regionserver.wal.max.splitters", 3);
169     conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
170     TEST_UTIL.shutdownMiniHBaseCluster();
171     TEST_UTIL = new HBaseTestingUtility(conf);
172     TEST_UTIL.setDFSCluster(dfsCluster);
173     TEST_UTIL.setZkCluster(zkCluster);
174     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs);
175     cluster = TEST_UTIL.getHBaseCluster();
176     LOG.info("Waiting for active/ready master");
177     cluster.waitForActiveAndReadyMaster();
178     master = cluster.getMaster();
179     while (cluster.getLiveRegionServerThreads().size() < num_rs) {
180       Threads.sleep(1);
181     }
182   }
183 
184   @Before
185   public void before() throws Exception {
186     // refresh configuration
187     conf = HBaseConfiguration.create(originalConf);
188   }
189 
190   @After
191   public void after() throws Exception {
192     try {
193       if (TEST_UTIL.getHBaseCluster() != null) {
194         for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
195           mt.getMaster().abort("closing...", null);
196         }
197       }
198       TEST_UTIL.shutdownMiniHBaseCluster();
199     } finally {
200       TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
201       ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
202     }
203   }
204 
205   @Test (timeout=300000)
206   public void testRecoveredEdits() throws Exception {
207     LOG.info("testRecoveredEdits");
208     conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024); // create more than one wal
209     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
210     startCluster(NUM_RS);
211 
212     final int NUM_LOG_LINES = 1000;
213     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
214     // turn off load balancing to prevent regions from moving around otherwise
215     // they will consume recovered.edits
216     master.balanceSwitch(false);
217     FileSystem fs = master.getMasterFileSystem().getFileSystem();
218 
219     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
220 
221     Path rootdir = FSUtils.getRootDir(conf);
222 
223     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
224         "table", "family", 40);
225     TableName table = TableName.valueOf("table");
226     List<HRegionInfo> regions = null;
227     HRegionServer hrs = null;
228     for (int i = 0; i < NUM_RS; i++) {
229       boolean foundRs = false;
230       hrs = rsts.get(i).getRegionServer();
231       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
232       for (HRegionInfo region : regions) {
233         if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
234           foundRs = true;
235           break;
236         }
237       }
238       if (foundRs) break;
239     }
240     final Path logDir = new Path(rootdir, DefaultWALProvider.getWALDirectoryName(hrs
241         .getServerName().toString()));
242 
243     LOG.info("#regions = " + regions.size());
244     Iterator<HRegionInfo> it = regions.iterator();
245     while (it.hasNext()) {
246       HRegionInfo region = it.next();
247       if (region.getTable().getNamespaceAsString()
248           .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
249         it.remove();
250       }
251     }
252     
253     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
254 
255     slm.splitLogDistributed(logDir);
256 
257     int count = 0;
258     for (HRegionInfo hri : regions) {
259 
260       Path tdir = FSUtils.getTableDir(rootdir, table);
261       Path editsdir =
262         WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
263       LOG.debug("checking edits dir " + editsdir);
264       FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
265         @Override
266         public boolean accept(Path p) {
267           if (WALSplitter.isSequenceIdFile(p)) {
268             return false;
269           }
270           return true;
271         }
272       });
273       assertTrue("edits dir should have more than a single file in it. instead has " + files.length,
274           files.length > 1);
275       for (int i = 0; i < files.length; i++) {
276         int c = countWAL(files[i].getPath(), fs, conf);
277         count += c;
278       }
279       LOG.info(count + " edits in " + files.length + " recovered edits files.");
280     }
281 
282     // check that the log file is moved
283     assertFalse(fs.exists(logDir));
284 
285     assertEquals(NUM_LOG_LINES, count);
286   }
287 
288   @Test(timeout = 300000)
289   public void testLogReplayWithNonMetaRSDown() throws Exception {
290     LOG.info("testLogReplayWithNonMetaRSDown");
291     conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024); // create more than one wal
292     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
293     startCluster(NUM_RS);
294     final int NUM_REGIONS_TO_CREATE = 40;
295     final int NUM_LOG_LINES = 1000;
296     // turn off load balancing to prevent regions from moving around otherwise
297     // they will consume recovered.edits
298     master.balanceSwitch(false);
299 
300     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
301     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
302 
303     HRegionServer hrs = findRSToKill(false, "table");
304     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
305     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
306 
307     // wait for abort completes
308     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
309     ht.close();
310     zkw.close();
311   }
312 
313   private static class NonceGeneratorWithDups extends PerClientRandomNonceGenerator {
314     private boolean isDups = false;
315     private LinkedList<Long> nonces = new LinkedList<Long>();
316 
317     public void startDups() {
318       isDups = true;
319     }
320 
321     @Override
322     public long newNonce() {
323       long nonce = isDups ? nonces.removeFirst() : super.newNonce();
324       if (!isDups) {
325         nonces.add(nonce);
326       }
327       return nonce;
328     }
329   }
330 
331   @Test(timeout = 300000)
332   public void testNonceRecovery() throws Exception {
333     LOG.info("testNonceRecovery");
334     final String TABLE_NAME = "table";
335     final String FAMILY_NAME = "family";
336     final int NUM_REGIONS_TO_CREATE = 40;
337 
338     conf.setLong("hbase.regionserver.hlog.blocksize", 100*1024);
339     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
340     startCluster(NUM_RS);
341     master.balanceSwitch(false);
342 
343     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
344     HTable ht = installTable(zkw, TABLE_NAME, FAMILY_NAME, NUM_REGIONS_TO_CREATE);
345     NonceGeneratorWithDups ng = new NonceGeneratorWithDups();
346     NonceGenerator oldNg =
347         ConnectionUtils.injectNonceGeneratorForTesting((ClusterConnection)ht.getConnection(), ng);
348 
349     try {
350       List<Increment> reqs = new ArrayList<Increment>();
351       for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
352         HRegionServer hrs = rst.getRegionServer();
353         List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
354         for (HRegionInfo hri : hris) {
355           if (TABLE_NAME.equalsIgnoreCase(hri.getTable().getNameAsString())) {
356             byte[] key = hri.getStartKey();
357             if (key == null || key.length == 0) {
358               key = Bytes.copy(hri.getEndKey());
359               --(key[key.length - 1]);
360             }
361             Increment incr = new Increment(key);
362             incr.addColumn(Bytes.toBytes(FAMILY_NAME), Bytes.toBytes("q"), 1);
363             ht.increment(incr);
364             reqs.add(incr);
365           }
366         }
367       }
368 
369       HRegionServer hrs = findRSToKill(false, "table");
370       abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
371       ng.startDups();
372       for (Increment incr : reqs) {
373         try {
374           ht.increment(incr);
375           fail("should have thrown");
376         } catch (OperationConflictException ope) {
377           LOG.debug("Caught as expected: " + ope.getMessage());
378         }
379       }
380     } finally {
381       ConnectionUtils.injectNonceGeneratorForTesting((ClusterConnection) ht.getConnection(), oldNg);
382       ht.close();
383       zkw.close();
384     }
385   }
386 
387   @Test(timeout = 300000)
388   public void testLogReplayWithMetaRSDown() throws Exception {
389     LOG.info("testRecoveredEditsReplayWithMetaRSDown");
390     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
391     startCluster(NUM_RS);
392     final int NUM_REGIONS_TO_CREATE = 40;
393     final int NUM_LOG_LINES = 1000;
394     // turn off load balancing to prevent regions from moving around otherwise
395     // they will consume recovered.edits
396     master.balanceSwitch(false);
397 
398     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
399     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
400 
401     HRegionServer hrs = findRSToKill(true, "table");
402     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
403     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
404 
405     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
406     ht.close();
407     zkw.close();
408   }
409 
410   private void abortRSAndVerifyRecovery(HRegionServer hrs, Table ht, final ZooKeeperWatcher zkw,
411       final int numRegions, final int numofLines) throws Exception {
412 
413     abortRSAndWaitForRecovery(hrs, zkw, numRegions);
414     assertEquals(numofLines, TEST_UTIL.countRows(ht));
415   }
416 
417   private void abortRSAndWaitForRecovery(HRegionServer hrs, final ZooKeeperWatcher zkw,
418       final int numRegions) throws Exception {
419     final MiniHBaseCluster tmpCluster = this.cluster;
420 
421     // abort RS
422     LOG.info("Aborting region server: " + hrs.getServerName());
423     hrs.abort("testing");
424 
425     // wait for abort completes
426     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
427       @Override
428       public boolean evaluate() throws Exception {
429         return (tmpCluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
430       }
431     });
432 
433     // wait for regions come online
434     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
435       @Override
436       public boolean evaluate() throws Exception {
437         return (HBaseTestingUtility.getAllOnlineRegions(tmpCluster).size()
438             >= (numRegions + 1));
439       }
440     });
441 
442     // wait for all regions are fully recovered
443     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
444       @Override
445       public boolean evaluate() throws Exception {
446         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
447           zkw.recoveringRegionsZNode, false);
448         return (recoveringRegions != null && recoveringRegions.size() == 0);
449       }
450     });
451   }
452 
453   @Test(timeout = 300000)
454   public void testMasterStartsUpWithLogSplittingWork() throws Exception {
455     LOG.info("testMasterStartsUpWithLogSplittingWork");
456     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
457     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
458     startCluster(NUM_RS);
459 
460     final int NUM_REGIONS_TO_CREATE = 40;
461     final int NUM_LOG_LINES = 1000;
462     // turn off load balancing to prevent regions from moving around otherwise
463     // they will consume recovered.edits
464     master.balanceSwitch(false);
465 
466     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
467     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
468 
469     HRegionServer hrs = findRSToKill(false, "table");
470     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
471     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
472 
473     // abort master
474     abortMaster(cluster);
475 
476     // abort RS
477     LOG.info("Aborting region server: " + hrs.getServerName());
478     hrs.abort("testing");
479 
480     // wait for abort completes
481     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
482       @Override
483       public boolean evaluate() throws Exception {
484         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
485       }
486     });
487 
488     Thread.sleep(2000);
489     LOG.info("Current Open Regions:"
490         + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
491 
492     // wait for abort completes
493     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
494       @Override
495       public boolean evaluate() throws Exception {
496         return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
497           >= (NUM_REGIONS_TO_CREATE + 1));
498       }
499     });
500 
501     LOG.info("Current Open Regions After Master Node Starts Up:"
502         + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
503 
504     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
505 
506     ht.close();
507     zkw.close();
508   }
509 
510   @Test(timeout = 300000)
511   public void testMasterStartsUpWithLogReplayWork() throws Exception {
512     LOG.info("testMasterStartsUpWithLogReplayWork");
513     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
514     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
515     startCluster(NUM_RS);
516 
517     final int NUM_REGIONS_TO_CREATE = 40;
518     final int NUM_LOG_LINES = 1000;
519     // turn off load balancing to prevent regions from moving around otherwise
520     // they will consume recovered.edits
521     master.balanceSwitch(false);
522 
523     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
524     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
525 
526     HRegionServer hrs = findRSToKill(false, "table");
527     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
528     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
529 
530     // abort master
531     abortMaster(cluster);
532 
533     // abort RS
534     LOG.info("Aborting region server: " + hrs.getServerName());
535     hrs.abort("testing");
536 
537     // wait for the RS dies
538     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
539       @Override
540       public boolean evaluate() throws Exception {
541         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
542       }
543     });
544 
545     Thread.sleep(2000);
546     LOG.info("Current Open Regions:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
547 
548     // wait for all regions are fully recovered
549     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
550       @Override
551       public boolean evaluate() throws Exception {
552         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
553           zkw.recoveringRegionsZNode, false);
554         boolean done = recoveringRegions != null && recoveringRegions.size() == 0;
555         if (!done) {
556           LOG.info("Recovering regions: " + recoveringRegions);
557         }
558         return done;
559       }
560     });
561 
562     LOG.info("Current Open Regions After Master Node Starts Up:"
563         + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
564 
565     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
566 
567     ht.close();
568     zkw.close();
569   }
570 
571 
572   @Test(timeout = 300000)
573   public void testLogReplayTwoSequentialRSDown() throws Exception {
574     LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
575     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
576     startCluster(NUM_RS);
577     final int NUM_REGIONS_TO_CREATE = 40;
578     final int NUM_LOG_LINES = 1000;
579     // turn off load balancing to prevent regions from moving around otherwise
580     // they will consume recovered.edits
581     master.balanceSwitch(false);
582 
583     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
584     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
585     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
586 
587     List<HRegionInfo> regions = null;
588     HRegionServer hrs1 = findRSToKill(false, "table");
589     regions = ProtobufUtil.getOnlineRegions(hrs1.getRSRpcServices());
590 
591     makeWAL(hrs1, regions, "table", "family", NUM_LOG_LINES, 100);
592 
593     // abort RS1
594     LOG.info("Aborting region server: " + hrs1.getServerName());
595     hrs1.abort("testing");
596 
597     // wait for abort completes
598     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
599       @Override
600       public boolean evaluate() throws Exception {
601         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
602       }
603     });
604 
605     // wait for regions come online
606     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
607       @Override
608       public boolean evaluate() throws Exception {
609         return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
610             >= (NUM_REGIONS_TO_CREATE + 1));
611       }
612     });
613 
614     // sleep a little bit in order to interrupt recovering in the middle
615     Thread.sleep(300);
616     // abort second region server
617     rsts = cluster.getLiveRegionServerThreads();
618     HRegionServer hrs2 = rsts.get(0).getRegionServer();
619     LOG.info("Aborting one more region server: " + hrs2.getServerName());
620     hrs2.abort("testing");
621 
622     // wait for abort completes
623     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
624       @Override
625       public boolean evaluate() throws Exception {
626         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
627       }
628     });
629 
630     // wait for regions come online
631     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
632       @Override
633       public boolean evaluate() throws Exception {
634         return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
635             >= (NUM_REGIONS_TO_CREATE + 1));
636       }
637     });
638 
639     // wait for all regions are fully recovered
640     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
641       @Override
642       public boolean evaluate() throws Exception {
643         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
644           zkw.recoveringRegionsZNode, false);
645         return (recoveringRegions != null && recoveringRegions.size() == 0);
646       }
647     });
648 
649     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
650     ht.close();
651     zkw.close();
652   }
653 
654   @Test(timeout = 300000)
655   public void testMarkRegionsRecoveringInZK() throws Exception {
656     LOG.info("testMarkRegionsRecoveringInZK");
657     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
658     startCluster(NUM_RS);
659     master.balanceSwitch(false);
660     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
661     final ZooKeeperWatcher zkw = master.getZooKeeper();
662     Table ht = installTable(zkw, "table", "family", 40);
663     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
664 
665     Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
666     HRegionInfo region = null;
667     HRegionServer hrs = null;
668     ServerName firstFailedServer = null;
669     ServerName secondFailedServer = null;
670     for (int i = 0; i < NUM_RS; i++) {
671       hrs = rsts.get(i).getRegionServer();
672       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
673       if (regions.isEmpty()) continue;
674       region = regions.get(0);
675       regionSet.add(region);
676       firstFailedServer = hrs.getServerName();
677       secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
678       break;
679     }
680 
681     slm.markRegionsRecovering(firstFailedServer, regionSet);
682     slm.markRegionsRecovering(secondFailedServer, regionSet);
683 
684     List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw,
685       ZKUtil.joinZNode(zkw.recoveringRegionsZNode, region.getEncodedName()));
686 
687     assertEquals(recoveringRegions.size(), 2);
688 
689     // wait for splitLogWorker to mark them up because there is no WAL files recorded in ZK
690     final HRegionServer tmphrs = hrs;
691     TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
692       @Override
693       public boolean evaluate() throws Exception {
694         return (tmphrs.getRecoveringRegions().size() == 0);
695       }
696     });
697     ht.close();
698   }
699 
700   @Test(timeout = 300000)
701   public void testReplayCmd() throws Exception {
702     LOG.info("testReplayCmd");
703     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
704     startCluster(NUM_RS);
705     final int NUM_REGIONS_TO_CREATE = 40;
706     // turn off load balancing to prevent regions from moving around otherwise
707     // they will consume recovered.edits
708     master.balanceSwitch(false);
709 
710     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
711     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
712     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
713 
714     List<HRegionInfo> regions = null;
715     HRegionServer hrs = null;
716     for (int i = 0; i < NUM_RS; i++) {
717       boolean isCarryingMeta = false;
718       hrs = rsts.get(i).getRegionServer();
719       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
720       for (HRegionInfo region : regions) {
721         if (region.isMetaRegion()) {
722           isCarryingMeta = true;
723           break;
724         }
725       }
726       if (isCarryingMeta) {
727         continue;
728       }
729       if (regions.size() > 0) break;
730     }
731 
732     this.prepareData(ht, Bytes.toBytes("family"), Bytes.toBytes("c1"));
733     String originalCheckSum = TEST_UTIL.checksumRows(ht);
734 
735     // abort RA and trigger replay
736     abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
737 
738     assertEquals("Data should remain after reopening of regions", originalCheckSum,
739       TEST_UTIL.checksumRows(ht));
740 
741     ht.close();
742     zkw.close();
743   }
744 
745   @Test(timeout = 300000)
746   public void testLogReplayForDisablingTable() throws Exception {
747     LOG.info("testLogReplayForDisablingTable");
748     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
749     startCluster(NUM_RS);
750     final int NUM_REGIONS_TO_CREATE = 40;
751     final int NUM_LOG_LINES = 1000;
752 
753     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
754     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
755     Table disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
756     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
757 
758     // turn off load balancing to prevent regions from moving around otherwise
759     // they will consume recovered.edits
760     master.balanceSwitch(false);
761 
762     List<HRegionInfo> regions = null;
763     HRegionServer hrs = null;
764     boolean hasRegionsForBothTables = false;
765     String tableName = null;
766     for (int i = 0; i < NUM_RS; i++) {
767       tableName = null;
768       hasRegionsForBothTables = false;
769       boolean isCarryingSystem = false;
770       hrs = rsts.get(i).getRegionServer();
771       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
772       for (HRegionInfo region : regions) {
773         if (region.getTable().isSystemTable()) {
774           isCarryingSystem = true;
775           break;
776         }
777         if (tableName != null &&
778             !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
779           // make sure that we find a RS has online regions for both "table" and "disableTable"
780           hasRegionsForBothTables = true;
781           break;
782         } else if (tableName == null) {
783           tableName = region.getTable().getNameAsString();
784         }
785       }
786       if (isCarryingSystem) {
787         continue;
788       }
789       if (hasRegionsForBothTables) {
790         break;
791       }
792     }
793 
794     // make sure we found a good RS
795     Assert.assertTrue(hasRegionsForBothTables);
796 
797     LOG.info("#regions = " + regions.size());
798     Iterator<HRegionInfo> it = regions.iterator();
799     while (it.hasNext()) {
800       HRegionInfo region = it.next();
801       if (region.isMetaTable()) {
802         it.remove();
803       }
804     }
805     makeWAL(hrs, regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
806     makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
807 
808     LOG.info("Disabling table\n");
809     TEST_UTIL.getHBaseAdmin().disableTable(TableName.valueOf("disableTable"));
810 
811     // abort RS
812     LOG.info("Aborting region server: " + hrs.getServerName());
813     hrs.abort("testing");
814 
815     // wait for abort completes
816     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
817       @Override
818       public boolean evaluate() throws Exception {
819         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
820       }
821     });
822 
823     // wait for regions come online
824     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
825       @Override
826       public boolean evaluate() throws Exception {
827         return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
828             >= (NUM_REGIONS_TO_CREATE + 1));
829       }
830     });
831 
832     // wait for all regions are fully recovered
833     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
834       @Override
835       public boolean evaluate() throws Exception {
836         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
837           zkw.recoveringRegionsZNode, false);
838         ServerManager serverManager = master.getServerManager();
839         return (!serverManager.areDeadServersInProgress() &&
840             recoveringRegions != null && recoveringRegions.size() == 0);
841       }
842     });
843 
844     int count = 0;
845     FileSystem fs = master.getMasterFileSystem().getFileSystem();
846     Path rootdir = FSUtils.getRootDir(conf);
847     Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf("disableTable"));
848     for (HRegionInfo hri : regions) {
849       Path editsdir =
850         WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
851       LOG.debug("checking edits dir " + editsdir);
852       if(!fs.exists(editsdir)) continue;
853       FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
854         @Override
855         public boolean accept(Path p) {
856           if (WALSplitter.isSequenceIdFile(p)) {
857             return false;
858           }
859           return true;
860         }
861       });
862       if(files != null) {
863         for(FileStatus file : files) {
864           int c = countWAL(file.getPath(), fs, conf);
865           count += c;
866           LOG.info(c + " edits in " + file.getPath());
867         }
868       }
869     }
870 
871     LOG.info("Verify edits in recovered.edits files");
872     assertEquals(NUM_LOG_LINES, count);
873     LOG.info("Verify replayed edits");
874     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
875 
876     // clean up
877     for (HRegionInfo hri : regions) {
878       Path editsdir =
879         WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
880       fs.delete(editsdir, true);
881     }
882     disablingHT.close();
883     ht.close();
884     zkw.close();
885   }
886 
887   @Test(timeout = 300000)
888   public void testDisallowWritesInRecovering() throws Exception {
889     LOG.info("testDisallowWritesInRecovering");
890     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
891     conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3);
892     conf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true);
893     startCluster(NUM_RS);
894     final int NUM_REGIONS_TO_CREATE = 40;
895     // turn off load balancing to prevent regions from moving around otherwise
896     // they will consume recovered.edits
897     master.balanceSwitch(false);
898 
899     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
900     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
901     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
902     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
903 
904     Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
905     HRegionInfo region = null;
906     HRegionServer hrs = null;
907     HRegionServer dstRS = null;
908     for (int i = 0; i < NUM_RS; i++) {
909       hrs = rsts.get(i).getRegionServer();
910       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
911       if (regions.isEmpty()) continue;
912       region = regions.get(0);
913       if (region.isMetaRegion()) continue;
914       regionSet.add(region);
915       dstRS = rsts.get((i+1) % NUM_RS).getRegionServer();
916       break;
917     }
918 
919     slm.markRegionsRecovering(hrs.getServerName(), regionSet);
920     // move region in order for the region opened in recovering state
921     final HRegionInfo hri = region;
922     final HRegionServer tmpRS = dstRS;
923     TEST_UTIL.getHBaseAdmin().move(region.getEncodedNameAsBytes(),
924       Bytes.toBytes(dstRS.getServerName().getServerName()));
925     // wait for region move completes
926     final RegionStates regionStates =
927         TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
928     TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
929       @Override
930       public boolean evaluate() throws Exception {
931         ServerName sn = regionStates.getRegionServerOfRegion(hri);
932         return (sn != null && sn.equals(tmpRS.getServerName()));
933       }
934     });
935 
936     try {
937       byte[] key = region.getStartKey();
938       if (key == null || key.length == 0) {
939         key = new byte[] { 0, 0, 0, 0, 1 };
940       }
941       Put put = new Put(key);
942       put.add(Bytes.toBytes("family"), Bytes.toBytes("c1"), new byte[]{'b'});
943       ht.put(put);
944     } catch (IOException ioe) {
945       Assert.assertTrue(ioe instanceof RetriesExhaustedWithDetailsException);
946       RetriesExhaustedWithDetailsException re = (RetriesExhaustedWithDetailsException) ioe;
947       boolean foundRegionInRecoveryException = false;
948       for (Throwable t : re.getCauses()) {
949         if (t instanceof RegionInRecoveryException) {
950           foundRegionInRecoveryException = true;
951           break;
952         }
953       }
954       Assert.assertTrue(
955         "No RegionInRecoveryException. Following exceptions returned=" + re.getCauses(),
956         foundRegionInRecoveryException);
957     }
958 
959     ht.close();
960     zkw.close();
961   }
962 
963   /**
964    * The original intention of this test was to force an abort of a region
965    * server and to make sure that the failure path in the region servers is
966    * properly evaluated. But it is difficult to ensure that the region server
967    * doesn't finish the log splitting before it aborts. Also now, there is
968    * this code path where the master will preempt the region server when master
969    * detects that the region server has aborted.
970    * @throws Exception
971    */
972   @Test (timeout=300000)
973   public void testWorkerAbort() throws Exception {
974     LOG.info("testWorkerAbort");
975     startCluster(3);
976     final int NUM_LOG_LINES = 10000;
977     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
978     FileSystem fs = master.getMasterFileSystem().getFileSystem();
979 
980     final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
981     HRegionServer hrs = findRSToKill(false, "table");
982     Path rootdir = FSUtils.getRootDir(conf);
983     final Path logDir = new Path(rootdir,
984         DefaultWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
985 
986     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
987         "table", "family", 40);
988 
989     makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()),
990       "table", "family", NUM_LOG_LINES, 100);
991 
992     new Thread() {
993       @Override
994       public void run() {
995         waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
996         for (RegionServerThread rst : rsts) {
997           rst.getRegionServer().abort("testing");
998           break;
999         }
1000       }
1001     }.start();
1002     // slm.splitLogDistributed(logDir);
1003     FileStatus[] logfiles = fs.listStatus(logDir);
1004     TaskBatch batch = new TaskBatch();
1005     slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
1006     //waitForCounter but for one of the 2 counters
1007     long curt = System.currentTimeMillis();
1008     long waitTime = 80000;
1009     long endt = curt + waitTime;
1010     while (curt < endt) {
1011       if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
1012           tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
1013           tot_wkr_preempt_task.get()) == 0) {
1014         Thread.yield();
1015         curt = System.currentTimeMillis();
1016       } else {
1017         assertTrue(1 <= (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
1018             tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
1019             tot_wkr_preempt_task.get()));
1020         return;
1021       }
1022     }
1023     fail("none of the following counters went up in " + waitTime +
1024         " milliseconds - " +
1025         "tot_wkr_task_resigned, tot_wkr_task_err, " +
1026         "tot_wkr_final_transition_failed, tot_wkr_task_done, " +
1027         "tot_wkr_preempt_task");
1028   }
1029 
1030   @Test (timeout=300000)
1031   public void testThreeRSAbort() throws Exception {
1032     LOG.info("testThreeRSAbort");
1033     final int NUM_REGIONS_TO_CREATE = 40;
1034     final int NUM_ROWS_PER_REGION = 100;
1035 
1036     startCluster(NUM_RS); // NUM_RS=6.
1037 
1038     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf,
1039         "distributed log splitting test", null);
1040 
1041     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1042     populateDataInTable(NUM_ROWS_PER_REGION, "family");
1043 
1044 
1045     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1046     assertEquals(NUM_RS, rsts.size());
1047     rsts.get(0).getRegionServer().abort("testing");
1048     rsts.get(1).getRegionServer().abort("testing");
1049     rsts.get(2).getRegionServer().abort("testing");
1050 
1051     long start = EnvironmentEdgeManager.currentTime();
1052     while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
1053       if (EnvironmentEdgeManager.currentTime() - start > 60000) {
1054         assertTrue(false);
1055       }
1056       Thread.sleep(200);
1057     }
1058 
1059     start = EnvironmentEdgeManager.currentTime();
1060     while (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
1061         < (NUM_REGIONS_TO_CREATE + 1)) {
1062       if (EnvironmentEdgeManager.currentTime() - start > 60000) {
1063         assertTrue("Timedout", false);
1064       }
1065       Thread.sleep(200);
1066     }
1067 
1068     // wait for all regions are fully recovered
1069     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
1070       @Override
1071       public boolean evaluate() throws Exception {
1072         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
1073           zkw.recoveringRegionsZNode, false);
1074         return (recoveringRegions != null && recoveringRegions.size() == 0);
1075       }
1076     });
1077 
1078     assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
1079         TEST_UTIL.countRows(ht));
1080     ht.close();
1081     zkw.close();
1082   }
1083 
1084 
1085 
1086   @Test(timeout=30000)
1087   public void testDelayedDeleteOnFailure() throws Exception {
1088     LOG.info("testDelayedDeleteOnFailure");
1089     startCluster(1);
1090     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
1091     final FileSystem fs = master.getMasterFileSystem().getFileSystem();
1092     final Path logDir = new Path(FSUtils.getRootDir(conf), "x");
1093     fs.mkdirs(logDir);
1094     ExecutorService executor = null;
1095     try {
1096       final Path corruptedLogFile = new Path(logDir, "x");
1097       FSDataOutputStream out;
1098       out = fs.create(corruptedLogFile);
1099       out.write(0);
1100       out.write(Bytes.toBytes("corrupted bytes"));
1101       out.close();
1102       ZKSplitLogManagerCoordination coordination =
1103           (ZKSplitLogManagerCoordination) ((BaseCoordinatedStateManager) master
1104               .getCoordinatedStateManager()).getSplitLogManagerCoordination();
1105       coordination.setIgnoreDeleteForTesting(true);
1106       executor = Executors.newSingleThreadExecutor();
1107       Runnable runnable = new Runnable() {
1108        @Override
1109        public void run() {
1110           try {
1111             // since the logDir is a fake, corrupted one, so the split log worker
1112             // will finish it quickly with error, and this call will fail and throw
1113             // an IOException.
1114             slm.splitLogDistributed(logDir);
1115           } catch (IOException ioe) {
1116             try {
1117               assertTrue(fs.exists(corruptedLogFile));
1118               // this call will block waiting for the task to be removed from the
1119               // tasks map which is not going to happen since ignoreZKDeleteForTesting
1120               // is set to true, until it is interrupted.
1121               slm.splitLogDistributed(logDir);
1122             } catch (IOException e) {
1123               assertTrue(Thread.currentThread().isInterrupted());
1124               return;
1125             }
1126             fail("did not get the expected IOException from the 2nd call");
1127           }
1128           fail("did not get the expected IOException from the 1st call");
1129         }
1130       };
1131       Future<?> result = executor.submit(runnable);
1132       try {
1133         result.get(2000, TimeUnit.MILLISECONDS);
1134       } catch (TimeoutException te) {
1135         // it is ok, expected.
1136       }
1137       waitForCounter(tot_mgr_wait_for_zk_delete, 0, 1, 10000);
1138       executor.shutdownNow();
1139       executor = null;
1140 
1141       // make sure the runnable is finished with no exception thrown.
1142       result.get();
1143     } finally {
1144       if (executor != null) {
1145         // interrupt the thread in case the test fails in the middle.
1146         // it has no effect if the thread is already terminated.
1147         executor.shutdownNow();
1148       }
1149       fs.delete(logDir, true);
1150     }
1151   }
1152 
1153   @Test(timeout = 300000)
1154   public void testMetaRecoveryInZK() throws Exception {
1155     LOG.info("testMetaRecoveryInZK");
1156     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1157     startCluster(NUM_RS);
1158 
1159     // turn off load balancing to prevent regions from moving around otherwise
1160     // they will consume recovered.edits
1161     master.balanceSwitch(false);
1162     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1163 
1164     // only testing meta recovery in ZK operation
1165     HRegionServer hrs = findRSToKill(true, null);
1166     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1167 
1168     LOG.info("#regions = " + regions.size());
1169     Set<HRegionInfo> tmpRegions = new HashSet<HRegionInfo>();
1170     tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
1171     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), tmpRegions);
1172     Set<HRegionInfo> userRegionSet = new HashSet<HRegionInfo>();
1173     userRegionSet.addAll(regions);
1174     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), userRegionSet);
1175     boolean isMetaRegionInRecovery = false;
1176     List<String> recoveringRegions =
1177         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1178     for (String curEncodedRegionName : recoveringRegions) {
1179       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1180         isMetaRegionInRecovery = true;
1181         break;
1182       }
1183     }
1184     assertTrue(isMetaRegionInRecovery);
1185 
1186     master.getMasterFileSystem().splitMetaLog(hrs.getServerName());
1187 
1188     isMetaRegionInRecovery = false;
1189     recoveringRegions =
1190         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1191     for (String curEncodedRegionName : recoveringRegions) {
1192       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1193         isMetaRegionInRecovery = true;
1194         break;
1195       }
1196     }
1197     // meta region should be recovered
1198     assertFalse(isMetaRegionInRecovery);
1199     zkw.close();
1200   }
1201 
1202   @Test(timeout = 300000)
1203   public void testSameVersionUpdatesRecovery() throws Exception {
1204     LOG.info("testSameVersionUpdatesRecovery");
1205     conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
1206     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1207     startCluster(NUM_RS);
1208     final AtomicLong sequenceId = new AtomicLong(100);
1209     final int NUM_REGIONS_TO_CREATE = 40;
1210     final int NUM_LOG_LINES = 1000;
1211     // turn off load balancing to prevent regions from moving around otherwise
1212     // they will consume recovered.edits
1213     master.balanceSwitch(false);
1214 
1215     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1216     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1217     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1218 
1219     List<HRegionInfo> regions = null;
1220     HRegionServer hrs = null;
1221     for (int i = 0; i < NUM_RS; i++) {
1222       boolean isCarryingMeta = false;
1223       hrs = rsts.get(i).getRegionServer();
1224       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1225       for (HRegionInfo region : regions) {
1226         if (region.isMetaRegion()) {
1227           isCarryingMeta = true;
1228           break;
1229         }
1230       }
1231       if (isCarryingMeta) {
1232         continue;
1233       }
1234       break;
1235     }
1236 
1237     LOG.info("#regions = " + regions.size());
1238     Iterator<HRegionInfo> it = regions.iterator();
1239     while (it.hasNext()) {
1240       HRegionInfo region = it.next();
1241       if (region.isMetaTable()
1242           || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1243         it.remove();
1244       }
1245     }
1246     if (regions.size() == 0) return;
1247     HRegionInfo curRegionInfo = regions.get(0);
1248     byte[] startRow = curRegionInfo.getStartKey();
1249     if (startRow == null || startRow.length == 0) {
1250       startRow = new byte[] { 0, 0, 0, 0, 1 };
1251     }
1252     byte[] row = Bytes.incrementBytes(startRow, 1);
1253     // use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
1254     row = Arrays.copyOfRange(row, 3, 8);
1255     long value = 0;
1256     TableName tableName = TableName.valueOf("table");
1257     byte[] family = Bytes.toBytes("family");
1258     byte[] qualifier = Bytes.toBytes("c1");
1259     long timeStamp = System.currentTimeMillis();
1260     HTableDescriptor htd = new HTableDescriptor();
1261     htd.addFamily(new HColumnDescriptor(family));
1262     final WAL wal = hrs.getWAL(curRegionInfo);
1263     for (int i = 0; i < NUM_LOG_LINES; i += 1) {
1264       WALEdit e = new WALEdit();
1265       value++;
1266       e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
1267       wal.append(htd, curRegionInfo,
1268           new HLogKey(curRegionInfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis()),
1269           e, sequenceId, true, null);
1270     }
1271     wal.sync();
1272     wal.shutdown();
1273 
1274     // wait for abort completes
1275     this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
1276 
1277     // verify we got the last value
1278     LOG.info("Verification Starts...");
1279     Get g = new Get(row);
1280     Result r = ht.get(g);
1281     long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1282     assertEquals(value, theStoredVal);
1283 
1284     // after flush
1285     LOG.info("Verification after flush...");
1286     TEST_UTIL.getHBaseAdmin().flush(tableName);
1287     r = ht.get(g);
1288     theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1289     assertEquals(value, theStoredVal);
1290     ht.close();
1291   }
1292 
1293   @Test(timeout = 300000)
1294   public void testSameVersionUpdatesRecoveryWithCompaction() throws Exception {
1295     LOG.info("testSameVersionUpdatesRecoveryWithWrites");
1296     conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
1297     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1298     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 30 * 1024);
1299     conf.setInt("hbase.hstore.compactionThreshold", 3);
1300     startCluster(NUM_RS);
1301     final AtomicLong sequenceId = new AtomicLong(100);
1302     final int NUM_REGIONS_TO_CREATE = 40;
1303     final int NUM_LOG_LINES = 2000;
1304     // turn off load balancing to prevent regions from moving around otherwise
1305     // they will consume recovered.edits
1306     master.balanceSwitch(false);
1307 
1308     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1309     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1310     Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1311 
1312     List<HRegionInfo> regions = null;
1313     HRegionServer hrs = null;
1314     for (int i = 0; i < NUM_RS; i++) {
1315       boolean isCarryingMeta = false;
1316       hrs = rsts.get(i).getRegionServer();
1317       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1318       for (HRegionInfo region : regions) {
1319         if (region.isMetaRegion()) {
1320           isCarryingMeta = true;
1321           break;
1322         }
1323       }
1324       if (isCarryingMeta) {
1325         continue;
1326       }
1327       break;
1328     }
1329 
1330     LOG.info("#regions = " + regions.size());
1331     Iterator<HRegionInfo> it = regions.iterator();
1332     while (it.hasNext()) {
1333       HRegionInfo region = it.next();
1334       if (region.isMetaTable()
1335           || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1336         it.remove();
1337       }
1338     }
1339     if (regions.size() == 0) return;
1340     HRegionInfo curRegionInfo = regions.get(0);
1341     byte[] startRow = curRegionInfo.getStartKey();
1342     if (startRow == null || startRow.length == 0) {
1343       startRow = new byte[] { 0, 0, 0, 0, 1 };
1344     }
1345     byte[] row = Bytes.incrementBytes(startRow, 1);
1346     // use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
1347     row = Arrays.copyOfRange(row, 3, 8);
1348     long value = 0;
1349     final TableName tableName = TableName.valueOf("table");
1350     byte[] family = Bytes.toBytes("family");
1351     byte[] qualifier = Bytes.toBytes("c1");
1352     long timeStamp = System.currentTimeMillis();
1353     HTableDescriptor htd = new HTableDescriptor(tableName);
1354     htd.addFamily(new HColumnDescriptor(family));
1355     final WAL wal = hrs.getWAL(curRegionInfo);
1356     for (int i = 0; i < NUM_LOG_LINES; i += 1) {
1357       WALEdit e = new WALEdit();
1358       value++;
1359       e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
1360       wal.append(htd, curRegionInfo, new HLogKey(curRegionInfo.getEncodedNameAsBytes(),
1361           tableName, System.currentTimeMillis()), e, sequenceId, true, null);
1362     }
1363     wal.sync();
1364     wal.shutdown();
1365 
1366     // wait for abort completes
1367     this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
1368 
1369     // verify we got the last value
1370     LOG.info("Verification Starts...");
1371     Get g = new Get(row);
1372     Result r = ht.get(g);
1373     long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1374     assertEquals(value, theStoredVal);
1375 
1376     // after flush & compaction
1377     LOG.info("Verification after flush...");
1378     TEST_UTIL.getHBaseAdmin().flush(tableName);
1379     TEST_UTIL.getHBaseAdmin().compact(tableName);
1380 
1381     // wait for compaction completes
1382     TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {
1383       @Override
1384       public boolean evaluate() throws Exception {
1385         return (TEST_UTIL.getHBaseAdmin().getCompactionState(tableName) == CompactionState.NONE);
1386       }
1387     });
1388 
1389     r = ht.get(g);
1390     theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1391     assertEquals(value, theStoredVal);
1392     ht.close();
1393   }
1394 
1395   @Test(timeout = 300000)
1396   public void testReadWriteSeqIdFiles() throws Exception {
1397     LOG.info("testReadWriteSeqIdFiles");
1398     startCluster(2);
1399     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1400     HTable ht = installTable(zkw, "table", "family", 10);
1401     FileSystem fs = master.getMasterFileSystem().getFileSystem();
1402     Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), TableName.valueOf("table"));
1403     List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
1404     long newSeqId = WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0), 1L, 1000L);
1405     WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0) , 1L, 1000L);
1406     assertEquals(newSeqId + 2000,
1407       WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0), 3L, 1000L));
1408     
1409     Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(regionDirs.get(0));
1410     FileStatus[] files = FSUtils.listStatus(fs, editsdir, new PathFilter() {
1411       @Override
1412       public boolean accept(Path p) {
1413         return WALSplitter.isSequenceIdFile(p);
1414       }
1415     });
1416     // only one seqid file should exist
1417     assertEquals(1, files.length);
1418 
1419     // verify all seqId files aren't treated as recovered.edits files
1420     NavigableSet<Path> recoveredEdits = WALSplitter.getSplitEditFilesSorted(fs, regionDirs.get(0));
1421     assertEquals(0, recoveredEdits.size());
1422 
1423     ht.close();
1424   }
1425 
1426   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs) throws Exception {
1427     return installTable(zkw, tname, fname, nrs, 0);
1428   }
1429 
1430   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs,
1431       int existingRegions) throws Exception {
1432     // Create a table with regions
1433     TableName table = TableName.valueOf(tname);
1434     byte [] family = Bytes.toBytes(fname);
1435     LOG.info("Creating table with " + nrs + " regions");
1436     HTable ht = TEST_UTIL.createTable(table, family);
1437     int numRegions = TEST_UTIL.createMultiRegions(conf, ht, family, nrs);
1438     assertEquals(nrs, numRegions);
1439       LOG.info("Waiting for no more RIT\n");
1440     blockUntilNoRIT(zkw, master);
1441     // disable-enable cycle to get rid of table's dead regions left behind
1442     // by createMultiRegions
1443     LOG.debug("Disabling table\n");
1444     TEST_UTIL.getHBaseAdmin().disableTable(table);
1445     LOG.debug("Waiting for no more RIT\n");
1446     blockUntilNoRIT(zkw, master);
1447     NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
1448     LOG.debug("Verifying only catalog and namespace regions are assigned\n");
1449     if (regions.size() != 2) {
1450       for (String oregion : regions)
1451         LOG.debug("Region still online: " + oregion);
1452     }
1453     assertEquals(2 + existingRegions, regions.size());
1454     LOG.debug("Enabling table\n");
1455     TEST_UTIL.getHBaseAdmin().enableTable(table);
1456     LOG.debug("Waiting for no more RIT\n");
1457     blockUntilNoRIT(zkw, master);
1458     LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
1459     regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
1460     assertEquals(numRegions + 2 + existingRegions, regions.size());
1461     return ht;
1462   }
1463 
1464   void populateDataInTable(int nrows, String fname) throws Exception {
1465     byte [] family = Bytes.toBytes(fname);
1466 
1467     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1468     assertEquals(NUM_RS, rsts.size());
1469 
1470     for (RegionServerThread rst : rsts) {
1471       HRegionServer hrs = rst.getRegionServer();
1472       List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1473       for (HRegionInfo hri : hris) {
1474         if (hri.getTable().isSystemTable()) {
1475           continue;
1476         }
1477         LOG.debug("adding data to rs = " + rst.getName() +
1478             " region = "+ hri.getRegionNameAsString());
1479         HRegion region = hrs.getOnlineRegion(hri.getRegionName());
1480         assertTrue(region != null);
1481         putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
1482       }
1483     }
1484   }
1485 
1486   public void makeWAL(HRegionServer hrs, List<HRegionInfo> regions, String tname, String fname,
1487       int num_edits, int edit_size) throws IOException {
1488     makeWAL(hrs, regions, tname, fname, num_edits, edit_size, true);
1489   }
1490 
1491   public void makeWAL(HRegionServer hrs, List<HRegionInfo> regions, String tname, String fname,
1492       int num_edits, int edit_size, boolean cleanShutdown) throws IOException {
1493     TableName fullTName = TableName.valueOf(tname);
1494     // remove root and meta region
1495     regions.remove(HRegionInfo.FIRST_META_REGIONINFO);
1496     // using one sequenceId for edits across all regions is ok.
1497     final AtomicLong sequenceId = new AtomicLong(10);
1498 
1499 
1500     for(Iterator<HRegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
1501       HRegionInfo regionInfo = iter.next();
1502       if(regionInfo.getTable().isSystemTable()) {
1503          iter.remove();
1504       }
1505     }
1506     HTableDescriptor htd = new HTableDescriptor(fullTName);
1507     byte[] family = Bytes.toBytes(fname);
1508     htd.addFamily(new HColumnDescriptor(family));
1509     byte[] value = new byte[edit_size];
1510 
1511     List<HRegionInfo> hris = new ArrayList<HRegionInfo>();
1512     for (HRegionInfo region : regions) {
1513       if (!region.getTable().getNameAsString().equalsIgnoreCase(tname)) {
1514         continue;
1515       }
1516       hris.add(region);
1517     }
1518     LOG.info("Creating wal edits across " + hris.size() + " regions.");
1519     for (int i = 0; i < edit_size; i++) {
1520       value[i] = (byte) ('a' + (i % 26));
1521     }
1522     int n = hris.size();
1523     int[] counts = new int[n];
1524     // sync every ~30k to line up with desired wal rolls
1525     final int syncEvery = 30 * 1024 / edit_size;
1526     if (n > 0) {
1527       for (int i = 0; i < num_edits; i += 1) {
1528         WALEdit e = new WALEdit();
1529         HRegionInfo curRegionInfo = hris.get(i % n);
1530         final WAL log = hrs.getWAL(curRegionInfo);
1531         byte[] startRow = curRegionInfo.getStartKey();
1532         if (startRow == null || startRow.length == 0) {
1533           startRow = new byte[] { 0, 0, 0, 0, 1 };
1534         }
1535         byte[] row = Bytes.incrementBytes(startRow, counts[i % n]);
1536         row = Arrays.copyOfRange(row, 3, 8); // use last 5 bytes because
1537                                              // HBaseTestingUtility.createMultiRegions use 5 bytes
1538                                              // key
1539         byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
1540         e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
1541         log.append(htd, curRegionInfo, new HLogKey(curRegionInfo.getEncodedNameAsBytes(), fullTName,
1542             System.currentTimeMillis()), e, sequenceId, true, null);
1543         if (0 == i % syncEvery) {
1544           log.sync();
1545         }
1546         counts[i % n] += 1;
1547       }
1548     }
1549     // done as two passes because the regions might share logs. shutdown is idempotent, but sync
1550     // will cause errors if done after.
1551     for (HRegionInfo info : hris) {
1552       final WAL log = hrs.getWAL(info);
1553       log.sync();
1554     }
1555     if (cleanShutdown) {
1556       for (HRegionInfo info : hris) {
1557         final WAL log = hrs.getWAL(info);
1558         log.shutdown();
1559       }
1560     }
1561     for (int i = 0; i < n; i++) {
1562       LOG.info("region " + hris.get(i).getRegionNameAsString() + " has " + counts[i] + " edits");
1563     }
1564     return;
1565   }
1566 
1567   private int countWAL(Path log, FileSystem fs, Configuration conf)
1568   throws IOException {
1569     int count = 0;
1570     WAL.Reader in = WALFactory.createReader(fs, log, conf);
1571     try {
1572       WAL.Entry e;
1573       while ((e = in.next()) != null) {
1574         if (!WALEdit.isMetaEditFamily(e.getEdit().getCells().get(0))) {
1575           count++;
1576         }
1577       }
1578     } finally {
1579       try {
1580         in.close();
1581       } catch (IOException exception) {
1582         LOG.warn("Problem closing wal: " + exception.getMessage());
1583         LOG.debug("exception details.", exception);
1584       }
1585     }
1586     return count;
1587   }
1588 
1589   private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master)
1590   throws KeeperException, InterruptedException {
1591     ZKAssign.blockUntilNoRIT(zkw);
1592     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
1593   }
1594 
1595   private void putData(HRegion region, byte[] startRow, int numRows, byte [] qf,
1596       byte [] ...families)
1597   throws IOException {
1598     for(int i = 0; i < numRows; i++) {
1599       Put put = new Put(Bytes.add(startRow, Bytes.toBytes(i)));
1600       for(byte [] family : families) {
1601         put.add(family, qf, null);
1602       }
1603       region.put(put);
1604     }
1605   }
1606 
1607   /**
1608    * Load table with puts and deletes with expected values so that we can verify later
1609    */
1610   private void prepareData(final Table t, final byte[] f, final byte[] column) throws IOException {
1611     byte[] k = new byte[3];
1612 
1613     // add puts
1614     List<Put> puts = new ArrayList<>();
1615     for (byte b1 = 'a'; b1 <= 'z'; b1++) {
1616       for (byte b2 = 'a'; b2 <= 'z'; b2++) {
1617         for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1618           k[0] = b1;
1619           k[1] = b2;
1620           k[2] = b3;
1621           Put put = new Put(k);
1622           put.add(f, column, k);
1623           puts.add(put);
1624         }
1625       }
1626     }
1627     t.put(puts);
1628     // add deletes
1629     for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1630       k[0] = 'a';
1631       k[1] = 'a';
1632       k[2] = b3;
1633       Delete del = new Delete(k);
1634       t.delete(del);
1635     }
1636   }
1637 
1638   private void waitForCounter(AtomicLong ctr, long oldval, long newval,
1639       long timems) {
1640     long curt = System.currentTimeMillis();
1641     long endt = curt + timems;
1642     while (curt < endt) {
1643       if (ctr.get() == oldval) {
1644         Thread.yield();
1645         curt = System.currentTimeMillis();
1646       } else {
1647         assertEquals(newval, ctr.get());
1648         return;
1649       }
1650     }
1651     assertTrue(false);
1652   }
1653 
1654   private void abortMaster(MiniHBaseCluster cluster) throws InterruptedException {
1655     for (MasterThread mt : cluster.getLiveMasterThreads()) {
1656       if (mt.getMaster().isActiveMaster()) {
1657         mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
1658         mt.join();
1659         break;
1660       }
1661     }
1662     LOG.debug("Master is aborted");
1663   }
1664 
1665   /**
1666    * Find a RS that has regions of a table.
1667    * @param hasMetaRegion when true, the returned RS has hbase:meta region as well
1668    * @param tableName
1669    * @return
1670    * @throws Exception
1671    */
1672   private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception {
1673     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1674     int numOfRSs = rsts.size();
1675     List<HRegionInfo> regions = null;
1676     HRegionServer hrs = null;
1677 
1678     for (int i = 0; i < numOfRSs; i++) {
1679       boolean isCarryingMeta = false;
1680       boolean foundTableRegion = false;
1681       hrs = rsts.get(i).getRegionServer();
1682       regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1683       for (HRegionInfo region : regions) {
1684         if (region.isMetaRegion()) {
1685           isCarryingMeta = true;
1686         }
1687         if (tableName == null || region.getTable().getNameAsString().equals(tableName)) {
1688           foundTableRegion = true;
1689         }
1690         if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
1691           break;
1692         }
1693       }
1694       if (isCarryingMeta && hasMetaRegion) {
1695         // clients ask for a RS with META
1696         if (!foundTableRegion) {
1697           final HRegionServer destRS = hrs;
1698           // the RS doesn't have regions of the specified table so we need move one to this RS
1699           List<HRegionInfo> tableRegions =
1700               TEST_UTIL.getHBaseAdmin().getTableRegions(TableName.valueOf(tableName));
1701           final HRegionInfo hri = tableRegions.get(0);
1702           TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
1703             Bytes.toBytes(destRS.getServerName().getServerName()));
1704           // wait for region move completes
1705           final RegionStates regionStates =
1706               TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
1707           TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
1708             @Override
1709             public boolean evaluate() throws Exception {
1710               ServerName sn = regionStates.getRegionServerOfRegion(hri);
1711               return (sn != null && sn.equals(destRS.getServerName()));
1712             }
1713           });
1714         }
1715         return hrs;
1716       } else if (hasMetaRegion || isCarryingMeta) {
1717         continue;
1718       }
1719       if (foundTableRegion) break;
1720     }
1721 
1722     return hrs;
1723   }
1724 
1725 }