View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.apache.hadoop.hbase.SplitLogCounters.tot_mgr_wait_for_zk_delete;
22  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_final_transition_failed;
23  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_preempt_task;
24  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_acquired;
25  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_done;
26  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_err;
27  import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_resigned;
28  import static org.junit.Assert.*;
29  import static org.junit.Assert.assertEquals;
30  import static org.junit.Assert.assertFalse;
31  import static org.junit.Assert.assertTrue;
32  import static org.junit.Assert.fail;
33  
34  import java.io.IOException;
35  import java.util.ArrayList;
36  import java.util.Arrays;
37  import java.util.HashSet;
38  import java.util.Iterator;
39  import java.util.LinkedList;
40  import java.util.List;
41  import java.util.NavigableSet;
42  import java.util.Set;
43  import java.util.TreeSet;
44  import java.util.concurrent.ExecutorService;
45  import java.util.concurrent.Executors;
46  import java.util.concurrent.Future;
47  import java.util.concurrent.TimeUnit;
48  import java.util.concurrent.TimeoutException;
49  import java.util.concurrent.atomic.AtomicLong;
50  
51  import org.apache.commons.logging.Log;
52  import org.apache.commons.logging.LogFactory;
53  import org.apache.hadoop.conf.Configuration;
54  import org.apache.hadoop.fs.FSDataOutputStream;
55  import org.apache.hadoop.fs.FileStatus;
56  import org.apache.hadoop.fs.FileSystem;
57  import org.apache.hadoop.fs.Path;
58  import org.apache.hadoop.hbase.HColumnDescriptor;
59  import org.apache.hadoop.hbase.TableName;
60  import org.apache.hadoop.hbase.HBaseConfiguration;
61  import org.apache.hadoop.hbase.HBaseTestingUtility;
62  import org.apache.hadoop.hbase.HConstants;
63  import org.apache.hadoop.hbase.HRegionInfo;
64  import org.apache.hadoop.hbase.HTableDescriptor;
65  import org.apache.hadoop.hbase.KeyValue;
66  import org.apache.hadoop.hbase.LargeTests;
67  import org.apache.hadoop.hbase.MiniHBaseCluster;
68  import org.apache.hadoop.hbase.NamespaceDescriptor;
69  import org.apache.hadoop.hbase.ServerName;
70  import org.apache.hadoop.hbase.SplitLogCounters;
71  import org.apache.hadoop.hbase.Waiter;
72  import org.apache.hadoop.hbase.client.Delete;
73  import org.apache.hadoop.hbase.client.Get;
74  import org.apache.hadoop.hbase.client.HConnectionManager;
75  import org.apache.hadoop.hbase.client.HTable;
76  import org.apache.hadoop.hbase.client.Increment;
77  import org.apache.hadoop.hbase.client.NonceGenerator;
78  import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
79  import org.apache.hadoop.hbase.client.Put;
80  import org.apache.hadoop.hbase.client.Result;
81  import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
82  import org.apache.hadoop.hbase.exceptions.OperationConflictException;
83  import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
84  import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
85  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
86  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
87  import org.apache.hadoop.hbase.regionserver.HRegion;
88  import org.apache.hadoop.hbase.regionserver.HRegionServer;
89  import org.apache.hadoop.hbase.regionserver.wal.HLog;
90  import org.apache.hadoop.hbase.regionserver.wal.HLogFactory;
91  import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
92  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
93  import org.apache.hadoop.hbase.util.Bytes;
94  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
95  import org.apache.hadoop.hbase.util.FSUtils;
96  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
97  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
98  import org.apache.hadoop.hbase.util.Threads;
99  import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
100 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
101 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
102 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
103 import org.apache.hadoop.hdfs.MiniDFSCluster;
104 import org.apache.log4j.Level;
105 import org.apache.log4j.Logger;
106 import org.apache.zookeeper.KeeperException;
107 import org.junit.After;
108 import org.junit.AfterClass;
109 import org.junit.Assert;
110 import org.junit.Before;
111 import org.junit.BeforeClass;
112 import org.junit.Test;
113 import org.junit.experimental.categories.Category;
114 
115 @Category(LargeTests.class)
116 public class TestDistributedLogSplitting {
117   private static final Log LOG = LogFactory.getLog(TestSplitLogManager.class);
118   static {
119     Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
120 
121     // test ThreeRSAbort fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on. this
122     // turns it off for this test.  TODO: Figure out why scr breaks recovery. 
123     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
124 
125   }
126 
127   // Start a cluster with 2 masters and 6 regionservers
128   static final int NUM_MASTERS = 2;
129   static final int NUM_RS = 6;
130 
131   MiniHBaseCluster cluster;
132   HMaster master;
133   Configuration conf;
134   static Configuration originalConf;
135   static HBaseTestingUtility TEST_UTIL;
136   static MiniDFSCluster dfsCluster;
137   static MiniZooKeeperCluster zkCluster;
138 
139   @BeforeClass
140   public static void setup() throws Exception {
141     TEST_UTIL = new HBaseTestingUtility(HBaseConfiguration.create());
142     dfsCluster = TEST_UTIL.startMiniDFSCluster(1);
143     zkCluster = TEST_UTIL.startMiniZKCluster();
144     originalConf = TEST_UTIL.getConfiguration();
145   }
146 
147   @AfterClass
148   public static void tearDown() throws IOException {
149     TEST_UTIL.shutdownMiniZKCluster();
150     TEST_UTIL.shutdownMiniDFSCluster();
151   }
152 
153   private void startCluster(int num_rs) throws Exception {
154     SplitLogCounters.resetCounters();
155     LOG.info("Starting cluster");
156     conf.getLong("hbase.splitlog.max.resubmit", 0);
157     // Make the failure test faster
158     conf.setInt("zookeeper.recovery.retry", 0);
159     conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1);
160     conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0); // no load balancing
161     conf.setInt("hbase.regionserver.wal.max.splitters", 3);
162     TEST_UTIL = new HBaseTestingUtility(conf);
163     TEST_UTIL.setDFSCluster(dfsCluster);
164     TEST_UTIL.setZkCluster(zkCluster);
165     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs);
166     cluster = TEST_UTIL.getHBaseCluster();
167     LOG.info("Waiting for active/ready master");
168     cluster.waitForActiveAndReadyMaster();
169     master = cluster.getMaster();
170     while (cluster.getLiveRegionServerThreads().size() < num_rs) {
171       Threads.sleep(1);
172     }
173   }
174 
175   @Before
176   public void before() throws Exception {
177     // refresh configuration
178     conf = HBaseConfiguration.create(originalConf);
179   }
180   
181   @After
182   public void after() throws Exception {
183     try {
184       if (TEST_UTIL.getHBaseCluster() != null) {
185         for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
186           mt.getMaster().abort("closing...", null);
187         }
188       }
189       TEST_UTIL.shutdownMiniHBaseCluster();
190     } finally {
191       TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
192       ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
193     }
194   }
195   
196   @Test (timeout=300000)
197   public void testRecoveredEdits() throws Exception {
198     LOG.info("testRecoveredEdits");
199     conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024); // create more than one wal
200     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
201     startCluster(NUM_RS);
202 
203     final int NUM_LOG_LINES = 1000;
204     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
205     // turn off load balancing to prevent regions from moving around otherwise
206     // they will consume recovered.edits
207     master.balanceSwitch(false);
208     FileSystem fs = master.getMasterFileSystem().getFileSystem();
209 
210     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
211 
212     Path rootdir = FSUtils.getRootDir(conf);
213 
214     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
215         "table", "family", 40);
216     TableName table = TableName.valueOf("table");
217     List<HRegionInfo> regions = null;
218     HRegionServer hrs = null;
219     for (int i = 0; i < NUM_RS; i++) {
220       boolean foundRs = false;
221       hrs = rsts.get(i).getRegionServer();
222       regions = ProtobufUtil.getOnlineRegions(hrs);
223       for (HRegionInfo region : regions) {
224         if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
225           foundRs = true;
226           break;
227         }
228       }
229       if (foundRs) break;
230     }
231     final Path logDir = new Path(rootdir, HLogUtil.getHLogDirectoryName(hrs
232         .getServerName().toString()));
233 
234     LOG.info("#regions = " + regions.size());
235     Iterator<HRegionInfo> it = regions.iterator();
236     while (it.hasNext()) {
237       HRegionInfo region = it.next();
238       if (region.getTable().getNamespaceAsString()
239           .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
240         it.remove();
241       }
242     }
243     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
244 
245     slm.splitLogDistributed(logDir);
246 
247     int count = 0;
248     for (HRegionInfo hri : regions) {
249 
250       Path tdir = FSUtils.getTableDir(rootdir, table);
251       @SuppressWarnings("deprecation")
252       Path editsdir =
253         HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
254       LOG.debug("checking edits dir " + editsdir);
255       FileStatus[] files = fs.listStatus(editsdir);
256       assertTrue(files.length > 1);
257       for (int i = 0; i < files.length; i++) {
258         int c = countHLog(files[i].getPath(), fs, conf);
259         count += c;
260       }
261       LOG.info(count + " edits in " + files.length + " recovered edits files.");
262     }
263     assertEquals(NUM_LOG_LINES, count);
264   }
265 
266   @Test(timeout = 300000)
267   public void testLogReplayWithNonMetaRSDown() throws Exception {
268     LOG.info("testLogReplayWithNonMetaRSDown");
269     conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024); // create more than one wal
270     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
271     startCluster(NUM_RS);
272     final int NUM_REGIONS_TO_CREATE = 40;
273     final int NUM_LOG_LINES = 1000;
274     // turn off load balancing to prevent regions from moving around otherwise
275     // they will consume recovered.edits
276     master.balanceSwitch(false);
277 
278     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
279     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
280 
281     HRegionServer hrs = findRSToKill(false, "table");
282     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
283     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
284 
285     // wait for abort completes
286     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
287     ht.close();
288     zkw.close();
289   }
290 
291   private static class NonceGeneratorWithDups extends PerClientRandomNonceGenerator {
292     private boolean isDups = false;
293     private LinkedList<Long> nonces = new LinkedList<Long>();
294 
295     public void startDups() {
296       isDups = true;
297     }
298 
299     @Override
300     public long newNonce() {
301       long nonce = isDups ? nonces.removeFirst() : super.newNonce();
302       if (!isDups) {
303         nonces.add(nonce);
304       }
305       return nonce;
306     }
307   }
308 
309   @Test(timeout = 300000)
310   public void testNonceRecovery() throws Exception {
311     LOG.info("testNonceRecovery");
312     final String TABLE_NAME = "table";
313     final String FAMILY_NAME = "family";
314     final int NUM_REGIONS_TO_CREATE = 40;
315 
316     conf.setLong("hbase.regionserver.hlog.blocksize", 100*1024);
317     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
318     startCluster(NUM_RS);
319     master.balanceSwitch(false);
320 
321     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
322     HTable ht = installTable(zkw, TABLE_NAME, FAMILY_NAME, NUM_REGIONS_TO_CREATE);
323     NonceGeneratorWithDups ng = new NonceGeneratorWithDups();
324     NonceGenerator oldNg =
325         HConnectionManager.injectNonceGeneratorForTesting(ht.getConnection(), ng);
326 
327     try {
328       List<Increment> reqs = new ArrayList<Increment>();
329       for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
330         HRegionServer hrs = rst.getRegionServer();
331         List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs);
332         for (HRegionInfo hri : hris) {
333           if (TABLE_NAME.equalsIgnoreCase(hri.getTable().getNameAsString())) {
334             byte[] key = hri.getStartKey();
335             if (key == null || key.length == 0) {
336               key = Bytes.copy(hri.getEndKey());
337               --(key[key.length - 1]);
338             }
339             Increment incr = new Increment(key);
340             incr.addColumn(Bytes.toBytes(FAMILY_NAME), Bytes.toBytes("q"), 1);
341             ht.increment(incr);
342             reqs.add(incr);
343           }
344         }
345       }
346 
347       HRegionServer hrs = findRSToKill(false, "table");
348       abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
349       ng.startDups();
350       for (Increment incr : reqs) {
351         try {
352           ht.increment(incr);
353           fail("should have thrown");
354         } catch (OperationConflictException ope) {
355           LOG.debug("Caught as expected: " + ope.getMessage());
356         }
357       }
358     } finally {
359       HConnectionManager.injectNonceGeneratorForTesting(ht.getConnection(), oldNg);
360       ht.close();
361       zkw.close();
362     }
363   }
364 
365   @Test(timeout = 300000)
366   public void testLogReplayWithMetaRSDown() throws Exception {
367     LOG.info("testRecoveredEditsReplayWithMetaRSDown");
368     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
369     startCluster(NUM_RS);
370     final int NUM_REGIONS_TO_CREATE = 40;
371     final int NUM_LOG_LINES = 1000;
372     // turn off load balancing to prevent regions from moving around otherwise
373     // they will consume recovered.edits
374     master.balanceSwitch(false);
375 
376     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
377     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
378 
379     HRegionServer hrs = findRSToKill(true, "table");
380     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
381     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
382 
383     this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
384     ht.close();
385     zkw.close();
386   }
387 
388   private void abortRSAndVerifyRecovery(HRegionServer hrs, HTable ht, final ZooKeeperWatcher zkw,
389       final int numRegions, final int numofLines) throws Exception {
390 
391     abortRSAndWaitForRecovery(hrs, zkw, numRegions);
392     assertEquals(numofLines, TEST_UTIL.countRows(ht));
393   }
394 
395   private void abortRSAndWaitForRecovery(HRegionServer hrs, final ZooKeeperWatcher zkw,
396       final int numRegions) throws Exception {
397     final MiniHBaseCluster tmpCluster = this.cluster;
398 
399     // abort RS
400     LOG.info("Aborting region server: " + hrs.getServerName());
401     hrs.abort("testing");
402 
403     // wait for abort completes
404     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
405       @Override
406       public boolean evaluate() throws Exception {
407         return (tmpCluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
408       }
409     });
410 
411     // wait for regions come online
412     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
413       @Override
414       public boolean evaluate() throws Exception {
415         return (getAllOnlineRegions(tmpCluster).size() >= (numRegions + 1));
416       }
417     });
418 
419     // wait for all regions are fully recovered
420     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
421       @Override
422       public boolean evaluate() throws Exception {
423         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
424           zkw.recoveringRegionsZNode, false);
425         return (recoveringRegions != null && recoveringRegions.size() == 0);
426       }
427     });
428   }
429 
430   @Test(timeout = 300000)
431   public void testMasterStartsUpWithLogSplittingWork() throws Exception {
432     LOG.info("testMasterStartsUpWithLogSplittingWork");
433     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
434     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
435     startCluster(NUM_RS);
436 
437     final int NUM_REGIONS_TO_CREATE = 40;
438     final int NUM_LOG_LINES = 1000;
439     // turn off load balancing to prevent regions from moving around otherwise
440     // they will consume recovered.edits
441     master.balanceSwitch(false);
442 
443     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
444     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
445     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
446 
447     HRegionServer hrs = findRSToKill(false, "table");
448     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
449     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
450 
451     // abort master
452     abortMaster(cluster);
453 
454     // abort RS
455     LOG.info("Aborting region server: " + hrs.getServerName());
456     hrs.abort("testing");
457 
458     // wait for abort completes
459     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
460       @Override
461       public boolean evaluate() throws Exception {
462         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
463       }
464     });
465 
466     Thread.sleep(2000);
467     LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
468     
469     startMasterAndWaitUntilLogSplit(cluster);
470     
471     // wait for abort completes
472     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
473       @Override
474       public boolean evaluate() throws Exception {
475         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
476       }
477     });
478 
479     LOG.info("Current Open Regions After Master Node Starts Up:"
480         + getAllOnlineRegions(cluster).size());
481 
482     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
483 
484     ht.close();
485     zkw.close();
486   }
487   
488   @Test(timeout = 300000)
489   public void testMasterStartsUpWithLogReplayWork() throws Exception {
490     LOG.info("testMasterStartsUpWithLogReplayWork");
491     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
492     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
493     startCluster(NUM_RS);
494 
495     final int NUM_REGIONS_TO_CREATE = 40;
496     final int NUM_LOG_LINES = 1000;
497     // turn off load balancing to prevent regions from moving around otherwise
498     // they will consume recovered.edits
499     master.balanceSwitch(false);
500 
501     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
502     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
503     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
504 
505     HRegionServer hrs = findRSToKill(false, "table");
506     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
507     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
508 
509     // abort master
510     abortMaster(cluster);
511 
512     // abort RS
513     LOG.info("Aborting region server: " + hrs.getServerName());
514     hrs.abort("testing");
515 
516     // wait for the RS dies
517     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
518       @Override
519       public boolean evaluate() throws Exception {
520         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
521       }
522     });
523 
524     Thread.sleep(2000);
525     LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
526 
527     startMasterAndWaitUntilLogSplit(cluster);
528 
529     // wait for all regions are fully recovered
530     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
531       @Override
532       public boolean evaluate() throws Exception {
533         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
534           zkw.recoveringRegionsZNode, false);
535         return (recoveringRegions != null && recoveringRegions.size() == 0);
536       }
537     });
538 
539     LOG.info("Current Open Regions After Master Node Starts Up:"
540         + getAllOnlineRegions(cluster).size());
541 
542     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
543 
544     ht.close();
545     zkw.close();
546   }
547 
548 
549   @Test(timeout = 300000)
550   public void testLogReplayTwoSequentialRSDown() throws Exception {
551     LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
552     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
553     startCluster(NUM_RS);
554     final int NUM_REGIONS_TO_CREATE = 40;
555     final int NUM_LOG_LINES = 1000;
556     // turn off load balancing to prevent regions from moving around otherwise
557     // they will consume recovered.edits
558     master.balanceSwitch(false);
559 
560     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
561     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
562     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
563 
564     List<HRegionInfo> regions = null;
565     HRegionServer hrs1 = findRSToKill(false, "table");
566     regions = ProtobufUtil.getOnlineRegions(hrs1);
567 
568     makeHLog(hrs1.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
569 
570     // abort RS1
571     LOG.info("Aborting region server: " + hrs1.getServerName());
572     hrs1.abort("testing");
573 
574     // wait for abort completes
575     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
576       @Override
577       public boolean evaluate() throws Exception {
578         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
579       }
580     });
581 
582     // wait for regions come online
583     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
584       @Override
585       public boolean evaluate() throws Exception {
586         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
587       }
588     });
589 
590     // sleep a little bit in order to interrupt recovering in the middle
591     Thread.sleep(300);
592     // abort second region server
593     rsts = cluster.getLiveRegionServerThreads();
594     HRegionServer hrs2 = rsts.get(0).getRegionServer();
595     LOG.info("Aborting one more region server: " + hrs2.getServerName());
596     hrs2.abort("testing");
597 
598     // wait for abort completes
599     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
600       @Override
601       public boolean evaluate() throws Exception {
602         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
603       }
604     });
605 
606     // wait for regions come online
607     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
608       @Override
609       public boolean evaluate() throws Exception {
610         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
611       }
612     });
613 
614     // wait for all regions are fully recovered
615     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
616       @Override
617       public boolean evaluate() throws Exception {
618         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
619           zkw.recoveringRegionsZNode, false);
620         return (recoveringRegions != null && recoveringRegions.size() == 0);
621       }
622     });
623 
624     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
625     ht.close();
626     zkw.close();
627   }
628 
629   @Test(timeout = 300000)
630   public void testMarkRegionsRecoveringInZK() throws Exception {
631     LOG.info("testMarkRegionsRecoveringInZK");
632     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
633     startCluster(NUM_RS);
634     master.balanceSwitch(false);
635     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
636     final ZooKeeperWatcher zkw = master.getZooKeeperWatcher();
637     HTable ht = installTable(zkw, "table", "family", 40);
638     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
639 
640     Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
641     HRegionInfo region = null;
642     HRegionServer hrs = null;
643     ServerName firstFailedServer = null;
644     ServerName secondFailedServer = null;
645     for (int i = 0; i < NUM_RS; i++) {
646       hrs = rsts.get(i).getRegionServer();
647       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
648       if (regions.isEmpty()) continue;
649       region = regions.get(0);
650       regionSet.add(region);
651       firstFailedServer = hrs.getServerName();
652       secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
653       break;
654     }
655 
656     slm.markRegionsRecoveringInZK(firstFailedServer, regionSet);
657     slm.markRegionsRecoveringInZK(secondFailedServer, regionSet);
658 
659     List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw,
660       ZKUtil.joinZNode(zkw.recoveringRegionsZNode, region.getEncodedName()));
661 
662     assertEquals(recoveringRegions.size(), 2);
663 
664     // wait for splitLogWorker to mark them up because there is no WAL files recorded in ZK
665     final HRegionServer tmphrs = hrs;
666     TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
667       @Override
668       public boolean evaluate() throws Exception {
669         return (tmphrs.getRecoveringRegions().size() == 0);
670       }
671     });
672     ht.close();
673     zkw.close();
674   }
675 
676   @Test(timeout = 300000)
677   public void testReplayCmd() throws Exception {
678     LOG.info("testReplayCmd");
679     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
680     startCluster(NUM_RS);
681     final int NUM_REGIONS_TO_CREATE = 40;
682     // turn off load balancing to prevent regions from moving around otherwise
683     // they will consume recovered.edits
684     master.balanceSwitch(false);
685 
686     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
687     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
688     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
689 
690     List<HRegionInfo> regions = null;
691     HRegionServer hrs = null;
692     for (int i = 0; i < NUM_RS; i++) {
693       boolean isCarryingMeta = false;
694       hrs = rsts.get(i).getRegionServer();
695       regions = ProtobufUtil.getOnlineRegions(hrs);
696       for (HRegionInfo region : regions) {
697         if (region.isMetaRegion()) {
698           isCarryingMeta = true;
699           break;
700         }
701       }
702       if (isCarryingMeta) {
703         continue;
704       }
705       if (regions.size() > 0) break;
706     }
707 
708     this.prepareData(ht, Bytes.toBytes("family"), Bytes.toBytes("c1"));
709     String originalCheckSum = TEST_UTIL.checksumRows(ht);
710     
711     // abort RA and trigger replay
712     abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
713 
714     assertEquals("Data should remain after reopening of regions", originalCheckSum,
715       TEST_UTIL.checksumRows(ht));
716 
717     ht.close();
718     zkw.close();
719   }
720 
721   @Test(timeout = 300000)
722   public void testLogReplayForDisablingTable() throws Exception {
723     LOG.info("testLogReplayForDisablingTable");
724     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
725     startCluster(NUM_RS);
726     final int NUM_REGIONS_TO_CREATE = 40;
727     final int NUM_LOG_LINES = 1000;
728 
729     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
730     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
731     HTable disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
732     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
733 
734     // turn off load balancing to prevent regions from moving around otherwise
735     // they will consume recovered.edits
736     master.balanceSwitch(false);
737 
738     List<HRegionInfo> regions = null;
739     HRegionServer hrs = null;
740     boolean hasRegionsForBothTables = false;
741     String tableName = null;
742     for (int i = 0; i < NUM_RS; i++) {
743       tableName = null;
744       hasRegionsForBothTables = false;
745       boolean isCarryingSystem = false;
746       hrs = rsts.get(i).getRegionServer();
747       regions = ProtobufUtil.getOnlineRegions(hrs);
748       for (HRegionInfo region : regions) {
749         if (region.getTable().isSystemTable()) {
750           isCarryingSystem = true;
751           break;
752         }
753         if (tableName != null &&
754             !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
755           // make sure that we find a RS has online regions for both "table" and "disableTable"
756           hasRegionsForBothTables = true;
757           break;
758         } else if (tableName == null) {
759           tableName = region.getTable().getNameAsString();
760         }
761       }
762       if (isCarryingSystem) {
763         continue;
764       }
765       if (hasRegionsForBothTables) {
766         break;
767       }
768     }
769 
770     // make sure we found a good RS
771     Assert.assertTrue(hasRegionsForBothTables);
772 
773     LOG.info("#regions = " + regions.size());
774     Iterator<HRegionInfo> it = regions.iterator();
775     while (it.hasNext()) {
776       HRegionInfo region = it.next();
777       if (region.isMetaTable()) {
778         it.remove();
779       }
780     }
781     makeHLog(hrs.getWAL(), regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
782     makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
783     
784     LOG.info("Disabling table\n");
785     TEST_UTIL.getHBaseAdmin().disableTable(Bytes.toBytes("disableTable"));
786     
787     // abort RS
788     LOG.info("Aborting region server: " + hrs.getServerName());
789     hrs.abort("testing");
790 
791     // wait for abort completes
792     TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
793       @Override
794       public boolean evaluate() throws Exception {
795         return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
796       }
797     });
798 
799     // wait for regions come online
800     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
801       @Override
802       public boolean evaluate() throws Exception {
803         return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
804       }
805     });
806 
807     // wait for all regions are fully recovered
808     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
809       @Override
810       public boolean evaluate() throws Exception {
811         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
812           zkw.recoveringRegionsZNode, false);
813         ServerManager serverManager = master.getServerManager();
814         return (!serverManager.areDeadServersInProgress() &&
815             recoveringRegions != null && recoveringRegions.size() == 0);
816       }
817     });
818 
819     int count = 0;
820     FileSystem fs = master.getMasterFileSystem().getFileSystem();
821     Path rootdir = FSUtils.getRootDir(conf);
822     Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf("disableTable"));
823     for (HRegionInfo hri : regions) {
824       @SuppressWarnings("deprecation")
825       Path editsdir =
826         HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
827       LOG.debug("checking edits dir " + editsdir);
828       if(!fs.exists(editsdir)) continue;
829       FileStatus[] files = fs.listStatus(editsdir);
830       if(files != null) {
831         for(FileStatus file : files) {
832           int c = countHLog(file.getPath(), fs, conf);
833           count += c;
834           LOG.info(c + " edits in " + file.getPath());
835         }
836       }
837     }
838 
839     LOG.info("Verify edits in recovered.edits files");
840     assertEquals(NUM_LOG_LINES, count);
841     LOG.info("Verify replayed edits");
842     assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
843     
844     // clean up
845     for (HRegionInfo hri : regions) {
846       @SuppressWarnings("deprecation")
847       Path editsdir =
848         HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
849       fs.delete(editsdir, true);
850     }
851     disablingHT.close();
852     ht.close();
853     zkw.close();
854   }
855 
856   @Test(timeout = 300000)
857   public void testDisallowWritesInRecovering() throws Exception {
858     LOG.info("testDisallowWritesInRecovering");
859     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
860     conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3);
861     conf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true);
862     startCluster(NUM_RS);
863     final int NUM_REGIONS_TO_CREATE = 40;
864     // turn off load balancing to prevent regions from moving around otherwise
865     // they will consume recovered.edits
866     master.balanceSwitch(false);
867 
868     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
869     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
870     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
871     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
872 
873     Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
874     HRegionInfo region = null;
875     HRegionServer hrs = null;
876     HRegionServer dstRS = null;
877     for (int i = 0; i < NUM_RS; i++) {
878       hrs = rsts.get(i).getRegionServer();
879       List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
880       if (regions.isEmpty()) continue;
881       region = regions.get(0);
882       regionSet.add(region);
883       dstRS = rsts.get((i+1) % NUM_RS).getRegionServer();
884       break;
885     }
886     
887     slm.markRegionsRecoveringInZK(hrs.getServerName(), regionSet);
888     // move region in order for the region opened in recovering state
889     final HRegionInfo hri = region;
890     final HRegionServer tmpRS = dstRS;
891     TEST_UTIL.getHBaseAdmin().move(region.getEncodedNameAsBytes(),
892       Bytes.toBytes(dstRS.getServerName().getServerName()));
893     // wait for region move completes
894     final RegionStates regionStates =
895         TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
896     TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
897       @Override
898       public boolean evaluate() throws Exception {
899         ServerName sn = regionStates.getRegionServerOfRegion(hri);
900         return (sn != null && sn.equals(tmpRS.getServerName()));
901       }
902     });
903     
904     try {
905       byte[] key = region.getStartKey();
906       if (key == null || key.length == 0) {
907         key = new byte[] { 0, 0, 0, 0, 1 };
908       }
909       ht.setAutoFlush(true, true);
910       Put put = new Put(key);
911       put.add(Bytes.toBytes("family"), Bytes.toBytes("c1"), new byte[]{'b'});
912       ht.put(put);
913       ht.close();
914     } catch (IOException ioe) {
915       Assert.assertTrue(ioe instanceof RetriesExhaustedWithDetailsException);
916       RetriesExhaustedWithDetailsException re = (RetriesExhaustedWithDetailsException) ioe;
917       boolean foundRegionInRecoveryException = false;
918       for (Throwable t : re.getCauses()) {
919         if (t instanceof RegionInRecoveryException) {
920           foundRegionInRecoveryException = true;
921           break;
922         }
923       }
924       Assert.assertTrue(
925         "No RegionInRecoveryException. Following exceptions returned=" + re.getCauses(),
926         foundRegionInRecoveryException);
927     }
928 
929     zkw.close();
930   }
931 
932   /**
933    * The original intention of this test was to force an abort of a region
934    * server and to make sure that the failure path in the region servers is
935    * properly evaluated. But it is difficult to ensure that the region server
936    * doesn't finish the log splitting before it aborts. Also now, there is
937    * this code path where the master will preempt the region server when master
938    * detects that the region server has aborted.
939    * @throws Exception
940    */
941   @Test (timeout=300000)
942   public void testWorkerAbort() throws Exception {
943     LOG.info("testWorkerAbort");
944     startCluster(3);
945     final int NUM_LOG_LINES = 10000;
946     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
947     FileSystem fs = master.getMasterFileSystem().getFileSystem();
948 
949     final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
950     HRegionServer hrs = findRSToKill(false, "table");
951     Path rootdir = FSUtils.getRootDir(conf);
952     final Path logDir = new Path(rootdir,
953         HLogUtil.getHLogDirectoryName(hrs.getServerName().toString()));
954 
955     installTable(new ZooKeeperWatcher(conf, "table-creation", null),
956         "table", "family", 40);
957 
958     makeHLog(hrs.getWAL(), ProtobufUtil.getOnlineRegions(hrs), "table", "family", NUM_LOG_LINES,
959       100);
960 
961     new Thread() {
962       public void run() {
963         waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
964         for (RegionServerThread rst : rsts) {
965           rst.getRegionServer().abort("testing");
966           break;
967         }
968       }
969     }.start();
970     // slm.splitLogDistributed(logDir);
971     FileStatus[] logfiles = fs.listStatus(logDir);
972     TaskBatch batch = new TaskBatch();
973     slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
974     //waitForCounter but for one of the 2 counters
975     long curt = System.currentTimeMillis();
976     long waitTime = 80000;
977     long endt = curt + waitTime;
978     while (curt < endt) {
979       if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
980           tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
981           tot_wkr_preempt_task.get()) == 0) {
982         Thread.yield();
983         curt = System.currentTimeMillis();
984       } else {
985         assertTrue(1 <= (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
986             tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
987             tot_wkr_preempt_task.get()));
988         return;
989       }
990     }
991     fail("none of the following counters went up in " + waitTime +
992         " milliseconds - " +
993         "tot_wkr_task_resigned, tot_wkr_task_err, " +
994         "tot_wkr_final_transition_failed, tot_wkr_task_done, " +
995         "tot_wkr_preempt_task");
996   }
997 
998   @Test (timeout=300000)
999   public void testThreeRSAbort() throws Exception {
1000     LOG.info("testThreeRSAbort");
1001     final int NUM_REGIONS_TO_CREATE = 40;
1002     final int NUM_ROWS_PER_REGION = 100;
1003 
1004     startCluster(NUM_RS); // NUM_RS=6.
1005 
1006     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf,
1007         "distributed log splitting test", null);
1008 
1009     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1010     populateDataInTable(NUM_ROWS_PER_REGION, "family");
1011 
1012 
1013     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1014     assertEquals(NUM_RS, rsts.size());
1015     rsts.get(0).getRegionServer().abort("testing");
1016     rsts.get(1).getRegionServer().abort("testing");
1017     rsts.get(2).getRegionServer().abort("testing");
1018 
1019     long start = EnvironmentEdgeManager.currentTimeMillis();
1020     while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
1021       if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
1022         assertTrue(false);
1023       }
1024       Thread.sleep(200);
1025     }
1026 
1027     start = EnvironmentEdgeManager.currentTimeMillis();
1028     while (getAllOnlineRegions(cluster).size() < (NUM_REGIONS_TO_CREATE + 1)) {
1029       if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
1030         assertTrue("Timedout", false);
1031       }
1032       Thread.sleep(200);
1033     }
1034 
1035     // wait for all regions are fully recovered
1036     TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
1037       @Override
1038       public boolean evaluate() throws Exception {
1039         List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
1040           zkw.recoveringRegionsZNode, false);
1041         return (recoveringRegions != null && recoveringRegions.size() == 0);
1042       }
1043     });
1044 
1045     assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
1046         TEST_UTIL.countRows(ht));
1047     ht.close();
1048     zkw.close();
1049   }
1050 
1051 
1052 
1053   @Test(timeout=30000)
1054   public void testDelayedDeleteOnFailure() throws Exception {
1055     LOG.info("testDelayedDeleteOnFailure");
1056     startCluster(1);
1057     final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
1058     final FileSystem fs = master.getMasterFileSystem().getFileSystem();
1059     final Path logDir = new Path(FSUtils.getRootDir(conf), "x");
1060     fs.mkdirs(logDir);
1061     ExecutorService executor = null;
1062     try {
1063       final Path corruptedLogFile = new Path(logDir, "x");
1064       FSDataOutputStream out;
1065       out = fs.create(corruptedLogFile);
1066       out.write(0);
1067       out.write(Bytes.toBytes("corrupted bytes"));
1068       out.close();
1069       slm.ignoreZKDeleteForTesting = true;
1070       executor = Executors.newSingleThreadExecutor();
1071       Runnable runnable = new Runnable() {
1072        @Override
1073        public void run() {
1074           try {
1075             // since the logDir is a fake, corrupted one, so the split log worker
1076             // will finish it quickly with error, and this call will fail and throw
1077             // an IOException.
1078             slm.splitLogDistributed(logDir);
1079           } catch (IOException ioe) {
1080             try {
1081               assertTrue(fs.exists(corruptedLogFile));
1082               // this call will block waiting for the task to be removed from the
1083               // tasks map which is not going to happen since ignoreZKDeleteForTesting
1084               // is set to true, until it is interrupted.
1085               slm.splitLogDistributed(logDir);
1086             } catch (IOException e) {
1087               assertTrue(Thread.currentThread().isInterrupted());
1088               return;
1089             }
1090             fail("did not get the expected IOException from the 2nd call");
1091           }
1092           fail("did not get the expected IOException from the 1st call");
1093         }
1094       };
1095       Future<?> result = executor.submit(runnable);
1096       try {
1097         result.get(2000, TimeUnit.MILLISECONDS);
1098       } catch (TimeoutException te) {
1099         // it is ok, expected.
1100       }
1101       waitForCounter(tot_mgr_wait_for_zk_delete, 0, 1, 10000);
1102       executor.shutdownNow();
1103       executor = null;
1104 
1105       // make sure the runnable is finished with no exception thrown.
1106       result.get();
1107     } finally {
1108       if (executor != null) {
1109         // interrupt the thread in case the test fails in the middle.
1110         // it has no effect if the thread is already terminated.
1111         executor.shutdownNow();
1112       }
1113       fs.delete(logDir, true);
1114     }
1115   }
1116 
1117   @Test(timeout = 300000)
1118   public void testMetaRecoveryInZK() throws Exception {
1119     LOG.info("testMetaRecoveryInZK");
1120     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1121     startCluster(NUM_RS);
1122 
1123     // turn off load balancing to prevent regions from moving around otherwise
1124     // they will consume recovered.edits
1125     master.balanceSwitch(false);
1126     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1127     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1128 
1129     // only testing meta recovery in ZK operation
1130     HRegionServer hrs = findRSToKill(true, null);
1131     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1132 
1133     LOG.info("#regions = " + regions.size());
1134     Set<HRegionInfo> tmpRegions = new HashSet<HRegionInfo>();
1135     tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
1136     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), tmpRegions);
1137     Set<HRegionInfo> userRegionSet = new HashSet<HRegionInfo>();
1138     userRegionSet.addAll(regions);
1139     master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), userRegionSet);
1140     boolean isMetaRegionInRecovery = false;
1141     List<String> recoveringRegions =
1142         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1143     for (String curEncodedRegionName : recoveringRegions) {
1144       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1145         isMetaRegionInRecovery = true;
1146         break;
1147       }
1148     }
1149     assertTrue(isMetaRegionInRecovery);
1150 
1151     master.getMasterFileSystem().splitMetaLog(hrs.getServerName());
1152     
1153     isMetaRegionInRecovery = false;
1154     recoveringRegions =
1155         zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1156     for (String curEncodedRegionName : recoveringRegions) {
1157       if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1158         isMetaRegionInRecovery = true;
1159         break;
1160       }
1161     }
1162     // meta region should be recovered
1163     assertFalse(isMetaRegionInRecovery);
1164     zkw.close();
1165   }
1166 
1167   @Test(timeout = 300000)
1168   public void testSameVersionUpdatesRecovery() throws Exception {
1169     LOG.info("testSameVersionUpdatesRecovery");
1170     conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
1171     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1172     conf.setInt("hfile.format.version", 3);
1173     startCluster(NUM_RS);
1174     final AtomicLong sequenceId = new AtomicLong(100);
1175     final int NUM_REGIONS_TO_CREATE = 40;
1176     final int NUM_LOG_LINES = 1000;
1177     // turn off load balancing to prevent regions from moving around otherwise
1178     // they will consume recovered.edits
1179     master.balanceSwitch(false);
1180 
1181     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1182     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1183     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1184 
1185     List<HRegionInfo> regions = null;
1186     HRegionServer hrs = null;
1187     for (int i = 0; i < NUM_RS; i++) {
1188       boolean isCarryingMeta = false;
1189       hrs = rsts.get(i).getRegionServer();
1190       regions = ProtobufUtil.getOnlineRegions(hrs);
1191       for (HRegionInfo region : regions) {
1192         if (region.isMetaRegion()) {
1193           isCarryingMeta = true;
1194           break;
1195         }
1196       }
1197       if (isCarryingMeta) {
1198         continue;
1199       }
1200       break;
1201     }
1202 
1203     LOG.info("#regions = " + regions.size());
1204     Iterator<HRegionInfo> it = regions.iterator();
1205     while (it.hasNext()) {
1206       HRegionInfo region = it.next();
1207       if (region.isMetaTable()
1208           || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1209         it.remove();
1210       }
1211     }
1212     if (regions.size() == 0) return;
1213     HRegionInfo curRegionInfo = regions.get(0);
1214     byte[] startRow = curRegionInfo.getStartKey();
1215     if (startRow == null || startRow.length == 0) {
1216       startRow = new byte[] { 0, 0, 0, 0, 1 };
1217     }
1218     byte[] row = Bytes.incrementBytes(startRow, 1);
1219     // use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
1220     row = Arrays.copyOfRange(row, 3, 8);
1221     long value = 0;
1222     byte[] tableName = Bytes.toBytes("table");
1223     byte[] family = Bytes.toBytes("family");
1224     byte[] qualifier = Bytes.toBytes("c1");
1225     long timeStamp = System.currentTimeMillis();
1226     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
1227     htd.addFamily(new HColumnDescriptor(family));
1228     for (int i = 0; i < NUM_LOG_LINES; i += 1) {
1229       WALEdit e = new WALEdit();
1230       value++;
1231       e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
1232       hrs.getWAL().append(curRegionInfo, TableName.valueOf(tableName), e, 
1233         System.currentTimeMillis(), htd, sequenceId);
1234     }
1235     hrs.getWAL().sync();
1236     hrs.getWAL().close();
1237 
1238     // wait for abort completes
1239     this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
1240 
1241     // verify we got the last value
1242     LOG.info("Verification Starts...");
1243     Get g = new Get(row);
1244     Result r = ht.get(g);
1245     long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1246     assertEquals(value, theStoredVal);
1247 
1248     // after flush
1249     LOG.info("Verification after flush...");
1250     TEST_UTIL.getHBaseAdmin().flush(tableName);
1251     r = ht.get(g);
1252     theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1253     assertEquals(value, theStoredVal);
1254     ht.close();
1255   }
1256 
1257   @Test(timeout = 300000)
1258   public void testSameVersionUpdatesRecoveryWithCompaction() throws Exception {
1259     LOG.info("testSameVersionUpdatesRecoveryWithWrites");
1260     conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
1261     conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1262     conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 30 * 1024);
1263     conf.setInt("hbase.hstore.compactionThreshold", 3);
1264     conf.setInt("hfile.format.version", 3);
1265     startCluster(NUM_RS);
1266     final AtomicLong sequenceId = new AtomicLong(100);
1267     final int NUM_REGIONS_TO_CREATE = 40;
1268     final int NUM_LOG_LINES = 1000;
1269     // turn off load balancing to prevent regions from moving around otherwise
1270     // they will consume recovered.edits
1271     master.balanceSwitch(false);
1272 
1273     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1274     final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1275     HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1276 
1277     List<HRegionInfo> regions = null;
1278     HRegionServer hrs = null;
1279     for (int i = 0; i < NUM_RS; i++) {
1280       boolean isCarryingMeta = false;
1281       hrs = rsts.get(i).getRegionServer();
1282       regions = ProtobufUtil.getOnlineRegions(hrs);
1283       for (HRegionInfo region : regions) {
1284         if (region.isMetaRegion()) {
1285           isCarryingMeta = true;
1286           break;
1287         }
1288       }
1289       if (isCarryingMeta) {
1290         continue;
1291       }
1292       break;
1293     }
1294 
1295     LOG.info("#regions = " + regions.size());
1296     Iterator<HRegionInfo> it = regions.iterator();
1297     while (it.hasNext()) {
1298       HRegionInfo region = it.next();
1299       if (region.isMetaTable()
1300           || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1301         it.remove();
1302       }
1303     }
1304     if (regions.size() == 0) return;
1305     HRegionInfo curRegionInfo = regions.get(0);
1306     byte[] startRow = curRegionInfo.getStartKey();
1307     if (startRow == null || startRow.length == 0) {
1308       startRow = new byte[] { 0, 0, 0, 0, 1 };
1309     }
1310     byte[] row = Bytes.incrementBytes(startRow, 1);
1311     // use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
1312     row = Arrays.copyOfRange(row, 3, 8);
1313     long value = 0;
1314     final byte[] tableName = Bytes.toBytes("table");
1315     byte[] family = Bytes.toBytes("family");
1316     byte[] qualifier = Bytes.toBytes("c1");
1317     long timeStamp = System.currentTimeMillis();
1318     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
1319     htd.addFamily(new HColumnDescriptor(family));
1320     for (int i = 0; i < NUM_LOG_LINES; i += 1) {
1321       WALEdit e = new WALEdit();
1322       value++;
1323       e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
1324       hrs.getWAL().append(curRegionInfo, TableName.valueOf(tableName), e, 
1325         System.currentTimeMillis(), htd, sequenceId);
1326     }
1327     hrs.getWAL().sync();
1328     hrs.getWAL().close();
1329 
1330     // wait for abort completes
1331     this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
1332  
1333     // verify we got the last value
1334     LOG.info("Verification Starts...");
1335     Get g = new Get(row);
1336     Result r = ht.get(g);
1337     long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1338     assertEquals(value, theStoredVal);
1339 
1340     // after flush & compaction
1341     LOG.info("Verification after flush...");
1342     TEST_UTIL.getHBaseAdmin().flush(tableName);
1343     TEST_UTIL.getHBaseAdmin().compact(tableName);
1344     
1345     // wait for compaction completes
1346     TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {
1347       @Override
1348       public boolean evaluate() throws Exception {
1349         return (TEST_UTIL.getHBaseAdmin().getCompactionState(tableName) == CompactionState.NONE);
1350       }
1351     });
1352 
1353     r = ht.get(g);
1354     theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1355     assertEquals(value, theStoredVal);
1356     ht.close();
1357   }
1358 
1359   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs) throws Exception {
1360     return installTable(zkw, tname, fname, nrs, 0);
1361   }
1362 
1363   HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs, 
1364       int existingRegions) throws Exception {
1365     // Create a table with regions
1366     byte [] table = Bytes.toBytes(tname);
1367     byte [] family = Bytes.toBytes(fname);
1368     LOG.info("Creating table with " + nrs + " regions");
1369     HTable ht = TEST_UTIL.createTable(table, family);
1370     int numRegions = TEST_UTIL.createMultiRegions(conf, ht, family, nrs);
1371     assertEquals(nrs, numRegions);
1372       LOG.info("Waiting for no more RIT\n");
1373     blockUntilNoRIT(zkw, master);
1374     // disable-enable cycle to get rid of table's dead regions left behind
1375     // by createMultiRegions
1376     LOG.debug("Disabling table\n");
1377     TEST_UTIL.getHBaseAdmin().disableTable(table);
1378     LOG.debug("Waiting for no more RIT\n");
1379     blockUntilNoRIT(zkw, master);
1380     NavigableSet<String> regions = getAllOnlineRegions(cluster);
1381     LOG.debug("Verifying only catalog and namespace regions are assigned\n");
1382     if (regions.size() != 2) {
1383       for (String oregion : regions)
1384         LOG.debug("Region still online: " + oregion);
1385     }
1386     assertEquals(2 + existingRegions, regions.size());
1387     LOG.debug("Enabling table\n");
1388     TEST_UTIL.getHBaseAdmin().enableTable(table);
1389     LOG.debug("Waiting for no more RIT\n");
1390     blockUntilNoRIT(zkw, master);
1391     LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
1392     regions = getAllOnlineRegions(cluster);
1393     assertEquals(numRegions + 2 + existingRegions, regions.size());
1394     return ht;
1395   }
1396 
1397   void populateDataInTable(int nrows, String fname) throws Exception {
1398     byte [] family = Bytes.toBytes(fname);
1399 
1400     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1401     assertEquals(NUM_RS, rsts.size());
1402 
1403     for (RegionServerThread rst : rsts) {
1404       HRegionServer hrs = rst.getRegionServer();
1405       List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs);
1406       for (HRegionInfo hri : hris) {
1407         if (hri.getTable().isSystemTable()) {
1408           continue;
1409         }
1410         LOG.debug("adding data to rs = " + rst.getName() +
1411             " region = "+ hri.getRegionNameAsString());
1412         HRegion region = hrs.getOnlineRegion(hri.getRegionName());
1413         assertTrue(region != null);
1414         putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
1415       }
1416     }
1417   }
1418 
1419   public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1420       int num_edits, int edit_size) throws IOException {
1421     makeHLog(log, regions, tname, fname, num_edits, edit_size, true);
1422   }
1423 
1424   public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1425       int num_edits, int edit_size, boolean closeLog) throws IOException {
1426     TableName fullTName = TableName.valueOf(tname);
1427     // remove root and meta region
1428     regions.remove(HRegionInfo.FIRST_META_REGIONINFO);
1429     // using one sequenceId for edits across all regions is ok.
1430     final AtomicLong sequenceId = new AtomicLong(10);
1431 
1432 
1433     for(Iterator<HRegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
1434       HRegionInfo regionInfo = iter.next();
1435       if(regionInfo.getTable().isSystemTable()) {
1436          iter.remove();
1437       }
1438     }
1439     HTableDescriptor htd = new HTableDescriptor(fullTName);
1440     byte[] family = Bytes.toBytes(fname);
1441     htd.addFamily(new HColumnDescriptor(family));
1442     byte[] value = new byte[edit_size];
1443 
1444     List<HRegionInfo> hris = new ArrayList<HRegionInfo>();
1445     for (HRegionInfo region : regions) {
1446       if (!region.getTable().getNameAsString().equalsIgnoreCase(tname)) {
1447         continue;
1448       }
1449       hris.add(region);
1450     }
1451     LOG.info("Creating wal edits across " + hris.size() + " regions.");
1452     for (int i = 0; i < edit_size; i++) {
1453       value[i] = (byte) ('a' + (i % 26));
1454     }
1455     int n = hris.size();
1456     int[] counts = new int[n];
1457     if (n > 0) {
1458       for (int i = 0; i < num_edits; i += 1) {
1459         WALEdit e = new WALEdit();
1460         HRegionInfo curRegionInfo = hris.get(i % n);
1461         byte[] startRow = curRegionInfo.getStartKey();
1462         if (startRow == null || startRow.length == 0) {
1463           startRow = new byte[] { 0, 0, 0, 0, 1 };
1464         }
1465         byte[] row = Bytes.incrementBytes(startRow, counts[i % n]);
1466         row = Arrays.copyOfRange(row, 3, 8); // use last 5 bytes because
1467                                              // HBaseTestingUtility.createMultiRegions use 5 bytes
1468                                              // key
1469         byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
1470         e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
1471         log.append(curRegionInfo, fullTName, e, System.currentTimeMillis(), htd, sequenceId);
1472         counts[i % n] += 1;
1473       }
1474     }
1475     log.sync();
1476     if(closeLog) {
1477       log.close();
1478     }
1479     for (int i = 0; i < n; i++) {
1480       LOG.info("region " + hris.get(i).getRegionNameAsString() + " has " + counts[i] + " edits");
1481     }
1482     return;
1483   }
1484 
1485   private int countHLog(Path log, FileSystem fs, Configuration conf)
1486   throws IOException {
1487     int count = 0;
1488     HLog.Reader in = HLogFactory.createReader(fs, log, conf);
1489     while (in.next() != null) {
1490       count++;
1491     }
1492     return count;
1493   }
1494 
1495   private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master)
1496   throws KeeperException, InterruptedException {
1497     ZKAssign.blockUntilNoRIT(zkw);
1498     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
1499   }
1500 
1501   private void putData(HRegion region, byte[] startRow, int numRows, byte [] qf,
1502       byte [] ...families)
1503   throws IOException {
1504     for(int i = 0; i < numRows; i++) {
1505       Put put = new Put(Bytes.add(startRow, Bytes.toBytes(i)));
1506       for(byte [] family : families) {
1507         put.add(family, qf, null);
1508       }
1509       region.put(put);
1510     }
1511   }
1512 
1513   /**
1514    * Load table with puts and deletes with expected values so that we can verify later
1515    */
1516   private void prepareData(final HTable t, final byte[] f, final byte[] column) throws IOException {
1517     t.setAutoFlush(false, true);
1518     byte[] k = new byte[3];
1519 
1520     // add puts
1521     for (byte b1 = 'a'; b1 <= 'z'; b1++) {
1522       for (byte b2 = 'a'; b2 <= 'z'; b2++) {
1523         for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1524           k[0] = b1;
1525           k[1] = b2;
1526           k[2] = b3;
1527           Put put = new Put(k);
1528           put.add(f, column, k);
1529           t.put(put);
1530         }
1531       }
1532     }
1533     t.flushCommits();
1534     // add deletes
1535     for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1536       k[0] = 'a';
1537       k[1] = 'a';
1538       k[2] = b3;
1539       Delete del = new Delete(k);
1540       t.delete(del);
1541     }
1542     t.flushCommits();
1543   }
1544 
1545   private NavigableSet<String> getAllOnlineRegions(MiniHBaseCluster cluster)
1546       throws IOException {
1547     NavigableSet<String> online = new TreeSet<String>();
1548     for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
1549       for (HRegionInfo region : ProtobufUtil.getOnlineRegions(rst.getRegionServer())) {
1550         online.add(region.getRegionNameAsString());
1551       }
1552     }
1553     return online;
1554   }
1555 
1556   private void waitForCounter(AtomicLong ctr, long oldval, long newval,
1557       long timems) {
1558     long curt = System.currentTimeMillis();
1559     long endt = curt + timems;
1560     while (curt < endt) {
1561       if (ctr.get() == oldval) {
1562         Thread.yield();
1563         curt = System.currentTimeMillis();
1564       } else {
1565         assertEquals(newval, ctr.get());
1566         return;
1567       }
1568     }
1569     assertTrue(false);
1570   }
1571 
1572   private void abortMaster(MiniHBaseCluster cluster) throws InterruptedException {
1573     for (MasterThread mt : cluster.getLiveMasterThreads()) {
1574       if (mt.getMaster().isActiveMaster()) {
1575         mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
1576         mt.join();
1577         break;
1578       }
1579     }
1580     LOG.debug("Master is aborted");
1581   }
1582 
1583   private void startMasterAndWaitUntilLogSplit(MiniHBaseCluster cluster)
1584       throws IOException, InterruptedException {
1585     cluster.startMaster();
1586     HMaster master = cluster.getMaster();
1587     while (!master.isInitialized()) {
1588       Thread.sleep(100);
1589     }
1590     ServerManager serverManager = master.getServerManager();
1591     while (serverManager.areDeadServersInProgress()) {
1592       Thread.sleep(100);
1593     }
1594   }
1595 
1596   /**
1597    * Find a RS that has regions of a table.
1598    * @param hasMetaRegion when true, the returned RS has hbase:meta region as well
1599    * @param tableName
1600    * @return
1601    * @throws Exception
1602    */
1603   private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception {
1604     List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1605     int numOfRSs = rsts.size();
1606     List<HRegionInfo> regions = null;
1607     HRegionServer hrs = null;
1608 
1609     for (int i = 0; i < numOfRSs; i++) {
1610       boolean isCarryingMeta = false;
1611       boolean foundTableRegion = false;
1612       hrs = rsts.get(i).getRegionServer();
1613       regions = ProtobufUtil.getOnlineRegions(hrs);
1614       for (HRegionInfo region : regions) {
1615         if (region.isMetaRegion()) {
1616           isCarryingMeta = true;
1617         }
1618         if (tableName == null || region.getTable().getNameAsString().equals(tableName)) {
1619           foundTableRegion = true;
1620         }
1621         if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
1622           break;
1623         }
1624       }
1625       if (isCarryingMeta && hasMetaRegion) {
1626         // clients ask for a RS with META
1627         if (!foundTableRegion) {
1628           final HRegionServer destRS = hrs;
1629           // the RS doesn't have regions of the specified table so we need move one to this RS
1630           List<HRegionInfo> tableRegions =
1631               TEST_UTIL.getHBaseAdmin().getTableRegions(Bytes.toBytes(tableName));
1632           final HRegionInfo hri = tableRegions.get(0);
1633           TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
1634             Bytes.toBytes(destRS.getServerName().getServerName()));
1635           // wait for region move completes
1636           final RegionStates regionStates =
1637               TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
1638           TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
1639             @Override
1640             public boolean evaluate() throws Exception {
1641               ServerName sn = regionStates.getRegionServerOfRegion(hri);
1642               return (sn != null && sn.equals(destRS.getServerName()));
1643             }
1644           });
1645         }
1646         return hrs;
1647       } else if (hasMetaRegion || isCarryingMeta) {
1648         continue;
1649       }
1650       if (foundTableRegion) break;
1651     }
1652 
1653     return hrs;
1654   }
1655 
1656 }