1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import static org.apache.hadoop.hbase.SplitLogCounters.tot_mgr_wait_for_zk_delete;
22 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_final_transition_failed;
23 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_preempt_task;
24 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_acquired;
25 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_done;
26 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_err;
27 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_resigned;
28 import static org.junit.Assert.assertEquals;
29 import static org.junit.Assert.assertFalse;
30 import static org.junit.Assert.assertTrue;
31 import static org.junit.Assert.fail;
32
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.Arrays;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
39 import java.util.NavigableSet;
40 import java.util.Set;
41 import java.util.TreeSet;
42 import java.util.concurrent.ExecutorService;
43 import java.util.concurrent.Executors;
44 import java.util.concurrent.Future;
45 import java.util.concurrent.TimeUnit;
46 import java.util.concurrent.TimeoutException;
47 import java.util.concurrent.atomic.AtomicLong;
48
49 import org.apache.commons.logging.Log;
50 import org.apache.commons.logging.LogFactory;
51 import org.apache.hadoop.conf.Configuration;
52 import org.apache.hadoop.fs.FSDataOutputStream;
53 import org.apache.hadoop.fs.FileStatus;
54 import org.apache.hadoop.fs.FileSystem;
55 import org.apache.hadoop.fs.Path;
56 import org.apache.hadoop.hbase.HColumnDescriptor;
57 import org.apache.hadoop.hbase.TableName;
58 import org.apache.hadoop.hbase.HBaseConfiguration;
59 import org.apache.hadoop.hbase.HBaseTestingUtility;
60 import org.apache.hadoop.hbase.HConstants;
61 import org.apache.hadoop.hbase.HRegionInfo;
62 import org.apache.hadoop.hbase.HTableDescriptor;
63 import org.apache.hadoop.hbase.KeyValue;
64 import org.apache.hadoop.hbase.LargeTests;
65 import org.apache.hadoop.hbase.MiniHBaseCluster;
66 import org.apache.hadoop.hbase.NamespaceDescriptor;
67 import org.apache.hadoop.hbase.ServerName;
68 import org.apache.hadoop.hbase.SplitLogCounters;
69 import org.apache.hadoop.hbase.Waiter;
70 import org.apache.hadoop.hbase.client.Delete;
71 import org.apache.hadoop.hbase.client.HTable;
72 import org.apache.hadoop.hbase.client.Put;
73 import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
74 import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
75 import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
76 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
77 import org.apache.hadoop.hbase.regionserver.HRegion;
78 import org.apache.hadoop.hbase.regionserver.HRegionServer;
79 import org.apache.hadoop.hbase.regionserver.wal.HLog;
80 import org.apache.hadoop.hbase.regionserver.wal.HLogFactory;
81 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
82 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
83 import org.apache.hadoop.hbase.util.Bytes;
84 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
85 import org.apache.hadoop.hbase.util.FSUtils;
86 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
87 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
88 import org.apache.hadoop.hbase.util.Threads;
89 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
90 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
91 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
92 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
93 import org.apache.hadoop.hdfs.MiniDFSCluster;
94 import org.apache.log4j.Level;
95 import org.apache.log4j.Logger;
96 import org.apache.zookeeper.KeeperException;
97 import org.junit.After;
98 import org.junit.AfterClass;
99 import org.junit.Assert;
100 import org.junit.Before;
101 import org.junit.BeforeClass;
102 import org.junit.Test;
103 import org.junit.experimental.categories.Category;
104
105 @Category(LargeTests.class)
106 public class TestDistributedLogSplitting {
107 private static final Log LOG = LogFactory.getLog(TestSplitLogManager.class);
108 static {
109 Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
110
111
112
113 System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
114
115 }
116
117
118 static final int NUM_MASTERS = 2;
119 static final int NUM_RS = 6;
120
121 MiniHBaseCluster cluster;
122 HMaster master;
123 Configuration conf;
124 static Configuration originalConf;
125 static HBaseTestingUtility TEST_UTIL;
126 static MiniDFSCluster dfsCluster;
127 static MiniZooKeeperCluster zkCluster;
128
129 @BeforeClass
130 public static void setup() throws Exception {
131 TEST_UTIL = new HBaseTestingUtility(HBaseConfiguration.create());
132 dfsCluster = TEST_UTIL.startMiniDFSCluster(1);
133 zkCluster = TEST_UTIL.startMiniZKCluster();
134 originalConf = TEST_UTIL.getConfiguration();
135 }
136
137 @AfterClass
138 public static void tearDown() throws IOException {
139 TEST_UTIL.shutdownMiniZKCluster();
140 TEST_UTIL.shutdownMiniDFSCluster();
141 }
142
143 private void startCluster(int num_rs) throws Exception {
144 SplitLogCounters.resetCounters();
145 LOG.info("Starting cluster");
146 conf.getLong("hbase.splitlog.max.resubmit", 0);
147
148 conf.setInt("zookeeper.recovery.retry", 0);
149 conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1);
150 conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0);
151 conf.setInt("hbase.regionserver.wal.max.splitters", 3);
152 TEST_UTIL = new HBaseTestingUtility(conf);
153 TEST_UTIL.setDFSCluster(dfsCluster);
154 TEST_UTIL.setZkCluster(zkCluster);
155 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs);
156 cluster = TEST_UTIL.getHBaseCluster();
157 LOG.info("Waiting for active/ready master");
158 cluster.waitForActiveAndReadyMaster();
159 master = cluster.getMaster();
160 while (cluster.getLiveRegionServerThreads().size() < num_rs) {
161 Threads.sleep(1);
162 }
163 }
164
165 @Before
166 public void before() throws Exception {
167
168 conf = HBaseConfiguration.create(originalConf);
169 }
170
171 @After
172 public void after() throws Exception {
173 try {
174 if (TEST_UTIL.getHBaseCluster() != null) {
175 for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
176 mt.getMaster().abort("closing...", new Exception("Trace info"));
177 }
178 }
179 TEST_UTIL.shutdownMiniHBaseCluster();
180 } finally {
181 TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
182 ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
183 }
184 }
185
186 @Test (timeout=300000)
187 public void testRecoveredEdits() throws Exception {
188 LOG.info("testRecoveredEdits");
189 conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024);
190 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
191 startCluster(NUM_RS);
192
193 final int NUM_LOG_LINES = 1000;
194 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
195
196
197 master.balanceSwitch(false);
198 FileSystem fs = master.getMasterFileSystem().getFileSystem();
199
200 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
201
202 Path rootdir = FSUtils.getRootDir(conf);
203
204 installTable(new ZooKeeperWatcher(conf, "table-creation", null),
205 "table", "family", 40);
206 TableName table = TableName.valueOf("table");
207 List<HRegionInfo> regions = null;
208 HRegionServer hrs = null;
209 for (int i = 0; i < NUM_RS; i++) {
210 boolean foundRs = false;
211 hrs = rsts.get(i).getRegionServer();
212 regions = ProtobufUtil.getOnlineRegions(hrs);
213 for (HRegionInfo region : regions) {
214 if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
215 foundRs = true;
216 break;
217 }
218 }
219 if (foundRs) break;
220 }
221 final Path logDir = new Path(rootdir, HLogUtil.getHLogDirectoryName(hrs
222 .getServerName().toString()));
223
224 LOG.info("#regions = " + regions.size());
225 Iterator<HRegionInfo> it = regions.iterator();
226 while (it.hasNext()) {
227 HRegionInfo region = it.next();
228 if (region.getTable().getNamespaceAsString()
229 .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
230 it.remove();
231 }
232 }
233 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
234
235 slm.splitLogDistributed(logDir);
236
237 int count = 0;
238 for (HRegionInfo hri : regions) {
239
240 Path tdir = FSUtils.getTableDir(rootdir, table);
241 @SuppressWarnings("deprecation")
242 Path editsdir =
243 HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
244 LOG.debug("checking edits dir " + editsdir);
245 FileStatus[] files = fs.listStatus(editsdir);
246 assertTrue(files.length > 1);
247 for (int i = 0; i < files.length; i++) {
248 int c = countHLog(files[i].getPath(), fs, conf);
249 count += c;
250 }
251 LOG.info(count + " edits in " + files.length + " recovered edits files.");
252 }
253 assertEquals(NUM_LOG_LINES, count);
254 }
255
256 @Test(timeout = 300000)
257 public void testLogReplayWithNonMetaRSDown() throws Exception {
258 LOG.info("testLogReplayWithNonMetaRSDown");
259 conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024);
260 conf.setLong("hbase.regionserver.hlog.blocksize", 100*1024);
261 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
262 startCluster(NUM_RS);
263 final int NUM_REGIONS_TO_CREATE = 40;
264 final int NUM_LOG_LINES = 1000;
265
266
267 master.balanceSwitch(false);
268
269 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
270 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
271
272 HRegionServer hrs = findRSToKill(false, "table");
273 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
274 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
275
276
277 this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
278 ht.close();
279 zkw.close();
280 }
281
282 @Test(timeout = 300000)
283 public void testLogReplayWithMetaRSDown() throws Exception {
284 LOG.info("testRecoveredEditsReplayWithMetaRSDown");
285 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
286 startCluster(NUM_RS);
287 final int NUM_REGIONS_TO_CREATE = 40;
288 final int NUM_LOG_LINES = 1000;
289
290
291 master.balanceSwitch(false);
292
293 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
294 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
295
296 HRegionServer hrs = findRSToKill(true, "table");
297 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
298 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
299
300 this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
301 ht.close();
302 zkw.close();
303 }
304
305 private void abortRSAndVerifyRecovery(HRegionServer hrs, HTable ht, final ZooKeeperWatcher zkw,
306 final int numRegions, final int numofLines) throws Exception {
307
308 abortRSAndWaitForRecovery(hrs, zkw, numRegions);
309 assertEquals(numofLines, TEST_UTIL.countRows(ht));
310 }
311
312 private void abortRSAndWaitForRecovery(HRegionServer hrs, final ZooKeeperWatcher zkw,
313 final int numRegions) throws Exception {
314 final MiniHBaseCluster tmpCluster = this.cluster;
315
316
317 LOG.info("Aborting region server: " + hrs.getServerName());
318 hrs.abort("testing");
319
320
321 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
322 @Override
323 public boolean evaluate() throws Exception {
324 return (tmpCluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
325 }
326 });
327
328
329 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
330 @Override
331 public boolean evaluate() throws Exception {
332 return (getAllOnlineRegions(tmpCluster).size() >= (numRegions + 1));
333 }
334 });
335
336
337 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
338 @Override
339 public boolean evaluate() throws Exception {
340 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
341 zkw.recoveringRegionsZNode, false);
342 return (recoveringRegions != null && recoveringRegions.size() == 0);
343 }
344 });
345 }
346
347 @Test(timeout = 300000)
348 public void testMasterStartsUpWithLogSplittingWork() throws Exception {
349 LOG.info("testMasterStartsUpWithLogSplittingWork");
350 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
351 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
352 startCluster(NUM_RS);
353
354 final int NUM_REGIONS_TO_CREATE = 40;
355 final int NUM_LOG_LINES = 1000;
356
357
358 master.balanceSwitch(false);
359
360 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
361 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
362 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
363
364 HRegionServer hrs = findRSToKill(false, "table");
365 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
366 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
367
368
369 abortMaster(cluster);
370
371
372 LOG.info("Aborting region server: " + hrs.getServerName());
373 hrs.abort("testing");
374
375
376 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
377 @Override
378 public boolean evaluate() throws Exception {
379 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
380 }
381 });
382
383 Thread.sleep(2000);
384 LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
385
386 startMasterAndWaitUntilLogSplit(cluster);
387
388
389 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
390 @Override
391 public boolean evaluate() throws Exception {
392 return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
393 }
394 });
395
396 LOG.info("Current Open Regions After Master Node Starts Up:"
397 + getAllOnlineRegions(cluster).size());
398
399 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
400
401 ht.close();
402 zkw.close();
403 }
404
405 @Test(timeout = 300000)
406 public void testMasterStartsUpWithLogReplayWork() throws Exception {
407 LOG.info("testMasterStartsUpWithLogReplayWork");
408 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
409 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
410 startCluster(NUM_RS);
411
412 final int NUM_REGIONS_TO_CREATE = 40;
413 final int NUM_LOG_LINES = 1000;
414
415
416 master.balanceSwitch(false);
417
418 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
419 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
420 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
421
422 HRegionServer hrs = findRSToKill(false, "table");
423 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
424 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
425
426
427 abortMaster(cluster);
428
429
430 LOG.info("Aborting region server: " + hrs.getServerName());
431 hrs.abort("testing");
432
433
434 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
435 @Override
436 public boolean evaluate() throws Exception {
437 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
438 }
439 });
440
441 Thread.sleep(2000);
442 LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
443
444 startMasterAndWaitUntilLogSplit(cluster);
445
446
447 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
448 @Override
449 public boolean evaluate() throws Exception {
450 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
451 zkw.recoveringRegionsZNode, false);
452 return (recoveringRegions != null && recoveringRegions.size() == 0);
453 }
454 });
455
456 LOG.info("Current Open Regions After Master Node Starts Up:"
457 + getAllOnlineRegions(cluster).size());
458
459 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
460
461 ht.close();
462 zkw.close();
463 }
464
465
466 @Test(timeout = 300000)
467 public void testLogReplayTwoSequentialRSDown() throws Exception {
468 LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
469 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
470 startCluster(NUM_RS);
471 final int NUM_REGIONS_TO_CREATE = 40;
472 final int NUM_LOG_LINES = 1000;
473
474
475 master.balanceSwitch(false);
476
477 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
478 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
479 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
480
481 List<HRegionInfo> regions = null;
482 HRegionServer hrs1 = findRSToKill(false, "table");
483 regions = ProtobufUtil.getOnlineRegions(hrs1);
484
485 makeHLog(hrs1.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
486
487
488 LOG.info("Aborting region server: " + hrs1.getServerName());
489 hrs1.abort("testing");
490
491
492 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
493 @Override
494 public boolean evaluate() throws Exception {
495 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
496 }
497 });
498
499
500 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
501 @Override
502 public boolean evaluate() throws Exception {
503 return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
504 }
505 });
506
507
508 Thread.sleep(300);
509
510 rsts = cluster.getLiveRegionServerThreads();
511 HRegionServer hrs2 = rsts.get(0).getRegionServer();
512 LOG.info("Aborting one more region server: " + hrs2.getServerName());
513 hrs2.abort("testing");
514
515
516 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
517 @Override
518 public boolean evaluate() throws Exception {
519 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
520 }
521 });
522
523
524 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
525 @Override
526 public boolean evaluate() throws Exception {
527 return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
528 }
529 });
530
531
532 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
533 @Override
534 public boolean evaluate() throws Exception {
535 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
536 zkw.recoveringRegionsZNode, false);
537 return (recoveringRegions != null && recoveringRegions.size() == 0);
538 }
539 });
540
541 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
542 ht.close();
543 zkw.close();
544 }
545
546 @Test(timeout = 300000)
547 public void testMarkRegionsRecoveringInZK() throws Exception {
548 LOG.info("testMarkRegionsRecoveringInZK");
549 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
550 startCluster(NUM_RS);
551 master.balanceSwitch(false);
552 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
553 final ZooKeeperWatcher zkw = master.getZooKeeperWatcher();
554 HTable ht = installTable(zkw, "table", "family", 40);
555 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
556
557 Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
558 HRegionInfo region = null;
559 HRegionServer hrs = null;
560 ServerName firstFailedServer = null;
561 ServerName secondFailedServer = null;
562 for (int i = 0; i < NUM_RS; i++) {
563 hrs = rsts.get(i).getRegionServer();
564 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
565 if (regions.isEmpty()) continue;
566 region = regions.get(0);
567 regionSet.add(region);
568 firstFailedServer = hrs.getServerName();
569 secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
570 break;
571 }
572
573 slm.markRegionsRecoveringInZK(firstFailedServer, regionSet);
574 slm.markRegionsRecoveringInZK(secondFailedServer, regionSet);
575
576 List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw,
577 ZKUtil.joinZNode(zkw.recoveringRegionsZNode, region.getEncodedName()));
578
579 assertEquals(recoveringRegions.size(), 2);
580
581
582 final HRegionServer tmphrs = hrs;
583 TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
584 @Override
585 public boolean evaluate() throws Exception {
586 return (tmphrs.getRecoveringRegions().size() == 0);
587 }
588 });
589 ht.close();
590 zkw.close();
591 }
592
593 @Test(timeout = 300000)
594 public void testReplayCmd() throws Exception {
595 LOG.info("testReplayCmd");
596 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
597 startCluster(NUM_RS);
598 final int NUM_REGIONS_TO_CREATE = 40;
599
600
601 master.balanceSwitch(false);
602
603 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
604 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
605 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
606
607 List<HRegionInfo> regions = null;
608 HRegionServer hrs = null;
609 for (int i = 0; i < NUM_RS; i++) {
610 boolean isCarryingMeta = false;
611 hrs = rsts.get(i).getRegionServer();
612 regions = ProtobufUtil.getOnlineRegions(hrs);
613 for (HRegionInfo region : regions) {
614 if (region.isMetaRegion()) {
615 isCarryingMeta = true;
616 break;
617 }
618 }
619 if (isCarryingMeta) {
620 continue;
621 }
622 if (regions.size() > 0) break;
623 }
624
625 this.prepareData(ht, Bytes.toBytes("family"), Bytes.toBytes("c1"));
626 String originalCheckSum = TEST_UTIL.checksumRows(ht);
627
628
629 abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
630
631 assertEquals("Data should remain after reopening of regions", originalCheckSum,
632 TEST_UTIL.checksumRows(ht));
633
634 ht.close();
635 zkw.close();
636 }
637
638 @Test(timeout = 300000)
639 public void testLogReplayForDisablingTable() throws Exception {
640 LOG.info("testLogReplayForDisablingTable");
641 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
642 startCluster(NUM_RS);
643 final int NUM_REGIONS_TO_CREATE = 40;
644 final int NUM_LOG_LINES = 1000;
645
646 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
647 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
648 HTable disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
649 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
650
651
652
653 master.balanceSwitch(false);
654
655 List<HRegionInfo> regions = null;
656 HRegionServer hrs = null;
657 boolean hasRegionsForBothTables = false;
658 String tableName = null;
659 for (int i = 0; i < NUM_RS; i++) {
660 tableName = null;
661 hasRegionsForBothTables = false;
662 boolean isCarryingMeta = false;
663 hrs = rsts.get(i).getRegionServer();
664 regions = ProtobufUtil.getOnlineRegions(hrs);
665 for (HRegionInfo region : regions) {
666 if (region.isMetaRegion()) {
667 isCarryingMeta = true;
668 break;
669 }
670 if (tableName != null &&
671 !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
672
673 hasRegionsForBothTables = true;
674 break;
675 } else if (tableName == null) {
676 tableName = region.getTable().getNameAsString();
677 }
678 }
679 if (isCarryingMeta) {
680 continue;
681 }
682 if (hasRegionsForBothTables) {
683 break;
684 }
685 }
686
687
688 Assert.assertTrue(hasRegionsForBothTables);
689
690 LOG.info("#regions = " + regions.size());
691 Iterator<HRegionInfo> it = regions.iterator();
692 while (it.hasNext()) {
693 HRegionInfo region = it.next();
694 if (region.isMetaTable()) {
695 it.remove();
696 }
697 }
698 makeHLog(hrs.getWAL(), regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
699 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
700
701 LOG.info("Disabling table\n");
702 TEST_UTIL.getHBaseAdmin().disableTable(Bytes.toBytes("disableTable"));
703
704
705 LOG.info("Aborting region server: " + hrs.getServerName());
706 hrs.abort("testing");
707
708
709 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
710 @Override
711 public boolean evaluate() throws Exception {
712 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
713 }
714 });
715
716
717 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
718 @Override
719 public boolean evaluate() throws Exception {
720 return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
721 }
722 });
723
724
725 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
726 @Override
727 public boolean evaluate() throws Exception {
728 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
729 zkw.recoveringRegionsZNode, false);
730 return (recoveringRegions != null && recoveringRegions.size() == 0);
731 }
732 });
733
734 int count = 0;
735 FileSystem fs = master.getMasterFileSystem().getFileSystem();
736 Path rootdir = FSUtils.getRootDir(conf);
737 Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf("disableTable"));
738 for (HRegionInfo hri : regions) {
739 @SuppressWarnings("deprecation")
740 Path editsdir =
741 HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
742 LOG.debug("checking edits dir " + editsdir);
743 if(!fs.exists(editsdir)) continue;
744 FileStatus[] files = fs.listStatus(editsdir);
745 if(files != null) {
746 for(FileStatus file : files) {
747 int c = countHLog(file.getPath(), fs, conf);
748 count += c;
749 LOG.info(c + " edits in " + file.getPath());
750 }
751 }
752 }
753
754 LOG.info("Verify edits in recovered.edits files");
755 assertEquals(NUM_LOG_LINES, count);
756 LOG.info("Verify replayed edits");
757 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
758
759
760 for (HRegionInfo hri : regions) {
761 @SuppressWarnings("deprecation")
762 Path editsdir =
763 HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
764 fs.delete(editsdir, true);
765 }
766 disablingHT.close();
767 ht.close();
768 zkw.close();
769 }
770
771 @Test(timeout = 300000)
772 public void testDisallowWritesInRecovering() throws Exception {
773 LOG.info("testDisallowWritesInRecovering");
774 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
775 conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
776 conf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true);
777 startCluster(NUM_RS);
778 final int NUM_REGIONS_TO_CREATE = 40;
779
780
781 master.balanceSwitch(false);
782
783 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
784 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
785 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
786 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
787
788 Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
789 HRegionInfo region = null;
790 HRegionServer hrs = null;
791 HRegionServer dstRS = null;
792 for (int i = 0; i < NUM_RS; i++) {
793 hrs = rsts.get(i).getRegionServer();
794 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
795 if (regions.isEmpty()) continue;
796 region = regions.get(0);
797 regionSet.add(region);
798 dstRS = rsts.get((i+1) % NUM_RS).getRegionServer();
799 break;
800 }
801
802 slm.markRegionsRecoveringInZK(hrs.getServerName(), regionSet);
803
804 final HRegionInfo hri = region;
805 final HRegionServer tmpRS = dstRS;
806 TEST_UTIL.getHBaseAdmin().move(region.getEncodedNameAsBytes(),
807 Bytes.toBytes(dstRS.getServerName().getServerName()));
808
809 final RegionStates regionStates =
810 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
811 TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
812 @Override
813 public boolean evaluate() throws Exception {
814 ServerName sn = regionStates.getRegionServerOfRegion(hri);
815 return (sn != null && sn.equals(tmpRS.getServerName()));
816 }
817 });
818
819 try {
820 byte[] key = region.getStartKey();
821 if (key == null || key.length == 0) {
822 key = new byte[] { 0, 0, 0, 0, 1 };
823 }
824 ht.setAutoFlush(true, true);
825 Put put = new Put(key);
826 put.add(Bytes.toBytes("family"), Bytes.toBytes("c1"), new byte[]{'b'});
827 ht.put(put);
828 ht.close();
829 } catch (IOException ioe) {
830 Assert.assertTrue(ioe instanceof RetriesExhaustedWithDetailsException);
831 RetriesExhaustedWithDetailsException re = (RetriesExhaustedWithDetailsException) ioe;
832 boolean foundRegionInRecoveryException = false;
833 for (Throwable t : re.getCauses()) {
834 if (t instanceof RegionInRecoveryException) {
835 foundRegionInRecoveryException = true;
836 break;
837 }
838 }
839 Assert.assertTrue(
840 "No RegionInRecoveryException. Following exceptions returned=" + re.getCauses(),
841 foundRegionInRecoveryException);
842 }
843
844 zkw.close();
845 }
846
847
848
849
850
851
852
853
854
855
856 @Test (timeout=300000)
857 public void testWorkerAbort() throws Exception {
858 LOG.info("testWorkerAbort");
859 startCluster(3);
860 final int NUM_LOG_LINES = 10000;
861 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
862 FileSystem fs = master.getMasterFileSystem().getFileSystem();
863
864 final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
865 HRegionServer hrs = findRSToKill(false, "table");
866 Path rootdir = FSUtils.getRootDir(conf);
867 final Path logDir = new Path(rootdir,
868 HLogUtil.getHLogDirectoryName(hrs.getServerName().toString()));
869
870 installTable(new ZooKeeperWatcher(conf, "table-creation", null),
871 "table", "family", 40);
872
873 makeHLog(hrs.getWAL(), ProtobufUtil.getOnlineRegions(hrs), "table", "family", NUM_LOG_LINES,
874 100);
875
876 new Thread() {
877 public void run() {
878 waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
879 for (RegionServerThread rst : rsts) {
880 rst.getRegionServer().abort("testing");
881 break;
882 }
883 }
884 }.start();
885
886 FileStatus[] logfiles = fs.listStatus(logDir);
887 TaskBatch batch = new TaskBatch();
888 slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
889
890 long curt = System.currentTimeMillis();
891 long waitTime = 80000;
892 long endt = curt + waitTime;
893 while (curt < endt) {
894 if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
895 tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
896 tot_wkr_preempt_task.get()) == 0) {
897 Thread.yield();
898 curt = System.currentTimeMillis();
899 } else {
900 assertTrue(1 <= (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
901 tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
902 tot_wkr_preempt_task.get()));
903 return;
904 }
905 }
906 fail("none of the following counters went up in " + waitTime +
907 " milliseconds - " +
908 "tot_wkr_task_resigned, tot_wkr_task_err, " +
909 "tot_wkr_final_transition_failed, tot_wkr_task_done, " +
910 "tot_wkr_preempt_task");
911 }
912
913 @Test (timeout=300000)
914 public void testThreeRSAbort() throws Exception {
915 LOG.info("testThreeRSAbort");
916 final int NUM_REGIONS_TO_CREATE = 40;
917 final int NUM_ROWS_PER_REGION = 100;
918
919 startCluster(NUM_RS);
920
921 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf,
922 "distributed log splitting test", null);
923
924 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
925 populateDataInTable(NUM_ROWS_PER_REGION, "family");
926
927
928 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
929 assertEquals(NUM_RS, rsts.size());
930 rsts.get(0).getRegionServer().abort("testing");
931 rsts.get(1).getRegionServer().abort("testing");
932 rsts.get(2).getRegionServer().abort("testing");
933
934 long start = EnvironmentEdgeManager.currentTimeMillis();
935 while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
936 if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
937 assertTrue(false);
938 }
939 Thread.sleep(200);
940 }
941
942 start = EnvironmentEdgeManager.currentTimeMillis();
943 while (getAllOnlineRegions(cluster).size() < (NUM_REGIONS_TO_CREATE + 1)) {
944 if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
945 assertTrue("Timedout", false);
946 }
947 Thread.sleep(200);
948 }
949
950
951 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
952 @Override
953 public boolean evaluate() throws Exception {
954 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
955 zkw.recoveringRegionsZNode, false);
956 return (recoveringRegions != null && recoveringRegions.size() == 0);
957 }
958 });
959
960 assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
961 TEST_UTIL.countRows(ht));
962 ht.close();
963 zkw.close();
964 }
965
966
967
968 @Test(timeout=30000)
969 public void testDelayedDeleteOnFailure() throws Exception {
970 LOG.info("testDelayedDeleteOnFailure");
971 startCluster(1);
972 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
973 final FileSystem fs = master.getMasterFileSystem().getFileSystem();
974 final Path logDir = new Path(FSUtils.getRootDir(conf), "x");
975 fs.mkdirs(logDir);
976 ExecutorService executor = null;
977 try {
978 final Path corruptedLogFile = new Path(logDir, "x");
979 FSDataOutputStream out;
980 out = fs.create(corruptedLogFile);
981 out.write(0);
982 out.write(Bytes.toBytes("corrupted bytes"));
983 out.close();
984 slm.ignoreZKDeleteForTesting = true;
985 executor = Executors.newSingleThreadExecutor();
986 Runnable runnable = new Runnable() {
987 @Override
988 public void run() {
989 try {
990
991
992
993 slm.splitLogDistributed(logDir);
994 } catch (IOException ioe) {
995 try {
996 assertTrue(fs.exists(corruptedLogFile));
997
998
999
1000 slm.splitLogDistributed(logDir);
1001 } catch (IOException e) {
1002 assertTrue(Thread.currentThread().isInterrupted());
1003 return;
1004 }
1005 fail("did not get the expected IOException from the 2nd call");
1006 }
1007 fail("did not get the expected IOException from the 1st call");
1008 }
1009 };
1010 Future<?> result = executor.submit(runnable);
1011 try {
1012 result.get(2000, TimeUnit.MILLISECONDS);
1013 } catch (TimeoutException te) {
1014
1015 }
1016 waitForCounter(tot_mgr_wait_for_zk_delete, 0, 1, 10000);
1017 executor.shutdownNow();
1018 executor = null;
1019
1020
1021 result.get();
1022 } finally {
1023 if (executor != null) {
1024
1025
1026 executor.shutdownNow();
1027 }
1028 fs.delete(logDir, true);
1029 }
1030 }
1031
1032 @Test(timeout = 300000)
1033 public void testMetaRecoveryInZK() throws Exception {
1034 LOG.info("testMetaRecoveryInZK");
1035 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1036 startCluster(NUM_RS);
1037
1038
1039
1040 master.balanceSwitch(false);
1041 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1042 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1043
1044
1045 HRegionServer hrs = findRSToKill(true, null);
1046 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1047
1048 LOG.info("#regions = " + regions.size());
1049 Set<HRegionInfo> tmpRegions = new HashSet<HRegionInfo>();
1050 tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
1051 master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), tmpRegions);
1052 Set<HRegionInfo> userRegionSet = new HashSet<HRegionInfo>();
1053 userRegionSet.addAll(regions);
1054 master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), userRegionSet);
1055 boolean isMetaRegionInRecovery = false;
1056 List<String> recoveringRegions =
1057 zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1058 for (String curEncodedRegionName : recoveringRegions) {
1059 if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1060 isMetaRegionInRecovery = true;
1061 break;
1062 }
1063 }
1064 assertTrue(isMetaRegionInRecovery);
1065
1066 master.getMasterFileSystem().splitMetaLog(hrs.getServerName());
1067
1068 isMetaRegionInRecovery = false;
1069 recoveringRegions =
1070 zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1071 for (String curEncodedRegionName : recoveringRegions) {
1072 if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1073 isMetaRegionInRecovery = true;
1074 break;
1075 }
1076 }
1077
1078 assertFalse(isMetaRegionInRecovery);
1079 zkw.close();
1080 }
1081
1082 HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs) throws Exception {
1083 return installTable(zkw, tname, fname, nrs, 0);
1084 }
1085
1086 HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs,
1087 int existingRegions) throws Exception {
1088
1089 byte [] table = Bytes.toBytes(tname);
1090 byte [] family = Bytes.toBytes(fname);
1091 LOG.info("Creating table with " + nrs + " regions");
1092 HTable ht = TEST_UTIL.createTable(table, family);
1093 int numRegions = TEST_UTIL.createMultiRegions(conf, ht, family, nrs);
1094 assertEquals(nrs, numRegions);
1095 LOG.info("Waiting for no more RIT\n");
1096 blockUntilNoRIT(zkw, master);
1097
1098
1099 LOG.debug("Disabling table\n");
1100 TEST_UTIL.getHBaseAdmin().disableTable(table);
1101 LOG.debug("Waiting for no more RIT\n");
1102 blockUntilNoRIT(zkw, master);
1103 NavigableSet<String> regions = getAllOnlineRegions(cluster);
1104 LOG.debug("Verifying only catalog and namespace regions are assigned\n");
1105 if (regions.size() != 2) {
1106 for (String oregion : regions)
1107 LOG.debug("Region still online: " + oregion);
1108 }
1109 assertEquals(2 + existingRegions, regions.size());
1110 LOG.debug("Enabling table\n");
1111 TEST_UTIL.getHBaseAdmin().enableTable(table);
1112 LOG.debug("Waiting for no more RIT\n");
1113 blockUntilNoRIT(zkw, master);
1114 LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
1115 regions = getAllOnlineRegions(cluster);
1116 assertEquals(numRegions + 2 + existingRegions, regions.size());
1117 return ht;
1118 }
1119
1120 void populateDataInTable(int nrows, String fname) throws Exception {
1121 byte [] family = Bytes.toBytes(fname);
1122
1123 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1124 assertEquals(NUM_RS, rsts.size());
1125
1126 for (RegionServerThread rst : rsts) {
1127 HRegionServer hrs = rst.getRegionServer();
1128 List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs);
1129 for (HRegionInfo hri : hris) {
1130 if (hri.getTable().isSystemTable()) {
1131 continue;
1132 }
1133 LOG.debug("adding data to rs = " + rst.getName() +
1134 " region = "+ hri.getRegionNameAsString());
1135 HRegion region = hrs.getOnlineRegion(hri.getRegionName());
1136 assertTrue(region != null);
1137 putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
1138 }
1139 }
1140 }
1141
1142 public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1143 int num_edits, int edit_size) throws IOException {
1144 makeHLog(log, regions, tname, fname, num_edits, edit_size, true);
1145 }
1146
1147 public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1148 int num_edits, int edit_size, boolean closeLog) throws IOException {
1149 TableName fullTName = TableName.valueOf(tname);
1150
1151 regions.remove(HRegionInfo.FIRST_META_REGIONINFO);
1152
1153 for(Iterator<HRegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
1154 HRegionInfo regionInfo = iter.next();
1155 if(regionInfo.getTable().isSystemTable()) {
1156 iter.remove();
1157 }
1158 }
1159 HTableDescriptor htd = new HTableDescriptor(fullTName);
1160 byte[] family = Bytes.toBytes(fname);
1161 htd.addFamily(new HColumnDescriptor(family));
1162 byte[] value = new byte[edit_size];
1163
1164 List<HRegionInfo> hris = new ArrayList<HRegionInfo>();
1165 for (HRegionInfo region : regions) {
1166 if (!region.getTable().getNameAsString().equalsIgnoreCase(tname)) {
1167 continue;
1168 }
1169 hris.add(region);
1170 }
1171 LOG.info("Creating wal edits across " + hris.size() + " regions.");
1172 for (int i = 0; i < edit_size; i++) {
1173 value[i] = (byte) ('a' + (i % 26));
1174 }
1175 int n = hris.size();
1176 int[] counts = new int[n];
1177 if (n > 0) {
1178 for (int i = 0; i < num_edits; i += 1) {
1179 WALEdit e = new WALEdit();
1180 HRegionInfo curRegionInfo = hris.get(i % n);
1181 byte[] startRow = curRegionInfo.getStartKey();
1182 if (startRow == null || startRow.length == 0) {
1183 startRow = new byte[] { 0, 0, 0, 0, 1 };
1184 }
1185 byte[] row = Bytes.incrementBytes(startRow, counts[i % n]);
1186 row = Arrays.copyOfRange(row, 3, 8);
1187
1188
1189 byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
1190 e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
1191 log.append(curRegionInfo, fullTName, e, System.currentTimeMillis(), htd);
1192 counts[i % n] += 1;
1193 }
1194 }
1195 log.sync();
1196 if(closeLog) {
1197 log.close();
1198 }
1199 for (int i = 0; i < n; i++) {
1200 LOG.info("region " + hris.get(i).getRegionNameAsString() + " has " + counts[i] + " edits");
1201 }
1202 return;
1203 }
1204
1205 private int countHLog(Path log, FileSystem fs, Configuration conf)
1206 throws IOException {
1207 int count = 0;
1208 HLog.Reader in = HLogFactory.createReader(fs, log, conf);
1209 while (in.next() != null) {
1210 count++;
1211 }
1212 return count;
1213 }
1214
1215 private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master)
1216 throws KeeperException, InterruptedException {
1217 ZKAssign.blockUntilNoRIT(zkw);
1218 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
1219 }
1220
1221 private void putData(HRegion region, byte[] startRow, int numRows, byte [] qf,
1222 byte [] ...families)
1223 throws IOException {
1224 for(int i = 0; i < numRows; i++) {
1225 Put put = new Put(Bytes.add(startRow, Bytes.toBytes(i)));
1226 for(byte [] family : families) {
1227 put.add(family, qf, null);
1228 }
1229 region.put(put);
1230 }
1231 }
1232
1233
1234
1235
1236 private void prepareData(final HTable t, final byte[] f, final byte[] column) throws IOException {
1237 t.setAutoFlush(false, true);
1238 byte[] k = new byte[3];
1239
1240
1241 for (byte b1 = 'a'; b1 <= 'z'; b1++) {
1242 for (byte b2 = 'a'; b2 <= 'z'; b2++) {
1243 for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1244 k[0] = b1;
1245 k[1] = b2;
1246 k[2] = b3;
1247 Put put = new Put(k);
1248 put.add(f, column, k);
1249 t.put(put);
1250 }
1251 }
1252 }
1253 t.flushCommits();
1254
1255 for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1256 k[0] = 'a';
1257 k[1] = 'a';
1258 k[2] = b3;
1259 Delete del = new Delete(k);
1260 t.delete(del);
1261 }
1262 t.flushCommits();
1263 }
1264
1265 private NavigableSet<String> getAllOnlineRegions(MiniHBaseCluster cluster)
1266 throws IOException {
1267 NavigableSet<String> online = new TreeSet<String>();
1268 for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
1269 for (HRegionInfo region : ProtobufUtil.getOnlineRegions(rst.getRegionServer())) {
1270 online.add(region.getRegionNameAsString());
1271 }
1272 }
1273 return online;
1274 }
1275
1276 private void waitForCounter(AtomicLong ctr, long oldval, long newval,
1277 long timems) {
1278 long curt = System.currentTimeMillis();
1279 long endt = curt + timems;
1280 while (curt < endt) {
1281 if (ctr.get() == oldval) {
1282 Thread.yield();
1283 curt = System.currentTimeMillis();
1284 } else {
1285 assertEquals(newval, ctr.get());
1286 return;
1287 }
1288 }
1289 assertTrue(false);
1290 }
1291
1292 private void abortMaster(MiniHBaseCluster cluster) throws InterruptedException {
1293 for (MasterThread mt : cluster.getLiveMasterThreads()) {
1294 if (mt.getMaster().isActiveMaster()) {
1295 mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
1296 mt.join();
1297 break;
1298 }
1299 }
1300 LOG.debug("Master is aborted");
1301 }
1302
1303 private void startMasterAndWaitUntilLogSplit(MiniHBaseCluster cluster)
1304 throws IOException, InterruptedException {
1305 cluster.startMaster();
1306 HMaster master = cluster.getMaster();
1307 while (!master.isInitialized()) {
1308 Thread.sleep(100);
1309 }
1310 ServerManager serverManager = master.getServerManager();
1311 while (serverManager.areDeadServersInProgress()) {
1312 Thread.sleep(100);
1313 }
1314 }
1315
1316
1317
1318
1319
1320
1321
1322
1323 private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception {
1324 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1325 int numOfRSs = rsts.size();
1326 List<HRegionInfo> regions = null;
1327 HRegionServer hrs = null;
1328
1329 for (int i = 0; i < numOfRSs; i++) {
1330 boolean isCarryingMeta = false;
1331 boolean foundTableRegion = false;
1332 hrs = rsts.get(i).getRegionServer();
1333 regions = ProtobufUtil.getOnlineRegions(hrs);
1334 for (HRegionInfo region : regions) {
1335 if (region.isMetaRegion()) {
1336 isCarryingMeta = true;
1337 }
1338 if (tableName == null || region.getTable().getNameAsString().equals(tableName)) {
1339 foundTableRegion = true;
1340 }
1341 if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
1342 break;
1343 }
1344 }
1345 if (isCarryingMeta && hasMetaRegion) {
1346
1347 if (!foundTableRegion) {
1348 final HRegionServer destRS = hrs;
1349
1350 List<HRegionInfo> tableRegions =
1351 TEST_UTIL.getHBaseAdmin().getTableRegions(Bytes.toBytes(tableName));
1352 final HRegionInfo hri = tableRegions.get(0);
1353 TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
1354 Bytes.toBytes(destRS.getServerName().getServerName()));
1355
1356 final RegionStates regionStates =
1357 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
1358 TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
1359 @Override
1360 public boolean evaluate() throws Exception {
1361 ServerName sn = regionStates.getRegionServerOfRegion(hri);
1362 return (sn != null && sn.equals(destRS.getServerName()));
1363 }
1364 });
1365 }
1366 return hrs;
1367 } else if (hasMetaRegion || isCarryingMeta) {
1368 continue;
1369 }
1370 if (foundTableRegion) break;
1371 }
1372
1373 return hrs;
1374 }
1375
1376 }