1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import static org.apache.hadoop.hbase.SplitLogCounters.tot_mgr_wait_for_zk_delete;
22 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_final_transition_failed;
23 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_preempt_task;
24 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_acquired;
25 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_done;
26 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_err;
27 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_resigned;
28 import static org.junit.Assert.assertEquals;
29 import static org.junit.Assert.assertFalse;
30 import static org.junit.Assert.assertTrue;
31 import static org.junit.Assert.fail;
32
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.Arrays;
36 import java.util.HashSet;
37 import java.util.Iterator;
38 import java.util.List;
39 import java.util.NavigableSet;
40 import java.util.Set;
41 import java.util.TreeSet;
42 import java.util.concurrent.ExecutorService;
43 import java.util.concurrent.Executors;
44 import java.util.concurrent.Future;
45 import java.util.concurrent.TimeUnit;
46 import java.util.concurrent.TimeoutException;
47 import java.util.concurrent.atomic.AtomicLong;
48
49 import org.apache.commons.logging.Log;
50 import org.apache.commons.logging.LogFactory;
51 import org.apache.hadoop.conf.Configuration;
52 import org.apache.hadoop.fs.FSDataOutputStream;
53 import org.apache.hadoop.fs.FileStatus;
54 import org.apache.hadoop.fs.FileSystem;
55 import org.apache.hadoop.fs.Path;
56 import org.apache.hadoop.hbase.TableName;
57 import org.apache.hadoop.hbase.HBaseConfiguration;
58 import org.apache.hadoop.hbase.HBaseTestingUtility;
59 import org.apache.hadoop.hbase.HConstants;
60 import org.apache.hadoop.hbase.HRegionInfo;
61 import org.apache.hadoop.hbase.HTableDescriptor;
62 import org.apache.hadoop.hbase.KeyValue;
63 import org.apache.hadoop.hbase.LargeTests;
64 import org.apache.hadoop.hbase.MiniHBaseCluster;
65 import org.apache.hadoop.hbase.NamespaceDescriptor;
66 import org.apache.hadoop.hbase.ServerName;
67 import org.apache.hadoop.hbase.SplitLogCounters;
68 import org.apache.hadoop.hbase.Waiter;
69 import org.apache.hadoop.hbase.client.Delete;
70 import org.apache.hadoop.hbase.client.HTable;
71 import org.apache.hadoop.hbase.client.Put;
72 import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
73 import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
74 import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
75 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
76 import org.apache.hadoop.hbase.regionserver.HRegion;
77 import org.apache.hadoop.hbase.regionserver.HRegionServer;
78 import org.apache.hadoop.hbase.regionserver.wal.HLog;
79 import org.apache.hadoop.hbase.regionserver.wal.HLogFactory;
80 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
81 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
82 import org.apache.hadoop.hbase.util.Bytes;
83 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
84 import org.apache.hadoop.hbase.util.FSUtils;
85 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
86 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
87 import org.apache.hadoop.hbase.util.Threads;
88 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
89 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
90 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
91 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
92 import org.apache.hadoop.hdfs.MiniDFSCluster;
93 import org.apache.log4j.Level;
94 import org.apache.log4j.Logger;
95 import org.apache.zookeeper.KeeperException;
96 import org.junit.After;
97 import org.junit.AfterClass;
98 import org.junit.Assert;
99 import org.junit.BeforeClass;
100 import org.junit.Test;
101 import org.junit.experimental.categories.Category;
102
103 @Category(LargeTests.class)
104 public class TestDistributedLogSplitting {
105 private static final Log LOG = LogFactory.getLog(TestSplitLogManager.class);
106 static {
107 Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
108
109
110
111 System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
112
113 }
114
115
116 static final int NUM_MASTERS = 2;
117 static final int NUM_RS = 6;
118
119 MiniHBaseCluster cluster;
120 HMaster master;
121 Configuration conf;
122 static HBaseTestingUtility TEST_UTIL;
123 static MiniDFSCluster dfsCluster;
124 static MiniZooKeeperCluster zkCluster;
125
126 @BeforeClass
127 public static void setup() throws Exception {
128 TEST_UTIL = new HBaseTestingUtility(HBaseConfiguration.create());
129 dfsCluster = TEST_UTIL.startMiniDFSCluster(1);
130 zkCluster = TEST_UTIL.startMiniZKCluster();
131 }
132
133 @AfterClass
134 public static void tearDown() throws IOException {
135 TEST_UTIL.shutdownMiniZKCluster();
136 TEST_UTIL.shutdownMiniDFSCluster();
137 }
138
139 private void startCluster(int num_rs) throws Exception{
140 conf = HBaseConfiguration.create();
141 startCluster(num_rs, conf);
142 }
143
144 private void startCluster(int num_rs, Configuration inConf) throws Exception {
145 SplitLogCounters.resetCounters();
146 LOG.info("Starting cluster");
147 this.conf = inConf;
148 conf.getLong("hbase.splitlog.max.resubmit", 0);
149
150 conf.setInt("zookeeper.recovery.retry", 0);
151 conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1);
152 conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0);
153 TEST_UTIL = new HBaseTestingUtility(conf);
154 TEST_UTIL.setDFSCluster(dfsCluster);
155 TEST_UTIL.setZkCluster(zkCluster);
156 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs);
157 cluster = TEST_UTIL.getHBaseCluster();
158 LOG.info("Waiting for active/ready master");
159 cluster.waitForActiveAndReadyMaster();
160 master = cluster.getMaster();
161 while (cluster.getLiveRegionServerThreads().size() < num_rs) {
162 Threads.sleep(1);
163 }
164 }
165
166 @After
167 public void after() throws Exception {
168 for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
169 mt.getMaster().abort("closing...", new Exception("Trace info"));
170 }
171
172 TEST_UTIL.shutdownMiniHBaseCluster();
173 TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
174 ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
175 }
176
177 @Test (timeout=300000)
178 public void testRecoveredEdits() throws Exception {
179 LOG.info("testRecoveredEdits");
180 Configuration curConf = HBaseConfiguration.create();
181 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
182 startCluster(NUM_RS, curConf);
183
184 final int NUM_LOG_LINES = 1000;
185 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
186
187
188 master.balanceSwitch(false);
189 FileSystem fs = master.getMasterFileSystem().getFileSystem();
190
191 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
192
193 Path rootdir = FSUtils.getRootDir(conf);
194
195 installTable(new ZooKeeperWatcher(conf, "table-creation", null),
196 "table", "family", 40);
197 TableName table = TableName.valueOf("table");
198 List<HRegionInfo> regions = null;
199 HRegionServer hrs = null;
200 for (int i = 0; i < NUM_RS; i++) {
201 boolean foundRs = false;
202 hrs = rsts.get(i).getRegionServer();
203 regions = ProtobufUtil.getOnlineRegions(hrs);
204 for (HRegionInfo region : regions) {
205 if (region.getTableName().getNameAsString().equalsIgnoreCase("table")) {
206 foundRs = true;
207 break;
208 }
209 }
210 if (foundRs) break;
211 }
212 final Path logDir = new Path(rootdir, HLogUtil.getHLogDirectoryName(hrs
213 .getServerName().toString()));
214
215 LOG.info("#regions = " + regions.size());
216 Iterator<HRegionInfo> it = regions.iterator();
217 while (it.hasNext()) {
218 HRegionInfo region = it.next();
219 if (region.getTableName().getNamespaceAsString()
220 .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
221 it.remove();
222 }
223 }
224 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
225
226 slm.splitLogDistributed(logDir);
227
228 int count = 0;
229 for (HRegionInfo hri : regions) {
230
231 Path tdir = FSUtils.getTableDir(rootdir, table);
232 @SuppressWarnings("deprecation")
233 Path editsdir =
234 HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
235 LOG.debug("checking edits dir " + editsdir);
236 FileStatus[] files = fs.listStatus(editsdir);
237 assertEquals(1, files.length);
238 int c = countHLog(files[0].getPath(), fs, conf);
239 count += c;
240 LOG.info(c + " edits in " + files[0].getPath());
241 }
242 assertEquals(NUM_LOG_LINES, count);
243 }
244
245 @Test(timeout = 300000)
246 public void testLogReplayWithNonMetaRSDown() throws Exception {
247 LOG.info("testLogReplayWithNonMetaRSDown");
248 Configuration curConf = HBaseConfiguration.create();
249 curConf.setLong("hbase.regionserver.hlog.blocksize", 100*1024);
250 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
251 startCluster(NUM_RS, curConf);
252 final int NUM_REGIONS_TO_CREATE = 40;
253 final int NUM_LOG_LINES = 1000;
254
255
256 master.balanceSwitch(false);
257
258 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
259 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
260 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
261
262 HRegionServer hrs = findRSToKill(false, "table");
263 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
264 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
265
266
267 this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
268 ht.close();
269 zkw.close();
270 }
271
272 @Test(timeout = 300000)
273 public void testLogReplayWithMetaRSDown() throws Exception {
274 LOG.info("testRecoveredEditsReplayWithMetaRSDown");
275 Configuration curConf = HBaseConfiguration.create();
276 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
277 startCluster(NUM_RS, curConf);
278 final int NUM_REGIONS_TO_CREATE = 40;
279 final int NUM_LOG_LINES = 1000;
280
281
282 master.balanceSwitch(false);
283
284 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
285 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
286 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
287
288 HRegionServer hrs = findRSToKill(true, "table");
289 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
290 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
291
292 this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
293 ht.close();
294 zkw.close();
295 }
296
297 private void abortRSAndVerifyRecovery(HRegionServer hrs, HTable ht, final ZooKeeperWatcher zkw,
298 final int numRegions, final int numofLines) throws Exception {
299
300 abortRSAndWaitForRecovery(hrs, zkw, numRegions);
301 assertEquals(numofLines, TEST_UTIL.countRows(ht));
302 }
303
304 private void abortRSAndWaitForRecovery(HRegionServer hrs, final ZooKeeperWatcher zkw,
305 final int numRegions) throws Exception {
306 final MiniHBaseCluster tmpCluster = this.cluster;
307
308
309 LOG.info("Aborting region server: " + hrs.getServerName());
310 hrs.abort("testing");
311
312
313 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
314 @Override
315 public boolean evaluate() throws Exception {
316 return (tmpCluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
317 }
318 });
319
320
321 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
322 @Override
323 public boolean evaluate() throws Exception {
324 return (getAllOnlineRegions(tmpCluster).size() >= (numRegions + 1));
325 }
326 });
327
328
329 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
330 @Override
331 public boolean evaluate() throws Exception {
332 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
333 zkw.recoveringRegionsZNode, false);
334 return (recoveringRegions != null && recoveringRegions.size() == 0);
335 }
336 });
337 }
338
339 @Test(timeout = 300000)
340 public void testMasterStartsUpWithLogSplittingWork() throws Exception {
341 LOG.info("testMasterStartsUpWithLogSplittingWork");
342 Configuration curConf = HBaseConfiguration.create();
343 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
344 curConf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
345 startCluster(NUM_RS, curConf);
346
347 final int NUM_REGIONS_TO_CREATE = 40;
348 final int NUM_LOG_LINES = 1000;
349
350
351 master.balanceSwitch(false);
352
353 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
354 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
355 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
356
357 HRegionServer hrs = findRSToKill(false, "table");
358 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
359 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
360
361
362 abortMaster(cluster);
363
364
365 LOG.info("Aborting region server: " + hrs.getServerName());
366 hrs.abort("testing");
367
368
369 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
370 @Override
371 public boolean evaluate() throws Exception {
372 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
373 }
374 });
375
376 Thread.sleep(2000);
377 LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
378
379 startMasterAndWaitUntilLogSplit(cluster);
380
381
382 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
383 @Override
384 public boolean evaluate() throws Exception {
385 return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
386 }
387 });
388
389 LOG.info("Current Open Regions After Master Node Starts Up:"
390 + getAllOnlineRegions(cluster).size());
391
392 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
393
394 ht.close();
395 zkw.close();
396 }
397
398 @Test(timeout = 300000)
399 public void testMasterStartsUpWithLogReplayWork() throws Exception {
400 LOG.info("testMasterStartsUpWithLogReplayWork");
401 Configuration curConf = HBaseConfiguration.create();
402 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
403 curConf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
404 startCluster(NUM_RS, curConf);
405
406 final int NUM_REGIONS_TO_CREATE = 40;
407 final int NUM_LOG_LINES = 1000;
408
409
410 master.balanceSwitch(false);
411
412 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
413 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
414 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
415
416 HRegionServer hrs = findRSToKill(false, "table");
417 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
418 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
419
420
421 abortMaster(cluster);
422
423
424 LOG.info("Aborting region server: " + hrs.getServerName());
425 hrs.abort("testing");
426
427
428 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
429 @Override
430 public boolean evaluate() throws Exception {
431 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
432 }
433 });
434
435 Thread.sleep(2000);
436 LOG.info("Current Open Regions:" + getAllOnlineRegions(cluster).size());
437
438 startMasterAndWaitUntilLogSplit(cluster);
439
440
441 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
442 @Override
443 public boolean evaluate() throws Exception {
444 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
445 zkw.recoveringRegionsZNode, false);
446 return (recoveringRegions != null && recoveringRegions.size() == 0);
447 }
448 });
449
450 LOG.info("Current Open Regions After Master Node Starts Up:"
451 + getAllOnlineRegions(cluster).size());
452
453 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
454
455 ht.close();
456 zkw.close();
457 }
458
459
460 @Test(timeout = 300000)
461 public void testLogReplayTwoSequentialRSDown() throws Exception {
462 LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
463 Configuration curConf = HBaseConfiguration.create();
464 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
465 startCluster(NUM_RS, curConf);
466 final int NUM_REGIONS_TO_CREATE = 40;
467 final int NUM_LOG_LINES = 1000;
468
469
470 master.balanceSwitch(false);
471
472 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
473 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
474 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
475
476 List<HRegionInfo> regions = null;
477 HRegionServer hrs1 = findRSToKill(false, "table");
478 regions = ProtobufUtil.getOnlineRegions(hrs1);
479
480 makeHLog(hrs1.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
481
482
483 LOG.info("Aborting region server: " + hrs1.getServerName());
484 hrs1.abort("testing");
485
486
487 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
488 @Override
489 public boolean evaluate() throws Exception {
490 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
491 }
492 });
493
494
495 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
496 @Override
497 public boolean evaluate() throws Exception {
498 return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
499 }
500 });
501
502
503 Thread.sleep(300);
504
505 rsts = cluster.getLiveRegionServerThreads();
506 HRegionServer hrs2 = rsts.get(0).getRegionServer();
507 LOG.info("Aborting one more region server: " + hrs2.getServerName());
508 hrs2.abort("testing");
509
510
511 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
512 @Override
513 public boolean evaluate() throws Exception {
514 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
515 }
516 });
517
518
519 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
520 @Override
521 public boolean evaluate() throws Exception {
522 return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
523 }
524 });
525
526
527 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
528 @Override
529 public boolean evaluate() throws Exception {
530 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
531 zkw.recoveringRegionsZNode, false);
532 return (recoveringRegions != null && recoveringRegions.size() == 0);
533 }
534 });
535
536 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
537 ht.close();
538 zkw.close();
539 }
540
541 @Test(timeout = 300000)
542 public void testMarkRegionsRecoveringInZK() throws Exception {
543 LOG.info("testMarkRegionsRecoveringInZK");
544 Configuration curConf = HBaseConfiguration.create();
545 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
546 startCluster(NUM_RS, curConf);
547 master.balanceSwitch(false);
548 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
549 final ZooKeeperWatcher zkw = master.getZooKeeperWatcher();
550 HTable ht = installTable(zkw, "table", "family", 40);
551 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
552
553 Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
554 HRegionInfo region = null;
555 HRegionServer hrs = null;
556 ServerName firstFailedServer = null;
557 ServerName secondFailedServer = null;
558 for (int i = 0; i < NUM_RS; i++) {
559 hrs = rsts.get(i).getRegionServer();
560 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
561 if (regions.isEmpty()) continue;
562 region = regions.get(0);
563 regionSet.add(region);
564 firstFailedServer = hrs.getServerName();
565 secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
566 break;
567 }
568
569 slm.markRegionsRecoveringInZK(firstFailedServer, regionSet);
570 slm.markRegionsRecoveringInZK(secondFailedServer, regionSet);
571
572 List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw,
573 ZKUtil.joinZNode(zkw.recoveringRegionsZNode, region.getEncodedName()));
574
575 assertEquals(recoveringRegions.size(), 2);
576
577
578 final HRegionServer tmphrs = hrs;
579 TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
580 @Override
581 public boolean evaluate() throws Exception {
582 return (tmphrs.getRecoveringRegions().size() == 0);
583 }
584 });
585 ht.close();
586 zkw.close();
587 }
588
589 @Test(timeout = 300000)
590 public void testReplayCmd() throws Exception {
591 LOG.info("testReplayCmd");
592 Configuration curConf = HBaseConfiguration.create();
593 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
594 startCluster(NUM_RS, curConf);
595 final int NUM_REGIONS_TO_CREATE = 40;
596
597
598 master.balanceSwitch(false);
599
600 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
601 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
602 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
603
604 List<HRegionInfo> regions = null;
605 HRegionServer hrs = null;
606 for (int i = 0; i < NUM_RS; i++) {
607 boolean isCarryingMeta = false;
608 hrs = rsts.get(i).getRegionServer();
609 regions = ProtobufUtil.getOnlineRegions(hrs);
610 for (HRegionInfo region : regions) {
611 if (region.isMetaRegion()) {
612 isCarryingMeta = true;
613 break;
614 }
615 }
616 if (isCarryingMeta) {
617 continue;
618 }
619 if (regions.size() > 0) break;
620 }
621
622 this.prepareData(ht, Bytes.toBytes("family"), Bytes.toBytes("c1"));
623 String originalCheckSum = TEST_UTIL.checksumRows(ht);
624
625
626 abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
627
628 assertEquals("Data should remain after reopening of regions", originalCheckSum,
629 TEST_UTIL.checksumRows(ht));
630
631 ht.close();
632 zkw.close();
633 }
634
635 @Test(timeout = 300000)
636 public void testLogReplayForDisablingTable() throws Exception {
637 LOG.info("testLogReplayForDisablingTable");
638 Configuration curConf = HBaseConfiguration.create();
639 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
640 startCluster(NUM_RS, curConf);
641 final int NUM_REGIONS_TO_CREATE = 40;
642 final int NUM_LOG_LINES = 1000;
643
644 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
645 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
646 HTable disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
647 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
648
649
650
651 master.balanceSwitch(false);
652
653 List<HRegionInfo> regions = null;
654 HRegionServer hrs = null;
655 boolean hasRegionsForBothTables = false;
656 String tableName = null;
657 for (int i = 0; i < NUM_RS; i++) {
658 tableName = null;
659 hasRegionsForBothTables = false;
660 boolean isCarryingMeta = false;
661 hrs = rsts.get(i).getRegionServer();
662 regions = ProtobufUtil.getOnlineRegions(hrs);
663 for (HRegionInfo region : regions) {
664 if (region.isMetaRegion()) {
665 isCarryingMeta = true;
666 break;
667 }
668 if (tableName != null &&
669 !tableName.equalsIgnoreCase(region.getTableName().getNameAsString())) {
670
671 hasRegionsForBothTables = true;
672 break;
673 } else if (tableName == null) {
674 tableName = region.getTableName().getNameAsString();
675 }
676 }
677 if (isCarryingMeta) {
678 continue;
679 }
680 if (hasRegionsForBothTables) {
681 break;
682 }
683 }
684
685
686 Assert.assertTrue(hasRegionsForBothTables);
687
688 LOG.info("#regions = " + regions.size());
689 Iterator<HRegionInfo> it = regions.iterator();
690 while (it.hasNext()) {
691 HRegionInfo region = it.next();
692 if (region.isMetaTable()) {
693 it.remove();
694 }
695 }
696 makeHLog(hrs.getWAL(), regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
697 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
698
699 LOG.info("Disabling table\n");
700 TEST_UTIL.getHBaseAdmin().disableTable(Bytes.toBytes("disableTable"));
701
702
703 LOG.info("Aborting region server: " + hrs.getServerName());
704 hrs.abort("testing");
705
706
707 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
708 @Override
709 public boolean evaluate() throws Exception {
710 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
711 }
712 });
713
714
715 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
716 @Override
717 public boolean evaluate() throws Exception {
718 return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
719 }
720 });
721
722
723 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
724 @Override
725 public boolean evaluate() throws Exception {
726 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
727 zkw.recoveringRegionsZNode, false);
728 return (recoveringRegions != null && recoveringRegions.size() == 0);
729 }
730 });
731
732 int count = 0;
733 FileSystem fs = master.getMasterFileSystem().getFileSystem();
734 Path rootdir = FSUtils.getRootDir(conf);
735 Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf("disableTable"));
736 for (HRegionInfo hri : regions) {
737 @SuppressWarnings("deprecation")
738 Path editsdir =
739 HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
740 LOG.debug("checking edits dir " + editsdir);
741 if(!fs.exists(editsdir)) continue;
742 FileStatus[] files = fs.listStatus(editsdir);
743 if(files != null) {
744 for(FileStatus file : files) {
745 int c = countHLog(file.getPath(), fs, conf);
746 count += c;
747 LOG.info(c + " edits in " + file.getPath());
748 }
749 }
750 }
751
752 LOG.info("Verify edits in recovered.edits files");
753 assertEquals(NUM_LOG_LINES, count);
754 LOG.info("Verify replayed edits");
755 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
756
757
758 for (HRegionInfo hri : regions) {
759 @SuppressWarnings("deprecation")
760 Path editsdir =
761 HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
762 fs.delete(editsdir, true);
763 }
764 disablingHT.close();
765 ht.close();
766 zkw.close();
767 }
768
769 @Test(timeout = 300000)
770 public void testDisallowWritesInRecovering() throws Exception {
771 LOG.info("testDisallowWritesInRecovering");
772 Configuration curConf = HBaseConfiguration.create();
773 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
774 curConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
775 curConf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true);
776 startCluster(NUM_RS, curConf);
777 final int NUM_REGIONS_TO_CREATE = 40;
778 final int NUM_LOG_LINES = 20000;
779
780
781 master.balanceSwitch(false);
782
783 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
784 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
785 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
786
787 HRegionServer hrs = findRSToKill(false, "table");
788 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
789 makeHLog(hrs.getWAL(), regions, "table", "family", NUM_LOG_LINES, 100);
790
791
792 LOG.info("Aborting region server: " + hrs.getServerName());
793 hrs.abort("testing");
794
795
796 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
797 @Override
798 public boolean evaluate() throws Exception {
799 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
800 }
801 });
802
803
804 TEST_UTIL.waitFor(180000, 100, new Waiter.Predicate<Exception>() {
805 @Override
806 public boolean evaluate() throws Exception {
807 return (getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
808 }
809 });
810
811 try {
812 HRegionInfo region = regions.get(0);
813 byte[] key = region.getStartKey();
814 if (key == null || key.length == 0) {
815 key = new byte[] { 0, 0, 0, 0, 1 };
816 }
817 ht.setAutoFlush(true);
818 Put put = new Put(key);
819 put.add(Bytes.toBytes("family"), Bytes.toBytes("c1"), new byte[]{'b'});
820 ht.put(put);
821 } catch (IOException ioe) {
822 Assert.assertTrue(ioe instanceof RetriesExhaustedWithDetailsException);
823 RetriesExhaustedWithDetailsException re = (RetriesExhaustedWithDetailsException) ioe;
824 Assert.assertTrue(re.getCause(0) instanceof RegionInRecoveryException);
825 }
826
827 ht.close();
828 zkw.close();
829 }
830
831
832
833
834
835
836
837
838
839
840 @Test (timeout=300000)
841 public void testWorkerAbort() throws Exception {
842 LOG.info("testWorkerAbort");
843 startCluster(3);
844 final int NUM_LOG_LINES = 10000;
845 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
846 FileSystem fs = master.getMasterFileSystem().getFileSystem();
847
848 final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
849 HRegionServer hrs = findRSToKill(false, "table");
850 Path rootdir = FSUtils.getRootDir(conf);
851 final Path logDir = new Path(rootdir,
852 HLogUtil.getHLogDirectoryName(hrs.getServerName().toString()));
853
854 installTable(new ZooKeeperWatcher(conf, "table-creation", null),
855 "table", "family", 40);
856
857 makeHLog(hrs.getWAL(), ProtobufUtil.getOnlineRegions(hrs), "table", "family", NUM_LOG_LINES,
858 100);
859
860 new Thread() {
861 public void run() {
862 waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
863 for (RegionServerThread rst : rsts) {
864 rst.getRegionServer().abort("testing");
865 break;
866 }
867 }
868 }.start();
869
870 FileStatus[] logfiles = fs.listStatus(logDir);
871 TaskBatch batch = new TaskBatch();
872 slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
873
874 long curt = System.currentTimeMillis();
875 long waitTime = 80000;
876 long endt = curt + waitTime;
877 while (curt < endt) {
878 if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
879 tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
880 tot_wkr_preempt_task.get()) == 0) {
881 Thread.yield();
882 curt = System.currentTimeMillis();
883 } else {
884 assertEquals(1, (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
885 tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
886 tot_wkr_preempt_task.get()));
887 return;
888 }
889 }
890 fail("none of the following counters went up in " + waitTime +
891 " milliseconds - " +
892 "tot_wkr_task_resigned, tot_wkr_task_err, " +
893 "tot_wkr_final_transition_failed, tot_wkr_task_done, " +
894 "tot_wkr_preempt_task");
895 }
896
897 @Test (timeout=300000)
898 public void testThreeRSAbort() throws Exception {
899 LOG.info("testThreeRSAbort");
900 final int NUM_REGIONS_TO_CREATE = 40;
901 final int NUM_ROWS_PER_REGION = 100;
902
903 startCluster(NUM_RS);
904
905 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf,
906 "distributed log splitting test", null);
907
908 HTable ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
909 populateDataInTable(NUM_ROWS_PER_REGION, "family");
910
911
912 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
913 assertEquals(NUM_RS, rsts.size());
914 rsts.get(0).getRegionServer().abort("testing");
915 rsts.get(1).getRegionServer().abort("testing");
916 rsts.get(2).getRegionServer().abort("testing");
917
918 long start = EnvironmentEdgeManager.currentTimeMillis();
919 while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
920 if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
921 assertTrue(false);
922 }
923 Thread.sleep(200);
924 }
925
926 start = EnvironmentEdgeManager.currentTimeMillis();
927 while (getAllOnlineRegions(cluster).size() < (NUM_REGIONS_TO_CREATE + 1)) {
928 if (EnvironmentEdgeManager.currentTimeMillis() - start > 60000) {
929 assertTrue("Timedout", false);
930 }
931 Thread.sleep(200);
932 }
933
934
935 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
936 @Override
937 public boolean evaluate() throws Exception {
938 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
939 zkw.recoveringRegionsZNode, false);
940 return (recoveringRegions != null && recoveringRegions.size() == 0);
941 }
942 });
943
944 assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
945 TEST_UTIL.countRows(ht));
946 ht.close();
947 zkw.close();
948 }
949
950
951
952 @Test(timeout=30000)
953 public void testDelayedDeleteOnFailure() throws Exception {
954 LOG.info("testDelayedDeleteOnFailure");
955 startCluster(1);
956 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
957 final FileSystem fs = master.getMasterFileSystem().getFileSystem();
958 final Path logDir = new Path(FSUtils.getRootDir(conf), "x");
959 fs.mkdirs(logDir);
960 ExecutorService executor = null;
961 try {
962 final Path corruptedLogFile = new Path(logDir, "x");
963 FSDataOutputStream out;
964 out = fs.create(corruptedLogFile);
965 out.write(0);
966 out.write(Bytes.toBytes("corrupted bytes"));
967 out.close();
968 slm.ignoreZKDeleteForTesting = true;
969 executor = Executors.newSingleThreadExecutor();
970 Runnable runnable = new Runnable() {
971 @Override
972 public void run() {
973 try {
974
975
976
977 slm.splitLogDistributed(logDir);
978 } catch (IOException ioe) {
979 try {
980 assertTrue(fs.exists(corruptedLogFile));
981
982
983
984 slm.splitLogDistributed(logDir);
985 } catch (IOException e) {
986 assertTrue(Thread.currentThread().isInterrupted());
987 return;
988 }
989 fail("did not get the expected IOException from the 2nd call");
990 }
991 fail("did not get the expected IOException from the 1st call");
992 }
993 };
994 Future<?> result = executor.submit(runnable);
995 try {
996 result.get(2000, TimeUnit.MILLISECONDS);
997 } catch (TimeoutException te) {
998
999 }
1000 waitForCounter(tot_mgr_wait_for_zk_delete, 0, 1, 10000);
1001 executor.shutdownNow();
1002 executor = null;
1003
1004
1005 result.get();
1006 } finally {
1007 if (executor != null) {
1008
1009
1010 executor.shutdownNow();
1011 }
1012 fs.delete(logDir, true);
1013 }
1014 }
1015
1016 @Test(timeout = 300000)
1017 public void testMetaRecoveryInZK() throws Exception {
1018 LOG.info("testMetaRecoveryInZK");
1019 Configuration curConf = HBaseConfiguration.create();
1020 curConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1021 startCluster(NUM_RS, curConf);
1022
1023
1024
1025 master.balanceSwitch(false);
1026 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(curConf, "table-creation", null);
1027 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1028
1029
1030 HRegionServer hrs = findRSToKill(true, null);
1031 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs);
1032
1033 LOG.info("#regions = " + regions.size());
1034 Set<HRegionInfo> tmpRegions = new HashSet<HRegionInfo>();
1035 tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
1036 master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), tmpRegions);
1037 Set<HRegionInfo> userRegionSet = new HashSet<HRegionInfo>();
1038 userRegionSet.addAll(regions);
1039 master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), userRegionSet);
1040 boolean isMetaRegionInRecovery = false;
1041 List<String> recoveringRegions =
1042 zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1043 for (String curEncodedRegionName : recoveringRegions) {
1044 if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1045 isMetaRegionInRecovery = true;
1046 break;
1047 }
1048 }
1049 assertTrue(isMetaRegionInRecovery);
1050
1051 master.getMasterFileSystem().splitMetaLog(hrs.getServerName());
1052
1053 isMetaRegionInRecovery = false;
1054 recoveringRegions =
1055 zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1056 for (String curEncodedRegionName : recoveringRegions) {
1057 if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1058 isMetaRegionInRecovery = true;
1059 break;
1060 }
1061 }
1062
1063 assertFalse(isMetaRegionInRecovery);
1064 zkw.close();
1065 }
1066
1067 HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs) throws Exception {
1068 return installTable(zkw, tname, fname, nrs, 0);
1069 }
1070
1071 HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs,
1072 int existingRegions) throws Exception {
1073
1074 byte [] table = Bytes.toBytes(tname);
1075 byte [] family = Bytes.toBytes(fname);
1076 LOG.info("Creating table with " + nrs + " regions");
1077 HTable ht = TEST_UTIL.createTable(table, family);
1078 int numRegions = TEST_UTIL.createMultiRegions(conf, ht, family, nrs);
1079 assertEquals(nrs, numRegions);
1080 LOG.info("Waiting for no more RIT\n");
1081 blockUntilNoRIT(zkw, master);
1082
1083
1084 LOG.debug("Disabling table\n");
1085 TEST_UTIL.getHBaseAdmin().disableTable(table);
1086 LOG.debug("Waiting for no more RIT\n");
1087 blockUntilNoRIT(zkw, master);
1088 NavigableSet<String> regions = getAllOnlineRegions(cluster);
1089 LOG.debug("Verifying only catalog and namespace regions are assigned\n");
1090 if (regions.size() != 2) {
1091 for (String oregion : regions)
1092 LOG.debug("Region still online: " + oregion);
1093 }
1094 assertEquals(2 + existingRegions, regions.size());
1095 LOG.debug("Enabling table\n");
1096 TEST_UTIL.getHBaseAdmin().enableTable(table);
1097 LOG.debug("Waiting for no more RIT\n");
1098 blockUntilNoRIT(zkw, master);
1099 LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
1100 regions = getAllOnlineRegions(cluster);
1101 assertEquals(numRegions + 2 + existingRegions, regions.size());
1102 return ht;
1103 }
1104
1105 void populateDataInTable(int nrows, String fname) throws Exception {
1106 byte [] family = Bytes.toBytes(fname);
1107
1108 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1109 assertEquals(NUM_RS, rsts.size());
1110
1111 for (RegionServerThread rst : rsts) {
1112 HRegionServer hrs = rst.getRegionServer();
1113 List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs);
1114 for (HRegionInfo hri : hris) {
1115 if (HTableDescriptor.isSystemTable(hri.getTableName())) {
1116 continue;
1117 }
1118 LOG.debug("adding data to rs = " + rst.getName() +
1119 " region = "+ hri.getRegionNameAsString());
1120 HRegion region = hrs.getOnlineRegion(hri.getRegionName());
1121 assertTrue(region != null);
1122 putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
1123 }
1124 }
1125 }
1126
1127 public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1128 int num_edits, int edit_size) throws IOException {
1129 makeHLog(log, regions, tname, fname, num_edits, edit_size, true);
1130 }
1131
1132 public void makeHLog(HLog log, List<HRegionInfo> regions, String tname, String fname,
1133 int num_edits, int edit_size, boolean closeLog) throws IOException {
1134 TableName fullTName = TableName.valueOf(tname);
1135
1136 regions.remove(HRegionInfo.FIRST_META_REGIONINFO);
1137
1138 for(Iterator<HRegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
1139 HRegionInfo regionInfo = iter.next();
1140 if(HTableDescriptor.isSystemTable(regionInfo.getTableName())) {
1141 iter.remove();
1142 }
1143 }
1144 HTableDescriptor htd = new HTableDescriptor(fullTName);
1145 byte[] value = new byte[edit_size];
1146
1147 List<HRegionInfo> hris = new ArrayList<HRegionInfo>();
1148 for (HRegionInfo region : regions) {
1149 if (!region.getTableName().getNameAsString().equalsIgnoreCase(tname)) {
1150 continue;
1151 }
1152 hris.add(region);
1153 }
1154 LOG.info("Creating wal edits across " + hris.size() + " regions.");
1155 for (int i = 0; i < edit_size; i++) {
1156 value[i] = (byte) ('a' + (i % 26));
1157 }
1158 int n = hris.size();
1159 int[] counts = new int[n];
1160 if (n > 0) {
1161 for (int i = 0; i < num_edits; i += 1) {
1162 WALEdit e = new WALEdit();
1163 HRegionInfo curRegionInfo = hris.get(i % n);
1164 byte[] startRow = curRegionInfo.getStartKey();
1165 if (startRow == null || startRow.length == 0) {
1166 startRow = new byte[] { 0, 0, 0, 0, 1 };
1167 }
1168 byte[] row = Bytes.incrementBytes(startRow, counts[i % n]);
1169 row = Arrays.copyOfRange(row, 3, 8);
1170
1171
1172 byte[] family = Bytes.toBytes(fname);
1173 byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
1174 e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
1175 log.append(curRegionInfo, fullTName, e, System.currentTimeMillis(), htd);
1176 counts[i % n] += 1;
1177 }
1178 }
1179 log.sync();
1180 if(closeLog) {
1181 log.close();
1182 }
1183 for (int i = 0; i < n; i++) {
1184 LOG.info("region " + hris.get(i).getRegionNameAsString() + " has " + counts[i] + " edits");
1185 }
1186 return;
1187 }
1188
1189 private int countHLog(Path log, FileSystem fs, Configuration conf)
1190 throws IOException {
1191 int count = 0;
1192 HLog.Reader in = HLogFactory.createReader(fs, log, conf);
1193 while (in.next() != null) {
1194 count++;
1195 }
1196 return count;
1197 }
1198
1199 private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master)
1200 throws KeeperException, InterruptedException {
1201 ZKAssign.blockUntilNoRIT(zkw);
1202 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
1203 }
1204
1205 private void putData(HRegion region, byte[] startRow, int numRows, byte [] qf,
1206 byte [] ...families)
1207 throws IOException {
1208 for(int i = 0; i < numRows; i++) {
1209 Put put = new Put(Bytes.add(startRow, Bytes.toBytes(i)));
1210 for(byte [] family : families) {
1211 put.add(family, qf, null);
1212 }
1213 region.put(put);
1214 }
1215 }
1216
1217
1218
1219
1220 private void prepareData(final HTable t, final byte[] f, final byte[] column) throws IOException {
1221 t.setAutoFlush(false);
1222 byte[] k = new byte[3];
1223
1224
1225 for (byte b1 = 'a'; b1 <= 'z'; b1++) {
1226 for (byte b2 = 'a'; b2 <= 'z'; b2++) {
1227 for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1228 k[0] = b1;
1229 k[1] = b2;
1230 k[2] = b3;
1231 Put put = new Put(k);
1232 put.add(f, column, k);
1233 t.put(put);
1234 }
1235 }
1236 }
1237 t.flushCommits();
1238
1239 for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1240 k[0] = 'a';
1241 k[1] = 'a';
1242 k[2] = b3;
1243 Delete del = new Delete(k);
1244 t.delete(del);
1245 }
1246 t.flushCommits();
1247 }
1248
1249 private NavigableSet<String> getAllOnlineRegions(MiniHBaseCluster cluster)
1250 throws IOException {
1251 NavigableSet<String> online = new TreeSet<String>();
1252 for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
1253 for (HRegionInfo region : ProtobufUtil.getOnlineRegions(rst.getRegionServer())) {
1254 online.add(region.getRegionNameAsString());
1255 }
1256 }
1257 return online;
1258 }
1259
1260 private void waitForCounter(AtomicLong ctr, long oldval, long newval,
1261 long timems) {
1262 long curt = System.currentTimeMillis();
1263 long endt = curt + timems;
1264 while (curt < endt) {
1265 if (ctr.get() == oldval) {
1266 Thread.yield();
1267 curt = System.currentTimeMillis();
1268 } else {
1269 assertEquals(newval, ctr.get());
1270 return;
1271 }
1272 }
1273 assertTrue(false);
1274 }
1275
1276 private void abortMaster(MiniHBaseCluster cluster) throws InterruptedException {
1277 for (MasterThread mt : cluster.getLiveMasterThreads()) {
1278 if (mt.getMaster().isActiveMaster()) {
1279 mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
1280 mt.join();
1281 break;
1282 }
1283 }
1284 LOG.debug("Master is aborted");
1285 }
1286
1287 private void startMasterAndWaitUntilLogSplit(MiniHBaseCluster cluster)
1288 throws IOException, InterruptedException {
1289 cluster.startMaster();
1290 HMaster master = cluster.getMaster();
1291 while (!master.isInitialized()) {
1292 Thread.sleep(100);
1293 }
1294 ServerManager serverManager = master.getServerManager();
1295 while (serverManager.areDeadServersInProgress()) {
1296 Thread.sleep(100);
1297 }
1298 }
1299
1300
1301
1302
1303
1304
1305
1306
1307 private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception {
1308 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1309 int numOfRSs = rsts.size();
1310 List<HRegionInfo> regions = null;
1311 HRegionServer hrs = null;
1312
1313 for (int i = 0; i < numOfRSs; i++) {
1314 boolean isCarryingMeta = false;
1315 boolean foundTableRegion = false;
1316 hrs = rsts.get(i).getRegionServer();
1317 regions = ProtobufUtil.getOnlineRegions(hrs);
1318 for (HRegionInfo region : regions) {
1319 if (region.isMetaRegion()) {
1320 isCarryingMeta = true;
1321 }
1322 if (tableName == null || region.getTableName().getNameAsString().equals(tableName)) {
1323 foundTableRegion = true;
1324 }
1325 if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
1326 break;
1327 }
1328 }
1329 if (isCarryingMeta && hasMetaRegion) {
1330
1331 if (!foundTableRegion) {
1332 final HRegionServer destRS = hrs;
1333
1334 List<HRegionInfo> tableRegions =
1335 TEST_UTIL.getHBaseAdmin().getTableRegions(Bytes.toBytes(tableName));
1336 final HRegionInfo hri = tableRegions.get(0);
1337 TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
1338 Bytes.toBytes(destRS.getServerName().getServerName()));
1339
1340 final RegionStates regionStates =
1341 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
1342 TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
1343 @Override
1344 public boolean evaluate() throws Exception {
1345 ServerName sn = regionStates.getRegionServerOfRegion(hri);
1346 return (sn != null && sn.equals(destRS.getServerName()));
1347 }
1348 });
1349 }
1350 return hrs;
1351 } else if (hasMetaRegion || isCarryingMeta) {
1352 continue;
1353 }
1354 if (foundTableRegion) break;
1355 }
1356
1357 return hrs;
1358 }
1359
1360 }