1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.replication;
20
21
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.hadoop.hbase.HBaseTestingUtility;
25 import org.apache.hadoop.hbase.LargeTests;
26 import org.apache.hadoop.hbase.UnknownScannerException;
27 import org.apache.hadoop.hbase.client.Result;
28 import org.apache.hadoop.hbase.client.ResultScanner;
29 import org.apache.hadoop.hbase.client.Scan;
30 import org.junit.Test;
31 import org.junit.experimental.categories.Category;
32
33 import static org.junit.Assert.fail;
34
35 @Category(LargeTests.class)
36 public class TestReplicationQueueFailover extends TestReplicationBase {
37
38 private static final Log LOG = LogFactory.getLog(TestReplicationQueueFailover.class);
39
40
41
42
43
44
45
46
47
48 @Test(timeout=300000)
49 public void queueFailover() throws Exception {
50
51
52 int rsToKill1 =
53 utility1.getHBaseCluster().getServerWithMeta() == 0 ? 1 : 0;
54 int rsToKill2 =
55 utility2.getHBaseCluster().getServerWithMeta() == 0 ? 1 : 0;
56
57
58 Thread killer1 = killARegionServer(utility1, 7500, rsToKill1);
59 Thread killer2 = killARegionServer(utility2, 10000, rsToKill2);
60
61 LOG.info("Start loading table");
62 int initialCount = utility1.loadTable(htable1, famName);
63 LOG.info("Done loading table");
64 killer1.join(5000);
65 killer2.join(5000);
66 LOG.info("Done waiting for threads");
67
68 Result[] res;
69 while (true) {
70 try {
71 Scan scan = new Scan();
72 ResultScanner scanner = htable1.getScanner(scan);
73 res = scanner.next(initialCount);
74 scanner.close();
75 break;
76 } catch (UnknownScannerException ex) {
77 LOG.info("Cluster wasn't ready yet, restarting scanner");
78 }
79 }
80
81
82 if (res.length != initialCount) {
83 LOG.warn("We lost some rows on the master cluster!");
84
85 initialCount = res.length;
86 }
87
88 int lastCount = 0;
89
90 final long start = System.currentTimeMillis();
91 int i = 0;
92 while (true) {
93 if (i==NB_RETRIES-1) {
94 fail("Waited too much time for queueFailover replication. " +
95 "Waited "+(System.currentTimeMillis() - start)+"ms.");
96 }
97 Scan scan2 = new Scan();
98 ResultScanner scanner2 = htable2.getScanner(scan2);
99 Result[] res2 = scanner2.next(initialCount * 2);
100 scanner2.close();
101 if (res2.length < initialCount) {
102 if (lastCount < res2.length) {
103 i--;
104 } else {
105 i++;
106 }
107 lastCount = res2.length;
108 LOG.info("Only got " + lastCount + " rows instead of " +
109 initialCount + " current i=" + i);
110 Thread.sleep(SLEEP_TIME*2);
111 } else {
112 break;
113 }
114 }
115 }
116
117 private static Thread killARegionServer(final HBaseTestingUtility utility,
118 final long timeout, final int rs) {
119 Thread killer = new Thread() {
120 public void run() {
121 try {
122 Thread.sleep(timeout);
123 utility.expireRegionServerSession(rs);
124 } catch (Exception e) {
125 LOG.error("Couldn't kill a region server", e);
126 }
127 }
128 };
129 killer.setDaemon(true);
130 killer.start();
131 return killer;
132 }
133 }