1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.replication;
21
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.hbase.EmptyWatcher;
26 import org.apache.hadoop.hbase.HBaseConfiguration;
27 import org.apache.hadoop.hbase.HBaseTestingUtility;
28 import org.apache.hadoop.hbase.HColumnDescriptor;
29 import org.apache.hadoop.hbase.HConstants;
30 import org.apache.hadoop.hbase.HTableDescriptor;
31 import org.apache.hadoop.hbase.MiniZooKeeperCluster;
32 import org.apache.hadoop.hbase.UnknownScannerException;
33 import org.apache.hadoop.hbase.client.Delete;
34 import org.apache.hadoop.hbase.client.Get;
35 import org.apache.hadoop.hbase.client.HBaseAdmin;
36 import org.apache.hadoop.hbase.client.HTable;
37 import org.apache.hadoop.hbase.client.Put;
38 import org.apache.hadoop.hbase.client.Result;
39 import org.apache.hadoop.hbase.client.ResultScanner;
40 import org.apache.hadoop.hbase.client.Scan;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
43 import org.junit.After;
44 import org.junit.AfterClass;
45 import org.junit.Before;
46 import org.junit.BeforeClass;
47 import org.junit.Test;
48
49 import static org.junit.Assert.assertArrayEquals;
50 import static org.junit.Assert.assertEquals;
51 import static org.junit.Assert.fail;
52
53 public class TestReplication {
54
55 private static final Log LOG = LogFactory.getLog(TestReplication.class);
56
57 private static Configuration conf1;
58 private static Configuration conf2;
59
60 private static ZooKeeperWrapper zkw1;
61 private static ZooKeeperWrapper zkw2;
62
63 private static HTable htable1;
64 private static HTable htable2;
65
66 private static HBaseTestingUtility utility1;
67 private static HBaseTestingUtility utility2;
68 private static final int NB_ROWS_IN_BATCH = 100;
69 private static final long SLEEP_TIME = 500;
70 private static final int NB_RETRIES = 10;
71
72 private static final byte[] tableName = Bytes.toBytes("test");
73 private static final byte[] famName = Bytes.toBytes("f");
74 private static final byte[] row = Bytes.toBytes("row");
75 private static final byte[] noRepfamName = Bytes.toBytes("norep");
76
77
78
79
80 @BeforeClass
81 public static void setUpBeforeClass() throws Exception {
82 conf1 = HBaseConfiguration.create();
83 conf1.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1");
84
85
86 conf1.setInt("hbase.regionserver.hlog.blocksize", 1024*20);
87 conf1.setInt("replication.source.size.capacity", 1024);
88 conf1.setLong("replication.source.sleepforretries", 100);
89 conf1.setInt("hbase.regionserver.maxlogs", 10);
90 conf1.setLong("hbase.master.logcleaner.ttl", 10);
91 conf1.setLong("hbase.client.retries.number", 5);
92 conf1.setLong("hbase.regions.percheckin", 1);
93 conf1.setBoolean(HConstants.REPLICATION_ENABLE_KEY, true);
94 conf1.setBoolean("dfs.support.append", true);
95 conf1.setLong(HConstants.THREAD_WAKE_FREQUENCY, 100);
96
97 utility1 = new HBaseTestingUtility(conf1);
98 utility1.startMiniZKCluster();
99 MiniZooKeeperCluster miniZK = utility1.getZkCluster();
100 zkw1 = ZooKeeperWrapper.createInstance(conf1, "cluster1");
101 zkw1.writeZNode("/1", "replication", "");
102 zkw1.writeZNode("/1/replication", "master",
103 conf1.get(HConstants.ZOOKEEPER_QUORUM)+":" +
104 conf1.get("hbase.zookeeper.property.clientPort")+":/1");
105 setIsReplication(true);
106
107 LOG.info("Setup first Zk");
108
109 conf2 = HBaseConfiguration.create();
110 conf2.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/2");
111 conf2.setInt("hbase.client.retries.number", 6);
112 conf2.setBoolean(HConstants.REPLICATION_ENABLE_KEY, true);
113 conf2.setBoolean("dfs.support.append", true);
114 conf2.setLong("hbase.regions.percheckin", 1);
115
116 utility2 = new HBaseTestingUtility(conf2);
117 utility2.setZkCluster(miniZK);
118 zkw2 = ZooKeeperWrapper.createInstance(conf2, "cluster2");
119 zkw2.writeZNode("/2", "replication", "");
120 zkw2.writeZNode("/2/replication", "master",
121 conf1.get(HConstants.ZOOKEEPER_QUORUM)+":" +
122 conf1.get("hbase.zookeeper.property.clientPort")+":/1");
123
124 zkw1.writeZNode("/1/replication/peers", "1",
125 conf2.get(HConstants.ZOOKEEPER_QUORUM)+":" +
126 conf2.get("hbase.zookeeper.property.clientPort")+":/2");
127
128 LOG.info("Setup second Zk");
129
130 utility1.startMiniCluster(2);
131 utility2.startMiniCluster(2);
132
133 HTableDescriptor table = new HTableDescriptor(tableName);
134 table.setDeferredLogFlush(false);
135 HColumnDescriptor fam = new HColumnDescriptor(famName);
136 fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
137 table.addFamily(fam);
138 fam = new HColumnDescriptor(noRepfamName);
139 table.addFamily(fam);
140 HBaseAdmin admin1 = new HBaseAdmin(conf1);
141 HBaseAdmin admin2 = new HBaseAdmin(conf2);
142 admin1.createTable(table);
143 admin2.createTable(table);
144
145 htable1 = new HTable(conf1, tableName);
146 htable1.setWriteBufferSize(1024);
147 htable2 = new HTable(conf2, tableName);
148 }
149
150 private static void setIsReplication(boolean rep) throws Exception {
151 LOG.info("Set rep " + rep);
152 zkw1.writeZNode("/1/replication", "state", Boolean.toString(rep));
153
154 Thread.sleep(SLEEP_TIME);
155 }
156
157
158
159
160 @Before
161 public void setUp() throws Exception {
162 setIsReplication(false);
163 utility1.truncateTable(tableName);
164 utility2.truncateTable(tableName);
165
166 Thread.sleep(SLEEP_TIME*8);
167 setIsReplication(true);
168 }
169
170
171
172
173 @AfterClass
174 public static void tearDownAfterClass() throws Exception {
175 utility2.shutdownMiniCluster();
176 utility1.shutdownMiniCluster();
177 }
178
179
180
181
182
183 @Test
184 public void testSimplePutDelete() throws Exception {
185 LOG.info("testSimplePutDelete");
186 Put put = new Put(row);
187 put.add(famName, row, row);
188
189 htable1 = new HTable(conf1, tableName);
190 htable1.put(put);
191
192 HTable table2 = new HTable(conf2, tableName);
193 Get get = new Get(row);
194 for (int i = 0; i < NB_RETRIES; i++) {
195 if (i==NB_RETRIES-1) {
196 fail("Waited too much time for put replication");
197 }
198 Result res = table2.get(get);
199 if (res.size() == 0) {
200 LOG.info("Row not available");
201 Thread.sleep(SLEEP_TIME);
202 } else {
203 assertArrayEquals(res.value(), row);
204 break;
205 }
206 }
207
208 Delete del = new Delete(row);
209 htable1.delete(del);
210
211 table2 = new HTable(conf2, tableName);
212 get = new Get(row);
213 for (int i = 0; i < NB_RETRIES; i++) {
214 if (i==NB_RETRIES-1) {
215 fail("Waited too much time for del replication");
216 }
217 Result res = table2.get(get);
218 if (res.size() >= 1) {
219 LOG.info("Row not deleted");
220 Thread.sleep(SLEEP_TIME);
221 } else {
222 break;
223 }
224 }
225 }
226
227
228
229
230
231 @Test
232 public void testSmallBatch() throws Exception {
233 LOG.info("testSmallBatch");
234 Put put;
235
236 htable1.setAutoFlush(false);
237 for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
238 put = new Put(Bytes.toBytes(i));
239 put.add(famName, row, row);
240 htable1.put(put);
241 }
242 htable1.flushCommits();
243
244 Scan scan = new Scan();
245
246 ResultScanner scanner1 = htable1.getScanner(scan);
247 Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
248 scanner1.close();
249 assertEquals(NB_ROWS_IN_BATCH, res1.length);
250
251 for (int i = 0; i < NB_RETRIES; i++) {
252 if (i==NB_RETRIES-1) {
253 fail("Waited too much time for normal batch replication");
254 }
255 ResultScanner scanner = htable2.getScanner(scan);
256 Result[] res = scanner.next(NB_ROWS_IN_BATCH);
257 scanner.close();
258 if (res.length != NB_ROWS_IN_BATCH) {
259 LOG.info("Only got " + res.length + " rows");
260 Thread.sleep(SLEEP_TIME);
261 } else {
262 break;
263 }
264 }
265
266 htable1.setAutoFlush(true);
267
268 }
269
270
271
272
273
274
275 @Test
276 public void testStartStop() throws Exception {
277
278
279 setIsReplication(false);
280
281 Put put = new Put(Bytes.toBytes("stop start"));
282 put.add(famName, row, row);
283 htable1.put(put);
284
285 Get get = new Get(Bytes.toBytes("stop start"));
286 for (int i = 0; i < NB_RETRIES; i++) {
287 if (i==NB_RETRIES-1) {
288 break;
289 }
290 Result res = htable2.get(get);
291 if(res.size() >= 1) {
292 fail("Replication wasn't stopped");
293
294 } else {
295 LOG.info("Row not replicated, let's wait a bit more...");
296 Thread.sleep(SLEEP_TIME);
297 }
298 }
299
300
301 setIsReplication(true);
302
303 htable1.put(put);
304
305 for (int i = 0; i < NB_RETRIES; i++) {
306 if (i==NB_RETRIES-1) {
307 fail("Waited too much time for put replication");
308 }
309 Result res = htable2.get(get);
310 if(res.size() == 0) {
311 LOG.info("Row not available");
312 Thread.sleep(SLEEP_TIME);
313 } else {
314 assertArrayEquals(res.value(), row);
315 break;
316 }
317 }
318
319 put = new Put(Bytes.toBytes("do not rep"));
320 put.add(noRepfamName, row, row);
321 htable1.put(put);
322
323 get = new Get(Bytes.toBytes("do not rep"));
324 for (int i = 0; i < NB_RETRIES; i++) {
325 if (i == NB_RETRIES-1) {
326 break;
327 }
328 Result res = htable2.get(get);
329 if (res.size() >= 1) {
330 fail("Not supposed to be replicated");
331 } else {
332 LOG.info("Row not replicated, let's wait a bit more...");
333 Thread.sleep(SLEEP_TIME);
334 }
335 }
336
337 }
338
339
340
341
342
343
344 @Test
345 public void loadTesting() throws Exception {
346 htable1.setWriteBufferSize(1024);
347 htable1.setAutoFlush(false);
348 for (int i = 0; i < NB_ROWS_IN_BATCH *10; i++) {
349 Put put = new Put(Bytes.toBytes(i));
350 put.add(famName, row, row);
351 htable1.put(put);
352 }
353 htable1.flushCommits();
354
355 Scan scan = new Scan();
356
357 ResultScanner scanner = htable1.getScanner(scan);
358 Result[] res = scanner.next(NB_ROWS_IN_BATCH * 100);
359 scanner.close();
360
361 assertEquals(NB_ROWS_IN_BATCH *10, res.length);
362
363 scan = new Scan();
364
365 for (int i = 0; i < NB_RETRIES; i++) {
366
367 scanner = htable2.getScanner(scan);
368 res = scanner.next(NB_ROWS_IN_BATCH * 100);
369 scanner.close();
370 if (res.length != NB_ROWS_IN_BATCH *10) {
371 if (i == NB_RETRIES-1) {
372 int lastRow = -1;
373 for (Result result : res) {
374 int currentRow = Bytes.toInt(result.getRow());
375 for (int row = lastRow+1; row < currentRow; row++) {
376 LOG.error("Row missing: " + row);
377 }
378 lastRow = currentRow;
379 }
380 LOG.error("Last row: " + lastRow);
381 fail("Waited too much time for normal batch replication, "
382 + res.length + " instead of " + NB_ROWS_IN_BATCH *10);
383 } else {
384 LOG.info("Only got " + res.length + " rows");
385 Thread.sleep(SLEEP_TIME);
386 }
387 } else {
388 break;
389 }
390 }
391 }
392
393
394
395
396
397
398 @Test
399 public void queueFailover() throws Exception {
400 utility1.createMultiRegions(htable1, famName);
401
402
403
404 int rsToKill1 =
405 utility1.getHBaseCluster().getServerWithMeta() == 0 ? 1 : 0;
406 int rsToKill2 =
407 utility2.getHBaseCluster().getServerWithMeta() == 0 ? 1 : 0;
408
409
410 Thread killer1 = killARegionServer(utility1, 7500, rsToKill1);
411 Thread killer2 = killARegionServer(utility2, 10000, rsToKill2);
412
413 LOG.info("Start loading table");
414 int initialCount = utility1.loadTable(htable1, famName);
415 LOG.info("Done loading table");
416 killer1.join(5000);
417 killer2.join(5000);
418 LOG.info("Done waiting for threads");
419
420 Result[] res;
421 while (true) {
422 try {
423 Scan scan = new Scan();
424 ResultScanner scanner = htable1.getScanner(scan);
425 res = scanner.next(initialCount);
426 scanner.close();
427 break;
428 } catch (UnknownScannerException ex) {
429 LOG.info("Cluster wasn't ready yet, restarting scanner");
430 }
431 }
432
433
434 if (res.length != initialCount) {
435 LOG.warn("We lost some rows on the master cluster!");
436
437 initialCount = res.length;
438 }
439
440 Scan scan2 = new Scan();
441
442 int lastCount = 0;
443
444 for (int i = 0; i < NB_RETRIES; i++) {
445 if (i==NB_RETRIES-1) {
446 fail("Waited too much time for queueFailover replication");
447 }
448 ResultScanner scanner2 = htable2.getScanner(scan2);
449 Result[] res2 = scanner2.next(initialCount * 2);
450 scanner2.close();
451 if (res2.length < initialCount) {
452 if (lastCount < res2.length) {
453 i--;
454 }
455 lastCount = res2.length;
456 LOG.info("Only got " + lastCount + " rows instead of " +
457 initialCount + " current i=" + i);
458 Thread.sleep(SLEEP_TIME*2);
459 } else {
460 break;
461 }
462 }
463 }
464
465 private static Thread killARegionServer(final HBaseTestingUtility utility,
466 final long timeout, final int rs) {
467 Thread killer = new Thread() {
468 public void run() {
469 try {
470 Thread.sleep(timeout);
471 utility.expireRegionServerSession(rs);
472 } catch (Exception e) {
473 LOG.error(e);
474 }
475 }
476 };
477 killer.start();
478 return killer;
479 }
480 }