1   package org.apache.hadoop.hbase.replication;
2   
3   
4   import org.apache.commons.logging.Log;
5   import org.apache.commons.logging.LogFactory;
6   import org.apache.hadoop.hbase.KeyValue;
7   import org.apache.hadoop.hbase.LargeTests;
8   import org.apache.hadoop.hbase.client.*;
9   import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
10  import org.apache.hadoop.hbase.util.Bytes;
11  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
12  import org.apache.hadoop.hbase.util.JVMClusterUtil;
13  import org.apache.hadoop.mapreduce.Job;
14  import org.junit.Before;
15  import org.junit.Test;
16  import org.junit.experimental.categories.Category;
17  
18  import static org.junit.Assert.assertArrayEquals;
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.fail;
21  
22  @Category(LargeTests.class)
23  public class TestReplicationSmallTests extends TestReplicationBase {
24  
25    private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
26  
27    /**
28     * @throws java.lang.Exception
29     */
30    @Before
31    public void setUp() throws Exception {
32      htable1.setAutoFlush(true);
33      // Starting and stopping replication can make us miss new logs,
34      // rolling like this makes sure the most recent one gets added to the queue
35      for ( JVMClusterUtil.RegionServerThread r :
36          utility1.getHBaseCluster().getRegionServerThreads()) {
37        r.getRegionServer().getWAL().rollWriter();
38      }
39      utility1.truncateTable(tableName);
40      // truncating the table will send one Delete per row to the slave cluster
41      // in an async fashion, which is why we cannot just call truncateTable on
42      // utility2 since late writes could make it to the slave in some way.
43      // Instead, we truncate the first table and wait for all the Deletes to
44      // make it to the slave.
45      Scan scan = new Scan();
46      int lastCount = 0;
47      for (int i = 0; i < NB_RETRIES; i++) {
48        if (i==NB_RETRIES-1) {
49          fail("Waited too much time for truncate");
50        }
51        ResultScanner scanner = htable2.getScanner(scan);
52        Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
53        scanner.close();
54        if (res.length != 0) {
55          if (res.length < lastCount) {
56            i--; // Don't increment timeout if we make progress
57          }
58          lastCount = res.length;
59          LOG.info("Still got " + res.length + " rows");
60          Thread.sleep(SLEEP_TIME);
61        } else {
62          break;
63        }
64      }
65    }
66  
67    /**
68     * Verify that version and column delete marker types are replicated
69     * correctly.
70     * @throws Exception
71     */
72    @Test(timeout=300000)
73    public void testDeleteTypes() throws Exception {
74      LOG.info("testDeleteTypes");
75      final byte[] v1 = Bytes.toBytes("v1");
76      final byte[] v2 = Bytes.toBytes("v2");
77      final byte[] v3 = Bytes.toBytes("v3");
78      htable1 = new HTable(conf1, tableName);
79  
80      long t = EnvironmentEdgeManager.currentTimeMillis();
81      // create three versions for "row"
82      Put put = new Put(row);
83      put.add(famName, row, t, v1);
84      htable1.put(put);
85  
86      put = new Put(row);
87      put.add(famName, row, t+1, v2);
88      htable1.put(put);
89  
90      put = new Put(row);
91      put.add(famName, row, t+2, v3);
92      htable1.put(put);
93  
94      Get get = new Get(row);
95      get.setMaxVersions();
96      for (int i = 0; i < NB_RETRIES; i++) {
97        if (i==NB_RETRIES-1) {
98          fail("Waited too much time for put replication");
99        }
100       Result res = htable2.get(get);
101       if (res.size() < 3) {
102         LOG.info("Rows not available");
103         Thread.sleep(SLEEP_TIME);
104       } else {
105         assertArrayEquals(res.raw()[0].getValue(), v3);
106         assertArrayEquals(res.raw()[1].getValue(), v2);
107         assertArrayEquals(res.raw()[2].getValue(), v1);
108         break;
109       }
110     }
111     // place a version delete marker (delete last version)
112     Delete d = new Delete(row);
113     d.deleteColumn(famName, row, t);
114     htable1.delete(d);
115 
116     get = new Get(row);
117     get.setMaxVersions();
118     for (int i = 0; i < NB_RETRIES; i++) {
119       if (i==NB_RETRIES-1) {
120         fail("Waited too much time for put replication");
121       }
122       Result res = htable2.get(get);
123       if (res.size() > 2) {
124         LOG.info("Version not deleted");
125         Thread.sleep(SLEEP_TIME);
126       } else {
127         assertArrayEquals(res.raw()[0].getValue(), v3);
128         assertArrayEquals(res.raw()[1].getValue(), v2);
129         break;
130       }
131     }
132 
133     // place a column delete marker
134     d = new Delete(row);
135     d.deleteColumns(famName, row, t+2);
136     htable1.delete(d);
137 
138     // now *both* of the remaining version should be deleted
139     // at the replica
140     get = new Get(row);
141     for (int i = 0; i < NB_RETRIES; i++) {
142       if (i==NB_RETRIES-1) {
143         fail("Waited too much time for del replication");
144       }
145       Result res = htable2.get(get);
146       if (res.size() >= 1) {
147         LOG.info("Rows not deleted");
148         Thread.sleep(SLEEP_TIME);
149       } else {
150         break;
151       }
152     }
153   }
154 
155   /**
156    * Add a row, check it's replicated, delete it, check's gone
157    * @throws Exception
158    */
159   @Test(timeout=300000)
160   public void testSimplePutDelete() throws Exception {
161     LOG.info("testSimplePutDelete");
162     Put put = new Put(row);
163     put.add(famName, row, row);
164 
165     htable1 = new HTable(conf1, tableName);
166     htable1.put(put);
167 
168     Get get = new Get(row);
169     for (int i = 0; i < NB_RETRIES; i++) {
170       if (i==NB_RETRIES-1) {
171         fail("Waited too much time for put replication");
172       }
173       Result res = htable2.get(get);
174       if (res.size() == 0) {
175         LOG.info("Row not available");
176         Thread.sleep(SLEEP_TIME);
177       } else {
178         assertArrayEquals(res.value(), row);
179         break;
180       }
181     }
182 
183     Delete del = new Delete(row);
184     htable1.delete(del);
185 
186     get = new Get(row);
187     for (int i = 0; i < NB_RETRIES; i++) {
188       if (i==NB_RETRIES-1) {
189         fail("Waited too much time for del replication");
190       }
191       Result res = htable2.get(get);
192       if (res.size() >= 1) {
193         LOG.info("Row not deleted");
194         Thread.sleep(SLEEP_TIME);
195       } else {
196         break;
197       }
198     }
199   }
200 
201   /**
202    * Try a small batch upload using the write buffer, check it's replicated
203    * @throws Exception
204    */
205   @Test(timeout=300000)
206   public void testSmallBatch() throws Exception {
207     LOG.info("testSmallBatch");
208     Put put;
209     // normal Batch tests
210     htable1.setAutoFlush(false);
211     for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
212       put = new Put(Bytes.toBytes(i));
213       put.add(famName, row, row);
214       htable1.put(put);
215     }
216     htable1.flushCommits();
217 
218     Scan scan = new Scan();
219 
220     ResultScanner scanner1 = htable1.getScanner(scan);
221     Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
222     scanner1.close();
223     assertEquals(NB_ROWS_IN_BATCH, res1.length);
224 
225     for (int i = 0; i < NB_RETRIES; i++) {
226       if (i==NB_RETRIES-1) {
227         fail("Waited too much time for normal batch replication");
228       }
229       ResultScanner scanner = htable2.getScanner(scan);
230       Result[] res = scanner.next(NB_ROWS_IN_BATCH);
231       scanner.close();
232       if (res.length != NB_ROWS_IN_BATCH) {
233         LOG.info("Only got " + res.length + " rows");
234         Thread.sleep(SLEEP_TIME);
235       } else {
236         break;
237       }
238     }
239   }
240 
241   /**
242    * Test stopping replication, trying to insert, make sure nothing's
243    * replicated, enable it, try replicating and it should work
244    * @throws Exception
245    */
246   @Test(timeout=300000)
247   public void testStartStop() throws Exception {
248 
249     // Test stopping replication
250     setIsReplication(false);
251 
252     Put put = new Put(Bytes.toBytes("stop start"));
253     put.add(famName, row, row);
254     htable1.put(put);
255 
256     Get get = new Get(Bytes.toBytes("stop start"));
257     for (int i = 0; i < NB_RETRIES; i++) {
258       if (i==NB_RETRIES-1) {
259         break;
260       }
261       Result res = htable2.get(get);
262       if(res.size() >= 1) {
263         fail("Replication wasn't stopped");
264 
265       } else {
266         LOG.info("Row not replicated, let's wait a bit more...");
267         Thread.sleep(SLEEP_TIME);
268       }
269     }
270 
271     // Test restart replication
272     setIsReplication(true);
273 
274     htable1.put(put);
275 
276     for (int i = 0; i < NB_RETRIES; i++) {
277       if (i==NB_RETRIES-1) {
278         fail("Waited too much time for put replication");
279       }
280       Result res = htable2.get(get);
281       if(res.size() == 0) {
282         LOG.info("Row not available");
283         Thread.sleep(SLEEP_TIME);
284       } else {
285         assertArrayEquals(res.value(), row);
286         break;
287       }
288     }
289 
290     put = new Put(Bytes.toBytes("do not rep"));
291     put.add(noRepfamName, row, row);
292     htable1.put(put);
293 
294     get = new Get(Bytes.toBytes("do not rep"));
295     for (int i = 0; i < NB_RETRIES; i++) {
296       if (i == NB_RETRIES-1) {
297         break;
298       }
299       Result res = htable2.get(get);
300       if (res.size() >= 1) {
301         fail("Not supposed to be replicated");
302       } else {
303         LOG.info("Row not replicated, let's wait a bit more...");
304         Thread.sleep(SLEEP_TIME);
305       }
306     }
307 
308   }
309 
310   /**
311    * Test disable/enable replication, trying to insert, make sure nothing's
312    * replicated, enable it, the insert should be replicated
313    *
314    * @throws Exception
315    */
316   @Test(timeout = 300000)
317   public void testDisableEnable() throws Exception {
318 
319     // Test disabling replication
320     admin.disablePeer("2");
321 
322     byte[] rowkey = Bytes.toBytes("disable enable");
323     Put put = new Put(rowkey);
324     put.add(famName, row, row);
325     htable1.put(put);
326 
327     Get get = new Get(rowkey);
328     for (int i = 0; i < NB_RETRIES; i++) {
329       Result res = htable2.get(get);
330       if (res.size() >= 1) {
331         fail("Replication wasn't disabled");
332       } else {
333         LOG.info("Row not replicated, let's wait a bit more...");
334         Thread.sleep(SLEEP_TIME);
335       }
336     }
337 
338     // Test enable replication
339     admin.enablePeer("2");
340 
341     for (int i = 0; i < NB_RETRIES; i++) {
342       Result res = htable2.get(get);
343       if (res.size() == 0) {
344         LOG.info("Row not available");
345         Thread.sleep(SLEEP_TIME);
346       } else {
347         assertArrayEquals(res.value(), row);
348         return;
349       }
350     }
351     fail("Waited too much time for put replication");
352   }
353 
354   /**
355    * Integration test for TestReplicationAdmin, removes and re-add a peer
356    * cluster
357    *
358    * @throws Exception
359    */
360   @Test(timeout=300000)
361   public void testAddAndRemoveClusters() throws Exception {
362     LOG.info("testAddAndRemoveClusters");
363     admin.removePeer("2");
364     Thread.sleep(SLEEP_TIME);
365     byte[] rowKey = Bytes.toBytes("Won't be replicated");
366     Put put = new Put(rowKey);
367     put.add(famName, row, row);
368     htable1.put(put);
369 
370     Get get = new Get(rowKey);
371     for (int i = 0; i < NB_RETRIES; i++) {
372       if (i == NB_RETRIES-1) {
373         break;
374       }
375       Result res = htable2.get(get);
376       if (res.size() >= 1) {
377         fail("Not supposed to be replicated");
378       } else {
379         LOG.info("Row not replicated, let's wait a bit more...");
380         Thread.sleep(SLEEP_TIME);
381       }
382     }
383 
384     admin.addPeer("2", utility2.getClusterKey());
385     Thread.sleep(SLEEP_TIME);
386     rowKey = Bytes.toBytes("do rep");
387     put = new Put(rowKey);
388     put.add(famName, row, row);
389     LOG.info("Adding new row");
390     htable1.put(put);
391 
392     get = new Get(rowKey);
393     for (int i = 0; i < NB_RETRIES; i++) {
394       if (i==NB_RETRIES-1) {
395         fail("Waited too much time for put replication");
396       }
397       Result res = htable2.get(get);
398       if (res.size() == 0) {
399         LOG.info("Row not available");
400         Thread.sleep(SLEEP_TIME*i);
401       } else {
402         assertArrayEquals(res.value(), row);
403         break;
404       }
405     }
406   }
407 
408 
409   /**
410    * Do a more intense version testSmallBatch, one  that will trigger
411    * hlog rolling and other non-trivial code paths
412    * @throws Exception
413    */
414   @Test(timeout=300000)
415   public void loadTesting() throws Exception {
416     htable1.setWriteBufferSize(1024);
417     htable1.setAutoFlush(false);
418     for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
419       Put put = new Put(Bytes.toBytes(i));
420       put.add(famName, row, row);
421       htable1.put(put);
422     }
423     htable1.flushCommits();
424 
425     Scan scan = new Scan();
426 
427     ResultScanner scanner = htable1.getScanner(scan);
428     Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
429     scanner.close();
430 
431     assertEquals(NB_ROWS_IN_BATCH *10, res.length);
432 
433     scan = new Scan();
434 
435     for (int i = 0; i < NB_RETRIES_FOR_BIG_BATCH; i++) {
436 
437       scanner = htable2.getScanner(scan);
438       res = scanner.next(NB_ROWS_IN_BIG_BATCH);
439       scanner.close();
440       if (res.length != NB_ROWS_IN_BIG_BATCH) {
441         if (i == NB_RETRIES_FOR_BIG_BATCH-1) {
442           int lastRow = -1;
443           for (Result result : res) {
444             int currentRow = Bytes.toInt(result.getRow());
445             for (int row = lastRow+1; row < currentRow; row++) {
446               LOG.error("Row missing: " + row);
447             }
448             lastRow = currentRow;
449           }
450           LOG.error("Last row: " + lastRow);
451           fail("Waited too much time for normal batch replication, "
452               + res.length + " instead of " + NB_ROWS_IN_BIG_BATCH);
453         } else {
454           LOG.info("Only got " + res.length + " rows");
455           Thread.sleep(SLEEP_TIME);
456         }
457       } else {
458         break;
459       }
460     }
461   }
462 
463   /**
464    * Do a small loading into a table, make sure the data is really the same,
465    * then run the VerifyReplication job to check the results. Do a second
466    * comparison where all the cells are different.
467    * @throws Exception
468    */
469   @Test(timeout=300000)
470   public void testVerifyRepJob() throws Exception {
471     // Populate the tables, at the same time it guarantees that the tables are
472     // identical since it does the check
473     testSmallBatch();
474 
475     String[] args = new String[] {"2", Bytes.toString(tableName)};
476     Job job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
477     if (job == null) {
478       fail("Job wasn't created, see the log");
479     }
480     if (!job.waitForCompletion(true)) {
481       fail("Job failed, see the log");
482     }
483     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
484         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
485     assertEquals(0, job.getCounters().
486         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
487 
488     Scan scan = new Scan();
489     ResultScanner rs = htable2.getScanner(scan);
490     Put put = null;
491     for (Result result : rs) {
492       put = new Put(result.getRow());
493       KeyValue firstVal = result.raw()[0];
494       put.add(firstVal.getFamily(),
495           firstVal.getQualifier(), Bytes.toBytes("diff data"));
496       htable2.put(put);
497     }
498     Delete delete = new Delete(put.getRow());
499     htable2.delete(delete);
500     job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
501     if (job == null) {
502       fail("Job wasn't created, see the log");
503     }
504     if (!job.waitForCompletion(true)) {
505       fail("Job failed, see the log");
506     }
507     assertEquals(0, job.getCounters().
508         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
509     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
510         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
511   }
512 
513 }