1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.replication;
19  
20  
21  import org.apache.commons.logging.Log;
22  import org.apache.commons.logging.LogFactory;
23  import org.apache.hadoop.hbase.KeyValue;
24  import org.apache.hadoop.hbase.LargeTests;
25  import org.apache.hadoop.hbase.client.*;
26  import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
27  import org.apache.hadoop.hbase.util.Bytes;
28  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
29  import org.apache.hadoop.hbase.util.JVMClusterUtil;
30  import org.apache.hadoop.mapreduce.Job;
31  import org.junit.Before;
32  import org.junit.Test;
33  import org.junit.experimental.categories.Category;
34  
35  import static org.junit.Assert.assertArrayEquals;
36  import static org.junit.Assert.assertEquals;
37  import static org.junit.Assert.fail;
38  
39  @Category(LargeTests.class)
40  public class TestReplicationSmallTests extends TestReplicationBase {
41  
42    private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
43  
44    /**
45     * @throws java.lang.Exception
46     */
47    @Before
48    public void setUp() throws Exception {
49      htable1.setAutoFlush(true);
50      // Starting and stopping replication can make us miss new logs,
51      // rolling like this makes sure the most recent one gets added to the queue
52      for ( JVMClusterUtil.RegionServerThread r :
53          utility1.getHBaseCluster().getRegionServerThreads()) {
54        r.getRegionServer().getWAL().rollWriter();
55      }
56      utility1.truncateTable(tableName);
57      // truncating the table will send one Delete per row to the slave cluster
58      // in an async fashion, which is why we cannot just call truncateTable on
59      // utility2 since late writes could make it to the slave in some way.
60      // Instead, we truncate the first table and wait for all the Deletes to
61      // make it to the slave.
62      Scan scan = new Scan();
63      int lastCount = 0;
64      for (int i = 0; i < NB_RETRIES; i++) {
65        if (i==NB_RETRIES-1) {
66          fail("Waited too much time for truncate");
67        }
68        ResultScanner scanner = htable2.getScanner(scan);
69        Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
70        scanner.close();
71        if (res.length != 0) {
72          if (res.length < lastCount) {
73            i--; // Don't increment timeout if we make progress
74          }
75          lastCount = res.length;
76          LOG.info("Still got " + res.length + " rows");
77          Thread.sleep(SLEEP_TIME);
78        } else {
79          break;
80        }
81      }
82    }
83  
84    /**
85     * Verify that version and column delete marker types are replicated
86     * correctly.
87     * @throws Exception
88     */
89    @Test(timeout=300000)
90    public void testDeleteTypes() throws Exception {
91      LOG.info("testDeleteTypes");
92      final byte[] v1 = Bytes.toBytes("v1");
93      final byte[] v2 = Bytes.toBytes("v2");
94      final byte[] v3 = Bytes.toBytes("v3");
95      htable1 = new HTable(conf1, tableName);
96  
97      long t = EnvironmentEdgeManager.currentTimeMillis();
98      // create three versions for "row"
99      Put put = new Put(row);
100     put.add(famName, row, t, v1);
101     htable1.put(put);
102 
103     put = new Put(row);
104     put.add(famName, row, t+1, v2);
105     htable1.put(put);
106 
107     put = new Put(row);
108     put.add(famName, row, t+2, v3);
109     htable1.put(put);
110 
111     Get get = new Get(row);
112     get.setMaxVersions();
113     for (int i = 0; i < NB_RETRIES; i++) {
114       if (i==NB_RETRIES-1) {
115         fail("Waited too much time for put replication");
116       }
117       Result res = htable2.get(get);
118       if (res.size() < 3) {
119         LOG.info("Rows not available");
120         Thread.sleep(SLEEP_TIME);
121       } else {
122         assertArrayEquals(res.raw()[0].getValue(), v3);
123         assertArrayEquals(res.raw()[1].getValue(), v2);
124         assertArrayEquals(res.raw()[2].getValue(), v1);
125         break;
126       }
127     }
128     // place a version delete marker (delete last version)
129     Delete d = new Delete(row);
130     d.deleteColumn(famName, row, t);
131     htable1.delete(d);
132 
133     get = new Get(row);
134     get.setMaxVersions();
135     for (int i = 0; i < NB_RETRIES; i++) {
136       if (i==NB_RETRIES-1) {
137         fail("Waited too much time for put replication");
138       }
139       Result res = htable2.get(get);
140       if (res.size() > 2) {
141         LOG.info("Version not deleted");
142         Thread.sleep(SLEEP_TIME);
143       } else {
144         assertArrayEquals(res.raw()[0].getValue(), v3);
145         assertArrayEquals(res.raw()[1].getValue(), v2);
146         break;
147       }
148     }
149 
150     // place a column delete marker
151     d = new Delete(row);
152     d.deleteColumns(famName, row, t+2);
153     htable1.delete(d);
154 
155     // now *both* of the remaining version should be deleted
156     // at the replica
157     get = new Get(row);
158     for (int i = 0; i < NB_RETRIES; i++) {
159       if (i==NB_RETRIES-1) {
160         fail("Waited too much time for del replication");
161       }
162       Result res = htable2.get(get);
163       if (res.size() >= 1) {
164         LOG.info("Rows not deleted");
165         Thread.sleep(SLEEP_TIME);
166       } else {
167         break;
168       }
169     }
170   }
171 
172   /**
173    * Add a row, check it's replicated, delete it, check's gone
174    * @throws Exception
175    */
176   @Test(timeout=300000)
177   public void testSimplePutDelete() throws Exception {
178     LOG.info("testSimplePutDelete");
179     Put put = new Put(row);
180     put.add(famName, row, row);
181 
182     htable1 = new HTable(conf1, tableName);
183     htable1.put(put);
184 
185     Get get = new Get(row);
186     for (int i = 0; i < NB_RETRIES; i++) {
187       if (i==NB_RETRIES-1) {
188         fail("Waited too much time for put replication");
189       }
190       Result res = htable2.get(get);
191       if (res.size() == 0) {
192         LOG.info("Row not available");
193         Thread.sleep(SLEEP_TIME);
194       } else {
195         assertArrayEquals(res.value(), row);
196         break;
197       }
198     }
199 
200     Delete del = new Delete(row);
201     htable1.delete(del);
202 
203     get = new Get(row);
204     for (int i = 0; i < NB_RETRIES; i++) {
205       if (i==NB_RETRIES-1) {
206         fail("Waited too much time for del replication");
207       }
208       Result res = htable2.get(get);
209       if (res.size() >= 1) {
210         LOG.info("Row not deleted");
211         Thread.sleep(SLEEP_TIME);
212       } else {
213         break;
214       }
215     }
216   }
217 
218   /**
219    * Try a small batch upload using the write buffer, check it's replicated
220    * @throws Exception
221    */
222   @Test(timeout=300000)
223   public void testSmallBatch() throws Exception {
224     LOG.info("testSmallBatch");
225     Put put;
226     // normal Batch tests
227     htable1.setAutoFlush(false);
228     for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
229       put = new Put(Bytes.toBytes(i));
230       put.add(famName, row, row);
231       htable1.put(put);
232     }
233     htable1.flushCommits();
234 
235     Scan scan = new Scan();
236 
237     ResultScanner scanner1 = htable1.getScanner(scan);
238     Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
239     scanner1.close();
240     assertEquals(NB_ROWS_IN_BATCH, res1.length);
241 
242     for (int i = 0; i < NB_RETRIES; i++) {
243       if (i==NB_RETRIES-1) {
244         fail("Waited too much time for normal batch replication");
245       }
246       ResultScanner scanner = htable2.getScanner(scan);
247       Result[] res = scanner.next(NB_ROWS_IN_BATCH);
248       scanner.close();
249       if (res.length != NB_ROWS_IN_BATCH) {
250         LOG.info("Only got " + res.length + " rows");
251         Thread.sleep(SLEEP_TIME);
252       } else {
253         break;
254       }
255     }
256   }
257 
258   /**
259    * Test stopping replication, trying to insert, make sure nothing's
260    * replicated, enable it, try replicating and it should work
261    * @throws Exception
262    */
263   @Test(timeout=300000)
264   public void testStartStop() throws Exception {
265 
266     // Test stopping replication
267     setIsReplication(false);
268 
269     Put put = new Put(Bytes.toBytes("stop start"));
270     put.add(famName, row, row);
271     htable1.put(put);
272 
273     Get get = new Get(Bytes.toBytes("stop start"));
274     for (int i = 0; i < NB_RETRIES; i++) {
275       if (i==NB_RETRIES-1) {
276         break;
277       }
278       Result res = htable2.get(get);
279       if(res.size() >= 1) {
280         fail("Replication wasn't stopped");
281 
282       } else {
283         LOG.info("Row not replicated, let's wait a bit more...");
284         Thread.sleep(SLEEP_TIME);
285       }
286     }
287 
288     // Test restart replication
289     setIsReplication(true);
290 
291     htable1.put(put);
292 
293     for (int i = 0; i < NB_RETRIES; i++) {
294       if (i==NB_RETRIES-1) {
295         fail("Waited too much time for put replication");
296       }
297       Result res = htable2.get(get);
298       if(res.size() == 0) {
299         LOG.info("Row not available");
300         Thread.sleep(SLEEP_TIME);
301       } else {
302         assertArrayEquals(res.value(), row);
303         break;
304       }
305     }
306 
307     put = new Put(Bytes.toBytes("do not rep"));
308     put.add(noRepfamName, row, row);
309     htable1.put(put);
310 
311     get = new Get(Bytes.toBytes("do not rep"));
312     for (int i = 0; i < NB_RETRIES; i++) {
313       if (i == NB_RETRIES-1) {
314         break;
315       }
316       Result res = htable2.get(get);
317       if (res.size() >= 1) {
318         fail("Not supposed to be replicated");
319       } else {
320         LOG.info("Row not replicated, let's wait a bit more...");
321         Thread.sleep(SLEEP_TIME);
322       }
323     }
324 
325   }
326 
327   /**
328    * Test disable/enable replication, trying to insert, make sure nothing's
329    * replicated, enable it, the insert should be replicated
330    *
331    * @throws Exception
332    */
333   @Test(timeout = 300000)
334   public void testDisableEnable() throws Exception {
335 
336     // Test disabling replication
337     admin.disablePeer("2");
338 
339     byte[] rowkey = Bytes.toBytes("disable enable");
340     Put put = new Put(rowkey);
341     put.add(famName, row, row);
342     htable1.put(put);
343 
344     Get get = new Get(rowkey);
345     for (int i = 0; i < NB_RETRIES; i++) {
346       Result res = htable2.get(get);
347       if (res.size() >= 1) {
348         fail("Replication wasn't disabled");
349       } else {
350         LOG.info("Row not replicated, let's wait a bit more...");
351         Thread.sleep(SLEEP_TIME);
352       }
353     }
354 
355     // Test enable replication
356     admin.enablePeer("2");
357 
358     for (int i = 0; i < NB_RETRIES; i++) {
359       Result res = htable2.get(get);
360       if (res.size() == 0) {
361         LOG.info("Row not available");
362         Thread.sleep(SLEEP_TIME);
363       } else {
364         assertArrayEquals(res.value(), row);
365         return;
366       }
367     }
368     fail("Waited too much time for put replication");
369   }
370 
371   /**
372    * Integration test for TestReplicationAdmin, removes and re-add a peer
373    * cluster
374    *
375    * @throws Exception
376    */
377   @Test(timeout=300000)
378   public void testAddAndRemoveClusters() throws Exception {
379     LOG.info("testAddAndRemoveClusters");
380     admin.removePeer("2");
381     Thread.sleep(SLEEP_TIME);
382     byte[] rowKey = Bytes.toBytes("Won't be replicated");
383     Put put = new Put(rowKey);
384     put.add(famName, row, row);
385     htable1.put(put);
386 
387     Get get = new Get(rowKey);
388     for (int i = 0; i < NB_RETRIES; i++) {
389       if (i == NB_RETRIES-1) {
390         break;
391       }
392       Result res = htable2.get(get);
393       if (res.size() >= 1) {
394         fail("Not supposed to be replicated");
395       } else {
396         LOG.info("Row not replicated, let's wait a bit more...");
397         Thread.sleep(SLEEP_TIME);
398       }
399     }
400 
401     admin.addPeer("2", utility2.getClusterKey());
402     Thread.sleep(SLEEP_TIME);
403     rowKey = Bytes.toBytes("do rep");
404     put = new Put(rowKey);
405     put.add(famName, row, row);
406     LOG.info("Adding new row");
407     htable1.put(put);
408 
409     get = new Get(rowKey);
410     for (int i = 0; i < NB_RETRIES; i++) {
411       if (i==NB_RETRIES-1) {
412         fail("Waited too much time for put replication");
413       }
414       Result res = htable2.get(get);
415       if (res.size() == 0) {
416         LOG.info("Row not available");
417         Thread.sleep(SLEEP_TIME*i);
418       } else {
419         assertArrayEquals(res.value(), row);
420         break;
421       }
422     }
423   }
424 
425 
426   /**
427    * Do a more intense version testSmallBatch, one  that will trigger
428    * hlog rolling and other non-trivial code paths
429    * @throws Exception
430    */
431   @Test(timeout=300000)
432   public void loadTesting() throws Exception {
433     htable1.setWriteBufferSize(1024);
434     htable1.setAutoFlush(false);
435     for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
436       Put put = new Put(Bytes.toBytes(i));
437       put.add(famName, row, row);
438       htable1.put(put);
439     }
440     htable1.flushCommits();
441 
442     Scan scan = new Scan();
443 
444     ResultScanner scanner = htable1.getScanner(scan);
445     Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
446     scanner.close();
447 
448     assertEquals(NB_ROWS_IN_BIG_BATCH, res.length);
449 
450     scan = new Scan();
451 
452     for (int i = 0; i < NB_RETRIES_FOR_BIG_BATCH; i++) {
453 
454       scanner = htable2.getScanner(scan);
455       res = scanner.next(NB_ROWS_IN_BIG_BATCH);
456       scanner.close();
457       if (res.length != NB_ROWS_IN_BIG_BATCH) {
458         if (i == NB_RETRIES_FOR_BIG_BATCH-1) {
459           int lastRow = -1;
460           for (Result result : res) {
461             int currentRow = Bytes.toInt(result.getRow());
462             for (int row = lastRow+1; row < currentRow; row++) {
463               LOG.error("Row missing: " + row);
464             }
465             lastRow = currentRow;
466           }
467           LOG.error("Last row: " + lastRow);
468           fail("Waited too much time for normal batch replication, "
469               + res.length + " instead of " + NB_ROWS_IN_BIG_BATCH);
470         } else {
471           LOG.info("Only got " + res.length + " rows");
472           Thread.sleep(SLEEP_TIME);
473         }
474       } else {
475         break;
476       }
477     }
478   }
479 
480   /**
481    * Do a small loading into a table, make sure the data is really the same,
482    * then run the VerifyReplication job to check the results. Do a second
483    * comparison where all the cells are different.
484    * @throws Exception
485    */
486   @Test(timeout=300000)
487   public void testVerifyRepJob() throws Exception {
488     // Populate the tables, at the same time it guarantees that the tables are
489     // identical since it does the check
490     testSmallBatch();
491 
492     String[] args = new String[] {"2", Bytes.toString(tableName)};
493     Job job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
494     if (job == null) {
495       fail("Job wasn't created, see the log");
496     }
497     if (!job.waitForCompletion(true)) {
498       fail("Job failed, see the log");
499     }
500     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
501         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
502     assertEquals(0, job.getCounters().
503         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
504 
505     Scan scan = new Scan();
506     ResultScanner rs = htable2.getScanner(scan);
507     Put put = null;
508     for (Result result : rs) {
509       put = new Put(result.getRow());
510       KeyValue firstVal = result.raw()[0];
511       put.add(firstVal.getFamily(),
512           firstVal.getQualifier(), Bytes.toBytes("diff data"));
513       htable2.put(put);
514     }
515     Delete delete = new Delete(put.getRow());
516     htable2.delete(delete);
517     job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
518     if (job == null) {
519       fail("Job wasn't created, see the log");
520     }
521     if (!job.waitForCompletion(true)) {
522       fail("Job failed, see the log");
523     }
524     assertEquals(0, job.getCounters().
525         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
526     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
527         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
528   }
529 
530 }