1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.replication;
19  
20  import java.util.HashMap;
21  import java.util.List;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.HColumnDescriptor;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.HTableDescriptor;
28  import org.apache.hadoop.hbase.KeyValue;
29  import org.apache.hadoop.hbase.LargeTests;
30  import org.apache.hadoop.hbase.client.*;
31  import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
32  import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
35  import org.apache.hadoop.hbase.util.JVMClusterUtil;
36  import org.apache.hadoop.mapreduce.Job;
37  import org.junit.Before;
38  import org.junit.Test;
39  import org.junit.experimental.categories.Category;
40  
41  import static org.junit.Assert.assertArrayEquals;
42  import static org.junit.Assert.assertEquals;
43  import static org.junit.Assert.fail;
44  import static org.junit.Assert.assertTrue;
45  
46  @Category(LargeTests.class)
47  public class TestReplicationSmallTests extends TestReplicationBase {
48  
49    private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
50  
51    /**
52     * @throws java.lang.Exception
53     */
54    @Before
55    public void setUp() throws Exception {
56      htable1.setAutoFlush(true);
57      // Starting and stopping replication can make us miss new logs,
58      // rolling like this makes sure the most recent one gets added to the queue
59      for ( JVMClusterUtil.RegionServerThread r :
60          utility1.getHBaseCluster().getRegionServerThreads()) {
61        r.getRegionServer().getWAL().rollWriter();
62      }
63      utility1.truncateTable(tableName);
64      // truncating the table will send one Delete per row to the slave cluster
65      // in an async fashion, which is why we cannot just call truncateTable on
66      // utility2 since late writes could make it to the slave in some way.
67      // Instead, we truncate the first table and wait for all the Deletes to
68      // make it to the slave.
69      Scan scan = new Scan();
70      int lastCount = 0;
71      for (int i = 0; i < NB_RETRIES; i++) {
72        if (i==NB_RETRIES-1) {
73          fail("Waited too much time for truncate");
74        }
75        ResultScanner scanner = htable2.getScanner(scan);
76        Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
77        scanner.close();
78        if (res.length != 0) {
79          if (res.length < lastCount) {
80            i--; // Don't increment timeout if we make progress
81          }
82          lastCount = res.length;
83          LOG.info("Still got " + res.length + " rows");
84          Thread.sleep(SLEEP_TIME);
85        } else {
86          break;
87        }
88      }
89    }
90  
91    /**
92     * Verify that version and column delete marker types are replicated
93     * correctly.
94     * @throws Exception
95     */
96    @Test(timeout=300000)
97    public void testDeleteTypes() throws Exception {
98      LOG.info("testDeleteTypes");
99      final byte[] v1 = Bytes.toBytes("v1");
100     final byte[] v2 = Bytes.toBytes("v2");
101     final byte[] v3 = Bytes.toBytes("v3");
102     htable1 = new HTable(conf1, tableName);
103 
104     long t = EnvironmentEdgeManager.currentTimeMillis();
105     // create three versions for "row"
106     Put put = new Put(row);
107     put.add(famName, row, t, v1);
108     htable1.put(put);
109 
110     put = new Put(row);
111     put.add(famName, row, t+1, v2);
112     htable1.put(put);
113 
114     put = new Put(row);
115     put.add(famName, row, t+2, v3);
116     htable1.put(put);
117 
118     Get get = new Get(row);
119     get.setMaxVersions();
120     for (int i = 0; i < NB_RETRIES; i++) {
121       if (i==NB_RETRIES-1) {
122         fail("Waited too much time for put replication");
123       }
124       Result res = htable2.get(get);
125       if (res.size() < 3) {
126         LOG.info("Rows not available");
127         Thread.sleep(SLEEP_TIME);
128       } else {
129         assertArrayEquals(res.raw()[0].getValue(), v3);
130         assertArrayEquals(res.raw()[1].getValue(), v2);
131         assertArrayEquals(res.raw()[2].getValue(), v1);
132         break;
133       }
134     }
135     // place a version delete marker (delete last version)
136     Delete d = new Delete(row);
137     d.deleteColumn(famName, row, t);
138     htable1.delete(d);
139 
140     get = new Get(row);
141     get.setMaxVersions();
142     for (int i = 0; i < NB_RETRIES; i++) {
143       if (i==NB_RETRIES-1) {
144         fail("Waited too much time for put replication");
145       }
146       Result res = htable2.get(get);
147       if (res.size() > 2) {
148         LOG.info("Version not deleted");
149         Thread.sleep(SLEEP_TIME);
150       } else {
151         assertArrayEquals(res.raw()[0].getValue(), v3);
152         assertArrayEquals(res.raw()[1].getValue(), v2);
153         break;
154       }
155     }
156 
157     // place a column delete marker
158     d = new Delete(row);
159     d.deleteColumns(famName, row, t+2);
160     htable1.delete(d);
161 
162     // now *both* of the remaining version should be deleted
163     // at the replica
164     get = new Get(row);
165     for (int i = 0; i < NB_RETRIES; i++) {
166       if (i==NB_RETRIES-1) {
167         fail("Waited too much time for del replication");
168       }
169       Result res = htable2.get(get);
170       if (res.size() >= 1) {
171         LOG.info("Rows not deleted");
172         Thread.sleep(SLEEP_TIME);
173       } else {
174         break;
175       }
176     }
177   }
178 
179   /**
180    * Add a row, check it's replicated, delete it, check's gone
181    * @throws Exception
182    */
183   @Test(timeout=300000)
184   public void testSimplePutDelete() throws Exception {
185     LOG.info("testSimplePutDelete");
186     Put put = new Put(row);
187     put.add(famName, row, row);
188 
189     htable1 = new HTable(conf1, tableName);
190     htable1.put(put);
191 
192     Get get = new Get(row);
193     for (int i = 0; i < NB_RETRIES; i++) {
194       if (i==NB_RETRIES-1) {
195         fail("Waited too much time for put replication");
196       }
197       Result res = htable2.get(get);
198       if (res.size() == 0) {
199         LOG.info("Row not available");
200         Thread.sleep(SLEEP_TIME);
201       } else {
202         assertArrayEquals(res.value(), row);
203         break;
204       }
205     }
206 
207     Delete del = new Delete(row);
208     htable1.delete(del);
209 
210     get = new Get(row);
211     for (int i = 0; i < NB_RETRIES; i++) {
212       if (i==NB_RETRIES-1) {
213         fail("Waited too much time for del replication");
214       }
215       Result res = htable2.get(get);
216       if (res.size() >= 1) {
217         LOG.info("Row not deleted");
218         Thread.sleep(SLEEP_TIME);
219       } else {
220         break;
221       }
222     }
223   }
224 
225   /**
226    * Try a small batch upload using the write buffer, check it's replicated
227    * @throws Exception
228    */
229   @Test(timeout=300000)
230   public void testSmallBatch() throws Exception {
231     LOG.info("testSmallBatch");
232     Put put;
233     // normal Batch tests
234     htable1.setAutoFlush(false);
235     for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
236       put = new Put(Bytes.toBytes(i));
237       put.add(famName, row, row);
238       htable1.put(put);
239     }
240     htable1.flushCommits();
241 
242     Scan scan = new Scan();
243 
244     ResultScanner scanner1 = htable1.getScanner(scan);
245     Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
246     scanner1.close();
247     assertEquals(NB_ROWS_IN_BATCH, res1.length);
248 
249     for (int i = 0; i < NB_RETRIES; i++) {
250       if (i==NB_RETRIES-1) {
251         fail("Waited too much time for normal batch replication");
252       }
253       ResultScanner scanner = htable2.getScanner(scan);
254       Result[] res = scanner.next(NB_ROWS_IN_BATCH);
255       scanner.close();
256       if (res.length != NB_ROWS_IN_BATCH) {
257         LOG.info("Only got " + res.length + " rows");
258         Thread.sleep(SLEEP_TIME);
259       } else {
260         break;
261       }
262     }
263   }
264 
265   /**
266    * Test stopping replication, trying to insert, make sure nothing's
267    * replicated, enable it, try replicating and it should work
268    * @throws Exception
269    */
270   @Test(timeout=300000)
271   public void testStartStop() throws Exception {
272 
273     // Test stopping replication
274     setIsReplication(false);
275 
276     Put put = new Put(Bytes.toBytes("stop start"));
277     put.add(famName, row, row);
278     htable1.put(put);
279 
280     Get get = new Get(Bytes.toBytes("stop start"));
281     for (int i = 0; i < NB_RETRIES; i++) {
282       if (i==NB_RETRIES-1) {
283         break;
284       }
285       Result res = htable2.get(get);
286       if(res.size() >= 1) {
287         fail("Replication wasn't stopped");
288 
289       } else {
290         LOG.info("Row not replicated, let's wait a bit more...");
291         Thread.sleep(SLEEP_TIME);
292       }
293     }
294 
295     // Test restart replication
296     setIsReplication(true);
297 
298     htable1.put(put);
299 
300     for (int i = 0; i < NB_RETRIES; i++) {
301       if (i==NB_RETRIES-1) {
302         fail("Waited too much time for put replication");
303       }
304       Result res = htable2.get(get);
305       if(res.size() == 0) {
306         LOG.info("Row not available");
307         Thread.sleep(SLEEP_TIME);
308       } else {
309         assertArrayEquals(res.value(), row);
310         break;
311       }
312     }
313 
314     put = new Put(Bytes.toBytes("do not rep"));
315     put.add(noRepfamName, row, row);
316     htable1.put(put);
317 
318     get = new Get(Bytes.toBytes("do not rep"));
319     for (int i = 0; i < NB_RETRIES; i++) {
320       if (i == NB_RETRIES-1) {
321         break;
322       }
323       Result res = htable2.get(get);
324       if (res.size() >= 1) {
325         fail("Not supposed to be replicated");
326       } else {
327         LOG.info("Row not replicated, let's wait a bit more...");
328         Thread.sleep(SLEEP_TIME);
329       }
330     }
331 
332   }
333 
334   /**
335    * Test disable/enable replication, trying to insert, make sure nothing's
336    * replicated, enable it, the insert should be replicated
337    *
338    * @throws Exception
339    */
340   @Test(timeout = 300000)
341   public void testDisableEnable() throws Exception {
342 
343     // Test disabling replication
344     admin.disablePeer("2");
345 
346     byte[] rowkey = Bytes.toBytes("disable enable");
347     Put put = new Put(rowkey);
348     put.add(famName, row, row);
349     htable1.put(put);
350 
351     Get get = new Get(rowkey);
352     for (int i = 0; i < NB_RETRIES; i++) {
353       Result res = htable2.get(get);
354       if (res.size() >= 1) {
355         fail("Replication wasn't disabled");
356       } else {
357         LOG.info("Row not replicated, let's wait a bit more...");
358         Thread.sleep(SLEEP_TIME);
359       }
360     }
361 
362     // Test enable replication
363     admin.enablePeer("2");
364 
365     for (int i = 0; i < NB_RETRIES; i++) {
366       Result res = htable2.get(get);
367       if (res.size() == 0) {
368         LOG.info("Row not available");
369         Thread.sleep(SLEEP_TIME);
370       } else {
371         assertArrayEquals(res.value(), row);
372         return;
373       }
374     }
375     fail("Waited too much time for put replication");
376   }
377 
378   /**
379    * Integration test for TestReplicationAdmin, removes and re-add a peer
380    * cluster
381    *
382    * @throws Exception
383    */
384   @Test(timeout=300000)
385   public void testAddAndRemoveClusters() throws Exception {
386     LOG.info("testAddAndRemoveClusters");
387     admin.removePeer("2");
388     Thread.sleep(SLEEP_TIME);
389     byte[] rowKey = Bytes.toBytes("Won't be replicated");
390     Put put = new Put(rowKey);
391     put.add(famName, row, row);
392     htable1.put(put);
393 
394     Get get = new Get(rowKey);
395     for (int i = 0; i < NB_RETRIES; i++) {
396       if (i == NB_RETRIES-1) {
397         break;
398       }
399       Result res = htable2.get(get);
400       if (res.size() >= 1) {
401         fail("Not supposed to be replicated");
402       } else {
403         LOG.info("Row not replicated, let's wait a bit more...");
404         Thread.sleep(SLEEP_TIME);
405       }
406     }
407 
408     admin.addPeer("2", utility2.getClusterKey());
409     Thread.sleep(SLEEP_TIME);
410     rowKey = Bytes.toBytes("do rep");
411     put = new Put(rowKey);
412     put.add(famName, row, row);
413     LOG.info("Adding new row");
414     htable1.put(put);
415 
416     get = new Get(rowKey);
417     for (int i = 0; i < NB_RETRIES; i++) {
418       if (i==NB_RETRIES-1) {
419         fail("Waited too much time for put replication");
420       }
421       Result res = htable2.get(get);
422       if (res.size() == 0) {
423         LOG.info("Row not available");
424         Thread.sleep(SLEEP_TIME*i);
425       } else {
426         assertArrayEquals(res.value(), row);
427         break;
428       }
429     }
430   }
431 
432 
433   /**
434    * Do a more intense version testSmallBatch, one  that will trigger
435    * hlog rolling and other non-trivial code paths
436    * @throws Exception
437    */
438   @Test(timeout=300000)
439   public void loadTesting() throws Exception {
440     htable1.setWriteBufferSize(1024);
441     htable1.setAutoFlush(false);
442     for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
443       Put put = new Put(Bytes.toBytes(i));
444       put.add(famName, row, row);
445       htable1.put(put);
446     }
447     htable1.flushCommits();
448 
449     Scan scan = new Scan();
450 
451     ResultScanner scanner = htable1.getScanner(scan);
452     Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
453     scanner.close();
454 
455     assertEquals(NB_ROWS_IN_BIG_BATCH, res.length);
456 
457     scan = new Scan();
458 
459     for (int i = 0; i < NB_RETRIES_FOR_BIG_BATCH; i++) {
460 
461       scanner = htable2.getScanner(scan);
462       res = scanner.next(NB_ROWS_IN_BIG_BATCH);
463       scanner.close();
464       if (res.length != NB_ROWS_IN_BIG_BATCH) {
465         if (i == NB_RETRIES_FOR_BIG_BATCH-1) {
466           int lastRow = -1;
467           for (Result result : res) {
468             int currentRow = Bytes.toInt(result.getRow());
469             for (int row = lastRow+1; row < currentRow; row++) {
470               LOG.error("Row missing: " + row);
471             }
472             lastRow = currentRow;
473           }
474           LOG.error("Last row: " + lastRow);
475           fail("Waited too much time for normal batch replication, "
476               + res.length + " instead of " + NB_ROWS_IN_BIG_BATCH);
477         } else {
478           LOG.info("Only got " + res.length + " rows");
479           Thread.sleep(SLEEP_TIME);
480         }
481       } else {
482         break;
483       }
484     }
485   }
486 
487   /**
488    * Do a small loading into a table, make sure the data is really the same,
489    * then run the VerifyReplication job to check the results. Do a second
490    * comparison where all the cells are different.
491    * @throws Exception
492    */
493   @Test(timeout=300000)
494   public void testVerifyRepJob() throws Exception {
495     // Populate the tables, at the same time it guarantees that the tables are
496     // identical since it does the check
497     testSmallBatch();
498 
499     String[] args = new String[] {"2", Bytes.toString(tableName)};
500     Job job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
501     if (job == null) {
502       fail("Job wasn't created, see the log");
503     }
504     if (!job.waitForCompletion(true)) {
505       fail("Job failed, see the log");
506     }
507     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
508         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
509     assertEquals(0, job.getCounters().
510         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
511 
512     Scan scan = new Scan();
513     ResultScanner rs = htable2.getScanner(scan);
514     Put put = null;
515     for (Result result : rs) {
516       put = new Put(result.getRow());
517       KeyValue firstVal = result.raw()[0];
518       put.add(firstVal.getFamily(),
519           firstVal.getQualifier(), Bytes.toBytes("diff data"));
520       htable2.put(put);
521     }
522     Delete delete = new Delete(put.getRow());
523     htable2.delete(delete);
524     job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
525     if (job == null) {
526       fail("Job wasn't created, see the log");
527     }
528     if (!job.waitForCompletion(true)) {
529       fail("Job failed, see the log");
530     }
531     assertEquals(0, job.getCounters().
532         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
533     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
534         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
535   }
536 
537     
538     /**
539      * Test for HBASE-8663
540      * Create two new Tables with colfamilies enabled for replication then run
541      * ReplicationAdmin.listReplicated(). Finally verify the table:colfamilies. Note:
542      * TestReplicationAdmin is a better place for this testing but it would need mocks.
543      * @throws Exception
544      */
545     @Test(timeout = 300000)
546     public void testVerifyListReplicatedTable() throws Exception {
547       LOG.info("testVerifyListReplicatedTable");
548    
549       final String tName = "VerifyListReplicated_";
550       final String colFam = "cf1";
551       final int numOfTables = 3;
552   
553       HBaseAdmin hadmin = new HBaseAdmin(conf1);
554   
555       // Create Tables
556       for (int i = 0; i < numOfTables; i++) {
557         HTableDescriptor ht = new HTableDescriptor(tName + i);
558         HColumnDescriptor cfd = new HColumnDescriptor(colFam);
559         cfd.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
560         ht.addFamily(cfd);
561         hadmin.createTable(ht);
562       }
563   
564       // verify the result
565       List<HashMap<String, String>> replicationColFams = admin.listReplicated();
566       int[] match = new int[numOfTables]; // array of 3 with init value of zero
567   
568       for (int i = 0; i < replicationColFams.size(); i++) {
569         HashMap<String, String> replicationEntry = replicationColFams.get(i);
570         String tn = replicationEntry.get(ReplicationAdmin.TNAME);
571         if ((tn.startsWith(tName)) && replicationEntry.get(ReplicationAdmin.CFNAME).equals(colFam)) {
572           int m = Integer.parseInt(tn.substring(tn.length() - 1)); // get the last digit
573           match[m]++; // should only increase once
574         }
575       }
576   
577       // check the matching result
578       for (int i = 0; i < match.length; i++) {
579         assertTrue("listReplicated() does not match table " + i, (match[i] == 1));
580       }
581   
582       // drop tables
583       for (int i = 0; i < numOfTables; i++) {
584         String ht = tName + i;
585         hadmin.disableTable(ht);
586         hadmin.deleteTable(ht);
587       }
588   
589       hadmin.close();
590     }  
591 }