View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.replication;
20  
21  import java.util.HashMap;
22  import java.util.List;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.HColumnDescriptor;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.HTableDescriptor;
29  import org.apache.hadoop.hbase.KeyValue;
30  import org.apache.hadoop.hbase.LargeTests;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.client.*;
33  import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
34  import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
35  import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
36  import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
37  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
38  import org.apache.hadoop.hbase.replication.regionserver.Replication;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
41  import org.apache.hadoop.hbase.util.JVMClusterUtil;
42  import org.apache.hadoop.mapreduce.Job;
43  import org.junit.Before;
44  import org.junit.Test;
45  import org.junit.experimental.categories.Category;
46  
47  import static org.junit.Assert.assertArrayEquals;
48  import static org.junit.Assert.assertEquals;
49  import static org.junit.Assert.fail;
50  import static org.junit.Assert.assertTrue;
51  
52  @Category(LargeTests.class)
53  public class TestReplicationSmallTests extends TestReplicationBase {
54  
55    private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
56  
57    /**
58     * @throws java.lang.Exception
59     */
60    @Before
61    public void setUp() throws Exception {
62      htable1.setAutoFlush(true);
63      // Starting and stopping replication can make us miss new logs,
64      // rolling like this makes sure the most recent one gets added to the queue
65      for ( JVMClusterUtil.RegionServerThread r :
66          utility1.getHBaseCluster().getRegionServerThreads()) {
67        r.getRegionServer().getWAL().rollWriter();
68      }
69      utility1.truncateTable(tableName);
70      // truncating the table will send one Delete per row to the slave cluster
71      // in an async fashion, which is why we cannot just call truncateTable on
72      // utility2 since late writes could make it to the slave in some way.
73      // Instead, we truncate the first table and wait for all the Deletes to
74      // make it to the slave.
75      Scan scan = new Scan();
76      int lastCount = 0;
77      for (int i = 0; i < NB_RETRIES; i++) {
78        if (i==NB_RETRIES-1) {
79          fail("Waited too much time for truncate");
80        }
81        ResultScanner scanner = htable2.getScanner(scan);
82        Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
83        scanner.close();
84        if (res.length != 0) {
85          if (res.length < lastCount) {
86            i--; // Don't increment timeout if we make progress
87          }
88          lastCount = res.length;
89          LOG.info("Still got " + res.length + " rows");
90          Thread.sleep(SLEEP_TIME);
91        } else {
92          break;
93        }
94      }
95    }
96  
97    /**
98     * Verify that version and column delete marker types are replicated
99     * correctly.
100    * @throws Exception
101    */
102   @Test(timeout=300000)
103   public void testDeleteTypes() throws Exception {
104     LOG.info("testDeleteTypes");
105     final byte[] v1 = Bytes.toBytes("v1");
106     final byte[] v2 = Bytes.toBytes("v2");
107     final byte[] v3 = Bytes.toBytes("v3");
108     htable1 = new HTable(conf1, tableName);
109 
110     long t = EnvironmentEdgeManager.currentTimeMillis();
111     // create three versions for "row"
112     Put put = new Put(row);
113     put.add(famName, row, t, v1);
114     htable1.put(put);
115 
116     put = new Put(row);
117     put.add(famName, row, t+1, v2);
118     htable1.put(put);
119 
120     put = new Put(row);
121     put.add(famName, row, t+2, v3);
122     htable1.put(put);
123 
124     Get get = new Get(row);
125     get.setMaxVersions();
126     for (int i = 0; i < NB_RETRIES; i++) {
127       if (i==NB_RETRIES-1) {
128         fail("Waited too much time for put replication");
129       }
130       Result res = htable2.get(get);
131       if (res.size() < 3) {
132         LOG.info("Rows not available");
133         Thread.sleep(SLEEP_TIME);
134       } else {
135         assertArrayEquals(res.raw()[0].getValue(), v3);
136         assertArrayEquals(res.raw()[1].getValue(), v2);
137         assertArrayEquals(res.raw()[2].getValue(), v1);
138         break;
139       }
140     }
141     // place a version delete marker (delete last version)
142     Delete d = new Delete(row);
143     d.deleteColumn(famName, row, t);
144     htable1.delete(d);
145 
146     get = new Get(row);
147     get.setMaxVersions();
148     for (int i = 0; i < NB_RETRIES; i++) {
149       if (i==NB_RETRIES-1) {
150         fail("Waited too much time for put replication");
151       }
152       Result res = htable2.get(get);
153       if (res.size() > 2) {
154         LOG.info("Version not deleted");
155         Thread.sleep(SLEEP_TIME);
156       } else {
157         assertArrayEquals(res.raw()[0].getValue(), v3);
158         assertArrayEquals(res.raw()[1].getValue(), v2);
159         break;
160       }
161     }
162 
163     // place a column delete marker
164     d = new Delete(row);
165     d.deleteColumns(famName, row, t+2);
166     htable1.delete(d);
167 
168     // now *both* of the remaining version should be deleted
169     // at the replica
170     get = new Get(row);
171     for (int i = 0; i < NB_RETRIES; i++) {
172       if (i==NB_RETRIES-1) {
173         fail("Waited too much time for del replication");
174       }
175       Result res = htable2.get(get);
176       if (res.size() >= 1) {
177         LOG.info("Rows not deleted");
178         Thread.sleep(SLEEP_TIME);
179       } else {
180         break;
181       }
182     }
183   }
184 
185   /**
186    * Add a row, check it's replicated, delete it, check's gone
187    * @throws Exception
188    */
189   @Test(timeout=300000)
190   public void testSimplePutDelete() throws Exception {
191     LOG.info("testSimplePutDelete");
192     Put put = new Put(row);
193     put.add(famName, row, row);
194 
195     htable1 = new HTable(conf1, tableName);
196     htable1.put(put);
197 
198     Get get = new Get(row);
199     for (int i = 0; i < NB_RETRIES; i++) {
200       if (i==NB_RETRIES-1) {
201         fail("Waited too much time for put replication");
202       }
203       Result res = htable2.get(get);
204       if (res.size() == 0) {
205         LOG.info("Row not available");
206         Thread.sleep(SLEEP_TIME);
207       } else {
208         assertArrayEquals(res.value(), row);
209         break;
210       }
211     }
212 
213     Delete del = new Delete(row);
214     htable1.delete(del);
215 
216     get = new Get(row);
217     for (int i = 0; i < NB_RETRIES; i++) {
218       if (i==NB_RETRIES-1) {
219         fail("Waited too much time for del replication");
220       }
221       Result res = htable2.get(get);
222       if (res.size() >= 1) {
223         LOG.info("Row not deleted");
224         Thread.sleep(SLEEP_TIME);
225       } else {
226         break;
227       }
228     }
229   }
230 
231   /**
232    * Try a small batch upload using the write buffer, check it's replicated
233    * @throws Exception
234    */
235   @Test(timeout=300000)
236   public void testSmallBatch() throws Exception {
237     LOG.info("testSmallBatch");
238     Put put;
239     // normal Batch tests
240     htable1.setAutoFlush(false);
241     for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
242       put = new Put(Bytes.toBytes(i));
243       put.add(famName, row, row);
244       htable1.put(put);
245     }
246     htable1.flushCommits();
247 
248     Scan scan = new Scan();
249 
250     ResultScanner scanner1 = htable1.getScanner(scan);
251     Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
252     scanner1.close();
253     assertEquals(NB_ROWS_IN_BATCH, res1.length);
254 
255     for (int i = 0; i < NB_RETRIES; i++) {
256       scan = new Scan();
257       if (i==NB_RETRIES-1) {
258         fail("Waited too much time for normal batch replication");
259       }
260       ResultScanner scanner = htable2.getScanner(scan);
261       Result[] res = scanner.next(NB_ROWS_IN_BATCH);
262       scanner.close();
263       if (res.length != NB_ROWS_IN_BATCH) {
264         LOG.info("Only got " + res.length + " rows");
265         Thread.sleep(SLEEP_TIME);
266       } else {
267         break;
268       }
269     }
270   }
271 
272   /**
273    * Test disable/enable replication, trying to insert, make sure nothing's
274    * replicated, enable it, the insert should be replicated
275    *
276    * @throws Exception
277    */
278   @Test(timeout = 300000)
279   public void testDisableEnable() throws Exception {
280 
281     // Test disabling replication
282     admin.disablePeer("2");
283 
284     byte[] rowkey = Bytes.toBytes("disable enable");
285     Put put = new Put(rowkey);
286     put.add(famName, row, row);
287     htable1.put(put);
288 
289     Get get = new Get(rowkey);
290     for (int i = 0; i < NB_RETRIES; i++) {
291       Result res = htable2.get(get);
292       if (res.size() >= 1) {
293         fail("Replication wasn't disabled");
294       } else {
295         LOG.info("Row not replicated, let's wait a bit more...");
296         Thread.sleep(SLEEP_TIME);
297       }
298     }
299 
300     // Test enable replication
301     admin.enablePeer("2");
302 
303     for (int i = 0; i < NB_RETRIES; i++) {
304       Result res = htable2.get(get);
305       if (res.size() == 0) {
306         LOG.info("Row not available");
307         Thread.sleep(SLEEP_TIME);
308       } else {
309         assertArrayEquals(res.value(), row);
310         return;
311       }
312     }
313     fail("Waited too much time for put replication");
314   }
315 
316   /**
317    * Integration test for TestReplicationAdmin, removes and re-add a peer
318    * cluster
319    *
320    * @throws Exception
321    */
322   @Test(timeout=300000)
323   public void testAddAndRemoveClusters() throws Exception {
324     LOG.info("testAddAndRemoveClusters");
325     admin.removePeer("2");
326     Thread.sleep(SLEEP_TIME);
327     byte[] rowKey = Bytes.toBytes("Won't be replicated");
328     Put put = new Put(rowKey);
329     put.add(famName, row, row);
330     htable1.put(put);
331 
332     Get get = new Get(rowKey);
333     for (int i = 0; i < NB_RETRIES; i++) {
334       if (i == NB_RETRIES-1) {
335         break;
336       }
337       Result res = htable2.get(get);
338       if (res.size() >= 1) {
339         fail("Not supposed to be replicated");
340       } else {
341         LOG.info("Row not replicated, let's wait a bit more...");
342         Thread.sleep(SLEEP_TIME);
343       }
344     }
345 
346     admin.addPeer("2", utility2.getClusterKey());
347     Thread.sleep(SLEEP_TIME);
348     rowKey = Bytes.toBytes("do rep");
349     put = new Put(rowKey);
350     put.add(famName, row, row);
351     LOG.info("Adding new row");
352     htable1.put(put);
353 
354     get = new Get(rowKey);
355     for (int i = 0; i < NB_RETRIES; i++) {
356       if (i==NB_RETRIES-1) {
357         fail("Waited too much time for put replication");
358       }
359       Result res = htable2.get(get);
360       if (res.size() == 0) {
361         LOG.info("Row not available");
362         Thread.sleep(SLEEP_TIME*i);
363       } else {
364         assertArrayEquals(res.value(), row);
365         break;
366       }
367     }
368   }
369 
370 
371   /**
372    * Do a more intense version testSmallBatch, one  that will trigger
373    * hlog rolling and other non-trivial code paths
374    * @throws Exception
375    */
376   @Test(timeout=300000)
377   public void loadTesting() throws Exception {
378     htable1.setWriteBufferSize(1024);
379     htable1.setAutoFlush(false);
380     for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
381       Put put = new Put(Bytes.toBytes(i));
382       put.add(famName, row, row);
383       htable1.put(put);
384     }
385     htable1.flushCommits();
386 
387     Scan scan = new Scan();
388 
389     ResultScanner scanner = htable1.getScanner(scan);
390     Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
391     scanner.close();
392 
393     assertEquals(NB_ROWS_IN_BIG_BATCH, res.length);
394 
395 
396     long start = System.currentTimeMillis();
397     for (int i = 0; i < NB_RETRIES; i++) {
398       scan = new Scan();
399 
400       scanner = htable2.getScanner(scan);
401       res = scanner.next(NB_ROWS_IN_BIG_BATCH);
402       scanner.close();
403       if (res.length != NB_ROWS_IN_BIG_BATCH) {
404         if (i == NB_RETRIES - 1) {
405           int lastRow = -1;
406           for (Result result : res) {
407             int currentRow = Bytes.toInt(result.getRow());
408             for (int row = lastRow+1; row < currentRow; row++) {
409               LOG.error("Row missing: " + row);
410             }
411             lastRow = currentRow;
412           }
413           LOG.error("Last row: " + lastRow);
414           fail("Waited too much time for normal batch replication, " +
415             res.length + " instead of " + NB_ROWS_IN_BIG_BATCH + "; waited=" +
416             (System.currentTimeMillis() - start) + "ms");
417         } else {
418           LOG.info("Only got " + res.length + " rows");
419           Thread.sleep(SLEEP_TIME);
420         }
421       } else {
422         break;
423       }
424     }
425   }
426 
427   /**
428    * Do a small loading into a table, make sure the data is really the same,
429    * then run the VerifyReplication job to check the results. Do a second
430    * comparison where all the cells are different.
431    * @throws Exception
432    */
433   @Test(timeout=300000)
434   public void testVerifyRepJob() throws Exception {
435     // Populate the tables, at the same time it guarantees that the tables are
436     // identical since it does the check
437     testSmallBatch();
438 
439     String[] args = new String[] {"2", Bytes.toString(tableName)};
440     Job job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
441     if (job == null) {
442       fail("Job wasn't created, see the log");
443     }
444     if (!job.waitForCompletion(true)) {
445       fail("Job failed, see the log");
446     }
447     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
448         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
449     assertEquals(0, job.getCounters().
450         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
451 
452     Scan scan = new Scan();
453     ResultScanner rs = htable2.getScanner(scan);
454     Put put = null;
455     for (Result result : rs) {
456       put = new Put(result.getRow());
457       KeyValue firstVal = result.raw()[0];
458       put.add(firstVal.getFamily(),
459           firstVal.getQualifier(), Bytes.toBytes("diff data"));
460       htable2.put(put);
461     }
462     Delete delete = new Delete(put.getRow());
463     htable2.delete(delete);
464     job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
465     if (job == null) {
466       fail("Job wasn't created, see the log");
467     }
468     if (!job.waitForCompletion(true)) {
469       fail("Job failed, see the log");
470     }
471     assertEquals(0, job.getCounters().
472         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
473     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
474         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
475   }
476 
477   /**
478    * Test for HBASE-9038, Replication.scopeWALEdits would NPE if it wasn't filtering out
479    * the compaction WALEdit
480    * @throws Exception
481    */
482   @Test(timeout=300000)
483   public void testCompactionWALEdits() throws Exception {
484     WALProtos.CompactionDescriptor compactionDescriptor =
485         WALProtos.CompactionDescriptor.getDefaultInstance();
486     WALEdit edit = WALEdit.createCompaction(compactionDescriptor);
487     Replication.scopeWALEdits(htable1.getTableDescriptor(), new HLogKey(), edit);
488   }
489   
490   /**
491    * Test for HBASE-8663
492    * Create two new Tables with colfamilies enabled for replication then run
493    * ReplicationAdmin.listReplicated(). Finally verify the table:colfamilies. Note:
494    * TestReplicationAdmin is a better place for this testing but it would need mocks.
495    * @throws Exception
496    */
497   @Test(timeout = 300000)
498   public void testVerifyListReplicatedTable() throws Exception {
499 	LOG.info("testVerifyListReplicatedTable");
500 
501     final String tName = "VerifyListReplicated_";
502     final String colFam = "cf1";
503     final int numOfTables = 3;
504 
505     HBaseAdmin hadmin = new HBaseAdmin(conf1);
506 
507     // Create Tables
508     for (int i = 0; i < numOfTables; i++) {
509       HTableDescriptor ht = new HTableDescriptor(TableName.valueOf(tName + i));
510       HColumnDescriptor cfd = new HColumnDescriptor(colFam);
511       cfd.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
512       ht.addFamily(cfd);
513       hadmin.createTable(ht);
514     }
515 
516     // verify the result
517     List<HashMap<String, String>> replicationColFams = admin.listReplicated();
518     int[] match = new int[numOfTables]; // array of 3 with init value of zero
519 
520     for (int i = 0; i < replicationColFams.size(); i++) {
521       HashMap<String, String> replicationEntry = replicationColFams.get(i);
522       String tn = replicationEntry.get(ReplicationAdmin.TNAME);
523       if ((tn.startsWith(tName)) && replicationEntry.get(ReplicationAdmin.CFNAME).equals(colFam)) {
524         int m = Integer.parseInt(tn.substring(tn.length() - 1)); // get the last digit
525         match[m]++; // should only increase once
526       }
527     }
528 
529     // check the matching result
530     for (int i = 0; i < match.length; i++) {
531       assertTrue("listReplicated() does not match table " + i, (match[i] == 1));
532     }
533 
534     // drop tables
535     for (int i = 0; i < numOfTables; i++) {
536       String ht = tName + i;
537       hadmin.disableTable(ht);
538       hadmin.deleteTable(ht);
539     }
540 
541     hadmin.close();
542   }
543 
544 }