View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.replication;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.util.HashMap;
27  import java.util.List;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.Cell;
32  import org.apache.hadoop.hbase.CellUtil;
33  import org.apache.hadoop.hbase.HColumnDescriptor;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.HTableDescriptor;
36  import org.apache.hadoop.hbase.LargeTests;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.client.Delete;
39  import org.apache.hadoop.hbase.client.Get;
40  import org.apache.hadoop.hbase.client.HBaseAdmin;
41  import org.apache.hadoop.hbase.client.HTable;
42  import org.apache.hadoop.hbase.client.Put;
43  import org.apache.hadoop.hbase.client.Result;
44  import org.apache.hadoop.hbase.client.ResultScanner;
45  import org.apache.hadoop.hbase.client.Scan;
46  import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
47  import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
48  import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
49  import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
50  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
51  import org.apache.hadoop.hbase.replication.regionserver.Replication;
52  import org.apache.hadoop.hbase.util.Bytes;
53  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
54  import org.apache.hadoop.hbase.util.JVMClusterUtil;
55  import org.apache.hadoop.mapreduce.Job;
56  import org.junit.Before;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  
60  @Category(LargeTests.class)
61  public class TestReplicationSmallTests extends TestReplicationBase {
62  
63    private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
64  
65    /**
66     * @throws java.lang.Exception
67     */
68    @Before
69    public void setUp() throws Exception {
70      htable1.setAutoFlush(true, true);
71      // Starting and stopping replication can make us miss new logs,
72      // rolling like this makes sure the most recent one gets added to the queue
73      for ( JVMClusterUtil.RegionServerThread r :
74          utility1.getHBaseCluster().getRegionServerThreads()) {
75        r.getRegionServer().getWAL().rollWriter();
76      }
77      utility1.truncateTable(tableName);
78      // truncating the table will send one Delete per row to the slave cluster
79      // in an async fashion, which is why we cannot just call truncateTable on
80      // utility2 since late writes could make it to the slave in some way.
81      // Instead, we truncate the first table and wait for all the Deletes to
82      // make it to the slave.
83      Scan scan = new Scan();
84      int lastCount = 0;
85      for (int i = 0; i < NB_RETRIES; i++) {
86        if (i==NB_RETRIES-1) {
87          fail("Waited too much time for truncate");
88        }
89        ResultScanner scanner = htable2.getScanner(scan);
90        Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
91        scanner.close();
92        if (res.length != 0) {
93          if (res.length < lastCount) {
94            i--; // Don't increment timeout if we make progress
95          }
96          lastCount = res.length;
97          LOG.info("Still got " + res.length + " rows");
98          Thread.sleep(SLEEP_TIME);
99        } else {
100         break;
101       }
102     }
103   }
104 
105   /**
106    * Verify that version and column delete marker types are replicated
107    * correctly.
108    * @throws Exception
109    */
110   @Test(timeout=300000)
111   public void testDeleteTypes() throws Exception {
112     LOG.info("testDeleteTypes");
113     final byte[] v1 = Bytes.toBytes("v1");
114     final byte[] v2 = Bytes.toBytes("v2");
115     final byte[] v3 = Bytes.toBytes("v3");
116     htable1 = new HTable(conf1, tableName);
117 
118     long t = EnvironmentEdgeManager.currentTimeMillis();
119     // create three versions for "row"
120     Put put = new Put(row);
121     put.add(famName, row, t, v1);
122     htable1.put(put);
123 
124     put = new Put(row);
125     put.add(famName, row, t+1, v2);
126     htable1.put(put);
127 
128     put = new Put(row);
129     put.add(famName, row, t+2, v3);
130     htable1.put(put);
131 
132     Get get = new Get(row);
133     get.setMaxVersions();
134     for (int i = 0; i < NB_RETRIES; i++) {
135       if (i==NB_RETRIES-1) {
136         fail("Waited too much time for put replication");
137       }
138       Result res = htable2.get(get);
139       if (res.size() < 3) {
140         LOG.info("Rows not available");
141         Thread.sleep(SLEEP_TIME);
142       } else {
143         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
144         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
145         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[2]), v1);
146         break;
147       }
148     }
149     // place a version delete marker (delete last version)
150     Delete d = new Delete(row);
151     d.deleteColumn(famName, row, t);
152     htable1.delete(d);
153 
154     get = new Get(row);
155     get.setMaxVersions();
156     for (int i = 0; i < NB_RETRIES; i++) {
157       if (i==NB_RETRIES-1) {
158         fail("Waited too much time for put replication");
159       }
160       Result res = htable2.get(get);
161       if (res.size() > 2) {
162         LOG.info("Version not deleted");
163         Thread.sleep(SLEEP_TIME);
164       } else {
165         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
166         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
167         break;
168       }
169     }
170 
171     // place a column delete marker
172     d = new Delete(row);
173     d.deleteColumns(famName, row, t+2);
174     htable1.delete(d);
175 
176     // now *both* of the remaining version should be deleted
177     // at the replica
178     get = new Get(row);
179     for (int i = 0; i < NB_RETRIES; i++) {
180       if (i==NB_RETRIES-1) {
181         fail("Waited too much time for del replication");
182       }
183       Result res = htable2.get(get);
184       if (res.size() >= 1) {
185         LOG.info("Rows not deleted");
186         Thread.sleep(SLEEP_TIME);
187       } else {
188         break;
189       }
190     }
191   }
192 
193   /**
194    * Add a row, check it's replicated, delete it, check's gone
195    * @throws Exception
196    */
197   @Test(timeout=300000)
198   public void testSimplePutDelete() throws Exception {
199     LOG.info("testSimplePutDelete");
200     Put put = new Put(row);
201     put.add(famName, row, row);
202 
203     htable1 = new HTable(conf1, tableName);
204     htable1.put(put);
205 
206     Get get = new Get(row);
207     for (int i = 0; i < NB_RETRIES; i++) {
208       if (i==NB_RETRIES-1) {
209         fail("Waited too much time for put replication");
210       }
211       Result res = htable2.get(get);
212       if (res.size() == 0) {
213         LOG.info("Row not available");
214         Thread.sleep(SLEEP_TIME);
215       } else {
216         assertArrayEquals(res.value(), row);
217         break;
218       }
219     }
220 
221     Delete del = new Delete(row);
222     htable1.delete(del);
223 
224     get = new Get(row);
225     for (int i = 0; i < NB_RETRIES; i++) {
226       if (i==NB_RETRIES-1) {
227         fail("Waited too much time for del replication");
228       }
229       Result res = htable2.get(get);
230       if (res.size() >= 1) {
231         LOG.info("Row not deleted");
232         Thread.sleep(SLEEP_TIME);
233       } else {
234         break;
235       }
236     }
237   }
238 
239   /**
240    * Try a small batch upload using the write buffer, check it's replicated
241    * @throws Exception
242    */
243   @Test(timeout=300000)
244   public void testSmallBatch() throws Exception {
245     LOG.info("testSmallBatch");
246     Put put;
247     // normal Batch tests
248     htable1.setAutoFlush(false, true);
249     for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
250       put = new Put(Bytes.toBytes(i));
251       put.add(famName, row, row);
252       htable1.put(put);
253     }
254     htable1.flushCommits();
255 
256     Scan scan = new Scan();
257 
258     ResultScanner scanner1 = htable1.getScanner(scan);
259     Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
260     scanner1.close();
261     assertEquals(NB_ROWS_IN_BATCH, res1.length);
262 
263     for (int i = 0; i < NB_RETRIES; i++) {
264       scan = new Scan();
265       if (i==NB_RETRIES-1) {
266         fail("Waited too much time for normal batch replication");
267       }
268       ResultScanner scanner = htable2.getScanner(scan);
269       Result[] res = scanner.next(NB_ROWS_IN_BATCH);
270       scanner.close();
271       if (res.length != NB_ROWS_IN_BATCH) {
272         LOG.info("Only got " + res.length + " rows");
273         Thread.sleep(SLEEP_TIME);
274       } else {
275         break;
276       }
277     }
278   }
279 
280   /**
281    * Test disable/enable replication, trying to insert, make sure nothing's
282    * replicated, enable it, the insert should be replicated
283    *
284    * @throws Exception
285    */
286   @Test(timeout = 300000)
287   public void testDisableEnable() throws Exception {
288 
289     // Test disabling replication
290     admin.disablePeer("2");
291 
292     byte[] rowkey = Bytes.toBytes("disable enable");
293     Put put = new Put(rowkey);
294     put.add(famName, row, row);
295     htable1.put(put);
296 
297     Get get = new Get(rowkey);
298     for (int i = 0; i < NB_RETRIES; i++) {
299       Result res = htable2.get(get);
300       if (res.size() >= 1) {
301         fail("Replication wasn't disabled");
302       } else {
303         LOG.info("Row not replicated, let's wait a bit more...");
304         Thread.sleep(SLEEP_TIME);
305       }
306     }
307 
308     // Test enable replication
309     admin.enablePeer("2");
310 
311     for (int i = 0; i < NB_RETRIES; i++) {
312       Result res = htable2.get(get);
313       if (res.size() == 0) {
314         LOG.info("Row not available");
315         Thread.sleep(SLEEP_TIME);
316       } else {
317         assertArrayEquals(res.value(), row);
318         return;
319       }
320     }
321     fail("Waited too much time for put replication");
322   }
323 
324   /**
325    * Integration test for TestReplicationAdmin, removes and re-add a peer
326    * cluster
327    *
328    * @throws Exception
329    */
330   @Test(timeout=300000)
331   public void testAddAndRemoveClusters() throws Exception {
332     LOG.info("testAddAndRemoveClusters");
333     admin.removePeer("2");
334     Thread.sleep(SLEEP_TIME);
335     byte[] rowKey = Bytes.toBytes("Won't be replicated");
336     Put put = new Put(rowKey);
337     put.add(famName, row, row);
338     htable1.put(put);
339 
340     Get get = new Get(rowKey);
341     for (int i = 0; i < NB_RETRIES; i++) {
342       if (i == NB_RETRIES-1) {
343         break;
344       }
345       Result res = htable2.get(get);
346       if (res.size() >= 1) {
347         fail("Not supposed to be replicated");
348       } else {
349         LOG.info("Row not replicated, let's wait a bit more...");
350         Thread.sleep(SLEEP_TIME);
351       }
352     }
353 
354     admin.addPeer("2", utility2.getClusterKey());
355     Thread.sleep(SLEEP_TIME);
356     rowKey = Bytes.toBytes("do rep");
357     put = new Put(rowKey);
358     put.add(famName, row, row);
359     LOG.info("Adding new row");
360     htable1.put(put);
361 
362     get = new Get(rowKey);
363     for (int i = 0; i < NB_RETRIES; i++) {
364       if (i==NB_RETRIES-1) {
365         fail("Waited too much time for put replication");
366       }
367       Result res = htable2.get(get);
368       if (res.size() == 0) {
369         LOG.info("Row not available");
370         Thread.sleep(SLEEP_TIME*i);
371       } else {
372         assertArrayEquals(res.value(), row);
373         break;
374       }
375     }
376   }
377 
378 
379   /**
380    * Do a more intense version testSmallBatch, one  that will trigger
381    * hlog rolling and other non-trivial code paths
382    * @throws Exception
383    */
384   @Test(timeout=300000)
385   public void loadTesting() throws Exception {
386     htable1.setWriteBufferSize(1024);
387     htable1.setAutoFlush(false, true);
388     for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
389       Put put = new Put(Bytes.toBytes(i));
390       put.add(famName, row, row);
391       htable1.put(put);
392     }
393     htable1.flushCommits();
394 
395     Scan scan = new Scan();
396 
397     ResultScanner scanner = htable1.getScanner(scan);
398     Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
399     scanner.close();
400 
401     assertEquals(NB_ROWS_IN_BIG_BATCH, res.length);
402 
403 
404     long start = System.currentTimeMillis();
405     for (int i = 0; i < NB_RETRIES; i++) {
406       scan = new Scan();
407 
408       scanner = htable2.getScanner(scan);
409       res = scanner.next(NB_ROWS_IN_BIG_BATCH);
410       scanner.close();
411       if (res.length != NB_ROWS_IN_BIG_BATCH) {
412         if (i == NB_RETRIES - 1) {
413           int lastRow = -1;
414           for (Result result : res) {
415             int currentRow = Bytes.toInt(result.getRow());
416             for (int row = lastRow+1; row < currentRow; row++) {
417               LOG.error("Row missing: " + row);
418             }
419             lastRow = currentRow;
420           }
421           LOG.error("Last row: " + lastRow);
422           fail("Waited too much time for normal batch replication, " +
423             res.length + " instead of " + NB_ROWS_IN_BIG_BATCH + "; waited=" +
424             (System.currentTimeMillis() - start) + "ms");
425         } else {
426           LOG.info("Only got " + res.length + " rows");
427           Thread.sleep(SLEEP_TIME);
428         }
429       } else {
430         break;
431       }
432     }
433   }
434 
435   /**
436    * Do a small loading into a table, make sure the data is really the same,
437    * then run the VerifyReplication job to check the results. Do a second
438    * comparison where all the cells are different.
439    * @throws Exception
440    */
441   @Test(timeout=300000)
442   public void testVerifyRepJob() throws Exception {
443     // Populate the tables, at the same time it guarantees that the tables are
444     // identical since it does the check
445     testSmallBatch();
446 
447     String[] args = new String[] {"2", Bytes.toString(tableName)};
448     Job job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
449     if (job == null) {
450       fail("Job wasn't created, see the log");
451     }
452     if (!job.waitForCompletion(true)) {
453       fail("Job failed, see the log");
454     }
455     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
456         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
457     assertEquals(0, job.getCounters().
458         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
459 
460     Scan scan = new Scan();
461     ResultScanner rs = htable2.getScanner(scan);
462     Put put = null;
463     for (Result result : rs) {
464       put = new Put(result.getRow());
465       Cell firstVal = result.rawCells()[0];
466       put.add(CellUtil.cloneFamily(firstVal),
467           CellUtil.cloneQualifier(firstVal), Bytes.toBytes("diff data"));
468       htable2.put(put);
469     }
470     Delete delete = new Delete(put.getRow());
471     htable2.delete(delete);
472     job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
473     if (job == null) {
474       fail("Job wasn't created, see the log");
475     }
476     if (!job.waitForCompletion(true)) {
477       fail("Job failed, see the log");
478     }
479     assertEquals(0, job.getCounters().
480         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
481     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
482         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
483   }
484 
485   /**
486    * Test for HBASE-9038, Replication.scopeWALEdits would NPE if it wasn't filtering out
487    * the compaction WALEdit
488    * @throws Exception
489    */
490   @Test(timeout=300000)
491   public void testCompactionWALEdits() throws Exception {
492     WALProtos.CompactionDescriptor compactionDescriptor =
493         WALProtos.CompactionDescriptor.getDefaultInstance();
494     WALEdit edit = WALEdit.createCompaction(compactionDescriptor);
495     Replication.scopeWALEdits(htable1.getTableDescriptor(), new HLogKey(), edit);
496   }
497   
498   /**
499    * Test for HBASE-8663
500    * Create two new Tables with colfamilies enabled for replication then run
501    * ReplicationAdmin.listReplicated(). Finally verify the table:colfamilies. Note:
502    * TestReplicationAdmin is a better place for this testing but it would need mocks.
503    * @throws Exception
504    */
505   @Test(timeout = 300000)
506   public void testVerifyListReplicatedTable() throws Exception {
507 	LOG.info("testVerifyListReplicatedTable");
508 
509     final String tName = "VerifyListReplicated_";
510     final String colFam = "cf1";
511     final int numOfTables = 3;
512 
513     HBaseAdmin hadmin = new HBaseAdmin(conf1);
514 
515     // Create Tables
516     for (int i = 0; i < numOfTables; i++) {
517       HTableDescriptor ht = new HTableDescriptor(TableName.valueOf(tName + i));
518       HColumnDescriptor cfd = new HColumnDescriptor(colFam);
519       cfd.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
520       ht.addFamily(cfd);
521       hadmin.createTable(ht);
522     }
523 
524     // verify the result
525     List<HashMap<String, String>> replicationColFams = admin.listReplicated();
526     int[] match = new int[numOfTables]; // array of 3 with init value of zero
527 
528     for (int i = 0; i < replicationColFams.size(); i++) {
529       HashMap<String, String> replicationEntry = replicationColFams.get(i);
530       String tn = replicationEntry.get(ReplicationAdmin.TNAME);
531       if ((tn.startsWith(tName)) && replicationEntry.get(ReplicationAdmin.CFNAME).equals(colFam)) {
532         int m = Integer.parseInt(tn.substring(tn.length() - 1)); // get the last digit
533         match[m]++; // should only increase once
534       }
535     }
536 
537     // check the matching result
538     for (int i = 0; i < match.length; i++) {
539       assertTrue("listReplicated() does not match table " + i, (match[i] == 1));
540     }
541 
542     // drop tables
543     for (int i = 0; i < numOfTables; i++) {
544       String ht = tName + i;
545       hadmin.disableTable(ht);
546       hadmin.deleteTable(ht);
547     }
548 
549     hadmin.close();
550   }
551 
552 }