View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.replication;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.util.HashMap;
27  import java.util.List;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.Cell;
32  import org.apache.hadoop.hbase.CellUtil;
33  import org.apache.hadoop.hbase.HColumnDescriptor;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.HRegionInfo;
36  import org.apache.hadoop.hbase.HTableDescriptor;
37  import org.apache.hadoop.hbase.LargeTests;
38  import org.apache.hadoop.hbase.TableName;
39  import org.apache.hadoop.hbase.client.Delete;
40  import org.apache.hadoop.hbase.client.Get;
41  import org.apache.hadoop.hbase.client.HBaseAdmin;
42  import org.apache.hadoop.hbase.client.HTable;
43  import org.apache.hadoop.hbase.client.Put;
44  import org.apache.hadoop.hbase.client.Result;
45  import org.apache.hadoop.hbase.client.ResultScanner;
46  import org.apache.hadoop.hbase.client.Scan;
47  import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
48  import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
49  import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
50  import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
51  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
52  import org.apache.hadoop.hbase.replication.regionserver.Replication;
53  import org.apache.hadoop.hbase.util.Bytes;
54  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
55  import org.apache.hadoop.hbase.util.JVMClusterUtil;
56  import org.apache.hadoop.mapreduce.Job;
57  import org.junit.Before;
58  import org.junit.Test;
59  import org.junit.experimental.categories.Category;
60  
61  @Category(LargeTests.class)
62  public class TestReplicationSmallTests extends TestReplicationBase {
63  
64    private static final Log LOG = LogFactory.getLog(TestReplicationSmallTests.class);
65  
66    /**
67     * @throws java.lang.Exception
68     */
69    @Before
70    public void setUp() throws Exception {
71      htable1.setAutoFlush(true, true);
72      // Starting and stopping replication can make us miss new logs,
73      // rolling like this makes sure the most recent one gets added to the queue
74      for ( JVMClusterUtil.RegionServerThread r :
75          utility1.getHBaseCluster().getRegionServerThreads()) {
76        r.getRegionServer().getWAL().rollWriter();
77      }
78      utility1.truncateTable(tableName);
79      // truncating the table will send one Delete per row to the slave cluster
80      // in an async fashion, which is why we cannot just call truncateTable on
81      // utility2 since late writes could make it to the slave in some way.
82      // Instead, we truncate the first table and wait for all the Deletes to
83      // make it to the slave.
84      Scan scan = new Scan();
85      int lastCount = 0;
86      for (int i = 0; i < NB_RETRIES; i++) {
87        if (i==NB_RETRIES-1) {
88          fail("Waited too much time for truncate");
89        }
90        ResultScanner scanner = htable2.getScanner(scan);
91        Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
92        scanner.close();
93        if (res.length != 0) {
94          if (res.length < lastCount) {
95            i--; // Don't increment timeout if we make progress
96          }
97          lastCount = res.length;
98          LOG.info("Still got " + res.length + " rows");
99          Thread.sleep(SLEEP_TIME);
100       } else {
101         break;
102       }
103     }
104   }
105 
106   /**
107    * Verify that version and column delete marker types are replicated
108    * correctly.
109    * @throws Exception
110    */
111   @Test(timeout=300000)
112   public void testDeleteTypes() throws Exception {
113     LOG.info("testDeleteTypes");
114     final byte[] v1 = Bytes.toBytes("v1");
115     final byte[] v2 = Bytes.toBytes("v2");
116     final byte[] v3 = Bytes.toBytes("v3");
117     htable1 = new HTable(conf1, tableName);
118 
119     long t = EnvironmentEdgeManager.currentTimeMillis();
120     // create three versions for "row"
121     Put put = new Put(row);
122     put.add(famName, row, t, v1);
123     htable1.put(put);
124 
125     put = new Put(row);
126     put.add(famName, row, t+1, v2);
127     htable1.put(put);
128 
129     put = new Put(row);
130     put.add(famName, row, t+2, v3);
131     htable1.put(put);
132 
133     Get get = new Get(row);
134     get.setMaxVersions();
135     for (int i = 0; i < NB_RETRIES; i++) {
136       if (i==NB_RETRIES-1) {
137         fail("Waited too much time for put replication");
138       }
139       Result res = htable2.get(get);
140       if (res.size() < 3) {
141         LOG.info("Rows not available");
142         Thread.sleep(SLEEP_TIME);
143       } else {
144         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
145         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
146         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[2]), v1);
147         break;
148       }
149     }
150     // place a version delete marker (delete last version)
151     Delete d = new Delete(row);
152     d.deleteColumn(famName, row, t);
153     htable1.delete(d);
154 
155     get = new Get(row);
156     get.setMaxVersions();
157     for (int i = 0; i < NB_RETRIES; i++) {
158       if (i==NB_RETRIES-1) {
159         fail("Waited too much time for put replication");
160       }
161       Result res = htable2.get(get);
162       if (res.size() > 2) {
163         LOG.info("Version not deleted");
164         Thread.sleep(SLEEP_TIME);
165       } else {
166         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[0]), v3);
167         assertArrayEquals(CellUtil.cloneValue(res.rawCells()[1]), v2);
168         break;
169       }
170     }
171 
172     // place a column delete marker
173     d = new Delete(row);
174     d.deleteColumns(famName, row, t+2);
175     htable1.delete(d);
176 
177     // now *both* of the remaining version should be deleted
178     // at the replica
179     get = new Get(row);
180     for (int i = 0; i < NB_RETRIES; i++) {
181       if (i==NB_RETRIES-1) {
182         fail("Waited too much time for del replication");
183       }
184       Result res = htable2.get(get);
185       if (res.size() >= 1) {
186         LOG.info("Rows not deleted");
187         Thread.sleep(SLEEP_TIME);
188       } else {
189         break;
190       }
191     }
192   }
193 
194   /**
195    * Add a row, check it's replicated, delete it, check's gone
196    * @throws Exception
197    */
198   @Test(timeout=300000)
199   public void testSimplePutDelete() throws Exception {
200     LOG.info("testSimplePutDelete");
201     Put put = new Put(row);
202     put.add(famName, row, row);
203 
204     htable1 = new HTable(conf1, tableName);
205     htable1.put(put);
206 
207     Get get = new Get(row);
208     for (int i = 0; i < NB_RETRIES; i++) {
209       if (i==NB_RETRIES-1) {
210         fail("Waited too much time for put replication");
211       }
212       Result res = htable2.get(get);
213       if (res.size() == 0) {
214         LOG.info("Row not available");
215         Thread.sleep(SLEEP_TIME);
216       } else {
217         assertArrayEquals(res.value(), row);
218         break;
219       }
220     }
221 
222     Delete del = new Delete(row);
223     htable1.delete(del);
224 
225     get = new Get(row);
226     for (int i = 0; i < NB_RETRIES; i++) {
227       if (i==NB_RETRIES-1) {
228         fail("Waited too much time for del replication");
229       }
230       Result res = htable2.get(get);
231       if (res.size() >= 1) {
232         LOG.info("Row not deleted");
233         Thread.sleep(SLEEP_TIME);
234       } else {
235         break;
236       }
237     }
238   }
239 
240   /**
241    * Try a small batch upload using the write buffer, check it's replicated
242    * @throws Exception
243    */
244   @Test(timeout=300000)
245   public void testSmallBatch() throws Exception {
246     LOG.info("testSmallBatch");
247     Put put;
248     // normal Batch tests
249     htable1.setAutoFlush(false, true);
250     for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
251       put = new Put(Bytes.toBytes(i));
252       put.add(famName, row, row);
253       htable1.put(put);
254     }
255     htable1.flushCommits();
256 
257     Scan scan = new Scan();
258 
259     ResultScanner scanner1 = htable1.getScanner(scan);
260     Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
261     scanner1.close();
262     assertEquals(NB_ROWS_IN_BATCH, res1.length);
263 
264     for (int i = 0; i < NB_RETRIES; i++) {
265       scan = new Scan();
266       if (i==NB_RETRIES-1) {
267         fail("Waited too much time for normal batch replication");
268       }
269       ResultScanner scanner = htable2.getScanner(scan);
270       Result[] res = scanner.next(NB_ROWS_IN_BATCH);
271       scanner.close();
272       if (res.length != NB_ROWS_IN_BATCH) {
273         LOG.info("Only got " + res.length + " rows");
274         Thread.sleep(SLEEP_TIME);
275       } else {
276         break;
277       }
278     }
279   }
280 
281   /**
282    * Test disable/enable replication, trying to insert, make sure nothing's
283    * replicated, enable it, the insert should be replicated
284    *
285    * @throws Exception
286    */
287   @Test(timeout = 300000)
288   public void testDisableEnable() throws Exception {
289 
290     // Test disabling replication
291     admin.disablePeer("2");
292 
293     byte[] rowkey = Bytes.toBytes("disable enable");
294     Put put = new Put(rowkey);
295     put.add(famName, row, row);
296     htable1.put(put);
297 
298     Get get = new Get(rowkey);
299     for (int i = 0; i < NB_RETRIES; i++) {
300       Result res = htable2.get(get);
301       if (res.size() >= 1) {
302         fail("Replication wasn't disabled");
303       } else {
304         LOG.info("Row not replicated, let's wait a bit more...");
305         Thread.sleep(SLEEP_TIME);
306       }
307     }
308 
309     // Test enable replication
310     admin.enablePeer("2");
311 
312     for (int i = 0; i < NB_RETRIES; i++) {
313       Result res = htable2.get(get);
314       if (res.size() == 0) {
315         LOG.info("Row not available");
316         Thread.sleep(SLEEP_TIME);
317       } else {
318         assertArrayEquals(res.value(), row);
319         return;
320       }
321     }
322     fail("Waited too much time for put replication");
323   }
324 
325   /**
326    * Integration test for TestReplicationAdmin, removes and re-add a peer
327    * cluster
328    *
329    * @throws Exception
330    */
331   @Test(timeout=300000)
332   public void testAddAndRemoveClusters() throws Exception {
333     LOG.info("testAddAndRemoveClusters");
334     admin.removePeer("2");
335     Thread.sleep(SLEEP_TIME);
336     byte[] rowKey = Bytes.toBytes("Won't be replicated");
337     Put put = new Put(rowKey);
338     put.add(famName, row, row);
339     htable1.put(put);
340 
341     Get get = new Get(rowKey);
342     for (int i = 0; i < NB_RETRIES; i++) {
343       if (i == NB_RETRIES-1) {
344         break;
345       }
346       Result res = htable2.get(get);
347       if (res.size() >= 1) {
348         fail("Not supposed to be replicated");
349       } else {
350         LOG.info("Row not replicated, let's wait a bit more...");
351         Thread.sleep(SLEEP_TIME);
352       }
353     }
354 
355     admin.addPeer("2", utility2.getClusterKey());
356     Thread.sleep(SLEEP_TIME);
357     rowKey = Bytes.toBytes("do rep");
358     put = new Put(rowKey);
359     put.add(famName, row, row);
360     LOG.info("Adding new row");
361     htable1.put(put);
362 
363     get = new Get(rowKey);
364     for (int i = 0; i < NB_RETRIES; i++) {
365       if (i==NB_RETRIES-1) {
366         fail("Waited too much time for put replication");
367       }
368       Result res = htable2.get(get);
369       if (res.size() == 0) {
370         LOG.info("Row not available");
371         Thread.sleep(SLEEP_TIME*i);
372       } else {
373         assertArrayEquals(res.value(), row);
374         break;
375       }
376     }
377   }
378 
379 
380   /**
381    * Do a more intense version testSmallBatch, one  that will trigger
382    * hlog rolling and other non-trivial code paths
383    * @throws Exception
384    */
385   @Test(timeout=300000)
386   public void testLoading() throws Exception {
387     LOG.info("Writing out rows to table1 in testLoading");
388     htable1.setWriteBufferSize(1024);
389     htable1.setAutoFlush(false, true);
390     for (int i = 0; i < NB_ROWS_IN_BIG_BATCH; i++) {
391       Put put = new Put(Bytes.toBytes(i));
392       put.add(famName, row, row);
393       htable1.put(put);
394     }
395     htable1.flushCommits();
396 
397     Scan scan = new Scan();
398 
399     ResultScanner scanner = htable1.getScanner(scan);
400     Result[] res = scanner.next(NB_ROWS_IN_BIG_BATCH);
401     scanner.close();
402 
403     assertEquals(NB_ROWS_IN_BIG_BATCH, res.length);
404 
405     LOG.info("Looking in table2 for replicated rows in testLoading");
406     long start = System.currentTimeMillis();
407     // Retry more than NB_RETRIES.  As it was, retries were done in 5 seconds and we'd fail
408     // sometimes.
409     final long retries = NB_RETRIES * 10;
410     for (int i = 0; i < retries; i++) {
411       scan = new Scan();
412       scanner = htable2.getScanner(scan);
413       res = scanner.next(NB_ROWS_IN_BIG_BATCH);
414       scanner.close();
415       if (res.length != NB_ROWS_IN_BIG_BATCH) {
416         if (i == retries - 1) {
417           int lastRow = -1;
418           for (Result result : res) {
419             int currentRow = Bytes.toInt(result.getRow());
420             for (int row = lastRow+1; row < currentRow; row++) {
421               LOG.error("Row missing: " + row);
422             }
423             lastRow = currentRow;
424           }
425           LOG.error("Last row: " + lastRow);
426           fail("Waited too much time for normal batch replication, " +
427             res.length + " instead of " + NB_ROWS_IN_BIG_BATCH + "; waited=" +
428             (System.currentTimeMillis() - start) + "ms");
429         } else {
430           LOG.info("Only got " + res.length + " rows... retrying");
431           Thread.sleep(SLEEP_TIME);
432         }
433       } else {
434         break;
435       }
436     }
437   }
438 
439   /**
440    * Do a small loading into a table, make sure the data is really the same,
441    * then run the VerifyReplication job to check the results. Do a second
442    * comparison where all the cells are different.
443    * @throws Exception
444    */
445   @Test(timeout=300000)
446   public void testVerifyRepJob() throws Exception {
447     // Populate the tables, at the same time it guarantees that the tables are
448     // identical since it does the check
449     testSmallBatch();
450 
451     String[] args = new String[] {"2", Bytes.toString(tableName)};
452     Job job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
453     if (job == null) {
454       fail("Job wasn't created, see the log");
455     }
456     if (!job.waitForCompletion(true)) {
457       fail("Job failed, see the log");
458     }
459     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
460         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
461     assertEquals(0, job.getCounters().
462         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
463 
464     Scan scan = new Scan();
465     ResultScanner rs = htable2.getScanner(scan);
466     Put put = null;
467     for (Result result : rs) {
468       put = new Put(result.getRow());
469       Cell firstVal = result.rawCells()[0];
470       put.add(CellUtil.cloneFamily(firstVal),
471           CellUtil.cloneQualifier(firstVal), Bytes.toBytes("diff data"));
472       htable2.put(put);
473     }
474     Delete delete = new Delete(put.getRow());
475     htable2.delete(delete);
476     job = VerifyReplication.createSubmittableJob(CONF_WITH_LOCALFS, args);
477     if (job == null) {
478       fail("Job wasn't created, see the log");
479     }
480     if (!job.waitForCompletion(true)) {
481       fail("Job failed, see the log");
482     }
483     assertEquals(0, job.getCounters().
484         findCounter(VerifyReplication.Verifier.Counters.GOODROWS).getValue());
485     assertEquals(NB_ROWS_IN_BATCH, job.getCounters().
486         findCounter(VerifyReplication.Verifier.Counters.BADROWS).getValue());
487   }
488 
489   /**
490    * Test for HBASE-9038, Replication.scopeWALEdits would NPE if it wasn't filtering out
491    * the compaction WALEdit
492    * @throws Exception
493    */
494   @Test(timeout=300000)
495   public void testCompactionWALEdits() throws Exception {
496     WALProtos.CompactionDescriptor compactionDescriptor =
497         WALProtos.CompactionDescriptor.getDefaultInstance();
498     HRegionInfo hri = new HRegionInfo(htable1.getName(),
499       HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
500     WALEdit edit = WALEdit.createCompaction(hri, compactionDescriptor);
501     Replication.scopeWALEdits(htable1.getTableDescriptor(), new HLogKey(), edit);
502   }
503 
504   /**
505    * Test for HBASE-8663
506    * Create two new Tables with colfamilies enabled for replication then run
507    * ReplicationAdmin.listReplicated(). Finally verify the table:colfamilies. Note:
508    * TestReplicationAdmin is a better place for this testing but it would need mocks.
509    * @throws Exception
510    */
511   @Test(timeout = 300000)
512   public void testVerifyListReplicatedTable() throws Exception {
513 	LOG.info("testVerifyListReplicatedTable");
514 
515     final String tName = "VerifyListReplicated_";
516     final String colFam = "cf1";
517     final int numOfTables = 3;
518 
519     HBaseAdmin hadmin = new HBaseAdmin(conf1);
520 
521     // Create Tables
522     for (int i = 0; i < numOfTables; i++) {
523       HTableDescriptor ht = new HTableDescriptor(TableName.valueOf(tName + i));
524       HColumnDescriptor cfd = new HColumnDescriptor(colFam);
525       cfd.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
526       ht.addFamily(cfd);
527       hadmin.createTable(ht);
528     }
529 
530     // verify the result
531     List<HashMap<String, String>> replicationColFams = admin.listReplicated();
532     int[] match = new int[numOfTables]; // array of 3 with init value of zero
533 
534     for (int i = 0; i < replicationColFams.size(); i++) {
535       HashMap<String, String> replicationEntry = replicationColFams.get(i);
536       String tn = replicationEntry.get(ReplicationAdmin.TNAME);
537       if ((tn.startsWith(tName)) && replicationEntry.get(ReplicationAdmin.CFNAME).equals(colFam)) {
538         int m = Integer.parseInt(tn.substring(tn.length() - 1)); // get the last digit
539         match[m]++; // should only increase once
540       }
541     }
542 
543     // check the matching result
544     for (int i = 0; i < match.length; i++) {
545       assertTrue("listReplicated() does not match table " + i, (match[i] == 1));
546     }
547 
548     // drop tables
549     for (int i = 0; i < numOfTables; i++) {
550       String ht = tName + i;
551       hadmin.disableTable(ht);
552       hadmin.deleteTable(ht);
553     }
554 
555     hadmin.close();
556   }
557 
558 }