1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.replication;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.hbase.EmptyWatcher;
26  import org.apache.hadoop.hbase.HBaseConfiguration;
27  import org.apache.hadoop.hbase.HBaseTestingUtility;
28  import org.apache.hadoop.hbase.HColumnDescriptor;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.HTableDescriptor;
31  import org.apache.hadoop.hbase.MiniZooKeeperCluster;
32  import org.apache.hadoop.hbase.UnknownScannerException;
33  import org.apache.hadoop.hbase.client.Delete;
34  import org.apache.hadoop.hbase.client.Get;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.client.Result;
39  import org.apache.hadoop.hbase.client.ResultScanner;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.util.Bytes;
42  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
43  import org.junit.After;
44  import org.junit.AfterClass;
45  import org.junit.Before;
46  import org.junit.BeforeClass;
47  import org.junit.Test;
48  
49  import static org.junit.Assert.assertArrayEquals;
50  import static org.junit.Assert.assertEquals;
51  import static org.junit.Assert.fail;
52  
53  public class TestReplication {
54  
55    private static final Log LOG = LogFactory.getLog(TestReplication.class);
56  
57    private static Configuration conf1;
58    private static Configuration conf2;
59  
60    private static ZooKeeperWrapper zkw1;
61    private static ZooKeeperWrapper zkw2;
62  
63    private static HTable htable1;
64    private static HTable htable2;
65  
66    private static HBaseTestingUtility utility1;
67    private static HBaseTestingUtility utility2;
68    private static final int NB_ROWS_IN_BATCH = 100;
69    private static final long SLEEP_TIME = 500;
70    private static final int NB_RETRIES = 10;
71  
72    private static final byte[] tableName = Bytes.toBytes("test");
73    private static final byte[] famName = Bytes.toBytes("f");
74    private static final byte[] row = Bytes.toBytes("row");
75    private static final byte[] noRepfamName = Bytes.toBytes("norep");
76  
77    /**
78     * @throws java.lang.Exception
79     */
80    @BeforeClass
81    public static void setUpBeforeClass() throws Exception {
82      conf1 = HBaseConfiguration.create();
83      conf1.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1");
84      // smaller block size and capacity to trigger more operations
85      // and test them
86      conf1.setInt("hbase.regionserver.hlog.blocksize", 1024*20);
87      conf1.setInt("replication.source.size.capacity", 1024);
88      conf1.setLong("replication.source.sleepforretries", 100);
89      conf1.setInt("hbase.regionserver.maxlogs", 10);
90      conf1.setLong("hbase.master.logcleaner.ttl", 10);
91      conf1.setLong("hbase.client.retries.number", 5);
92      conf1.setLong("hbase.regions.percheckin", 1);
93      conf1.setBoolean(HConstants.REPLICATION_ENABLE_KEY, true);
94      conf1.setBoolean("dfs.support.append", true);
95      conf1.setLong(HConstants.THREAD_WAKE_FREQUENCY, 100);
96  
97      utility1 = new HBaseTestingUtility(conf1);
98      utility1.startMiniZKCluster();
99      MiniZooKeeperCluster miniZK = utility1.getZkCluster();
100     zkw1 = ZooKeeperWrapper.createInstance(conf1, "cluster1");
101     zkw1.writeZNode("/1", "replication", "");
102     zkw1.writeZNode("/1/replication", "master",
103         conf1.get(HConstants.ZOOKEEPER_QUORUM)+":" +
104             conf1.get("hbase.zookeeper.property.clientPort")+":/1");
105     setIsReplication(true);
106 
107     LOG.info("Setup first Zk");
108 
109     conf2 = HBaseConfiguration.create();
110     conf2.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/2");
111     conf2.setInt("hbase.client.retries.number", 6);
112     conf2.setBoolean(HConstants.REPLICATION_ENABLE_KEY, true);
113     conf2.setBoolean("dfs.support.append", true);
114     conf2.setLong("hbase.regions.percheckin", 1);
115 
116     utility2 = new HBaseTestingUtility(conf2);
117     utility2.setZkCluster(miniZK);
118     zkw2 = ZooKeeperWrapper.createInstance(conf2, "cluster2");
119     zkw2.writeZNode("/2", "replication", "");
120     zkw2.writeZNode("/2/replication", "master",
121         conf1.get(HConstants.ZOOKEEPER_QUORUM)+":" +
122             conf1.get("hbase.zookeeper.property.clientPort")+":/1");
123 
124     zkw1.writeZNode("/1/replication/peers", "1",
125         conf2.get(HConstants.ZOOKEEPER_QUORUM)+":" +
126             conf2.get("hbase.zookeeper.property.clientPort")+":/2");
127 
128     LOG.info("Setup second Zk");
129 
130     utility1.startMiniCluster(2);
131     utility2.startMiniCluster(2);
132 
133     HTableDescriptor table = new HTableDescriptor(tableName);
134     table.setDeferredLogFlush(false);
135     HColumnDescriptor fam = new HColumnDescriptor(famName);
136     fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
137     table.addFamily(fam);
138     fam = new HColumnDescriptor(noRepfamName);
139     table.addFamily(fam);
140     HBaseAdmin admin1 = new HBaseAdmin(conf1);
141     HBaseAdmin admin2 = new HBaseAdmin(conf2);
142     admin1.createTable(table);
143     admin2.createTable(table);
144 
145     htable1 = new HTable(conf1, tableName);
146     htable1.setWriteBufferSize(1024);
147     htable2 = new HTable(conf2, tableName);
148   }
149 
150   private static void setIsReplication(boolean rep) throws Exception {
151     LOG.info("Set rep " + rep);
152     zkw1.writeZNode("/1/replication", "state", Boolean.toString(rep));
153     // Takes some ms for ZK to fire the watcher
154     Thread.sleep(SLEEP_TIME);
155   }
156 
157   /**
158    * @throws java.lang.Exception
159    */
160   @Before
161   public void setUp() throws Exception {
162     setIsReplication(false);
163     utility1.truncateTable(tableName);
164     utility2.truncateTable(tableName);
165     // If test is flaky, set that sleep higher
166     Thread.sleep(SLEEP_TIME*8);
167     setIsReplication(true);
168   }
169 
170   /**
171    * @throws java.lang.Exception
172    */
173   @AfterClass
174   public static void tearDownAfterClass() throws Exception {
175     utility2.shutdownMiniCluster();
176     utility1.shutdownMiniCluster();
177   }
178 
179   /**
180    * Add a row, check it's replicated, delete it, check's gone
181    * @throws Exception
182    */
183   @Test
184   public void testSimplePutDelete() throws Exception {
185     LOG.info("testSimplePutDelete");
186     Put put = new Put(row);
187     put.add(famName, row, row);
188 
189     htable1 = new HTable(conf1, tableName);
190     htable1.put(put);
191 
192     HTable table2 = new HTable(conf2, tableName);
193     Get get = new Get(row);
194     for (int i = 0; i < NB_RETRIES; i++) {
195       if (i==NB_RETRIES-1) {
196         fail("Waited too much time for put replication");
197       }
198       Result res = table2.get(get);
199       if (res.size() == 0) {
200         LOG.info("Row not available");
201         Thread.sleep(SLEEP_TIME);
202       } else {
203         assertArrayEquals(res.value(), row);
204         break;
205       }
206     }
207 
208     Delete del = new Delete(row);
209     htable1.delete(del);
210 
211     table2 = new HTable(conf2, tableName);
212     get = new Get(row);
213     for (int i = 0; i < NB_RETRIES; i++) {
214       if (i==NB_RETRIES-1) {
215         fail("Waited too much time for del replication");
216       }
217       Result res = table2.get(get);
218       if (res.size() >= 1) {
219         LOG.info("Row not deleted");
220         Thread.sleep(SLEEP_TIME);
221       } else {
222         break;
223       }
224     }
225   }
226 
227   /**
228    * Try a small batch upload using the write buffer, check it's replicated
229    * @throws Exception
230    */
231   @Test
232   public void testSmallBatch() throws Exception {
233     LOG.info("testSmallBatch");
234     Put put;
235     // normal Batch tests
236     htable1.setAutoFlush(false);
237     for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
238       put = new Put(Bytes.toBytes(i));
239       put.add(famName, row, row);
240       htable1.put(put);
241     }
242     htable1.flushCommits();
243 
244     Scan scan = new Scan();
245 
246     ResultScanner scanner1 = htable1.getScanner(scan);
247     Result[] res1 = scanner1.next(NB_ROWS_IN_BATCH);
248     scanner1.close();
249     assertEquals(NB_ROWS_IN_BATCH, res1.length);
250 
251     for (int i = 0; i < NB_RETRIES; i++) {
252       if (i==NB_RETRIES-1) {
253         fail("Waited too much time for normal batch replication");
254       }
255       ResultScanner scanner = htable2.getScanner(scan);
256       Result[] res = scanner.next(NB_ROWS_IN_BATCH);
257       scanner.close();
258       if (res.length != NB_ROWS_IN_BATCH) {
259         LOG.info("Only got " + res.length + " rows");
260         Thread.sleep(SLEEP_TIME);
261       } else {
262         break;
263       }
264     }
265 
266     htable1.setAutoFlush(true);
267 
268   }
269 
270   /**
271    * Test stopping replication, trying to insert, make sure nothing's
272    * replicated, enable it, try replicating and it should work
273    * @throws Exception
274    */
275   @Test
276   public void testStartStop() throws Exception {
277 
278     // Test stopping replication
279     setIsReplication(false);
280 
281     Put put = new Put(Bytes.toBytes("stop start"));
282     put.add(famName, row, row);
283     htable1.put(put);
284 
285     Get get = new Get(Bytes.toBytes("stop start"));
286     for (int i = 0; i < NB_RETRIES; i++) {
287       if (i==NB_RETRIES-1) {
288         break;
289       }
290       Result res = htable2.get(get);
291       if(res.size() >= 1) {
292         fail("Replication wasn't stopped");
293 
294       } else {
295         LOG.info("Row not replicated, let's wait a bit more...");
296         Thread.sleep(SLEEP_TIME);
297       }
298     }
299 
300     // Test restart replication
301     setIsReplication(true);
302 
303     htable1.put(put);
304 
305     for (int i = 0; i < NB_RETRIES; i++) {
306       if (i==NB_RETRIES-1) {
307         fail("Waited too much time for put replication");
308       }
309       Result res = htable2.get(get);
310       if(res.size() == 0) {
311         LOG.info("Row not available");
312         Thread.sleep(SLEEP_TIME);
313       } else {
314         assertArrayEquals(res.value(), row);
315         break;
316       }
317     }
318 
319     put = new Put(Bytes.toBytes("do not rep"));
320     put.add(noRepfamName, row, row);
321     htable1.put(put);
322 
323     get = new Get(Bytes.toBytes("do not rep"));
324     for (int i = 0; i < NB_RETRIES; i++) {
325       if (i == NB_RETRIES-1) {
326         break;
327       }
328       Result res = htable2.get(get);
329       if (res.size() >= 1) {
330         fail("Not supposed to be replicated");
331       } else {
332         LOG.info("Row not replicated, let's wait a bit more...");
333         Thread.sleep(SLEEP_TIME);
334       }
335     }
336 
337   }
338 
339   /**
340    * Do a more intense version testSmallBatch, one  that will trigger
341    * hlog rolling and other non-trivial code paths
342    * @throws Exception
343    */
344   @Test
345   public void loadTesting() throws Exception {
346     htable1.setWriteBufferSize(1024);
347     htable1.setAutoFlush(false);
348     for (int i = 0; i < NB_ROWS_IN_BATCH *10; i++) {
349       Put put = new Put(Bytes.toBytes(i));
350       put.add(famName, row, row);
351       htable1.put(put);
352     }
353     htable1.flushCommits();
354 
355     Scan scan = new Scan();
356 
357     ResultScanner scanner = htable1.getScanner(scan);
358     Result[] res = scanner.next(NB_ROWS_IN_BATCH * 100);
359     scanner.close();
360 
361     assertEquals(NB_ROWS_IN_BATCH *10, res.length);
362 
363     scan = new Scan();
364 
365     for (int i = 0; i < NB_RETRIES; i++) {
366 
367       scanner = htable2.getScanner(scan);
368       res = scanner.next(NB_ROWS_IN_BATCH * 100);
369       scanner.close();
370       if (res.length != NB_ROWS_IN_BATCH *10) {
371         if (i == NB_RETRIES-1) {
372           int lastRow = -1;
373           for (Result result : res) {
374             int currentRow = Bytes.toInt(result.getRow());
375             for (int row = lastRow+1; row < currentRow; row++) {
376               LOG.error("Row missing: " + row);
377             }
378             lastRow = currentRow;
379           }
380           LOG.error("Last row: " + lastRow);
381           fail("Waited too much time for normal batch replication, "
382               + res.length + " instead of " + NB_ROWS_IN_BATCH *10);
383         } else {
384           LOG.info("Only got " + res.length + " rows");
385           Thread.sleep(SLEEP_TIME);
386         }
387       } else {
388         break;
389       }
390     }
391   }
392 
393   /**
394    * Load up multiple tables over 2 region servers and kill a source during
395    * the upload. The failover happens internally.
396    * @throws Exception
397    */
398   @Test
399   public void queueFailover() throws Exception {
400     utility1.createMultiRegions(htable1, famName);
401 
402     // killing the RS with .META. can result into failed puts until we solve
403     // IO fencing
404     int rsToKill1 =
405         utility1.getHBaseCluster().getServerWithMeta() == 0 ? 1 : 0;
406     int rsToKill2 =
407         utility2.getHBaseCluster().getServerWithMeta() == 0 ? 1 : 0;
408 
409     // Takes about 20 secs to run the full loading, kill around the middle
410     Thread killer1 = killARegionServer(utility1, 7500, rsToKill1);
411     Thread killer2 = killARegionServer(utility2, 10000, rsToKill2);
412 
413     LOG.info("Start loading table");
414     int initialCount = utility1.loadTable(htable1, famName);
415     LOG.info("Done loading table");
416     killer1.join(5000);
417     killer2.join(5000);
418     LOG.info("Done waiting for threads");
419 
420     Result[] res;
421     while (true) {
422       try {
423         Scan scan = new Scan();
424         ResultScanner scanner = htable1.getScanner(scan);
425         res = scanner.next(initialCount);
426         scanner.close();
427         break;
428       } catch (UnknownScannerException ex) {
429         LOG.info("Cluster wasn't ready yet, restarting scanner");
430       }
431     }
432     // Test we actually have all the rows, we may miss some because we
433     // don't have IO fencing.
434     if (res.length != initialCount) {
435       LOG.warn("We lost some rows on the master cluster!");
436       // We don't really expect the other cluster to have more rows
437       initialCount = res.length;
438     }
439 
440     Scan scan2 = new Scan();
441 
442     int lastCount = 0;
443 
444     for (int i = 0; i < NB_RETRIES; i++) {
445       if (i==NB_RETRIES-1) {
446         fail("Waited too much time for queueFailover replication");
447       }
448       ResultScanner scanner2 = htable2.getScanner(scan2);
449       Result[] res2 = scanner2.next(initialCount * 2);
450       scanner2.close();
451       if (res2.length < initialCount) {
452         if (lastCount < res2.length) {
453           i--; // Don't increment timeout if we make progress
454         }
455         lastCount = res2.length;
456         LOG.info("Only got " + lastCount + " rows instead of " +
457             initialCount + " current i=" + i);
458         Thread.sleep(SLEEP_TIME*2);
459       } else {
460         break;
461       }
462     }
463   }
464 
465   private static Thread killARegionServer(final HBaseTestingUtility utility,
466                                    final long timeout, final int rs) {
467     Thread killer = new Thread() {
468       public void run() {
469         try {
470           Thread.sleep(timeout);
471           utility.expireRegionServerSession(rs);
472         } catch (Exception e) {
473           LOG.error(e);
474         }
475       }
476     };
477     killer.start();
478     return killer;
479   }
480 }