View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.client;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.commons.logging.impl.Log4JLogger;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.hbase.Cell;
27  import org.apache.hadoop.hbase.HBaseTestingUtility;
28  import org.apache.hadoop.hbase.HConstants;
29  import org.apache.hadoop.hbase.HRegionInfo;
30  import org.apache.hadoop.hbase.HTableDescriptor;
31  import org.apache.hadoop.hbase.testclassification.MediumTests;
32  import org.apache.hadoop.hbase.NotServingRegionException;
33  import org.apache.hadoop.hbase.RegionLocations;
34  import org.apache.hadoop.hbase.TableNotFoundException;
35  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
36  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
37  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
38  import org.apache.hadoop.hbase.protobuf.RequestConverter;
39  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
40  import org.apache.hadoop.hbase.regionserver.HRegionServer;
41  import org.apache.hadoop.hbase.regionserver.InternalScanner;
42  import org.apache.hadoop.hbase.regionserver.RegionScanner;
43  import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
44  import org.apache.hadoop.hbase.regionserver.TestRegionServerNoMaster;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
47  import org.apache.log4j.Level;
48  import org.apache.zookeeper.KeeperException;
49  import org.junit.After;
50  import org.junit.AfterClass;
51  import org.junit.Assert;
52  import org.junit.Before;
53  import org.junit.BeforeClass;
54  import org.junit.Test;
55  import org.junit.experimental.categories.Category;
56  
57  import java.io.IOException;
58  import java.util.HashMap;
59  import java.util.Iterator;
60  import java.util.List;
61  import java.util.Random;
62  import java.util.concurrent.CountDownLatch;
63  import java.util.concurrent.TimeUnit;
64  import java.util.concurrent.atomic.AtomicBoolean;
65  import java.util.concurrent.atomic.AtomicInteger;
66  import java.util.concurrent.atomic.AtomicLong;
67  import java.util.concurrent.atomic.AtomicReference;
68  
69  /**
70   * Tests for region replicas. Sad that we cannot isolate these without bringing up a whole
71   * cluster. See {@link org.apache.hadoop.hbase.regionserver.TestRegionServerNoMaster}.
72   */
73  @Category(MediumTests.class)
74  public class TestReplicasClient {
75    private static final Log LOG = LogFactory.getLog(TestReplicasClient.class);
76  
77    static {
78      ((Log4JLogger)RpcRetryingCaller.LOG).getLogger().setLevel(Level.ALL);
79    }
80  
81    private static final int NB_SERVERS = 1;
82    private static HTable table = null;
83    private static final byte[] row = TestReplicasClient.class.getName().getBytes();
84  
85    private static HRegionInfo hriPrimary;
86    private static HRegionInfo hriSecondary;
87  
88    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
89    private static final byte[] f = HConstants.CATALOG_FAMILY;
90  
91    private final static int REFRESH_PERIOD = 1000;
92  
93    /**
94     * This copro is used to synchronize the tests.
95     */
96    public static class SlowMeCopro extends BaseRegionObserver {
97      static final AtomicLong sleepTime = new AtomicLong(0);
98      static final AtomicBoolean slowDownNext = new AtomicBoolean(false);
99      static final AtomicInteger countOfNext = new AtomicInteger(0);
100     static final AtomicReference<CountDownLatch> cdl =
101         new AtomicReference<CountDownLatch>(new CountDownLatch(0));
102     Random r = new Random();
103     public SlowMeCopro() {
104     }
105 
106     @Override
107     public void preGetOp(final ObserverContext<RegionCoprocessorEnvironment> e,
108                          final Get get, final List<Cell> results) throws IOException {
109       slowdownCode(e);
110     }
111 
112     @Override
113     public RegionScanner preScannerOpen(final ObserverContext<RegionCoprocessorEnvironment> e,
114         final Scan scan, final RegionScanner s) throws IOException {
115       slowdownCode(e);
116       return s;
117     }
118 
119     @Override
120     public boolean preScannerNext(final ObserverContext<RegionCoprocessorEnvironment> e,
121         final InternalScanner s, final List<Result> results,
122         final int limit, final boolean hasMore) throws IOException {
123       //this will slow down a certain next operation if the conditions are met. The slowness
124       //will allow the call to go to a replica
125       if (slowDownNext.get()) {
126         //have some "next" return successfully from the primary; hence countOfNext checked
127         if (countOfNext.incrementAndGet() == 2) {
128           sleepTime.set(2000);
129           slowdownCode(e);
130         }
131       }
132       return true;
133     }
134 
135     private void slowdownCode(final ObserverContext<RegionCoprocessorEnvironment> e) {
136       if (e.getEnvironment().getRegion().getRegionInfo().getReplicaId() == 0) {
137         CountDownLatch latch = cdl.get();
138         try {
139           if (sleepTime.get() > 0) {
140             LOG.info("Sleeping for " + sleepTime.get() + " ms");
141             Thread.sleep(sleepTime.get());
142           } else if (latch.getCount() > 0) {
143             LOG.info("Waiting for the counterCountDownLatch");
144             latch.await(2, TimeUnit.MINUTES); // To help the tests to finish.
145             if (latch.getCount() > 0) {
146               throw new RuntimeException("Can't wait more");
147             }
148           }
149         } catch (InterruptedException e1) {
150           LOG.error(e1);
151         }
152       } else {
153         LOG.info("We're not the primary replicas.");
154       }
155     }
156   }
157 
158   @BeforeClass
159   public static void beforeClass() throws Exception {
160     // enable store file refreshing
161     HTU.getConfiguration().setInt(
162         StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, REFRESH_PERIOD);
163     HTU.getConfiguration().setBoolean("hbase.client.log.scanner.activity", true);
164     ConnectionUtils.setupMasterlessConnection(HTU.getConfiguration());
165     HTU.startMiniCluster(NB_SERVERS);
166 
167     // Create table then get the single region for our new table.
168     HTableDescriptor hdt = HTU.createTableDescriptor(TestReplicasClient.class.getSimpleName());
169     hdt.addCoprocessor(SlowMeCopro.class.getName());
170     table = HTU.createTable(hdt, new byte[][]{f}, HTU.getConfiguration());
171 
172     hriPrimary = table.getRegionLocation(row, false).getRegionInfo();
173 
174     // mock a secondary region info to open
175     hriSecondary = new HRegionInfo(hriPrimary.getTable(), hriPrimary.getStartKey(),
176         hriPrimary.getEndKey(), hriPrimary.isSplit(), hriPrimary.getRegionId(), 1);
177 
178     // No master
179     LOG.info("Master is going to be stopped");
180     TestRegionServerNoMaster.stopMasterAndAssignMeta(HTU);
181     Configuration c = new Configuration(HTU.getConfiguration());
182     c.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
183     LOG.info("Master has stopped");
184   }
185 
186   @AfterClass
187   public static void afterClass() throws Exception {
188     if (table != null) table.close();
189     HTU.shutdownMiniCluster();
190   }
191 
192   @Before
193   public void before() throws IOException {
194     HTU.getHBaseAdmin().getConnection().clearRegionCache();
195     try {
196       openRegion(hriPrimary);
197     } catch (Exception ignored) {
198     }
199     try {
200       openRegion(hriSecondary);
201     } catch (Exception ignored) {
202     }
203   }
204 
205   @After
206   public void after() throws IOException, KeeperException {
207     try {
208       closeRegion(hriSecondary);
209     } catch (Exception ignored) {
210     }
211     try {
212       closeRegion(hriPrimary);
213     } catch (Exception ignored) {
214     }
215     ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hriPrimary);
216     ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hriSecondary);
217 
218     HTU.getHBaseAdmin().getConnection().clearRegionCache();
219   }
220 
221   private HRegionServer getRS() {
222     return HTU.getMiniHBaseCluster().getRegionServer(0);
223   }
224 
225   private void openRegion(HRegionInfo hri) throws Exception {
226     try {
227       if (isRegionOpened(hri)) return;
228     } catch (Exception e){}
229     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
230     // first version is '0'
231     AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
232       getRS().getServerName(), hri, 0, null, null);
233     AdminProtos.OpenRegionResponse responseOpen = getRS().getRSRpcServices().openRegion(null, orr);
234     Assert.assertEquals(responseOpen.getOpeningStateCount(), 1);
235     Assert.assertEquals(responseOpen.getOpeningState(0),
236       AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED);
237     checkRegionIsOpened(hri);
238   }
239 
240   private void closeRegion(HRegionInfo hri) throws Exception {
241     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
242 
243     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
244       getRS().getServerName(), hri.getEncodedName(), true);
245     AdminProtos.CloseRegionResponse responseClose = getRS()
246         .getRSRpcServices().closeRegion(null, crr);
247     Assert.assertTrue(responseClose.getClosed());
248 
249     checkRegionIsClosed(hri.getEncodedName());
250 
251     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(), null);
252   }
253 
254   private void checkRegionIsOpened(HRegionInfo hri) throws Exception {
255 
256     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
257       Thread.sleep(1);
258     }
259 
260     Assert.assertTrue(
261         ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(), null));
262   }
263 
264   private boolean isRegionOpened(HRegionInfo hri) throws Exception {
265     return getRS().getRegionByEncodedName(hri.getEncodedName()).isAvailable();
266   }
267 
268   private void checkRegionIsClosed(String encodedRegionName) throws Exception {
269 
270     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
271       Thread.sleep(1);
272     }
273 
274     try {
275       Assert.assertFalse(getRS().getRegionByEncodedName(encodedRegionName).isAvailable());
276     } catch (NotServingRegionException expected) {
277       // That's how it work: if the region is closed we have an exception.
278     }
279 
280     // We don't delete the znode here, because there is not always a znode.
281   }
282 
283   private void flushRegion(HRegionInfo regionInfo) throws IOException {
284     TestRegionServerNoMaster.flushRegion(HTU, regionInfo);
285   }
286 
287   @Test
288   public void testUseRegionWithoutReplica() throws Exception {
289     byte[] b1 = "testUseRegionWithoutReplica".getBytes();
290     openRegion(hriSecondary);
291     SlowMeCopro.cdl.set(new CountDownLatch(0));
292     try {
293       Get g = new Get(b1);
294       Result r = table.get(g);
295       Assert.assertFalse(r.isStale());
296     } finally {
297       closeRegion(hriSecondary);
298     }
299   }
300 
301   @Test
302   public void testLocations() throws Exception {
303     byte[] b1 = "testLocations".getBytes();
304     openRegion(hriSecondary);
305     ClusterConnection hc = (ClusterConnection) HTU.getHBaseAdmin().getConnection();
306 
307     try {
308       hc.clearRegionCache();
309       RegionLocations rl = hc.locateRegion(table.getName(), b1, false, false);
310       Assert.assertEquals(2, rl.size());
311 
312       rl = hc.locateRegion(table.getName(), b1, true, false);
313       Assert.assertEquals(2, rl.size());
314 
315       hc.clearRegionCache();
316       rl = hc.locateRegion(table.getName(), b1, true, false);
317       Assert.assertEquals(2, rl.size());
318 
319       rl = hc.locateRegion(table.getName(), b1, false, false);
320       Assert.assertEquals(2, rl.size());
321     } finally {
322       closeRegion(hriSecondary);
323     }
324   }
325 
326   @Test
327   public void testGetNoResultNoStaleRegionWithReplica() throws Exception {
328     byte[] b1 = "testGetNoResultNoStaleRegionWithReplica".getBytes();
329     openRegion(hriSecondary);
330 
331     try {
332       // A get works and is not stale
333       Get g = new Get(b1);
334       Result r = table.get(g);
335       Assert.assertFalse(r.isStale());
336     } finally {
337       closeRegion(hriSecondary);
338     }
339   }
340 
341 
342   @Test
343   public void testGetNoResultStaleRegionWithReplica() throws Exception {
344     byte[] b1 = "testGetNoResultStaleRegionWithReplica".getBytes();
345     openRegion(hriSecondary);
346 
347     SlowMeCopro.cdl.set(new CountDownLatch(1));
348     try {
349       Get g = new Get(b1);
350       g.setConsistency(Consistency.TIMELINE);
351       Result r = table.get(g);
352       Assert.assertTrue(r.isStale());
353     } finally {
354       SlowMeCopro.cdl.get().countDown();
355       closeRegion(hriSecondary);
356     }
357   }
358 
359   @Test
360   public void testGetNoResultNotStaleSleepRegionWithReplica() throws Exception {
361     byte[] b1 = "testGetNoResultNotStaleSleepRegionWithReplica".getBytes();
362     openRegion(hriSecondary);
363 
364     try {
365       // We sleep; but we won't go to the stale region as we don't get the stale by default.
366       SlowMeCopro.sleepTime.set(2000);
367       Get g = new Get(b1);
368       Result r = table.get(g);
369       Assert.assertFalse(r.isStale());
370 
371     } finally {
372       SlowMeCopro.sleepTime.set(0);
373       closeRegion(hriSecondary);
374     }
375   }
376 
377 
378   @Test
379   public void testFlushTable() throws Exception {
380     openRegion(hriSecondary);
381     try {
382       flushRegion(hriPrimary);
383       flushRegion(hriSecondary);
384 
385       Put p = new Put(row);
386       p.add(f, row, row);
387       table.put(p);
388 
389       flushRegion(hriPrimary);
390       flushRegion(hriSecondary);
391     } finally {
392       Delete d = new Delete(row);
393       table.delete(d);
394       closeRegion(hriSecondary);
395     }
396   }
397 
398   @Test
399   public void testFlushPrimary() throws Exception {
400     openRegion(hriSecondary);
401 
402     try {
403       flushRegion(hriPrimary);
404 
405       Put p = new Put(row);
406       p.add(f, row, row);
407       table.put(p);
408 
409       flushRegion(hriPrimary);
410     } finally {
411       Delete d = new Delete(row);
412       table.delete(d);
413       closeRegion(hriSecondary);
414     }
415   }
416 
417   @Test
418   public void testFlushSecondary() throws Exception {
419     openRegion(hriSecondary);
420     try {
421       flushRegion(hriSecondary);
422 
423       Put p = new Put(row);
424       p.add(f, row, row);
425       table.put(p);
426 
427       flushRegion(hriSecondary);
428     } catch (TableNotFoundException expected) {
429     } finally {
430       Delete d = new Delete(row);
431       table.delete(d);
432       closeRegion(hriSecondary);
433     }
434   }
435 
436   @Test
437   public void testUseRegionWithReplica() throws Exception {
438     byte[] b1 = "testUseRegionWithReplica".getBytes();
439     openRegion(hriSecondary);
440 
441     try {
442       // A simple put works, even if there here a second replica
443       Put p = new Put(b1);
444       p.add(f, b1, b1);
445       table.put(p);
446       LOG.info("Put done");
447 
448       // A get works and is not stale
449       Get g = new Get(b1);
450       Result r = table.get(g);
451       Assert.assertFalse(r.isStale());
452       Assert.assertFalse(r.getColumnCells(f, b1).isEmpty());
453       LOG.info("get works and is not stale done");
454 
455       // Even if it we have to wait a little on the main region
456       SlowMeCopro.sleepTime.set(2000);
457       g = new Get(b1);
458       r = table.get(g);
459       Assert.assertFalse(r.isStale());
460       Assert.assertFalse(r.getColumnCells(f, b1).isEmpty());
461       SlowMeCopro.sleepTime.set(0);
462       LOG.info("sleep and is not stale done");
463 
464       // But if we ask for stale we will get it
465       SlowMeCopro.cdl.set(new CountDownLatch(1));
466       g = new Get(b1);
467       g.setConsistency(Consistency.TIMELINE);
468       r = table.get(g);
469       Assert.assertTrue(r.isStale());
470       Assert.assertTrue(r.getColumnCells(f, b1).isEmpty());
471       SlowMeCopro.cdl.get().countDown();
472 
473       LOG.info("stale done");
474 
475       // exists works and is not stale
476       g = new Get(b1);
477       g.setCheckExistenceOnly(true);
478       r = table.get(g);
479       Assert.assertFalse(r.isStale());
480       Assert.assertTrue(r.getExists());
481       LOG.info("exists not stale done");
482 
483       // exists works on stale but don't see the put
484       SlowMeCopro.cdl.set(new CountDownLatch(1));
485       g = new Get(b1);
486       g.setCheckExistenceOnly(true);
487       g.setConsistency(Consistency.TIMELINE);
488       r = table.get(g);
489       Assert.assertTrue(r.isStale());
490       Assert.assertFalse("The secondary has stale data", r.getExists());
491       SlowMeCopro.cdl.get().countDown();
492       LOG.info("exists stale before flush done");
493 
494       flushRegion(hriPrimary);
495       flushRegion(hriSecondary);
496       LOG.info("flush done");
497       Thread.sleep(1000 + REFRESH_PERIOD * 2);
498 
499       // get works and is not stale
500       SlowMeCopro.cdl.set(new CountDownLatch(1));
501       g = new Get(b1);
502       g.setConsistency(Consistency.TIMELINE);
503       r = table.get(g);
504       Assert.assertTrue(r.isStale());
505       Assert.assertFalse(r.isEmpty());
506       SlowMeCopro.cdl.get().countDown();
507       LOG.info("stale done");
508 
509       // exists works on stale and we see the put after the flush
510       SlowMeCopro.cdl.set(new CountDownLatch(1));
511       g = new Get(b1);
512       g.setCheckExistenceOnly(true);
513       g.setConsistency(Consistency.TIMELINE);
514       r = table.get(g);
515       Assert.assertTrue(r.isStale());
516       Assert.assertTrue(r.getExists());
517       SlowMeCopro.cdl.get().countDown();
518       LOG.info("exists stale after flush done");
519 
520     } finally {
521       SlowMeCopro.cdl.get().countDown();
522       SlowMeCopro.sleepTime.set(0);
523       Delete d = new Delete(b1);
524       table.delete(d);
525       closeRegion(hriSecondary);
526     }
527   }
528 
529   @Test
530   public void testScanWithReplicas() throws Exception {
531     //simple scan
532     runMultipleScansOfOneType(false, false);
533   }
534 
535   @Test
536   public void testSmallScanWithReplicas() throws Exception {
537     //small scan
538     runMultipleScansOfOneType(false, true);
539   }
540 
541   @Test
542   public void testReverseScanWithReplicas() throws Exception {
543     //reverse scan
544     runMultipleScansOfOneType(true, false);
545   }
546 
547   private void runMultipleScansOfOneType(boolean reversed, boolean small) throws Exception {
548     openRegion(hriSecondary);
549     int NUMROWS = 100;
550     try {
551       for (int i = 0; i < NUMROWS; i++) {
552         byte[] b1 = Bytes.toBytes("testUseRegionWithReplica" + i);
553         Put p = new Put(b1);
554         p.add(f, b1, b1);
555         table.put(p);
556       }
557       LOG.debug("PUT done");
558       int caching = 20;
559       byte[] start;
560       if (reversed) start = Bytes.toBytes("testUseRegionWithReplica" + (NUMROWS - 1));
561       else start = Bytes.toBytes("testUseRegionWithReplica" + 0);
562 
563       scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, start, NUMROWS, false, false);
564 
565       //Even if we were to slow the server down, unless we ask for stale
566       //we won't get it
567       SlowMeCopro.sleepTime.set(5000);
568       scanWithReplicas(reversed, small, Consistency.STRONG, caching, start, NUMROWS, false, false);
569       SlowMeCopro.sleepTime.set(0);
570 
571       flushRegion(hriPrimary);
572       LOG.info("flush done");
573       Thread.sleep(1000 + REFRESH_PERIOD * 2);
574 
575       //Now set the flag to get a response even if stale
576       SlowMeCopro.sleepTime.set(5000);
577       scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, start, NUMROWS, true, false);
578       SlowMeCopro.sleepTime.set(0);
579 
580       // now make some 'next' calls slow
581       SlowMeCopro.slowDownNext.set(true);
582       SlowMeCopro.countOfNext.set(0);
583       scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, start, NUMROWS, true, true);
584       SlowMeCopro.slowDownNext.set(false);
585       SlowMeCopro.countOfNext.set(0);
586     } finally {
587       SlowMeCopro.cdl.get().countDown();
588       SlowMeCopro.sleepTime.set(0);
589       SlowMeCopro.slowDownNext.set(false);
590       SlowMeCopro.countOfNext.set(0);
591       for (int i = 0; i < NUMROWS; i++) {
592         byte[] b1 = Bytes.toBytes("testUseRegionWithReplica" + i);
593         Delete d = new Delete(b1);
594         table.delete(d);
595       }
596       closeRegion(hriSecondary);
597     }
598   }
599 
600   private void scanWithReplicas(boolean reversed, boolean small, Consistency consistency,
601       int caching, byte[] startRow, int numRows, boolean staleExpected, boolean slowNext)
602           throws Exception {
603     Scan scan = new Scan(startRow);
604     scan.setCaching(caching);
605     scan.setReversed(reversed);
606     scan.setSmall(small);
607     scan.setConsistency(consistency);
608     ResultScanner scanner = table.getScanner(scan);
609     Iterator<Result> iter = scanner.iterator();
610     HashMap<String, Boolean> map = new HashMap<String, Boolean>();
611     int count = 0;
612     int countOfStale = 0;
613     while (iter.hasNext()) {
614       count++;
615       Result r = iter.next();
616       if (map.containsKey(new String(r.getRow()))) {
617         throw new Exception("Unexpected scan result. Repeated row " + Bytes.toString(r.getRow()));
618       }
619       map.put(new String(r.getRow()), true);
620       if (!slowNext) Assert.assertTrue(r.isStale() == staleExpected);
621       if (r.isStale()) countOfStale++;
622     }
623     LOG.debug("Count of rows " + count + " num rows expected " + numRows);
624     Assert.assertTrue(count == numRows);
625     if (slowNext) {
626       LOG.debug("Count of Stale " + countOfStale);
627       Assert.assertTrue(countOfStale > 1 && countOfStale < numRows);
628     }
629   }
630 }