View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.IOException;
24  import java.util.Collection;
25  import java.util.List;
26  import java.util.concurrent.CountDownLatch;
27  import java.util.concurrent.atomic.AtomicLong;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.client.HBaseAdmin;
35  import org.apache.hadoop.hbase.client.HTable;
36  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
37  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
38  import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
39  import org.apache.hadoop.hbase.regionserver.HRegion;
40  import org.apache.hadoop.hbase.regionserver.HRegionServer;
41  import org.apache.hadoop.hbase.regionserver.HStore;
42  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
43  import org.apache.hadoop.hbase.regionserver.Store;
44  import org.apache.hadoop.hbase.regionserver.StoreFile;
45  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
46  import org.apache.hadoop.hbase.regionserver.wal.HLog;
47  import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
48  import org.apache.hadoop.hbase.testclassification.MediumTests;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
51  import org.junit.Ignore;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  
55  import com.google.common.collect.Lists;
56  
57  /**
58   * Test for the case where a regionserver going down has enough cycles to do damage to regions
59   * that have actually been assigned elsehwere.
60   *
61   * <p>If we happen to assign a region before it fully done with in its old location -- i.e. it is on two servers at the
62   * same time -- all can work fine until the case where the region on the dying server decides to compact or otherwise
63   * change the region file set.  The region in its new location will then get a surprise when it tries to do something
64   * w/ a file removed by the region in its old location on dying server.
65   *
66   * <p>Making a test for this case is a little tough in that even if a file is deleted up on the namenode,
67   * if the file was opened before the delete, it will continue to let reads happen until something changes the
68   * state of cached blocks in the dfsclient that was already open (a block from the deleted file is cleaned
69   * from the datanode by NN).
70   *
71   * <p>What we will do below is do an explicit check for existence on the files listed in the region that
72   * has had some files removed because of a compaction.  This sort of hurry's along and makes certain what is a chance
73   * occurance.
74   */
75  @Category(MediumTests.class)
76  public class TestIOFencing {
77    static final Log LOG = LogFactory.getLog(TestIOFencing.class);
78    static {
79      // Uncomment the following lines if more verbosity is needed for
80      // debugging (see HBASE-12285 for details).
81      //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
82      //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
83      //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
84      //((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem"))
85      //    .getLogger().setLevel(Level.ALL);
86      //((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
87      //((Log4JLogger)HLog.LOG).getLogger().setLevel(Level.ALL);
88    }
89  
90    public abstract static class CompactionBlockerRegion extends HRegion {
91      volatile int compactCount = 0;
92      volatile CountDownLatch compactionsBlocked = new CountDownLatch(0);
93      volatile CountDownLatch compactionsWaiting = new CountDownLatch(0);
94  
95      @SuppressWarnings("deprecation")
96      public CompactionBlockerRegion(Path tableDir, HLog log,
97          FileSystem fs, Configuration confParam, HRegionInfo info,
98          HTableDescriptor htd, RegionServerServices rsServices) {
99        super(tableDir, log, fs, confParam, info, htd, rsServices);
100     }
101 
102     public void stopCompactions() {
103       compactionsBlocked = new CountDownLatch(1);
104       compactionsWaiting = new CountDownLatch(1);
105     }
106 
107     public void allowCompactions() {
108       LOG.debug("allowing compactions");
109       compactionsBlocked.countDown();
110     }
111     public void waitForCompactionToBlock() throws IOException {
112       try {
113         LOG.debug("waiting for compaction to block");
114         compactionsWaiting.await();
115         LOG.debug("compaction block reached");
116       } catch (InterruptedException ex) {
117         throw new IOException(ex);
118       }
119     }
120     @Override
121     public boolean compact(CompactionContext compaction, Store store) throws IOException {
122       try {
123         return super.compact(compaction, store);
124       } finally {
125         compactCount++;
126       }
127     }
128     public int countStoreFiles() {
129       int count = 0;
130       for (Store store : stores.values()) {
131         count += store.getStorefilesCount();
132       }
133       return count;
134     }
135   }
136 
137   /**
138    * An override of HRegion that allows us park compactions in a holding pattern and
139    * then when appropriate for the test, allow them proceed again.
140    */
141   public static class BlockCompactionsInPrepRegion extends CompactionBlockerRegion {
142 
143     public BlockCompactionsInPrepRegion(Path tableDir, HLog log,
144         FileSystem fs, Configuration confParam, HRegionInfo info,
145         HTableDescriptor htd, RegionServerServices rsServices) {
146       super(tableDir, log, fs, confParam, info, htd, rsServices);
147     }
148     @Override
149     protected void doRegionCompactionPrep() throws IOException {
150       compactionsWaiting.countDown();
151       try {
152         compactionsBlocked.await();
153       } catch (InterruptedException ex) {
154         throw new IOException();
155       }
156       super.doRegionCompactionPrep();
157     }
158   }
159 
160   /**
161    * An override of HRegion that allows us park compactions in a holding pattern and
162    * then when appropriate for the test, allow them proceed again. This allows the compaction
163    * entry to go the WAL before blocking, but blocks afterwards
164    */
165   public static class BlockCompactionsInCompletionRegion extends CompactionBlockerRegion {
166     public BlockCompactionsInCompletionRegion(Path tableDir, HLog log,
167         FileSystem fs, Configuration confParam, HRegionInfo info,
168         HTableDescriptor htd, RegionServerServices rsServices) {
169       super(tableDir, log, fs, confParam, info, htd, rsServices);
170     }
171     @Override
172     protected HStore instantiateHStore(final HColumnDescriptor family) throws IOException {
173       return new BlockCompactionsInCompletionHStore(this, family, this.conf);
174     }
175   }
176 
177   public static class BlockCompactionsInCompletionHStore extends HStore {
178     CompactionBlockerRegion r;
179     protected BlockCompactionsInCompletionHStore(HRegion region, HColumnDescriptor family,
180         Configuration confParam) throws IOException {
181       super(region, family, confParam);
182       r = (CompactionBlockerRegion) region;
183     }
184 
185     @Override
186     protected void completeCompaction(Collection<StoreFile> compactedFiles) throws IOException {
187       try {
188         r.compactionsWaiting.countDown();
189         r.compactionsBlocked.await();
190       } catch (InterruptedException ex) {
191         throw new IOException(ex);
192       }
193       super.completeCompaction(compactedFiles);
194     }
195   }
196 
197   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
198   private final static TableName TABLE_NAME =
199       TableName.valueOf("tabletest");
200   private final static byte[] FAMILY = Bytes.toBytes("family");
201   private static final int FIRST_BATCH_COUNT = 4000;
202   private static final int SECOND_BATCH_COUNT = FIRST_BATCH_COUNT;
203 
204   /**
205    * Test that puts up a regionserver, starts a compaction on a loaded region but holds the
206    * compaction until after we have killed the server and the region has come up on
207    * a new regionserver altogether.  This fakes the double assignment case where region in one
208    * location changes the files out from underneath a region being served elsewhere.
209    */
210   @Ignore("See HBASE-10298")
211   @Test
212   public void testFencingAroundCompaction() throws Exception {
213     doTest(BlockCompactionsInPrepRegion.class, false);
214     doTest(BlockCompactionsInPrepRegion.class, true);
215   }
216 
217   /**
218    * Test that puts up a regionserver, starts a compaction on a loaded region but holds the
219    * compaction completion until after we have killed the server and the region has come up on
220    * a new regionserver altogether.  This fakes the double assignment case where region in one
221    * location changes the files out from underneath a region being served elsewhere.
222    */
223   @Ignore("See HBASE-10298")
224   @Test
225   public void testFencingAroundCompactionAfterWALSync() throws Exception {
226     doTest(BlockCompactionsInCompletionRegion.class, false);
227     doTest(BlockCompactionsInCompletionRegion.class, true);
228   }
229 
230   public void doTest(Class<?> regionClass, boolean distributedLogReplay) throws Exception {
231     Configuration c = TEST_UTIL.getConfiguration();
232     c.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, distributedLogReplay);
233     // Insert our custom region
234     c.setClass(HConstants.REGION_IMPL, regionClass, HRegion.class);
235     c.setBoolean("dfs.support.append", true);
236     // Encourage plenty of flushes
237     c.setLong("hbase.hregion.memstore.flush.size", 200000);
238     c.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName());
239     // Only run compaction when we tell it to
240     c.setInt("hbase.hstore.compactionThreshold", 1000);
241     c.setLong("hbase.hstore.blockingStoreFiles", 1000);
242     // Compact quickly after we tell it to!
243     c.setInt("hbase.regionserver.thread.splitcompactcheckfrequency", 1000);
244     LOG.info("Starting mini cluster");
245     TEST_UTIL.startMiniCluster(1);
246     CompactionBlockerRegion compactingRegion = null;
247     HBaseAdmin admin = null;
248     try {
249       LOG.info("Creating admin");
250       admin = new HBaseAdmin(c);
251       LOG.info("Creating table");
252       TEST_UTIL.createTable(TABLE_NAME, FAMILY);
253       HTable table = new HTable(c, TABLE_NAME);
254       LOG.info("Loading test table");
255       // Find the region
256       List<HRegion> testRegions = TEST_UTIL.getMiniHBaseCluster().findRegionsForTable(TABLE_NAME);
257       assertEquals(1, testRegions.size());
258       compactingRegion = (CompactionBlockerRegion)testRegions.get(0);
259       LOG.info("Blocking compactions");
260       compactingRegion.stopCompactions();
261       long lastFlushTime = compactingRegion.getLastFlushTime();
262       // Load some rows
263       TEST_UTIL.loadNumericRows(table, FAMILY, 0, FIRST_BATCH_COUNT);
264 
265       // add a compaction from an older (non-existing) region to see whether we successfully skip
266       // those entries
267       HRegionInfo oldHri = new HRegionInfo(table.getName(),
268         HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
269       CompactionDescriptor compactionDescriptor = ProtobufUtil.toCompactionDescriptor(oldHri,
270         FAMILY, Lists.newArrayList(new Path("/a")), Lists.newArrayList(new Path("/b")),
271         new Path("store_dir"));
272       HLogUtil.writeCompactionMarker(compactingRegion.getLog(), table.getTableDescriptor(),
273         oldHri, compactionDescriptor, new AtomicLong(Long.MAX_VALUE-100));
274 
275       // Wait till flush has happened, otherwise there won't be multiple store files
276       long startWaitTime = System.currentTimeMillis();
277       while (compactingRegion.getLastFlushTime() <= lastFlushTime ||
278           compactingRegion.countStoreFiles() <= 1) {
279         LOG.info("Waiting for the region to flush " + compactingRegion.getRegionNameAsString());
280         Thread.sleep(1000);
281         assertTrue("Timed out waiting for the region to flush",
282           System.currentTimeMillis() - startWaitTime < 30000);
283       }
284       assertTrue(compactingRegion.countStoreFiles() > 1);
285       final byte REGION_NAME[] = compactingRegion.getRegionName();
286       LOG.info("Asking for compaction");
287       admin.majorCompact(TABLE_NAME.getName());
288       LOG.info("Waiting for compaction to be about to start");
289       compactingRegion.waitForCompactionToBlock();
290       LOG.info("Starting a new server");
291       RegionServerThread newServerThread = TEST_UTIL.getMiniHBaseCluster().startRegionServer();
292       final HRegionServer newServer = newServerThread.getRegionServer();
293       LOG.info("Killing region server ZK lease");
294       TEST_UTIL.expireRegionServerSession(0);
295       CompactionBlockerRegion newRegion = null;
296       startWaitTime = System.currentTimeMillis();
297       LOG.info("Waiting for the new server to pick up the region " + Bytes.toString(REGION_NAME));
298 
299       // wait for region to be assigned and to go out of log replay if applicable
300       Waiter.waitFor(c, 60000, new Waiter.Predicate<Exception>() {
301         @Override
302         public boolean evaluate() throws Exception {
303           HRegion newRegion = newServer.getOnlineRegion(REGION_NAME);
304           return newRegion != null && !newRegion.isRecovering();
305         }
306       });
307 
308       newRegion = (CompactionBlockerRegion)newServer.getOnlineRegion(REGION_NAME);
309 
310       LOG.info("Allowing compaction to proceed");
311       compactingRegion.allowCompactions();
312       while (compactingRegion.compactCount == 0) {
313         Thread.sleep(1000);
314       }
315       // The server we killed stays up until the compaction that was started before it was killed completes.  In logs
316       // you should see the old regionserver now going down.
317       LOG.info("Compaction finished");
318       // After compaction of old region finishes on the server that was going down, make sure that
319       // all the files we expect are still working when region is up in new location.
320       FileSystem fs = newRegion.getFilesystem();
321       for (String f: newRegion.getStoreFileList(new byte [][] {FAMILY})) {
322         assertTrue("After compaction, does not exist: " + f, fs.exists(new Path(f)));
323       }
324       // If we survive the split keep going...
325       // Now we make sure that the region isn't totally confused.  Load up more rows.
326       TEST_UTIL.loadNumericRows(table, FAMILY, FIRST_BATCH_COUNT, FIRST_BATCH_COUNT + SECOND_BATCH_COUNT);
327       admin.majorCompact(TABLE_NAME.getName());
328       startWaitTime = System.currentTimeMillis();
329       while (newRegion.compactCount == 0) {
330         Thread.sleep(1000);
331         assertTrue("New region never compacted", System.currentTimeMillis() - startWaitTime < 180000);
332       }
333       assertEquals(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT, TEST_UTIL.countRows(table));
334     } finally {
335       if (compactingRegion != null) {
336         compactingRegion.allowCompactions();
337       }
338       admin.close();
339       TEST_UTIL.shutdownMiniCluster();
340     }
341   }
342 }