View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.IOException;
24  import java.util.Collection;
25  import java.util.List;
26  import java.util.concurrent.CountDownLatch;
27  import java.util.concurrent.atomic.AtomicLong;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.commons.logging.impl.Log4JLogger;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
38  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
39  import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
40  import org.apache.hadoop.hbase.regionserver.HRegion;
41  import org.apache.hadoop.hbase.regionserver.HRegionServer;
42  import org.apache.hadoop.hbase.regionserver.HStore;
43  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
44  import org.apache.hadoop.hbase.regionserver.Store;
45  import org.apache.hadoop.hbase.regionserver.StoreFile;
46  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
47  import org.apache.hadoop.hbase.regionserver.wal.HLog;
48  import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
51  import org.apache.hadoop.hdfs.DFSClient;
52  import org.apache.hadoop.hdfs.server.datanode.DataNode;
53  import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
54  import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
55  import org.apache.log4j.Level;
56  import org.junit.Ignore;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  
60  import com.google.common.collect.Lists;
61  
62  /**
63   * Test for the case where a regionserver going down has enough cycles to do damage to regions
64   * that have actually been assigned elsehwere.
65   *
66   * <p>If we happen to assign a region before it fully done with in its old location -- i.e. it is on two servers at the
67   * same time -- all can work fine until the case where the region on the dying server decides to compact or otherwise
68   * change the region file set.  The region in its new location will then get a surprise when it tries to do something
69   * w/ a file removed by the region in its old location on dying server.
70   *
71   * <p>Making a test for this case is a little tough in that even if a file is deleted up on the namenode,
72   * if the file was opened before the delete, it will continue to let reads happen until something changes the
73   * state of cached blocks in the dfsclient that was already open (a block from the deleted file is cleaned
74   * from the datanode by NN).
75   *
76   * <p>What we will do below is do an explicit check for existence on the files listed in the region that
77   * has had some files removed because of a compaction.  This sort of hurry's along and makes certain what is a chance
78   * occurance.
79   */
80  @Category(MediumTests.class)
81  public class TestIOFencing {
82    static final Log LOG = LogFactory.getLog(TestIOFencing.class);
83    static {
84      // Uncomment the following lines if more verbosity is needed for
85      // debugging (see HBASE-12285 for details).
86      //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
87      //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
88      //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
89      //((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem"))
90      //    .getLogger().setLevel(Level.ALL);
91      //((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
92      //((Log4JLogger)HLog.LOG).getLogger().setLevel(Level.ALL);
93    }
94  
95    public abstract static class CompactionBlockerRegion extends HRegion {
96      volatile int compactCount = 0;
97      volatile CountDownLatch compactionsBlocked = new CountDownLatch(0);
98      volatile CountDownLatch compactionsWaiting = new CountDownLatch(0);
99  
100     @SuppressWarnings("deprecation")
101     public CompactionBlockerRegion(Path tableDir, HLog log,
102         FileSystem fs, Configuration confParam, HRegionInfo info,
103         HTableDescriptor htd, RegionServerServices rsServices) {
104       super(tableDir, log, fs, confParam, info, htd, rsServices);
105     }
106 
107     public void stopCompactions() {
108       compactionsBlocked = new CountDownLatch(1);
109       compactionsWaiting = new CountDownLatch(1);
110     }
111 
112     public void allowCompactions() {
113       LOG.debug("allowing compactions");
114       compactionsBlocked.countDown();
115     }
116     public void waitForCompactionToBlock() throws IOException {
117       try {
118         LOG.debug("waiting for compaction to block");
119         compactionsWaiting.await();
120         LOG.debug("compaction block reached");
121       } catch (InterruptedException ex) {
122         throw new IOException(ex);
123       }
124     }
125     @Override
126     public boolean compact(CompactionContext compaction, Store store) throws IOException {
127       try {
128         return super.compact(compaction, store);
129       } finally {
130         compactCount++;
131       }
132     }
133     public int countStoreFiles() {
134       int count = 0;
135       for (Store store : stores.values()) {
136         count += store.getStorefilesCount();
137       }
138       return count;
139     }
140   }
141 
142   /**
143    * An override of HRegion that allows us park compactions in a holding pattern and
144    * then when appropriate for the test, allow them proceed again.
145    */
146   public static class BlockCompactionsInPrepRegion extends CompactionBlockerRegion {
147 
148     public BlockCompactionsInPrepRegion(Path tableDir, HLog log,
149         FileSystem fs, Configuration confParam, HRegionInfo info,
150         HTableDescriptor htd, RegionServerServices rsServices) {
151       super(tableDir, log, fs, confParam, info, htd, rsServices);
152     }
153     @Override
154     protected void doRegionCompactionPrep() throws IOException {
155       compactionsWaiting.countDown();
156       try {
157         compactionsBlocked.await();
158       } catch (InterruptedException ex) {
159         throw new IOException();
160       }
161       super.doRegionCompactionPrep();
162     }
163   }
164 
165   /**
166    * An override of HRegion that allows us park compactions in a holding pattern and
167    * then when appropriate for the test, allow them proceed again. This allows the compaction
168    * entry to go the WAL before blocking, but blocks afterwards
169    */
170   public static class BlockCompactionsInCompletionRegion extends CompactionBlockerRegion {
171     public BlockCompactionsInCompletionRegion(Path tableDir, HLog log,
172         FileSystem fs, Configuration confParam, HRegionInfo info,
173         HTableDescriptor htd, RegionServerServices rsServices) {
174       super(tableDir, log, fs, confParam, info, htd, rsServices);
175     }
176     @Override
177     protected HStore instantiateHStore(final HColumnDescriptor family) throws IOException {
178       return new BlockCompactionsInCompletionHStore(this, family, this.conf);
179     }
180   }
181 
182   public static class BlockCompactionsInCompletionHStore extends HStore {
183     CompactionBlockerRegion r;
184     protected BlockCompactionsInCompletionHStore(HRegion region, HColumnDescriptor family,
185         Configuration confParam) throws IOException {
186       super(region, family, confParam);
187       r = (CompactionBlockerRegion) region;
188     }
189 
190     @Override
191     protected void completeCompaction(Collection<StoreFile> compactedFiles) throws IOException {
192       try {
193         r.compactionsWaiting.countDown();
194         r.compactionsBlocked.await();
195       } catch (InterruptedException ex) {
196         throw new IOException(ex);
197       }
198       super.completeCompaction(compactedFiles);
199     }
200   }
201 
202   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
203   private final static TableName TABLE_NAME =
204       TableName.valueOf("tabletest");
205   private final static byte[] FAMILY = Bytes.toBytes("family");
206   private static final int FIRST_BATCH_COUNT = 4000;
207   private static final int SECOND_BATCH_COUNT = FIRST_BATCH_COUNT;
208 
209   /**
210    * Test that puts up a regionserver, starts a compaction on a loaded region but holds the
211    * compaction until after we have killed the server and the region has come up on
212    * a new regionserver altogether.  This fakes the double assignment case where region in one
213    * location changes the files out from underneath a region being served elsewhere.
214    */
215   @Ignore("See HBASE-10298")
216   @Test
217   public void testFencingAroundCompaction() throws Exception {
218     doTest(BlockCompactionsInPrepRegion.class, false);
219     doTest(BlockCompactionsInPrepRegion.class, true);
220   }
221 
222   /**
223    * Test that puts up a regionserver, starts a compaction on a loaded region but holds the
224    * compaction completion until after we have killed the server and the region has come up on
225    * a new regionserver altogether.  This fakes the double assignment case where region in one
226    * location changes the files out from underneath a region being served elsewhere.
227    */
228   @Ignore("See HBASE-10298")
229   @Test
230   public void testFencingAroundCompactionAfterWALSync() throws Exception {
231     doTest(BlockCompactionsInCompletionRegion.class, false);
232     doTest(BlockCompactionsInCompletionRegion.class, true);
233   }
234 
235   public void doTest(Class<?> regionClass, boolean distributedLogReplay) throws Exception {
236     Configuration c = TEST_UTIL.getConfiguration();
237     c.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, distributedLogReplay);
238     // Insert our custom region
239     c.setClass(HConstants.REGION_IMPL, regionClass, HRegion.class);
240     c.setBoolean("dfs.support.append", true);
241     // Encourage plenty of flushes
242     c.setLong("hbase.hregion.memstore.flush.size", 200000);
243     c.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName());
244     // Only run compaction when we tell it to
245     c.setInt("hbase.hstore.compactionThreshold", 1000);
246     c.setLong("hbase.hstore.blockingStoreFiles", 1000);
247     // Compact quickly after we tell it to!
248     c.setInt("hbase.regionserver.thread.splitcompactcheckfrequency", 1000);
249     LOG.info("Starting mini cluster");
250     TEST_UTIL.startMiniCluster(1);
251     CompactionBlockerRegion compactingRegion = null;
252     HBaseAdmin admin = null;
253     try {
254       LOG.info("Creating admin");
255       admin = new HBaseAdmin(c);
256       LOG.info("Creating table");
257       TEST_UTIL.createTable(TABLE_NAME, FAMILY);
258       HTable table = new HTable(c, TABLE_NAME);
259       LOG.info("Loading test table");
260       // Find the region
261       List<HRegion> testRegions = TEST_UTIL.getMiniHBaseCluster().findRegionsForTable(TABLE_NAME);
262       assertEquals(1, testRegions.size());
263       compactingRegion = (CompactionBlockerRegion)testRegions.get(0);
264       LOG.info("Blocking compactions");
265       compactingRegion.stopCompactions();
266       long lastFlushTime = compactingRegion.getLastFlushTime();
267       // Load some rows
268       TEST_UTIL.loadNumericRows(table, FAMILY, 0, FIRST_BATCH_COUNT);
269 
270       // add a compaction from an older (non-existing) region to see whether we successfully skip
271       // those entries
272       HRegionInfo oldHri = new HRegionInfo(table.getName(),
273         HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
274       CompactionDescriptor compactionDescriptor = ProtobufUtil.toCompactionDescriptor(oldHri,
275         FAMILY, Lists.newArrayList(new Path("/a")), Lists.newArrayList(new Path("/b")),
276         new Path("store_dir"));
277       HLogUtil.writeCompactionMarker(compactingRegion.getLog(), table.getTableDescriptor(),
278         oldHri, compactionDescriptor, new AtomicLong(Long.MAX_VALUE-100));
279 
280       // Wait till flush has happened, otherwise there won't be multiple store files
281       long startWaitTime = System.currentTimeMillis();
282       while (compactingRegion.getLastFlushTime() <= lastFlushTime ||
283           compactingRegion.countStoreFiles() <= 1) {
284         LOG.info("Waiting for the region to flush " + compactingRegion.getRegionNameAsString());
285         Thread.sleep(1000);
286         assertTrue("Timed out waiting for the region to flush",
287           System.currentTimeMillis() - startWaitTime < 30000);
288       }
289       assertTrue(compactingRegion.countStoreFiles() > 1);
290       final byte REGION_NAME[] = compactingRegion.getRegionName();
291       LOG.info("Asking for compaction");
292       admin.majorCompact(TABLE_NAME.getName());
293       LOG.info("Waiting for compaction to be about to start");
294       compactingRegion.waitForCompactionToBlock();
295       LOG.info("Starting a new server");
296       RegionServerThread newServerThread = TEST_UTIL.getMiniHBaseCluster().startRegionServer();
297       final HRegionServer newServer = newServerThread.getRegionServer();
298       LOG.info("Killing region server ZK lease");
299       TEST_UTIL.expireRegionServerSession(0);
300       CompactionBlockerRegion newRegion = null;
301       startWaitTime = System.currentTimeMillis();
302       LOG.info("Waiting for the new server to pick up the region " + Bytes.toString(REGION_NAME));
303 
304       // wait for region to be assigned and to go out of log replay if applicable
305       Waiter.waitFor(c, 60000, new Waiter.Predicate<Exception>() {
306         @Override
307         public boolean evaluate() throws Exception {
308           HRegion newRegion = newServer.getOnlineRegion(REGION_NAME);
309           return newRegion != null && !newRegion.isRecovering();
310         }
311       });
312 
313       newRegion = (CompactionBlockerRegion)newServer.getOnlineRegion(REGION_NAME);
314 
315       LOG.info("Allowing compaction to proceed");
316       compactingRegion.allowCompactions();
317       while (compactingRegion.compactCount == 0) {
318         Thread.sleep(1000);
319       }
320       // The server we killed stays up until the compaction that was started before it was killed completes.  In logs
321       // you should see the old regionserver now going down.
322       LOG.info("Compaction finished");
323       // After compaction of old region finishes on the server that was going down, make sure that
324       // all the files we expect are still working when region is up in new location.
325       FileSystem fs = newRegion.getFilesystem();
326       for (String f: newRegion.getStoreFileList(new byte [][] {FAMILY})) {
327         assertTrue("After compaction, does not exist: " + f, fs.exists(new Path(f)));
328       }
329       // If we survive the split keep going...
330       // Now we make sure that the region isn't totally confused.  Load up more rows.
331       TEST_UTIL.loadNumericRows(table, FAMILY, FIRST_BATCH_COUNT, FIRST_BATCH_COUNT + SECOND_BATCH_COUNT);
332       admin.majorCompact(TABLE_NAME.getName());
333       startWaitTime = System.currentTimeMillis();
334       while (newRegion.compactCount == 0) {
335         Thread.sleep(1000);
336         assertTrue("New region never compacted", System.currentTimeMillis() - startWaitTime < 180000);
337       }
338       assertEquals(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT, TEST_UTIL.countRows(table));
339     } finally {
340       if (compactingRegion != null) {
341         compactingRegion.allowCompactions();
342       }
343       admin.close();
344       TEST_UTIL.shutdownMiniCluster();
345     }
346   }
347 }