1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.backup;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Collections;
27  import java.util.List;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileStatus;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.fs.PathFilter;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.MediumTests;
39  import org.apache.hadoop.hbase.Stoppable;
40  import org.apache.hadoop.hbase.backup.HFileArchiver;
41  import org.apache.hadoop.hbase.client.HBaseAdmin;
42  import org.apache.hadoop.hbase.master.MasterFileSystem;
43  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
44  import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
45  import org.apache.hadoop.hbase.regionserver.HRegion;
46  import org.apache.hadoop.hbase.regionserver.HRegionServer;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.FSUtils;
49  import org.apache.hadoop.hbase.util.HFileArchiveTestingUtil;
50  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
51  import org.apache.hadoop.hbase.util.StoppableImplementation;
52  import org.junit.After;
53  import org.junit.AfterClass;
54  import org.junit.Assert;
55  import org.junit.Before;
56  import org.junit.BeforeClass;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  
60  /**
61   * Test that the {@link HFileArchiver} correctly removes all the parts of a region when cleaning up
62   * a region
63   */
64  @Category(MediumTests.class)
65  public class TestHFileArchiving {
66  
67    private static final String STRING_TABLE_NAME = "test_table";
68  
69    private static final Log LOG = LogFactory.getLog(TestHFileArchiving.class);
70    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
71    private static final byte[] TABLE_NAME = Bytes.toBytes(STRING_TABLE_NAME);
72    private static final byte[] TEST_FAM = Bytes.toBytes("fam");
73  
74    /**
75     * Setup the config for the cluster
76     */
77    @BeforeClass
78    public static void setupCluster() throws Exception {
79      setupConf(UTIL.getConfiguration());
80      UTIL.startMiniCluster();
81    }
82  
83    private static void setupConf(Configuration conf) {
84      // disable the ui
85      conf.setInt("hbase.regionsever.info.port", -1);
86      // drop the memstore size so we get flushes
87      conf.setInt("hbase.hregion.memstore.flush.size", 25000);
88      // disable major compactions
89      conf.setInt(HConstants.MAJOR_COMPACTION_PERIOD, 0);
90  
91      // prevent aggressive region split
92      conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
93        ConstantSizeRegionSplitPolicy.class.getName());
94    }
95  
96    @Before
97    public void setup() throws Exception {
98      UTIL.createTable(TABLE_NAME, TEST_FAM);
99    }
100 
101   @After
102   public void tearDown() throws Exception {
103     // cleanup the cluster if its up still
104     if (UTIL.getHBaseAdmin().tableExists(STRING_TABLE_NAME)) {
105       UTIL.deleteTable(TABLE_NAME);
106     }
107     // and cleanup the archive directory
108     try {
109       clearArchiveDirectory();
110     } catch (IOException e) {
111       Assert.fail("Failure to delete archive directory:" + e.getMessage());
112     }
113   }
114 
115   @AfterClass
116   public static void cleanupTest() throws Exception {
117     try {
118       UTIL.shutdownMiniCluster();
119     } catch (Exception e) {
120       // NOOP;
121     }
122   }
123 
124   @Test
125   public void testRemovesRegionDirOnArchive() throws Exception {
126     final HBaseAdmin admin = UTIL.getHBaseAdmin();
127 
128     // get the current store files for the region
129     List<HRegion> servingRegions = UTIL.getHBaseCluster().getRegions(TABLE_NAME);
130     // make sure we only have 1 region serving this table
131     assertEquals(1, servingRegions.size());
132     HRegion region = servingRegions.get(0);
133 
134     // and load the table
135     UTIL.loadRegion(region, TEST_FAM);
136 
137     // shutdown the table so we can manipulate the files
138     admin.disableTable(STRING_TABLE_NAME);
139 
140     FileSystem fs = UTIL.getTestFileSystem();
141 
142     // now attempt to depose the region
143     Path regionDir = HRegion.getRegionDir(region.getTableDir().getParent(), region.getRegionInfo());
144 
145     HFileArchiver.archiveRegion(UTIL.getConfiguration(), fs, region.getRegionInfo());
146 
147     // check for the existence of the archive directory and some files in it
148     Path archiveDir = HFileArchiveTestingUtil.getRegionArchiveDir(UTIL.getConfiguration(), region);
149     assertTrue(fs.exists(archiveDir));
150 
151     // check to make sure the store directory was copied
152     FileStatus[] stores = fs.listStatus(archiveDir);
153     assertTrue(stores.length == 1);
154 
155     // make sure we archived the store files
156     FileStatus[] storeFiles = fs.listStatus(stores[0].getPath());
157     assertTrue(storeFiles.length > 0);
158 
159     // then ensure the region's directory isn't present
160     assertFalse(fs.exists(regionDir));
161   }
162 
163   /**
164    * Test that the region directory is removed when we archive a region without store files, but
165    * still has hidden files.
166    * @throws Exception
167    */
168   @Test
169   public void testDeleteRegionWithNoStoreFiles() throws Exception {
170     // get the current store files for the region
171     List<HRegion> servingRegions = UTIL.getHBaseCluster().getRegions(TABLE_NAME);
172     // make sure we only have 1 region serving this table
173     assertEquals(1, servingRegions.size());
174     HRegion region = servingRegions.get(0);
175 
176     FileSystem fs = region.getFilesystem();
177 
178     // make sure there are some files in the regiondir
179     Path rootDir = FSUtils.getRootDir(fs.getConf());
180     Path regionDir = HRegion.getRegionDir(rootDir, region.getRegionInfo());
181     FileStatus[] regionFiles = FSUtils.listStatus(fs, regionDir, null);
182     Assert.assertNotNull("No files in the region directory", regionFiles);
183     if (LOG.isDebugEnabled()) {
184       List<Path> files = new ArrayList<Path>();
185       for (FileStatus file : regionFiles) {
186         files.add(file.getPath());
187       }
188       LOG.debug("Current files:" + files);
189     }
190     // delete the visible folders so we just have hidden files/folders
191     final PathFilter dirFilter = new FSUtils.DirFilter(fs);
192     PathFilter nonHidden = new PathFilter() {
193       @Override
194       public boolean accept(Path file) {
195         return dirFilter.accept(file) && !file.getName().toString().startsWith(".");
196       }
197     };
198     FileStatus[] storeDirs = FSUtils.listStatus(fs, regionDir, nonHidden);
199     for (FileStatus store : storeDirs) {
200       LOG.debug("Deleting store for test");
201       fs.delete(store.getPath(), true);
202     }
203 
204     // then archive the region
205     HFileArchiver.archiveRegion(UTIL.getConfiguration(), fs, region.getRegionInfo());
206 
207     // and check to make sure the region directoy got deleted
208     assertFalse("Region directory (" + regionDir + "), still exists.", fs.exists(regionDir));
209   }
210 
211   @Test
212   public void testArchiveOnTableDelete() throws Exception {
213     List<HRegion> servingRegions = UTIL.getHBaseCluster().getRegions(TABLE_NAME);
214     // make sure we only have 1 region serving this table
215     assertEquals(1, servingRegions.size());
216     HRegion region = servingRegions.get(0);
217 
218     // get the parent RS and monitor
219     HRegionServer hrs = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
220     FileSystem fs = hrs.getFileSystem();
221 
222     // put some data on the region
223     LOG.debug("-------Loading table");
224     UTIL.loadRegion(region, TEST_FAM);
225 
226     // get the hfiles in the region
227     List<HRegion> regions = hrs.getOnlineRegions(TABLE_NAME);
228     assertEquals("More that 1 region for test table.", 1, regions.size());
229 
230     region = regions.get(0);
231     // wait for all the compactions to complete
232     region.waitForFlushesAndCompactions();
233 
234     // disable table to prevent new updates
235     UTIL.getHBaseAdmin().disableTable(TABLE_NAME);
236     LOG.debug("Disabled table");
237 
238     // remove all the files from the archive to get a fair comparison
239     clearArchiveDirectory();
240 
241     // then get the current store files
242     Path regionDir = region.getRegionDir();
243     List<String> storeFiles = getRegionStoreFiles(fs, regionDir);
244 
245     // then delete the table so the hfiles get archived
246     UTIL.deleteTable(TABLE_NAME);
247 
248     // then get the files in the archive directory.
249     Path archiveDir = HFileArchiveUtil.getArchivePath(UTIL.getConfiguration());
250     List<String> archivedFiles = getAllFileNames(fs, archiveDir);
251     Collections.sort(storeFiles);
252     Collections.sort(archivedFiles);
253 
254     LOG.debug("Store files:");
255     for (int i = 0; i < storeFiles.size(); i++) {
256       LOG.debug(i + " - " + storeFiles.get(i));
257     }
258     LOG.debug("Archive files:");
259     for (int i = 0; i < archivedFiles.size(); i++) {
260       LOG.debug(i + " - " + archivedFiles.get(i));
261     }
262 
263     assertTrue("Archived files are missing some of the store files!",
264       archivedFiles.containsAll(storeFiles));
265   }
266 
267   /**
268    * Test that the store files are archived when a column family is removed.
269    * @throws Exception
270    */
271   @Test
272   public void testArchiveOnTableFamilyDelete() throws Exception {
273     List<HRegion> servingRegions = UTIL.getHBaseCluster().getRegions(TABLE_NAME);
274     // make sure we only have 1 region serving this table
275     assertEquals(1, servingRegions.size());
276     HRegion region = servingRegions.get(0);
277 
278     // get the parent RS and monitor
279     HRegionServer hrs = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
280     FileSystem fs = hrs.getFileSystem();
281 
282     // put some data on the region
283     LOG.debug("-------Loading table");
284     UTIL.loadRegion(region, TEST_FAM);
285 
286     // get the hfiles in the region
287     List<HRegion> regions = hrs.getOnlineRegions(TABLE_NAME);
288     assertEquals("More that 1 region for test table.", 1, regions.size());
289 
290     region = regions.get(0);
291     // wait for all the compactions to complete
292     region.waitForFlushesAndCompactions();
293 
294     // disable table to prevent new updates
295     UTIL.getHBaseAdmin().disableTable(TABLE_NAME);
296     LOG.debug("Disabled table");
297 
298     // remove all the files from the archive to get a fair comparison
299     clearArchiveDirectory();
300 
301     // then get the current store files
302     Path regionDir = region.getRegionDir();
303     List<String> storeFiles = getRegionStoreFiles(fs, regionDir);
304 
305     // then delete the table so the hfiles get archived
306     UTIL.getHBaseAdmin().deleteColumn(TABLE_NAME, TEST_FAM);
307 
308     // then get the files in the archive directory.
309     Path archiveDir = HFileArchiveUtil.getArchivePath(UTIL.getConfiguration());
310     List<String> archivedFiles = getAllFileNames(fs, archiveDir);
311     Collections.sort(storeFiles);
312     Collections.sort(archivedFiles);
313 
314     LOG.debug("Store files:");
315     for (int i = 0; i < storeFiles.size(); i++) {
316       LOG.debug(i + " - " + storeFiles.get(i));
317     }
318     LOG.debug("Archive files:");
319     for (int i = 0; i < archivedFiles.size(); i++) {
320       LOG.debug(i + " - " + archivedFiles.get(i));
321     }
322 
323     assertTrue("Archived files are missing some of the store files!",
324       archivedFiles.containsAll(storeFiles));
325   }
326 
327   /**
328    * Test HFileArchiver.resolveAndArchive() race condition HBASE-7643
329    */
330   @Test
331   public void testCleaningRace() throws Exception {
332     final long TEST_TIME = 20 * 1000;
333 
334     Configuration conf = UTIL.getMiniHBaseCluster().getMaster().getConfiguration();
335     Path rootDir = UTIL.getDataTestDir("testCleaningRace");
336     FileSystem fs = UTIL.getTestFileSystem();
337 
338     Path archiveDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
339     Path regionDir = new Path("table", "abcdef");
340     Path familyDir = new Path(regionDir, "cf");
341 
342     Path sourceRegionDir = new Path(rootDir, regionDir);
343     fs.mkdirs(sourceRegionDir);
344 
345     Stoppable stoppable = new StoppableImplementation();
346 
347     // The cleaner should be looping without long pauses to reproduce the race condition.
348     HFileCleaner cleaner = new HFileCleaner(1, stoppable, conf, fs, archiveDir);
349     try {
350       cleaner.start();
351 
352       // Keep creating/archiving new files while the cleaner is running in the other thread
353       long startTime = System.currentTimeMillis();
354       for (long fid = 0; (System.currentTimeMillis() - startTime) < TEST_TIME; ++fid) {
355         Path file = new Path(familyDir,  String.valueOf(fid));
356         Path sourceFile = new Path(rootDir, file);
357         Path archiveFile = new Path(archiveDir, file);
358 
359         fs.createNewFile(sourceFile);
360 
361         try {
362           // Try to archive the file
363           HFileArchiver.archiveRegion(fs, rootDir,
364               sourceRegionDir.getParent(), sourceRegionDir);
365 
366           // The archiver succeded, the file is no longer in the original location
367           // but it's in the archive location.
368           LOG.debug("hfile=" + fid + " should be in the archive");
369           assertTrue(fs.exists(archiveFile));
370           assertFalse(fs.exists(sourceFile));
371         } catch (IOException e) {
372           // The archiver is unable to archive the file. Probably HBASE-7643 race condition.
373           // in this case, the file should not be archived, and we should have the file
374           // in the original location.
375           LOG.debug("hfile=" + fid + " should be in the source location");
376           assertFalse(fs.exists(archiveFile));
377           assertTrue(fs.exists(sourceFile));
378 
379           // Avoid to have this file in the next run
380           fs.delete(sourceFile, false);
381         }
382       }
383     } finally {
384       stoppable.stop("test end");
385       cleaner.join();
386       fs.delete(rootDir, true);
387     }
388   }
389 
390   private void clearArchiveDirectory() throws IOException {
391     UTIL.getTestFileSystem().delete(new Path(UTIL.getDefaultRootDirPath(), ".archive"), true);
392   }
393 
394   /**
395    * Get the names of all the files below the given directory
396    * @param fs
397    * @param archiveDir
398    * @return
399    * @throws IOException
400    */
401   private List<String> getAllFileNames(final FileSystem fs, Path archiveDir) throws IOException {
402     FileStatus[] files = FSUtils.listStatus(fs, archiveDir, null);
403     return recurseOnFiles(fs, files, new ArrayList<String>());
404   }
405 
406   /** Recursively lookup all the file names under the file[] array **/
407   private List<String> recurseOnFiles(FileSystem fs, FileStatus[] files, List<String> fileNames)
408       throws IOException {
409     if (files == null || files.length == 0) return fileNames;
410 
411     for (FileStatus file : files) {
412       if (file.isDir()) {
413         recurseOnFiles(fs, FSUtils.listStatus(fs, file.getPath(), null), fileNames);
414       } else fileNames.add(file.getPath().getName());
415     }
416     return fileNames;
417   }
418 
419   private List<String> getRegionStoreFiles(final FileSystem fs, final Path regionDir) 
420       throws IOException {
421     List<String> storeFiles = getAllFileNames(fs, regionDir);
422     // remove all the non-storefile named files for the region
423     for (int i = 0; i < storeFiles.size(); i++) {
424       String file = storeFiles.get(i);
425       if (file.contains(HRegion.REGIONINFO_FILE) || file.contains("hlog")) {
426         storeFiles.remove(i--);
427       }
428     }
429     storeFiles.remove(HRegion.REGIONINFO_FILE);
430     return storeFiles;
431   }
432 }