1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.snapshot;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.fail;
23  
24  import java.io.IOException;
25  import java.util.HashSet;
26  import java.util.List;
27  import java.util.Set;
28  import java.util.concurrent.CountDownLatch;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HBaseTestingUtility;
36  import org.apache.hadoop.hbase.HConstants;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.HTableDescriptor;
39  import org.apache.hadoop.hbase.LargeTests;
40  import org.apache.hadoop.hbase.TableNotFoundException;
41  import org.apache.hadoop.hbase.client.HBaseAdmin;
42  import org.apache.hadoop.hbase.client.HTable;
43  import org.apache.hadoop.hbase.master.HMaster;
44  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
45  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
46  import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
47  import org.apache.hadoop.hbase.regionserver.HRegion;
48  import org.apache.hadoop.hbase.regionserver.HRegionServer;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSTableDescriptors;
51  import org.apache.hadoop.hbase.util.FSUtils;
52  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
53  import org.junit.After;
54  import org.junit.AfterClass;
55  import org.junit.Before;
56  import org.junit.BeforeClass;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  
60  /**
61   * Test creating/using/deleting snapshots from the client
62   * <p>
63   * This is an end-to-end test for the snapshot utility
64   *
65   * TODO This is essentially a clone of TestSnapshotFromClient.  This is worth refactoring this
66   * because there will be a few more flavors of snapshots that need to run these tests.
67   */
68  @Category(LargeTests.class)
69  public class TestFlushSnapshotFromClient {
70    private static final Log LOG = LogFactory.getLog(TestFlushSnapshotFromClient.class);
71    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
72    private static final int NUM_RS = 2;
73    private static final String STRING_TABLE_NAME = "test";
74    private static final byte[] TEST_FAM = Bytes.toBytes("fam");
75    private static final byte[] TABLE_NAME = Bytes.toBytes(STRING_TABLE_NAME);
76  
77    /**
78     * Setup the config for the cluster
79     * @throws Exception on failure
80     */
81    @BeforeClass
82    public static void setupCluster() throws Exception {
83      setupConf(UTIL.getConfiguration());
84      UTIL.startMiniCluster(NUM_RS);
85    }
86  
87    private static void setupConf(Configuration conf) {
88      // disable the ui
89      conf.setInt("hbase.regionsever.info.port", -1);
90      // change the flush size to a small amount, regulating number of store files
91      conf.setInt("hbase.hregion.memstore.flush.size", 25000);
92      // so make sure we get a compaction when doing a load, but keep around some
93      // files in the store
94      conf.setInt("hbase.hstore.compaction.min", 10);
95      conf.setInt("hbase.hstore.compactionThreshold", 10);
96      // block writes if we get to 12 store files
97      conf.setInt("hbase.hstore.blockingStoreFiles", 12);
98      // drop the number of attempts for the hbase admin
99      conf.setInt("hbase.client.retries.number", 1);
100     // Enable snapshot
101     conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
102     // prevent aggressive region split
103     conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
104       ConstantSizeRegionSplitPolicy.class.getName());
105   }
106 
107   @Before
108   public void setup() throws Exception {
109     UTIL.createTable(TABLE_NAME, TEST_FAM);
110   }
111 
112   @After
113   public void tearDown() throws Exception {
114     UTIL.deleteTable(TABLE_NAME);
115     SnapshotTestingUtils.deleteAllSnapshots(UTIL.getHBaseAdmin());
116     SnapshotTestingUtils.deleteArchiveDirectory(UTIL);
117   }
118 
119   @AfterClass
120   public static void cleanupTest() throws Exception {
121     try {
122       UTIL.shutdownMiniCluster();
123     } catch (Exception e) {
124       LOG.warn("failure shutting down cluster", e);
125     }
126   }
127 
128   /**
129    * Test snapshotting a table that is online without flushing
130    * @throws Exception
131    */
132   @Test
133   public void testSkipFlushTableSnapshot() throws Exception {
134     HBaseAdmin admin = UTIL.getHBaseAdmin();
135     // make sure we don't fail on listing snapshots
136     SnapshotTestingUtils.assertNoSnapshots(admin);
137 
138     // put some stuff in the table
139     HTable table = new HTable(UTIL.getConfiguration(), TABLE_NAME);
140     UTIL.loadTable(table, TEST_FAM);
141 
142     // get the name of all the regionservers hosting the snapshotted table
143     Set<String> snapshotServers = new HashSet<String>();
144     List<RegionServerThread> servers = UTIL.getMiniHBaseCluster().getLiveRegionServerThreads();
145     for (RegionServerThread server : servers) {
146       if (server.getRegionServer().getOnlineRegions(TABLE_NAME).size() > 0) {
147         snapshotServers.add(server.getRegionServer().getServerName().toString());
148       }
149     }
150 
151     LOG.debug("FS state before snapshot:");
152     FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
153         FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
154 
155     // take a snapshot of the enabled table
156     String snapshotString = "skipFlushTableSnapshot";
157     byte[] snapshot = Bytes.toBytes(snapshotString);
158     admin.snapshot(snapshotString, STRING_TABLE_NAME, SnapshotDescription.Type.SKIPFLUSH);
159     LOG.debug("Snapshot completed.");
160 
161     // make sure we have the snapshot
162     List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
163         snapshot, TABLE_NAME);
164 
165     // make sure its a valid snapshot
166     FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
167     Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
168     LOG.debug("FS state after snapshot:");
169     FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
170         FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
171 
172     SnapshotTestingUtils.confirmSnapshotValid(snapshots.get(0), TABLE_NAME, TEST_FAM, rootDir,
173         admin, fs, false, new Path(rootDir, HConstants.HREGION_LOGDIR_NAME), snapshotServers);
174 
175     admin.deleteSnapshot(snapshot);
176     snapshots = admin.listSnapshots();
177     SnapshotTestingUtils.assertNoSnapshots(admin);
178   }
179   /**
180    * Test simple flush snapshotting a table that is online
181    * @throws Exception
182    */
183   @Test
184   public void testFlushTableSnapshot() throws Exception {
185     HBaseAdmin admin = UTIL.getHBaseAdmin();
186     // make sure we don't fail on listing snapshots
187     SnapshotTestingUtils.assertNoSnapshots(admin);
188 
189     // put some stuff in the table
190     HTable table = new HTable(UTIL.getConfiguration(), TABLE_NAME);
191     UTIL.loadTable(table, TEST_FAM);
192 
193     // get the name of all the regionservers hosting the snapshotted table
194     Set<String> snapshotServers = new HashSet<String>();
195     List<RegionServerThread> servers = UTIL.getMiniHBaseCluster().getLiveRegionServerThreads();
196     for (RegionServerThread server : servers) {
197       if (server.getRegionServer().getOnlineRegions(TABLE_NAME).size() > 0) {
198         snapshotServers.add(server.getRegionServer().getServerName().toString());
199       }
200     }
201 
202     LOG.debug("FS state before snapshot:");
203     FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
204       FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
205 
206     // take a snapshot of the enabled table
207     String snapshotString = "offlineTableSnapshot";
208     byte[] snapshot = Bytes.toBytes(snapshotString);
209     admin.snapshot(snapshotString, STRING_TABLE_NAME, SnapshotDescription.Type.FLUSH);
210     LOG.debug("Snapshot completed.");
211 
212     // make sure we have the snapshot
213     List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
214       snapshot, TABLE_NAME);
215 
216     // make sure its a valid snapshot
217     FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
218     Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
219     LOG.debug("FS state after snapshot:");
220     FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
221       FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
222 
223     SnapshotTestingUtils.confirmSnapshotValid(snapshots.get(0), TABLE_NAME, TEST_FAM, rootDir,
224       admin, fs, false, new Path(rootDir, HConstants.HREGION_LOGDIR_NAME), snapshotServers);
225 
226     admin.deleteSnapshot(snapshot);
227     snapshots = admin.listSnapshots();
228     SnapshotTestingUtils.assertNoSnapshots(admin);
229   }
230 
231   @Test
232   public void testSnapshotFailsOnNonExistantTable() throws Exception {
233     HBaseAdmin admin = UTIL.getHBaseAdmin();
234     // make sure we don't fail on listing snapshots
235     SnapshotTestingUtils.assertNoSnapshots(admin);
236     String tableName = "_not_a_table";
237 
238     // make sure the table doesn't exist
239     boolean fail = false;
240     do {
241     try {
242       admin.getTableDescriptor(Bytes.toBytes(tableName));
243       fail = true;
244           LOG.error("Table:" + tableName + " already exists, checking a new name");
245       tableName = tableName+"!";
246     } catch (TableNotFoundException e) {
247       fail = false;
248       }
249     } while (fail);
250 
251     // snapshot the non-existant table
252     try {
253       admin.snapshot("fail", tableName, SnapshotDescription.Type.FLUSH);
254       fail("Snapshot succeeded even though there is not table.");
255     } catch (SnapshotCreationException e) {
256       LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage());
257     }
258   }
259 
260   @Test(timeout = 60000)
261   public void testAsyncFlushSnapshot() throws Exception {
262     HBaseAdmin admin = UTIL.getHBaseAdmin();
263     SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName("asyncSnapshot")
264         .setTable(STRING_TABLE_NAME).setType(SnapshotDescription.Type.FLUSH).build();
265 
266     // take the snapshot async
267     admin.takeSnapshotAsync(snapshot);
268 
269     // constantly loop, looking for the snapshot to complete
270     HMaster master = UTIL.getMiniHBaseCluster().getMaster();
271     SnapshotTestingUtils.waitForSnapshotToComplete(master, new HSnapshotDescription(snapshot), 200);
272     LOG.info(" === Async Snapshot Completed ===");
273     FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
274       FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
275     // make sure we get the snapshot
276     SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot);
277 
278     // test that we can delete the snapshot
279     admin.deleteSnapshot(snapshot.getName());
280     LOG.info(" === Async Snapshot Deleted ===");
281     FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
282       FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
283     // make sure we don't have any snapshots
284     SnapshotTestingUtils.assertNoSnapshots(admin);
285     LOG.info(" === Async Snapshot Test Completed ===");
286 
287   }
288 
289   /**
290    * Basic end-to-end test of simple-flush-based snapshots
291    */
292   @Test
293   public void testFlushCreateListDestroy() throws Exception {
294     LOG.debug("------- Starting Snapshot test -------------");
295     HBaseAdmin admin = UTIL.getHBaseAdmin();
296     // make sure we don't fail on listing snapshots
297     SnapshotTestingUtils.assertNoSnapshots(admin);
298     // load the table so we have some data
299     UTIL.loadTable(new HTable(UTIL.getConfiguration(), TABLE_NAME), TEST_FAM);
300     // and wait until everything stabilizes
301     waitForTableToBeOnline(TABLE_NAME);
302 
303     String snapshotName = "flushSnapshotCreateListDestroy";
304     // test creating the snapshot
305     admin.snapshot(snapshotName, STRING_TABLE_NAME, SnapshotDescription.Type.FLUSH);
306     logFSTree(new Path(UTIL.getConfiguration().get(HConstants.HBASE_DIR)));
307 
308     // make sure we only have 1 matching snapshot
309     List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
310       snapshotName, STRING_TABLE_NAME);
311 
312     // check the directory structure
313     FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
314     Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
315     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshots.get(0), rootDir);
316     assertTrue(fs.exists(snapshotDir));
317     FSUtils.logFileSystemState(UTIL.getTestFileSystem(), snapshotDir, LOG);
318     Path snapshotinfo = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOTINFO_FILE);
319     assertTrue(fs.exists(snapshotinfo));
320 
321     // check the table info
322     HTableDescriptor desc = FSTableDescriptors.getTableDescriptor(fs, rootDir, TABLE_NAME);
323     HTableDescriptor snapshotDesc = FSTableDescriptors.getTableDescriptor(fs,
324       SnapshotDescriptionUtils.getSnapshotsDir(rootDir), Bytes.toBytes(snapshotName));
325     assertEquals(desc, snapshotDesc);
326 
327     // check the region snapshot for all the regions
328     List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME);
329     for (HRegionInfo info : regions) {
330       String regionName = info.getEncodedName();
331       Path regionDir = new Path(snapshotDir, regionName);
332       HRegionInfo snapshotRegionInfo = HRegion.loadDotRegionInfoFileContent(fs, regionDir);
333       assertEquals(info, snapshotRegionInfo);
334       // check to make sure we have the family
335       Path familyDir = new Path(regionDir, Bytes.toString(TEST_FAM));
336       assertTrue(fs.exists(familyDir));
337       // make sure we have some file references
338       assertTrue(fs.listStatus(familyDir).length > 0);
339     }
340 
341     // test that we can delete the snapshot
342     admin.deleteSnapshot(snapshotName);
343     FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
344       FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
345 
346     // make sure we don't have any snapshots
347     SnapshotTestingUtils.assertNoSnapshots(admin);
348     LOG.debug("------- Flush-Snapshot Create List Destroy-------------");
349   }
350 
351   /**
352    * Demonstrate that we reject snapshot requests if there is a snapshot already running on the
353    * same table currently running and that concurrent snapshots on different tables can both
354    * succeed concurretly.
355    */
356   @Test(timeout=60000)
357   public void testConcurrentSnapshottingAttempts() throws IOException, InterruptedException {
358     final String STRING_TABLE2_NAME = STRING_TABLE_NAME + "2";
359     final byte[] TABLE2_NAME = Bytes.toBytes(STRING_TABLE2_NAME);
360 
361     int ssNum = 20;
362     HBaseAdmin admin = UTIL.getHBaseAdmin();
363     // make sure we don't fail on listing snapshots
364     SnapshotTestingUtils.assertNoSnapshots(admin);
365     // create second testing table
366     UTIL.createTable(TABLE2_NAME, TEST_FAM);
367     // load the table so we have some data
368     UTIL.loadTable(new HTable(UTIL.getConfiguration(), TABLE_NAME), TEST_FAM);
369     UTIL.loadTable(new HTable(UTIL.getConfiguration(), TABLE2_NAME), TEST_FAM);
370     // and wait until everything stabilizes
371     waitForTableToBeOnline(TABLE_NAME);
372     waitForTableToBeOnline(TABLE2_NAME);
373 
374     final CountDownLatch toBeSubmitted = new CountDownLatch(ssNum);
375     // We'll have one of these per thread
376     class SSRunnable implements Runnable {
377       SnapshotDescription ss;
378       SSRunnable(SnapshotDescription ss) {
379         this.ss = ss;
380       }
381 
382       @Override
383       public void run() {
384         try {
385           HBaseAdmin admin = UTIL.getHBaseAdmin();
386           LOG.info("Submitting snapshot request: " + SnapshotDescriptionUtils.toString(ss));
387           admin.takeSnapshotAsync(ss);
388         } catch (Exception e) {
389           LOG.info("Exception during snapshot request: " + SnapshotDescriptionUtils.toString(ss)
390               + ".  This is ok, we expect some", e);
391         }
392         LOG.info("Submitted snapshot request: " + SnapshotDescriptionUtils.toString(ss));
393         toBeSubmitted.countDown();
394       }
395     };
396 
397     // build descriptions
398     SnapshotDescription[] descs = new SnapshotDescription[ssNum];
399     for (int i = 0; i < ssNum; i++) {
400       SnapshotDescription.Builder builder = SnapshotDescription.newBuilder();
401       builder.setTable((i % 2) == 0 ? STRING_TABLE_NAME : STRING_TABLE2_NAME);
402       builder.setName("ss"+i);
403       builder.setType(SnapshotDescription.Type.FLUSH);
404       descs[i] = builder.build();
405     }
406 
407     // kick each off its own thread
408     for (int i=0 ; i < ssNum; i++) {
409       new Thread(new SSRunnable(descs[i])).start();
410     }
411 
412     // wait until all have been submitted
413     toBeSubmitted.await();
414 
415     // loop until all are done.
416     while (true) {
417       int doneCount = 0;
418       for (SnapshotDescription ss : descs) {
419         try {
420           if (admin.isSnapshotFinished(ss)) {
421             doneCount++;
422           }
423         } catch (Exception e) {
424           LOG.warn("Got an exception when checking for snapshot " + ss.getName(), e);
425           doneCount++;
426         }
427       }
428       if (doneCount == descs.length) {
429         break;
430       }
431       Thread.sleep(100);
432     }
433 
434     // dump for debugging
435     logFSTree(new Path(UTIL.getConfiguration().get(HConstants.HBASE_DIR)));
436 
437     List<SnapshotDescription> taken = admin.listSnapshots();
438     int takenSize = taken.size();
439     LOG.info("Taken " + takenSize + " snapshots:  " + taken);
440     assertTrue("We expect at least 1 request to be rejected because of we concurrently" +
441         " issued many requests", takenSize < ssNum && takenSize > 0);
442 
443     // Verify that there's at least one snapshot per table
444     int t1SnapshotsCount = 0;
445     int t2SnapshotsCount = 0;
446     for (SnapshotDescription ss : taken) {
447       if (ss.getTable().equals(STRING_TABLE_NAME)) {
448         t1SnapshotsCount++;
449       } else if (ss.getTable().equals(STRING_TABLE2_NAME)) {
450         t2SnapshotsCount++;
451       }
452     }
453     assertTrue("We expect at least 1 snapshot of table1 ", t1SnapshotsCount > 0);
454     assertTrue("We expect at least 1 snapshot of table2 ", t2SnapshotsCount > 0);
455 
456     // delete snapshots so subsequent tests are clean.
457     for (SnapshotDescription ss : taken) {
458       admin.deleteSnapshot(ss.getName());
459     }
460     UTIL.deleteTable(TABLE2_NAME);
461   }
462 
463   private void logFSTree(Path root) throws IOException {
464     FSUtils.logFileSystemState(UTIL.getDFSCluster().getFileSystem(), root, LOG);
465   }
466 
467   private void waitForTableToBeOnline(final byte[] tableName) throws IOException {
468     HRegionServer rs = UTIL.getRSForFirstRegionInTable(tableName);
469     List<HRegion> onlineRegions = rs.getOnlineRegions(tableName);
470     for (HRegion region : onlineRegions) {
471       region.waitForFlushesAndCompactions();
472     }
473   }
474 }