View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.mockito.Mockito.mock;
22  import static org.mockito.Mockito.when;
23  
24  import java.io.IOException;
25  import java.util.Arrays;
26  import java.util.List;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.Cell;
34  import org.apache.hadoop.hbase.CellScanner;
35  import org.apache.hadoop.hbase.HBaseConfiguration;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
39  import org.apache.hadoop.hbase.LargeTests;
40  import org.apache.hadoop.hbase.TableName;
41  import org.apache.hadoop.hbase.client.HBaseAdmin;
42  import org.apache.hadoop.hbase.client.HTable;
43  import org.apache.hadoop.hbase.client.Result;
44  import org.apache.hadoop.hbase.client.Scan;
45  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
46  import org.apache.hadoop.hbase.io.hfile.BlockCache;
47  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
48  import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
49  import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
50  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
51  import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
52  import org.apache.hadoop.hbase.util.Bytes;
53  import org.apache.hadoop.io.NullWritable;
54  import org.apache.hadoop.mapreduce.InputSplit;
55  import org.apache.hadoop.mapreduce.Job;
56  import org.apache.hadoop.mapreduce.RecordReader;
57  import org.apache.hadoop.mapreduce.Reducer;
58  import org.apache.hadoop.mapreduce.TaskAttemptContext;
59  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
60  import org.junit.After;
61  import org.junit.Assert;
62  import org.junit.Ignore;
63  import org.junit.Test;
64  import org.junit.experimental.categories.Category;
65  
66  import com.google.common.collect.Lists;
67  
68  @Category(LargeTests.class)
69  public class TestTableSnapshotInputFormat {
70  
71    private static final Log LOG = LogFactory.getLog(TestTableSnapshotInputFormat.class);
72    private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
73    private static final int NUM_REGION_SERVERS = 2;
74    private static final String TABLE_NAME_STR = "TestTableSnapshotInputFormat";
75    private static final byte[][] FAMILIES = {Bytes.toBytes("f1"), Bytes.toBytes("f2")};
76    private static final TableName TABLE_NAME = TableName.valueOf(TABLE_NAME_STR);
77    public static byte[] bbb = Bytes.toBytes("bbb");
78    public static byte[] yyy = Bytes.toBytes("yyy");
79  
80    private FileSystem fs;
81    private Path rootDir;
82  
83    public void setupCluster() throws Exception {
84      setupConf(UTIL.getConfiguration());
85      UTIL.startMiniCluster(NUM_REGION_SERVERS);
86      rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
87      fs = rootDir.getFileSystem(UTIL.getConfiguration());
88    }
89  
90    public void tearDownCluster() throws Exception {
91      UTIL.shutdownMiniCluster();
92    }
93  
94    private static void setupConf(Configuration conf) {
95      // Enable snapshot
96      conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
97    }
98  
99    @After
100   public void tearDown() throws Exception {
101   }
102 
103   @Test
104   public void testGetBestLocations() throws IOException {
105     TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
106     Configuration conf = UTIL.getConfiguration();
107 
108     HDFSBlocksDistribution blockDistribution = new HDFSBlocksDistribution();
109     Assert.assertEquals(Lists.newArrayList(), tsif.getBestLocations(conf, blockDistribution));
110 
111     blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
112     Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
113 
114     blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
115     Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
116 
117     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 1);
118     Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
119 
120     blockDistribution = new HDFSBlocksDistribution();
121     blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 10);
122     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 7);
123     blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 5);
124     blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1);
125     Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
126 
127     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 2);
128     Assert.assertEquals(Lists.newArrayList("h1", "h2"), tsif.getBestLocations(conf, blockDistribution));
129 
130     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 3);
131     Assert.assertEquals(Lists.newArrayList("h2", "h1"), tsif.getBestLocations(conf, blockDistribution));
132 
133     blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 6);
134     blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 9);
135 
136     Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"), tsif.getBestLocations(conf, blockDistribution));
137   }
138 
139   public static enum TestTableSnapshotCounters {
140     VALIDATION_ERROR
141   }
142 
143   public static class TestTableSnapshotMapper
144     extends TableMapper<ImmutableBytesWritable, NullWritable> {
145     @Override
146     protected void map(ImmutableBytesWritable key, Result value,
147         Context context) throws IOException, InterruptedException {
148       // Validate a single row coming from the snapshot, and emit the row key
149       verifyRowFromMap(key, value);
150       context.write(key, NullWritable.get());
151     }
152   }
153 
154   public static class TestTableSnapshotReducer
155     extends Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
156     HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(bbb, yyy);
157     @Override
158     protected void reduce(ImmutableBytesWritable key, Iterable<NullWritable> values,
159        Context context) throws IOException, InterruptedException {
160       rowTracker.addRow(key.get());
161     }
162 
163     @Override
164     protected void cleanup(Context context) throws IOException,
165         InterruptedException {
166       rowTracker.validate();
167     }
168   }
169 
170   public static void createTableAndSnapshot(HBaseTestingUtility util, TableName tableName,
171       String snapshotName, int numRegions)
172       throws Exception {
173     try {
174       util.deleteTable(tableName);
175     } catch(Exception ex) {
176       // ignore
177     }
178 
179     if (numRegions > 1) {
180       util.createTable(tableName, FAMILIES, 1, bbb, yyy, numRegions);
181     } else {
182       util.createTable(tableName, FAMILIES);
183     }
184     HBaseAdmin admin = util.getHBaseAdmin();
185 
186     // put some stuff in the table
187     HTable table = new HTable(util.getConfiguration(), tableName);
188     util.loadTable(table, FAMILIES);
189 
190     Path rootDir = new Path(util.getConfiguration().get(HConstants.HBASE_DIR));
191     FileSystem fs = rootDir.getFileSystem(util.getConfiguration());
192 
193     SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName,
194         Arrays.asList(FAMILIES), null, snapshotName, rootDir, fs, true);
195 
196     // load different values
197     byte[] value = Bytes.toBytes("after_snapshot_value");
198     util.loadTable(table, FAMILIES, value);
199 
200     // cause flush to create new files in the region
201     admin.flush(tableName.toString());
202     table.close();
203   }
204 
205   @Test
206   public void testInitTableSnapshotMapperJobConfig() throws Exception {
207     setupCluster();
208     TableName tableName = TableName.valueOf("testInitTableSnapshotMapperJobConfig");
209     String snapshotName = "foo";
210 
211     try {
212       createTableAndSnapshot(UTIL, tableName, snapshotName, 1);
213       Job job = new Job(UTIL.getConfiguration());
214       Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
215 
216       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
217         new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
218         NullWritable.class, job, false, tmpTableDir);
219 
220       // TODO: would be better to examine directly the cache instance that results from this
221       // config. Currently this is not possible because BlockCache initialization is static.
222       Assert.assertEquals(
223         "Snapshot job should be configured for default LruBlockCache.",
224         HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
225         job.getConfiguration().getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
226       Assert.assertEquals(
227         "Snapshot job should not use SlabCache.",
228         0, job.getConfiguration().getFloat("hbase.offheapcache.percentage", -1), 0.01);
229       Assert.assertEquals(
230         "Snapshot job should not use BucketCache.",
231         0, job.getConfiguration().getFloat("hbase.bucketcache.size", -1), 0.01);
232     } finally {
233       UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
234       UTIL.deleteTable(tableName);
235       tearDownCluster();
236     }
237   }
238 
239   @Test
240   public void testWithMockedMapReduceSingleRegion() throws Exception {
241     testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1);
242   }
243 
244   @Test
245   public void testWithMockedMapReduceMultiRegion() throws Exception {
246     testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8);
247   }
248 
249   public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, int numRegions, int expectedNumSplits)
250       throws Exception {
251     setupCluster();
252     TableName tableName = TableName.valueOf("testWithMockedMapReduce");
253     try {
254       createTableAndSnapshot(util, tableName, snapshotName, numRegions);
255 
256       Job job = new Job(util.getConfiguration());
257       Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
258       Scan scan = new Scan(bbb, yyy); // limit the scan
259 
260       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
261           scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
262           NullWritable.class, job, false, tmpTableDir);
263 
264       verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, bbb, yyy);
265 
266     } finally {
267       util.getHBaseAdmin().deleteSnapshot(snapshotName);
268       util.deleteTable(tableName);
269       tearDownCluster();
270     }
271   }
272 
273   private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
274       byte[] startRow, byte[] stopRow)
275       throws IOException, InterruptedException {
276     TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
277     List<InputSplit> splits = tsif.getSplits(job);
278 
279     Assert.assertEquals(expectedNumSplits, splits.size());
280 
281     HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
282 
283     for (int i = 0; i < splits.size(); i++) {
284       // validate input split
285       InputSplit split = splits.get(i);
286       Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
287 
288       // validate record reader
289       TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
290       when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
291       RecordReader<ImmutableBytesWritable, Result> rr = tsif.createRecordReader(split, taskAttemptContext);
292       rr.initialize(split, taskAttemptContext);
293 
294       // validate we can read all the data back
295       while (rr.nextKeyValue()) {
296         byte[] row = rr.getCurrentKey().get();
297         verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
298         rowTracker.addRow(row);
299       }
300 
301       rr.close();
302     }
303 
304     // validate all rows are seen
305     rowTracker.validate();
306   }
307 
308   public static void verifyRowFromMap(ImmutableBytesWritable key, Result result) throws IOException {
309     byte[] row = key.get();
310     CellScanner scanner = result.cellScanner();
311     while (scanner.advance()) {
312       Cell cell = scanner.current();
313 
314       //assert that all Cells in the Result have the same key
315      Assert.assertEquals(0, Bytes.compareTo(row, 0, row.length,
316          cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
317     }
318 
319     for (int j = 0; j < FAMILIES.length; j++) {
320       byte[] actual = result.getValue(FAMILIES[j], null);
321       Assert.assertArrayEquals("Row in snapshot does not match, expected:" + Bytes.toString(row)
322           + " ,actual:" + Bytes.toString(actual), row, actual);
323     }
324   }
325 
326   @Test
327   public void testWithMapReduceSingleRegion() throws Exception {
328     testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, false);
329   }
330 
331   @Test
332   public void testWithMapReduceMultiRegion() throws Exception {
333     testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 8, false);
334   }
335 
336   @Test
337   // run the MR job while HBase is offline
338   public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
339     testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 8, true);
340   }
341 
342   private void testWithMapReduce(HBaseTestingUtility util, String snapshotName,
343       int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception {
344     setupCluster();
345     util.startMiniMapReduceCluster();
346     try {
347       Path tableDir = util.getDataTestDirOnTestFS(snapshotName);
348       TableName tableName = TableName.valueOf("testWithMapReduce");
349       doTestWithMapReduce(util, tableName, snapshotName, tableDir, numRegions,
350         expectedNumSplits, shutdownCluster);
351     } finally {
352       util.shutdownMiniMapReduceCluster();
353       tearDownCluster();
354     }
355   }
356 
357   // this is also called by the IntegrationTestTableSnapshotInputFormat
358   public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
359       String snapshotName, Path tableDir, int numRegions, int expectedNumSplits, boolean shutdownCluster)
360           throws Exception {
361 
362     //create the table and snapshot
363     createTableAndSnapshot(util, tableName, snapshotName, numRegions);
364 
365     if (shutdownCluster) {
366       util.shutdownMiniHBaseCluster();
367     }
368 
369     try {
370       // create the job
371       Job job = new Job(util.getConfiguration());
372       Scan scan = new Scan(bbb, yyy); // limit the scan
373 
374       job.setJarByClass(util.getClass());
375       TableMapReduceUtil.addDependencyJars(job.getConfiguration(), TestTableSnapshotInputFormat.class);
376 
377       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
378         scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
379         NullWritable.class, job, true, tableDir);
380 
381       job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
382       job.setNumReduceTasks(1);
383       job.setOutputFormatClass(NullOutputFormat.class);
384 
385       Assert.assertTrue(job.waitForCompletion(true));
386     } finally {
387       if (!shutdownCluster) {
388         util.getHBaseAdmin().deleteSnapshot(snapshotName);
389         util.deleteTable(tableName);
390       }
391     }
392   }
393 }