View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.mockito.Mockito.mock;
22  import static org.mockito.Mockito.when;
23  
24  import java.io.IOException;
25  import java.util.Arrays;
26  import java.util.List;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.Cell;
34  import org.apache.hadoop.hbase.CellScanner;
35  import org.apache.hadoop.hbase.HBaseTestingUtility;
36  import org.apache.hadoop.hbase.HConstants;
37  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
38  import org.apache.hadoop.hbase.LargeTests;
39  import org.apache.hadoop.hbase.TableName;
40  import org.apache.hadoop.hbase.client.HBaseAdmin;
41  import org.apache.hadoop.hbase.client.HTable;
42  import org.apache.hadoop.hbase.client.Result;
43  import org.apache.hadoop.hbase.client.Scan;
44  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
45  import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
46  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
47  import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.io.NullWritable;
50  import org.apache.hadoop.mapreduce.InputSplit;
51  import org.apache.hadoop.mapreduce.Job;
52  import org.apache.hadoop.mapreduce.RecordReader;
53  import org.apache.hadoop.mapreduce.Reducer;
54  import org.apache.hadoop.mapreduce.TaskAttemptContext;
55  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
56  import org.junit.After;
57  import org.junit.Assert;
58  import org.junit.Test;
59  import org.junit.experimental.categories.Category;
60  
61  import com.google.common.collect.Lists;
62  
63  @Category(LargeTests.class)
64  public class TestTableSnapshotInputFormat {
65  
66    private static final Log LOG = LogFactory.getLog(TestTableSnapshotInputFormat.class);
67    private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
68    private static final int NUM_REGION_SERVERS = 2;
69    private static final String TABLE_NAME_STR = "TestTableSnapshotInputFormat";
70    private static final byte[][] FAMILIES = {Bytes.toBytes("f1"), Bytes.toBytes("f2")};
71    private static final TableName TABLE_NAME = TableName.valueOf(TABLE_NAME_STR);
72    public static byte[] bbb = Bytes.toBytes("bbb");
73    public static byte[] yyy = Bytes.toBytes("yyy");
74  
75    private FileSystem fs;
76    private Path rootDir;
77  
78    public void setupCluster() throws Exception {
79      setupConf(UTIL.getConfiguration());
80      UTIL.startMiniCluster(NUM_REGION_SERVERS);
81      rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
82      fs = rootDir.getFileSystem(UTIL.getConfiguration());
83    }
84  
85    public void tearDownCluster() throws Exception {
86      UTIL.shutdownMiniCluster();
87    }
88  
89    private static void setupConf(Configuration conf) {
90      // Enable snapshot
91      conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
92    }
93  
94    @After
95    public void tearDown() throws Exception {
96    }
97  
98    @Test
99    public void testGetBestLocations() throws IOException {
100     TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
101     Configuration conf = UTIL.getConfiguration();
102 
103     HDFSBlocksDistribution blockDistribution = new HDFSBlocksDistribution();
104     Assert.assertEquals(Lists.newArrayList(), tsif.getBestLocations(conf, blockDistribution));
105 
106     blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
107     Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
108 
109     blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
110     Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
111 
112     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 1);
113     Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
114 
115     blockDistribution = new HDFSBlocksDistribution();
116     blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 10);
117     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 7);
118     blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 5);
119     blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1);
120     Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution));
121 
122     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 2);
123     Assert.assertEquals(Lists.newArrayList("h1", "h2"), tsif.getBestLocations(conf, blockDistribution));
124 
125     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 3);
126     Assert.assertEquals(Lists.newArrayList("h2", "h1"), tsif.getBestLocations(conf, blockDistribution));
127 
128     blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 6);
129     blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 9);
130 
131     Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"), tsif.getBestLocations(conf, blockDistribution));
132   }
133 
134   public static enum TestTableSnapshotCounters {
135     VALIDATION_ERROR
136   }
137 
138   public static class TestTableSnapshotMapper
139     extends TableMapper<ImmutableBytesWritable, NullWritable> {
140     @Override
141     protected void map(ImmutableBytesWritable key, Result value,
142         Context context) throws IOException, InterruptedException {
143       // Validate a single row coming from the snapshot, and emit the row key
144       verifyRowFromMap(key, value);
145       context.write(key, NullWritable.get());
146     }
147   }
148 
149   public static class TestTableSnapshotReducer
150     extends Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
151     HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(bbb, yyy);
152     @Override
153     protected void reduce(ImmutableBytesWritable key, Iterable<NullWritable> values,
154        Context context) throws IOException, InterruptedException {
155       rowTracker.addRow(key.get());
156     }
157 
158     @Override
159     protected void cleanup(Context context) throws IOException,
160         InterruptedException {
161       rowTracker.validate();
162     }
163   }
164 
165   public static void createTableAndSnapshot(HBaseTestingUtility util, TableName tableName,
166       String snapshotName, int numRegions)
167       throws Exception {
168     try {
169       util.deleteTable(tableName);
170     } catch(Exception ex) {
171       // ignore
172     }
173 
174     if (numRegions > 1) {
175       util.createTable(tableName, FAMILIES, 1, bbb, yyy, numRegions);
176     } else {
177       util.createTable(tableName, FAMILIES);
178     }
179     HBaseAdmin admin = util.getHBaseAdmin();
180 
181     // put some stuff in the table
182     HTable table = new HTable(util.getConfiguration(), tableName);
183     util.loadTable(table, FAMILIES);
184 
185     Path rootDir = new Path(util.getConfiguration().get(HConstants.HBASE_DIR));
186     FileSystem fs = rootDir.getFileSystem(util.getConfiguration());
187 
188     SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName,
189         Arrays.asList(FAMILIES), null, snapshotName, rootDir, fs, true);
190 
191     // load different values
192     byte[] value = Bytes.toBytes("after_snapshot_value");
193     util.loadTable(table, FAMILIES, value);
194 
195     // cause flush to create new files in the region
196     admin.flush(tableName.toString());
197     table.close();
198   }
199 
200   @Test
201   public void testWithMockedMapReduceSingleRegion() throws Exception {
202     testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1);
203   }
204 
205   @Test
206   public void testWithMockedMapReduceMultiRegion() throws Exception {
207     testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8);
208   }
209 
210   public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, int numRegions, int expectedNumSplits)
211       throws Exception {
212     setupCluster();
213     TableName tableName = TableName.valueOf("testWithMockedMapReduce");
214     try {
215       createTableAndSnapshot(util, tableName, snapshotName, numRegions);
216 
217       Job job = new Job(util.getConfiguration());
218       Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
219       Scan scan = new Scan(bbb, yyy); // limit the scan
220 
221       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
222           scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
223           NullWritable.class, job, false, tmpTableDir);
224 
225       verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, bbb, yyy);
226 
227     } finally {
228       util.getHBaseAdmin().deleteSnapshot(snapshotName);
229       util.deleteTable(tableName);
230       tearDownCluster();
231     }
232   }
233 
234   private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
235       byte[] startRow, byte[] stopRow)
236       throws IOException, InterruptedException {
237     TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
238     List<InputSplit> splits = tsif.getSplits(job);
239 
240     Assert.assertEquals(expectedNumSplits, splits.size());
241 
242     HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
243 
244     for (int i = 0; i < splits.size(); i++) {
245       // validate input split
246       InputSplit split = splits.get(i);
247       Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
248 
249       // validate record reader
250       TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
251       when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
252       RecordReader<ImmutableBytesWritable, Result> rr = tsif.createRecordReader(split, taskAttemptContext);
253       rr.initialize(split, taskAttemptContext);
254 
255       // validate we can read all the data back
256       while (rr.nextKeyValue()) {
257         byte[] row = rr.getCurrentKey().get();
258         verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
259         rowTracker.addRow(row);
260       }
261 
262       rr.close();
263     }
264 
265     // validate all rows are seen
266     rowTracker.validate();
267   }
268 
269   public static void verifyRowFromMap(ImmutableBytesWritable key, Result result) throws IOException {
270     byte[] row = key.get();
271     CellScanner scanner = result.cellScanner();
272     while (scanner.advance()) {
273       Cell cell = scanner.current();
274 
275       //assert that all Cells in the Result have the same key
276      Assert.assertEquals(0, Bytes.compareTo(row, 0, row.length,
277          cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
278     }
279 
280     for (int j = 0; j < FAMILIES.length; j++) {
281       byte[] actual = result.getValue(FAMILIES[j], null);
282       Assert.assertArrayEquals("Row in snapshot does not match, expected:" + Bytes.toString(row)
283           + " ,actual:" + Bytes.toString(actual), row, actual);
284     }
285   }
286 
287   @Test
288   public void testWithMapReduceSingleRegion() throws Exception {
289     testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, false);
290   }
291 
292   @Test
293   public void testWithMapReduceMultiRegion() throws Exception {
294     testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 8, false);
295   }
296 
297   @Test
298   // run the MR job while HBase is offline
299   public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
300     testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 8, true);
301   }
302 
303   private void testWithMapReduce(HBaseTestingUtility util, String snapshotName,
304       int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception {
305     setupCluster();
306     util.startMiniMapReduceCluster();
307     try {
308       Path tableDir = util.getDataTestDirOnTestFS(snapshotName);
309       TableName tableName = TableName.valueOf("testWithMapReduce");
310       doTestWithMapReduce(util, tableName, snapshotName, tableDir, numRegions,
311         expectedNumSplits, shutdownCluster);
312     } finally {
313       util.shutdownMiniMapReduceCluster();
314       tearDownCluster();
315     }
316   }
317 
318   // this is also called by the IntegrationTestTableSnapshotInputFormat
319   public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
320       String snapshotName, Path tableDir, int numRegions, int expectedNumSplits, boolean shutdownCluster)
321           throws Exception {
322 
323     //create the table and snapshot
324     createTableAndSnapshot(util, tableName, snapshotName, numRegions);
325 
326     if (shutdownCluster) {
327       util.shutdownMiniHBaseCluster();
328     }
329 
330     try {
331       // create the job
332       Job job = new Job(util.getConfiguration());
333       Scan scan = new Scan(bbb, yyy); // limit the scan
334 
335       job.setJarByClass(util.getClass());
336       TableMapReduceUtil.addDependencyJars(job.getConfiguration(), TestTableSnapshotInputFormat.class);
337 
338       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
339         scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
340         NullWritable.class, job, true, tableDir);
341 
342       job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
343       job.setNumReduceTasks(1);
344       job.setOutputFormatClass(NullOutputFormat.class);
345 
346       Assert.assertTrue(job.waitForCompletion(true));
347     } finally {
348       if (!shutdownCluster) {
349         util.getHBaseAdmin().deleteSnapshot(snapshotName);
350         util.deleteTable(tableName);
351       }
352     }
353   }
354 }