1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.mockito.Mockito.mock;
22  import static org.mockito.Mockito.when;
23  
24  import java.io.IOException;
25  import java.util.Arrays;
26  import java.util.List;
27  
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.HBaseTestingUtility;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.LargeTests;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Result;
38  import org.apache.hadoop.hbase.client.Scan;
39  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
40  import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
41  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
42  import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.io.NullWritable;
45  import org.apache.hadoop.mapreduce.InputSplit;
46  import org.apache.hadoop.mapreduce.Job;
47  import org.apache.hadoop.mapreduce.RecordReader;
48  import org.apache.hadoop.mapreduce.Reducer;
49  import org.apache.hadoop.mapreduce.TaskAttemptContext;
50  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
51  import org.junit.After;
52  import org.junit.Assert;
53  import org.junit.Test;
54  import org.junit.experimental.categories.Category;
55  
56  @Category(LargeTests.class)
57  public class TestTableSnapshotInputFormat {
58  
59    private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
60    private static final int NUM_REGION_SERVERS = 2;
61    private static final byte[][] FAMILIES = {Bytes.toBytes("f1"), Bytes.toBytes("f2")};
62    public static byte[] bbb = Bytes.toBytes("bbb");
63    public static byte[] yyy = Bytes.toBytes("yyy");
64  
65    public void setupCluster() throws Exception {
66      setupConf(UTIL.getConfiguration());
67      UTIL.startMiniCluster(NUM_REGION_SERVERS);
68    }
69  
70    public void tearDownCluster() throws Exception {
71      UTIL.shutdownMiniCluster();
72    }
73  
74    private static void setupConf(Configuration conf) {
75      // Enable snapshot
76      conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
77    }
78  
79    @After
80    public void tearDown() throws Exception {
81    }
82  
83    public static enum TestTableSnapshotCounters {
84      VALIDATION_ERROR
85    }
86  
87    public static class TestTableSnapshotMapper
88      extends TableMapper<ImmutableBytesWritable, NullWritable> {
89      @Override
90      protected void map(ImmutableBytesWritable key, Result value,
91          Context context) throws IOException, InterruptedException {
92        // Validate a single row coming from the snapshot, and emit the row key
93        verifyRowFromMap(key, value);
94        context.write(key, NullWritable.get());
95      }
96    }
97  
98    public static class TestTableSnapshotReducer
99      extends Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
100     HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(bbb, yyy);
101     @Override
102     protected void reduce(ImmutableBytesWritable key, Iterable<NullWritable> values,
103        Context context) throws IOException, InterruptedException {
104       rowTracker.addRow(key.get());
105     }
106 
107     @Override
108     protected void cleanup(Context context) throws IOException,
109         InterruptedException {
110       rowTracker.validate();
111     }
112   }
113 
114   public static void createTableAndSnapshot(HBaseTestingUtility util, byte[] tableName,
115       String snapshotName, int numRegions)
116       throws Exception {
117     try {
118       util.deleteTable(tableName);
119     } catch(Exception ex) {
120       // ignore
121     }
122 
123     if (numRegions > 1) {
124       util.createTable(tableName, FAMILIES, 1, bbb, yyy, numRegions);
125     } else {
126       util.createTable(tableName, FAMILIES);
127     }
128     HBaseAdmin admin = util.getHBaseAdmin();
129 
130     // put some stuff in the table
131     HTable table = new HTable(util.getConfiguration(), tableName);
132     util.loadTable(table, FAMILIES);
133 
134     Path rootDir = new Path(util.getConfiguration().get(HConstants.HBASE_DIR));
135     FileSystem fs = rootDir.getFileSystem(util.getConfiguration());
136 
137     SnapshotTestingUtils.createSnapshotAndValidate(admin, Bytes.toString(tableName),
138         Arrays.asList(FAMILIES), null, snapshotName, rootDir, fs, true);
139 
140     // load different values
141     byte[] value = Bytes.toBytes("after_snapshot_value");
142     util.loadTable(table, FAMILIES, value);
143 
144     // cause flush to create new files in the region
145     admin.flush(tableName);
146     table.close();
147   }
148 
149   @Test
150   public void testWithMockedMapReduceSingleRegion() throws Exception {
151     testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1);
152   }
153 
154   @Test
155   public void testWithMockedMapReduceMultiRegion() throws Exception {
156     testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8);
157   }
158 
159   public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, int numRegions, int expectedNumSplits)
160       throws Exception {
161     setupCluster();
162     byte[] tableName = Bytes.toBytes("testWithMockedMapReduce");
163     try {
164       createTableAndSnapshot(util, tableName, snapshotName, numRegions);
165 
166       Job job = new Job(util.getConfiguration());
167       Path tmpTableDir = util.getDataTestDir(snapshotName);
168       Scan scan = new Scan(bbb, yyy); // limit the scan
169 
170       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
171           scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
172           NullWritable.class, job, false, tmpTableDir);
173 
174       verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, bbb, yyy);
175 
176     } finally {
177       util.getHBaseAdmin().deleteSnapshot(snapshotName);
178       util.deleteTable(tableName);
179       tearDownCluster();
180     }
181   }
182 
183   private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
184       byte[] startRow, byte[] stopRow)
185       throws IOException, InterruptedException {
186     TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
187     List<InputSplit> splits = tsif.getSplits(job);
188 
189     Assert.assertEquals(expectedNumSplits, splits.size());
190 
191     HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
192 
193     for (int i = 0; i < splits.size(); i++) {
194       // validate input split
195       InputSplit split = splits.get(i);
196       Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
197 
198       // validate record reader
199       TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
200       when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
201       RecordReader<ImmutableBytesWritable, Result> rr = tsif.createRecordReader(split, taskAttemptContext);
202       rr.initialize(split, taskAttemptContext);
203 
204       // validate we can read all the data back
205       while (rr.nextKeyValue()) {
206         byte[] row = rr.getCurrentKey().get();
207         verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
208         rowTracker.addRow(row);
209       }
210 
211       rr.close();
212     }
213 
214     // validate all rows are seen
215     rowTracker.validate();
216   }
217 
218   public static void verifyRowFromMap(ImmutableBytesWritable key, Result result) throws IOException {
219     byte[] row = key.get();
220     for (KeyValue kv : result.list()) {
221       //assert that all Cells in the Result have the same key
222      Assert.assertEquals(0, Bytes.compareTo(row, 0, row.length,
223          kv.getBuffer(), kv.getRowOffset(), kv.getRowLength()));
224     }
225 
226     for (int j = 0; j < FAMILIES.length; j++) {
227       byte[] actual = result.getValue(FAMILIES[j], null);
228       Assert.assertArrayEquals("Row in snapshot does not match, expected:" + Bytes.toString(row)
229           + " ,actual:" + Bytes.toString(actual), row, actual);
230     }
231   }
232 
233   @Test
234   public void testWithMapReduceSingleRegion() throws Exception {
235     testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, false);
236   }
237 
238   @Test
239   public void testWithMapReduceMultiRegion() throws Exception {
240     testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 8, false);
241   }
242 
243   @Test
244   // run the MR job while HBase is offline
245   public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
246     testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 8, true);
247   }
248 
249   private void testWithMapReduce(HBaseTestingUtility util, String snapshotName,
250       int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception {
251     setupCluster();
252     util.startMiniMapReduceCluster();
253     try {
254       Path tableDir = util.getDataTestDir(snapshotName);
255       byte[] tableName = Bytes.toBytes("testWithMapReduce");
256       doTestWithMapReduce(util, tableName, snapshotName, tableDir, numRegions,
257         expectedNumSplits, shutdownCluster);
258     } finally {
259       util.shutdownMiniMapReduceCluster();
260       tearDownCluster();
261     }
262   }
263 
264   // this is also called by the IntegrationTestTableSnapshotInputFormat
265   public static void doTestWithMapReduce(HBaseTestingUtility util, byte[] tableName,
266       String snapshotName, Path tableDir, int numRegions, int expectedNumSplits, boolean shutdownCluster)
267           throws Exception {
268 
269     //create the table and snapshot
270     createTableAndSnapshot(util, tableName, snapshotName, numRegions);
271 
272     if (shutdownCluster) {
273       util.shutdownMiniHBaseCluster();
274     }
275 
276     try {
277       // create the job
278       Job job = new Job(util.getConfiguration());
279       Scan scan = new Scan(bbb, yyy); // limit the scan
280 
281       job.setJarByClass(util.getClass());
282       TableMapReduceUtil.addDependencyJars(job.getConfiguration(), TestTableSnapshotInputFormat.class);
283 
284       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
285         scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
286         NullWritable.class, job, true, tableDir);
287 
288       job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
289       job.setNumReduceTasks(1);
290       job.setOutputFormatClass(NullOutputFormat.class);
291 
292       Assert.assertTrue(job.waitForCompletion(true));
293     } finally {
294       if (!shutdownCluster) {
295         util.getHBaseAdmin().deleteSnapshot(snapshotName);
296         util.deleteTable(tableName);
297       }
298     }
299   }
300 }