View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapred;
20  
21  import static org.mockito.Mockito.mock;
22  
23  import org.apache.hadoop.fs.Path;
24  import org.apache.hadoop.hbase.HBaseTestingUtility;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.testclassification.LargeTests;
27  import org.apache.hadoop.hbase.TableName;
28  import org.apache.hadoop.hbase.client.Result;
29  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
30  import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatTestBase;
31  import org.apache.hadoop.hbase.util.Bytes;
32  import org.apache.hadoop.io.NullWritable;
33  import org.apache.hadoop.mapred.InputSplit;
34  import org.apache.hadoop.mapred.JobClient;
35  import org.apache.hadoop.mapred.JobConf;
36  import org.apache.hadoop.mapred.MapReduceBase;
37  import org.apache.hadoop.mapred.OutputCollector;
38  import org.apache.hadoop.mapred.RecordReader;
39  import org.apache.hadoop.mapred.Reducer;
40  import org.apache.hadoop.mapred.Reporter;
41  import org.apache.hadoop.mapred.RunningJob;
42  import org.apache.hadoop.mapred.lib.NullOutputFormat;
43  import org.junit.Assert;
44  import org.junit.Test;
45  import org.junit.experimental.categories.Category;
46  
47  import java.io.IOException;
48  import java.util.Iterator;
49  
50  @Category(LargeTests.class)
51  public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
52  
53    private static final byte[] aaa = Bytes.toBytes("aaa");
54    private static final byte[] after_zzz = Bytes.toBytes("zz{"); // 'z' + 1 => '{'
55    private static final String COLUMNS =
56      Bytes.toString(FAMILIES[0]) + " " + Bytes.toString(FAMILIES[1]);
57  
58    @Override
59    protected byte[] getStartRow() {
60      return aaa;
61    }
62  
63    @Override
64    protected byte[] getEndRow() {
65      return after_zzz;
66    }
67  
68    static class TestTableSnapshotMapper extends MapReduceBase
69        implements  TableMap<ImmutableBytesWritable, NullWritable> {
70      @Override
71      public void map(ImmutableBytesWritable key, Result value,
72          OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter)
73          throws IOException {
74        verifyRowFromMap(key, value);
75        collector.collect(key, NullWritable.get());
76      }
77    }
78  
79    public static class TestTableSnapshotReducer extends MapReduceBase
80        implements Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
81      HBaseTestingUtility.SeenRowTracker rowTracker =
82        new HBaseTestingUtility.SeenRowTracker(aaa, after_zzz);
83  
84      @Override
85      public void reduce(ImmutableBytesWritable key, Iterator<NullWritable> values,
86          OutputCollector<NullWritable, NullWritable> collector, Reporter reporter)
87          throws IOException {
88        rowTracker.addRow(key.get());
89      }
90  
91      @Override
92      public void close() {
93        rowTracker.validate();
94      }
95    }
96  
97    @Test
98    public void testInitTableSnapshotMapperJobConfig() throws Exception {
99      setupCluster();
100     TableName tableName = TableName.valueOf("testInitTableSnapshotMapperJobConfig");
101     String snapshotName = "foo";
102 
103     try {
104       createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
105       JobConf job = new JobConf(UTIL.getConfiguration());
106       Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
107 
108       TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
109         COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
110         NullWritable.class, job, false, tmpTableDir);
111 
112       // TODO: would be better to examine directly the cache instance that results from this
113       // config. Currently this is not possible because BlockCache initialization is static.
114       Assert.assertEquals(
115         "Snapshot job should be configured for default LruBlockCache.",
116         HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
117         job.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
118       Assert.assertEquals(
119         "Snapshot job should not use BucketCache.",
120         0, job.getFloat("hbase.bucketcache.size", -1), 0.01);
121     } finally {
122       UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
123       UTIL.deleteTable(tableName);
124       tearDownCluster();
125     }
126   }
127 
128   // TODO: mapred does not support limiting input range by startrow, endrow.
129   // Thus the following tests must override parameterverification.
130 
131   @Test
132   @Override
133   public void testWithMockedMapReduceMultiRegion() throws Exception {
134     testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 10);
135   }
136 
137   @Test
138   @Override
139   public void testWithMapReduceMultiRegion() throws Exception {
140     testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 10, false);
141   }
142 
143   @Test
144   @Override
145   // run the MR job while HBase is offline
146   public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
147     testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 10, true);
148   }
149 
150   @Override
151   protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
152       int numRegions, int expectedNumSplits) throws Exception {
153     setupCluster();
154     TableName tableName = TableName.valueOf("testWithMockedMapReduce");
155     try {
156       createTableAndSnapshot(
157         util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
158 
159       JobConf job = new JobConf(util.getConfiguration());
160       Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
161 
162       TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
163         COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
164         NullWritable.class, job, false, tmpTableDir);
165 
166       // mapred doesn't support start and end keys? o.O
167       verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
168 
169     } finally {
170       util.getHBaseAdmin().deleteSnapshot(snapshotName);
171       util.deleteTable(tableName);
172       tearDownCluster();
173     }
174   }
175 
176   private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
177       byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
178     TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
179     InputSplit[] splits = tsif.getSplits(job, 0);
180 
181     Assert.assertEquals(expectedNumSplits, splits.length);
182 
183     HBaseTestingUtility.SeenRowTracker rowTracker =
184       new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
185 
186     for (int i = 0; i < splits.length; i++) {
187       // validate input split
188       InputSplit split = splits[i];
189       Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);
190 
191       // validate record reader
192       OutputCollector collector = mock(OutputCollector.class);
193       Reporter reporter = mock(Reporter.class);
194       RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
195 
196       // validate we can read all the data back
197       ImmutableBytesWritable key = rr.createKey();
198       Result value = rr.createValue();
199       while (rr.next(key, value)) {
200         verifyRowFromMap(key, value);
201         rowTracker.addRow(key.copyBytes());
202       }
203 
204       rr.close();
205     }
206 
207     // validate all rows are seen
208     rowTracker.validate();
209   }
210 
211   @Override
212   protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
213       String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
214       boolean shutdownCluster) throws Exception {
215     doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
216       numRegions, expectedNumSplits, shutdownCluster);
217   }
218 
219   // this is also called by the IntegrationTestTableSnapshotInputFormat
220   public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
221       String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
222       int expectedNumSplits, boolean shutdownCluster) throws Exception {
223 
224     //create the table and snapshot
225     createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
226 
227     if (shutdownCluster) {
228       util.shutdownMiniHBaseCluster();
229     }
230 
231     try {
232       // create the job
233       JobConf jobConf = new JobConf(util.getConfiguration());
234 
235       jobConf.setJarByClass(util.getClass());
236       org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(jobConf,
237         TestTableSnapshotInputFormat.class);
238 
239       TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
240         TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
241         NullWritable.class, jobConf, true, tableDir);
242 
243       jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
244       jobConf.setNumReduceTasks(1);
245       jobConf.setOutputFormat(NullOutputFormat.class);
246 
247       RunningJob job = JobClient.runJob(jobConf);
248       Assert.assertTrue(job.isSuccessful());
249     } finally {
250       if (!shutdownCluster) {
251         util.getHBaseAdmin().deleteSnapshot(snapshotName);
252         util.deleteTable(tableName);
253       }
254     }
255   }
256 }