1   /**
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertFalse;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertTrue;
26  
27  import java.io.IOException;
28  import java.util.Arrays;
29  import java.util.Random;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileStatus;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.HBaseConfiguration;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.KeyValue;
41  import org.apache.hadoop.hbase.PerformanceEvaluation;
42  import org.apache.hadoop.hbase.client.HBaseAdmin;
43  import org.apache.hadoop.hbase.client.HTable;
44  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.io.NullWritable;
47  import org.apache.hadoop.mapreduce.Job;
48  import org.apache.hadoop.mapreduce.Mapper;
49  import org.apache.hadoop.mapreduce.RecordWriter;
50  import org.apache.hadoop.mapreduce.TaskAttemptContext;
51  import org.apache.hadoop.mapreduce.TaskAttemptID;
52  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
53  import org.junit.Before;
54  import org.junit.Test;
55  import org.mockito.Mockito;
56  
57  /**
58   * Simple test for {@link KeyValueSortReducer} and {@link HFileOutputFormat}.
59   * Sets up and runs a mapreduce job that writes hfile output.
60   * Creates a few inner classes to implement splits and an inputformat that
61   * emits keys and values like those of {@link PerformanceEvaluation}.  Makes
62   * as many splits as "mapred.map.tasks" maps.
63   */
64  public class TestHFileOutputFormat  {
65    private final static int ROWSPERSPLIT = 1024;
66  
67    private static final byte[] FAMILY_NAME = PerformanceEvaluation.FAMILY_NAME;
68    private static final byte[] TABLE_NAME = Bytes.toBytes("TestTable");
69    
70    private HBaseTestingUtility util = new HBaseTestingUtility();
71    
72    private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
73    
74    /**
75     * Simple mapper that makes KeyValue output.
76     */
77    static class RandomKVGeneratingMapper
78    extends Mapper<NullWritable, NullWritable,
79                   ImmutableBytesWritable, KeyValue> {
80      
81      private int keyLength;
82      private static final int KEYLEN_DEFAULT=10;
83      private static final String KEYLEN_CONF="randomkv.key.length";
84  
85      private int valLength;
86      private static final int VALLEN_DEFAULT=10;
87      private static final String VALLEN_CONF="randomkv.val.length";
88      
89      @Override
90      protected void setup(Context context) throws IOException,
91          InterruptedException {
92        super.setup(context);
93        
94        Configuration conf = context.getConfiguration();
95        keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
96        valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
97      }
98  
99      protected void map(
100         NullWritable n1, NullWritable n2,
101         Mapper<NullWritable, NullWritable,
102                ImmutableBytesWritable,KeyValue>.Context context)
103         throws java.io.IOException ,InterruptedException
104     {
105 
106       byte keyBytes[] = new byte[keyLength];
107       byte valBytes[] = new byte[valLength];
108       
109       Random random = new Random(System.currentTimeMillis());
110       for (int i = 0; i < ROWSPERSPLIT; i++) {
111 
112         random.nextBytes(keyBytes);
113         random.nextBytes(valBytes);
114         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
115 
116         KeyValue kv = new KeyValue(keyBytes, PerformanceEvaluation.FAMILY_NAME,
117             PerformanceEvaluation.QUALIFIER_NAME, valBytes);
118         context.write(key, kv);
119       }
120     }
121   }
122 
123   @Before
124   public void cleanupDir() throws IOException {
125     util.cleanupTestDir();
126   }
127   
128   
129   private void setupRandomGeneratorMapper(Job job) {
130     job.setInputFormatClass(NMapInputFormat.class);
131     job.setMapperClass(RandomKVGeneratingMapper.class);
132     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
133     job.setMapOutputValueClass(KeyValue.class);
134   }
135 
136   /**
137    * Test that {@link HFileOutputFormat} RecordWriter amends timestamps if
138    * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
139    * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
140    */
141   @Test
142   public void test_LATEST_TIMESTAMP_isReplaced()
143   throws IOException, InterruptedException {
144     Configuration conf = new Configuration(this.util.getConfiguration());
145     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
146     TaskAttemptContext context = null;
147     Path dir =
148       HBaseTestingUtility.getTestDir("test_LATEST_TIMESTAMP_isReplaced");
149     try {
150       Job job = new Job(conf);
151       FileOutputFormat.setOutputPath(job, dir);
152       context = new TaskAttemptContext(job.getConfiguration(),
153         new TaskAttemptID());
154       HFileOutputFormat hof = new HFileOutputFormat();
155       writer = hof.getRecordWriter(context);
156       final byte [] b = Bytes.toBytes("b");
157 
158       // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
159       // changed by call to write.  Check all in kv is same but ts.
160       KeyValue kv = new KeyValue(b, b, b);
161       KeyValue original = kv.clone();
162       writer.write(new ImmutableBytesWritable(), kv);
163       assertFalse(original.equals(kv));
164       assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
165       assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier()));
166       assertNotSame(original.getTimestamp(), kv.getTimestamp());
167       assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
168 
169       // Test 2. Now test passing a kv that has explicit ts.  It should not be
170       // changed by call to record write.
171       kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
172       original = kv.clone();
173       writer.write(new ImmutableBytesWritable(), kv);
174       assertTrue(original.equals(kv));
175     } finally {
176       if (writer != null && context != null) writer.close(context);
177       dir.getFileSystem(conf).delete(dir, true);
178     }
179   }
180 
181   /**
182    * Run small MR job.
183    */
184   @Test
185   public void testWritingPEData() throws Exception {
186     Configuration conf = util.getConfiguration();
187     Path testDir = HBaseTestingUtility.getTestDir("testWritingPEData");
188     FileSystem fs = testDir.getFileSystem(conf);
189     
190     // Set down this value or we OOME in eclipse.
191     conf.setInt("io.sort.mb", 20);
192     // Write a few files.
193     conf.setLong("hbase.hregion.max.filesize", 64 * 1024);
194     
195     Job job = new Job(conf, "testWritingPEData");
196     setupRandomGeneratorMapper(job);
197     // This partitioner doesn't work well for number keys but using it anyways
198     // just to demonstrate how to configure it.
199     byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
200     byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
201     
202     Arrays.fill(startKey, (byte)0);
203     Arrays.fill(endKey, (byte)0xff);
204     
205     job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
206     // Set start and end rows for partitioner.
207     SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
208     SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
209     job.setReducerClass(KeyValueSortReducer.class);
210     job.setOutputFormatClass(HFileOutputFormat.class);
211     job.setNumReduceTasks(4);
212     
213     FileOutputFormat.setOutputPath(job, testDir);
214     assertTrue(job.waitForCompletion(false));
215     FileStatus [] files = fs.listStatus(testDir);
216     assertTrue(files.length > 0);
217   }
218   
219   @Test
220   public void testJobConfiguration() throws Exception {
221     Job job = new Job();
222     HTable table = Mockito.mock(HTable.class);
223     byte[][] mockKeys = new byte[][] {
224         HConstants.EMPTY_BYTE_ARRAY,
225         Bytes.toBytes("aaa"),
226         Bytes.toBytes("ggg"),
227         Bytes.toBytes("zzz")
228     };
229     Mockito.doReturn(mockKeys).when(table).getStartKeys();
230     
231     HFileOutputFormat.configureIncrementalLoad(job, table);
232     assertEquals(job.getNumReduceTasks(), 4);
233   }
234   
235   private byte [][] generateRandomStartKeys(int numKeys) {
236     Random random = new Random();
237     byte[][] ret = new byte[numKeys][];
238     // first region start key is always empty
239     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
240     for (int i = 1; i < numKeys; i++) {
241       ret[i] = PerformanceEvaluation.generateValue(random);
242     }
243     return ret;
244   }
245   
246   @Test
247   public void testMRIncrementalLoad() throws Exception {
248     doIncrementalLoadTest(false);
249   }
250   
251   @Test
252   public void testMRIncrementalLoadWithSplit() throws Exception {
253     doIncrementalLoadTest(true);
254   }
255   
256   private void doIncrementalLoadTest(
257       boolean shouldChangeRegions) throws Exception {
258     Configuration conf = util.getConfiguration();
259     Path testDir = HBaseTestingUtility.getTestDir("testLocalMRIncrementalLoad");
260     byte[][] startKeys = generateRandomStartKeys(5);
261     
262     try {
263       util.startMiniCluster();
264       HBaseAdmin admin = new HBaseAdmin(conf);
265       HTable table = util.createTable(TABLE_NAME, FAMILY_NAME);
266       int numRegions = util.createMultiRegions(
267           util.getConfiguration(), table, FAMILY_NAME,
268           startKeys);
269       assertEquals("Should make 5 regions",
270           numRegions, 5);
271       assertEquals("Should start with empty table",
272           0, util.countRows(table));
273 
274       // Generate the bulk load files
275       util.startMiniMapReduceCluster();
276       runIncrementalPELoad(conf, table, testDir);
277       // This doesn't write into the table, just makes files
278       assertEquals("HFOF should not touch actual table",
279           0, util.countRows(table));
280   
281       if (shouldChangeRegions) {
282         LOG.info("Changing regions in table");
283         admin.disableTable(table.getTableName());
284         byte[][] newStartKeys = generateRandomStartKeys(15);
285         util.createMultiRegions(util.getConfiguration(),
286             table, FAMILY_NAME, newStartKeys);
287         admin.enableTable(table.getTableName());
288         while (table.getRegionsInfo().size() != 15 ||
289             !admin.isTableAvailable(table.getTableName())) {
290           Thread.sleep(1000);
291           LOG.info("Waiting for new region assignment to happen");
292         }
293       }
294       
295       // Perform the actual load
296       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
297       
298       // Ensure data shows up
299       int expectedRows = conf.getInt("mapred.map.tasks", 1) * ROWSPERSPLIT;
300       assertEquals("LoadIncrementalHFiles should put expected data in table",
301           expectedRows, util.countRows(table));
302       String tableDigestBefore = util.checksumRows(table);
303             
304       // Cause regions to reopen
305       admin.disableTable(TABLE_NAME);
306       while (table.getRegionsInfo().size() != 0) {
307         Thread.sleep(1000);
308         LOG.info("Waiting for table to disable"); 
309       }
310       admin.enableTable(TABLE_NAME);
311       util.waitTableAvailable(TABLE_NAME, 30000);
312       
313       assertEquals("Data should remain after reopening of regions",
314           tableDigestBefore, util.checksumRows(table));
315     } finally {
316       util.shutdownMiniMapReduceCluster();
317       util.shutdownMiniCluster();
318     }
319   }
320   
321   
322   
323   private void runIncrementalPELoad(
324       Configuration conf, HTable table, Path outDir)
325   throws Exception {
326     Job job = new Job(conf, "testLocalMRIncrementalLoad");
327     setupRandomGeneratorMapper(job);
328     HFileOutputFormat.configureIncrementalLoad(job, table);
329     FileOutputFormat.setOutputPath(job, outDir);
330     
331     assertEquals(table.getRegionsInfo().size(),
332         job.getNumReduceTasks());
333     
334     assertTrue(job.waitForCompletion(true));
335   }
336   
337   public static void main(String args[]) throws Exception {
338     new TestHFileOutputFormat().manualTest(args);
339   }
340   
341   public void manualTest(String args[]) throws Exception {
342     Configuration conf = HBaseConfiguration.create();    
343     util = new HBaseTestingUtility(conf);
344     if ("newtable".equals(args[0])) {
345       byte[] tname = args[1].getBytes();
346       HTable table = util.createTable(tname, FAMILY_NAME);
347       HBaseAdmin admin = new HBaseAdmin(conf);
348       admin.disableTable(tname);
349       util.createMultiRegions(conf, table, FAMILY_NAME,
350           generateRandomStartKeys(5));
351       admin.enableTable(tname);
352     } else if ("incremental".equals(args[0])) {
353       byte[] tname = args[1].getBytes();
354       HTable table = new HTable(conf, tname);
355       Path outDir = new Path("incremental-out");
356       runIncrementalPELoad(conf, table, outDir);
357     } else {
358       throw new RuntimeException(
359           "usage: TestHFileOutputFormat newtable | incremental");
360     }
361   }
362 }