View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.util.Arrays;
30  import java.util.HashMap;
31  import java.util.Map;
32  import java.util.Map.Entry;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.concurrent.Callable;
36  
37  import org.apache.commons.logging.Log;
38  import org.apache.commons.logging.LogFactory;
39  import org.apache.hadoop.conf.Configuration;
40  import org.apache.hadoop.fs.FileStatus;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.hbase.Cell;
44  import org.apache.hadoop.hbase.CellUtil;
45  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
46  import org.apache.hadoop.hbase.HBaseConfiguration;
47  import org.apache.hadoop.hbase.HBaseTestingUtility;
48  import org.apache.hadoop.hbase.HColumnDescriptor;
49  import org.apache.hadoop.hbase.HConstants;
50  import org.apache.hadoop.hbase.HTableDescriptor;
51  import org.apache.hadoop.hbase.HadoopShims;
52  import org.apache.hadoop.hbase.KeyValue;
53  import org.apache.hadoop.hbase.testclassification.LargeTests;
54  import org.apache.hadoop.hbase.PerformanceEvaluation;
55  import org.apache.hadoop.hbase.TableName;
56  import org.apache.hadoop.hbase.client.HBaseAdmin;
57  import org.apache.hadoop.hbase.client.HTable;
58  import org.apache.hadoop.hbase.client.Put;
59  import org.apache.hadoop.hbase.client.Result;
60  import org.apache.hadoop.hbase.client.ResultScanner;
61  import org.apache.hadoop.hbase.client.Scan;
62  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
63  import org.apache.hadoop.hbase.io.compress.Compression;
64  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
65  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
66  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
67  import org.apache.hadoop.hbase.io.hfile.HFile;
68  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
69  import org.apache.hadoop.hbase.regionserver.BloomType;
70  import org.apache.hadoop.hbase.regionserver.StoreFile;
71  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
72  import org.apache.hadoop.hbase.util.Bytes;
73  import org.apache.hadoop.hbase.util.FSUtils;
74  import org.apache.hadoop.hbase.util.Threads;
75  import org.apache.hadoop.hbase.util.Writables;
76  import org.apache.hadoop.io.NullWritable;
77  import org.apache.hadoop.mapreduce.Job;
78  import org.apache.hadoop.mapreduce.Mapper;
79  import org.apache.hadoop.mapreduce.RecordWriter;
80  import org.apache.hadoop.mapreduce.TaskAttemptContext;
81  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
82  import org.junit.Ignore;
83  import org.junit.Test;
84  import org.junit.experimental.categories.Category;
85  import org.mockito.Mockito;
86  
87  /**
88   * Simple test for {@link CellSortReducer} and {@link HFileOutputFormat2}.
89   * Sets up and runs a mapreduce job that writes hfile output.
90   * Creates a few inner classes to implement splits and an inputformat that
91   * emits keys and values like those of {@link PerformanceEvaluation}.
92   */
93  @Category(LargeTests.class)
94  public class TestHFileOutputFormat2  {
95    private final static int ROWSPERSPLIT = 1024;
96  
97    private static final byte[][] FAMILIES
98      = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
99        , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
100   private static final TableName TABLE_NAME =
101       TableName.valueOf("TestTable");
102 
103   private HBaseTestingUtility util = new HBaseTestingUtility();
104 
105   private static Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
106 
107   /**
108    * Simple mapper that makes KeyValue output.
109    */
110   static class RandomKVGeneratingMapper
111       extends Mapper<NullWritable, NullWritable,
112                  ImmutableBytesWritable, Cell> {
113 
114     private int keyLength;
115     private static final int KEYLEN_DEFAULT=10;
116     private static final String KEYLEN_CONF="randomkv.key.length";
117 
118     private int valLength;
119     private static final int VALLEN_DEFAULT=10;
120     private static final String VALLEN_CONF="randomkv.val.length";
121 
122     @Override
123     protected void setup(Context context) throws IOException,
124         InterruptedException {
125       super.setup(context);
126 
127       Configuration conf = context.getConfiguration();
128       keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
129       valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
130     }
131 
132     protected void map(
133         NullWritable n1, NullWritable n2,
134         Mapper<NullWritable, NullWritable,
135                ImmutableBytesWritable,Cell>.Context context)
136         throws java.io.IOException ,InterruptedException
137     {
138 
139       byte keyBytes[] = new byte[keyLength];
140       byte valBytes[] = new byte[valLength];
141 
142       int taskId = context.getTaskAttemptID().getTaskID().getId();
143       assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
144 
145       Random random = new Random();
146       for (int i = 0; i < ROWSPERSPLIT; i++) {
147 
148         random.nextBytes(keyBytes);
149         // Ensure that unique tasks generate unique keys
150         keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
151         random.nextBytes(valBytes);
152         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
153 
154         for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
155           Cell kv = new KeyValue(keyBytes, family,
156               PerformanceEvaluation.QUALIFIER_NAME, valBytes);
157           context.write(key, kv);
158         }
159       }
160     }
161   }
162 
163   private void setupRandomGeneratorMapper(Job job) {
164     job.setInputFormatClass(NMapInputFormat.class);
165     job.setMapperClass(RandomKVGeneratingMapper.class);
166     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
167     job.setMapOutputValueClass(KeyValue.class);
168   }
169 
170   /**
171    * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
172    * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
173    * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
174    */
175   @Test
176   public void test_LATEST_TIMESTAMP_isReplaced()
177   throws Exception {
178     Configuration conf = new Configuration(this.util.getConfiguration());
179     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
180     TaskAttemptContext context = null;
181     Path dir =
182       util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
183     try {
184       Job job = new Job(conf);
185       FileOutputFormat.setOutputPath(job, dir);
186       context = createTestTaskAttemptContext(job);
187       HFileOutputFormat2 hof = new HFileOutputFormat2();
188       writer = hof.getRecordWriter(context);
189       final byte [] b = Bytes.toBytes("b");
190 
191       // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
192       // changed by call to write.  Check all in kv is same but ts.
193       KeyValue kv = new KeyValue(b, b, b);
194       KeyValue original = kv.clone();
195       writer.write(new ImmutableBytesWritable(), kv);
196       assertFalse(original.equals(kv));
197       assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
198       assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
199       assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
200       assertNotSame(original.getTimestamp(), kv.getTimestamp());
201       assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
202 
203       // Test 2. Now test passing a kv that has explicit ts.  It should not be
204       // changed by call to record write.
205       kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
206       original = kv.clone();
207       writer.write(new ImmutableBytesWritable(), kv);
208       assertTrue(original.equals(kv));
209     } finally {
210       if (writer != null && context != null) writer.close(context);
211       dir.getFileSystem(conf).delete(dir, true);
212     }
213   }
214 
215   private TaskAttemptContext createTestTaskAttemptContext(final Job job)
216   throws IOException, Exception {
217     HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
218     TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
219       job, "attempt_201402131733_0001_m_000000_0");
220     return context;
221   }
222 
223   /*
224    * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
225    * metadata used by time-restricted scans.
226    */
227   @Test
228   public void test_TIMERANGE() throws Exception {
229     Configuration conf = new Configuration(this.util.getConfiguration());
230     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
231     TaskAttemptContext context = null;
232     Path dir =
233       util.getDataTestDir("test_TIMERANGE_present");
234     LOG.info("Timerange dir writing to dir: "+ dir);
235     try {
236       // build a record writer using HFileOutputFormat2
237       Job job = new Job(conf);
238       FileOutputFormat.setOutputPath(job, dir);
239       context = createTestTaskAttemptContext(job);
240       HFileOutputFormat2 hof = new HFileOutputFormat2();
241       writer = hof.getRecordWriter(context);
242 
243       // Pass two key values with explicit times stamps
244       final byte [] b = Bytes.toBytes("b");
245 
246       // value 1 with timestamp 2000
247       KeyValue kv = new KeyValue(b, b, b, 2000, b);
248       KeyValue original = kv.clone();
249       writer.write(new ImmutableBytesWritable(), kv);
250       assertEquals(original,kv);
251 
252       // value 2 with timestamp 1000
253       kv = new KeyValue(b, b, b, 1000, b);
254       original = kv.clone();
255       writer.write(new ImmutableBytesWritable(), kv);
256       assertEquals(original, kv);
257 
258       // verify that the file has the proper FileInfo.
259       writer.close(context);
260 
261       // the generated file lives 1 directory down from the attempt directory
262       // and is the only file, e.g.
263       // _attempt__0000_r_000000_0/b/1979617994050536795
264       FileSystem fs = FileSystem.get(conf);
265       Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
266       FileStatus[] sub1 = fs.listStatus(attemptDirectory);
267       FileStatus[] file = fs.listStatus(sub1[0].getPath());
268 
269       // open as HFile Reader and pull out TIMERANGE FileInfo.
270       HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
271           new CacheConfig(conf), conf);
272       Map<byte[],byte[]> finfo = rd.loadFileInfo();
273       byte[] range = finfo.get("TIMERANGE".getBytes());
274       assertNotNull(range);
275 
276       // unmarshall and check values.
277       TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
278       Writables.copyWritable(range, timeRangeTracker);
279       LOG.info(timeRangeTracker.getMinimumTimestamp() +
280           "...." + timeRangeTracker.getMaximumTimestamp());
281       assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
282       assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
283       rd.close();
284     } finally {
285       if (writer != null && context != null) writer.close(context);
286       dir.getFileSystem(conf).delete(dir, true);
287     }
288   }
289 
290   /**
291    * Run small MR job.
292    */
293   @Test
294   public void testWritingPEData() throws Exception {
295     Configuration conf = util.getConfiguration();
296     Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
297     FileSystem fs = testDir.getFileSystem(conf);
298 
299     // Set down this value or we OOME in eclipse.
300     conf.setInt("io.sort.mb", 20);
301     // Write a few files.
302     conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
303 
304     Job job = new Job(conf, "testWritingPEData");
305     setupRandomGeneratorMapper(job);
306     // This partitioner doesn't work well for number keys but using it anyways
307     // just to demonstrate how to configure it.
308     byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
309     byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
310 
311     Arrays.fill(startKey, (byte)0);
312     Arrays.fill(endKey, (byte)0xff);
313 
314     job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
315     // Set start and end rows for partitioner.
316     SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
317     SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
318     job.setReducerClass(KeyValueSortReducer.class);
319     job.setOutputFormatClass(HFileOutputFormat2.class);
320     job.setNumReduceTasks(4);
321     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
322         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
323         KeyValueSerialization.class.getName());
324 
325     FileOutputFormat.setOutputPath(job, testDir);
326     assertTrue(job.waitForCompletion(false));
327     FileStatus [] files = fs.listStatus(testDir);
328     assertTrue(files.length > 0);
329   }
330 
331   @Test
332   public void testJobConfiguration() throws Exception {
333     Configuration conf = new Configuration(this.util.getConfiguration());
334     conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString());
335     Job job = new Job(conf);
336     job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
337     HTable table = Mockito.mock(HTable.class);
338     setupMockStartKeys(table);
339     HFileOutputFormat2.configureIncrementalLoad(job, table);
340     assertEquals(job.getNumReduceTasks(), 4);
341   }
342 
343   private byte [][] generateRandomStartKeys(int numKeys) {
344     Random random = new Random();
345     byte[][] ret = new byte[numKeys][];
346     // first region start key is always empty
347     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
348     for (int i = 1; i < numKeys; i++) {
349       ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.VALUE_LENGTH);
350     }
351     return ret;
352   }
353 
354   @Test
355   public void testMRIncrementalLoad() throws Exception {
356     LOG.info("\nStarting test testMRIncrementalLoad\n");
357     doIncrementalLoadTest(false);
358   }
359 
360   @Test
361   public void testMRIncrementalLoadWithSplit() throws Exception {
362     LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
363     doIncrementalLoadTest(true);
364   }
365 
366   private void doIncrementalLoadTest(
367       boolean shouldChangeRegions) throws Exception {
368     util = new HBaseTestingUtility();
369     Configuration conf = util.getConfiguration();
370     byte[][] startKeys = generateRandomStartKeys(5);
371     HBaseAdmin admin = null;
372     try {
373       util.startMiniCluster();
374       Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
375       admin = new HBaseAdmin(conf);
376       HTable table = util.createTable(TABLE_NAME, FAMILIES);
377       assertEquals("Should start with empty table",
378           0, util.countRows(table));
379       int numRegions = util.createMultiRegions(
380           util.getConfiguration(), table, FAMILIES[0], startKeys);
381       assertEquals("Should make 5 regions", numRegions, 5);
382 
383       // Generate the bulk load files
384       util.startMiniMapReduceCluster();
385       runIncrementalPELoad(conf, table, testDir);
386       // This doesn't write into the table, just makes files
387       assertEquals("HFOF should not touch actual table",
388           0, util.countRows(table));
389 
390 
391       // Make sure that a directory was created for every CF
392       int dir = 0;
393       for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
394         for (byte[] family : FAMILIES) {
395           if (Bytes.toString(family).equals(f.getPath().getName())) {
396             ++dir;
397           }
398         }
399       }
400       assertEquals("Column family not found in FS.", FAMILIES.length, dir);
401 
402       // handle the split case
403       if (shouldChangeRegions) {
404         LOG.info("Changing regions in table");
405         admin.disableTable(table.getTableName());
406         while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
407             getRegionStates().isRegionsInTransition()) {
408           Threads.sleep(200);
409           LOG.info("Waiting on table to finish disabling");
410         }
411         byte[][] newStartKeys = generateRandomStartKeys(15);
412         util.createMultiRegions(
413             util.getConfiguration(), table, FAMILIES[0], newStartKeys);
414         admin.enableTable(table.getTableName());
415         while (table.getRegionLocations().size() != 15 ||
416             !admin.isTableAvailable(table.getTableName())) {
417           Thread.sleep(200);
418           LOG.info("Waiting for new region assignment to happen");
419         }
420       }
421 
422       // Perform the actual load
423       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
424 
425       // Ensure data shows up
426       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
427       assertEquals("LoadIncrementalHFiles should put expected data in table",
428           expectedRows, util.countRows(table));
429       Scan scan = new Scan();
430       ResultScanner results = table.getScanner(scan);
431       for (Result res : results) {
432         assertEquals(FAMILIES.length, res.rawCells().length);
433         Cell first = res.rawCells()[0];
434         for (Cell kv : res.rawCells()) {
435           assertTrue(CellUtil.matchingRow(first, kv));
436           assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
437         }
438       }
439       results.close();
440       String tableDigestBefore = util.checksumRows(table);
441 
442       // Cause regions to reopen
443       admin.disableTable(TABLE_NAME);
444       while (!admin.isTableDisabled(TABLE_NAME)) {
445         Thread.sleep(200);
446         LOG.info("Waiting for table to disable");
447       }
448       admin.enableTable(TABLE_NAME);
449       util.waitTableAvailable(TABLE_NAME.getName());
450       assertEquals("Data should remain after reopening of regions",
451           tableDigestBefore, util.checksumRows(table));
452     } finally {
453       if (admin != null) admin.close();
454       util.shutdownMiniMapReduceCluster();
455       util.shutdownMiniCluster();
456     }
457   }
458 
459   private void runIncrementalPELoad(
460       Configuration conf, HTable table, Path outDir)
461   throws Exception {
462     Job job = new Job(conf, "testLocalMRIncrementalLoad");
463     job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
464     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
465         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
466         KeyValueSerialization.class.getName());
467     setupRandomGeneratorMapper(job);
468     HFileOutputFormat2.configureIncrementalLoad(job, table);
469     FileOutputFormat.setOutputPath(job, outDir);
470 
471     assertFalse(util.getTestFileSystem().exists(outDir)) ;
472 
473     assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
474 
475     assertTrue(job.waitForCompletion(true));
476   }
477 
478   /**
479    * Test for {@link HFileOutputFormat2#configureCompression(HTable,
480    * Configuration)} and {@link HFileOutputFormat2#createFamilyCompressionMap
481    * (Configuration)}.
482    * Tests that the compression map is correctly serialized into
483    * and deserialized from configuration
484    *
485    * @throws IOException
486    */
487   @Test
488   public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
489     for (int numCfs = 0; numCfs <= 3; numCfs++) {
490       Configuration conf = new Configuration(this.util.getConfiguration());
491       Map<String, Compression.Algorithm> familyToCompression =
492           getMockColumnFamiliesForCompression(numCfs);
493       HTable table = Mockito.mock(HTable.class);
494       setupMockColumnFamiliesForCompression(table, familyToCompression);
495       HFileOutputFormat2.configureCompression(table, conf);
496 
497       // read back family specific compression setting from the configuration
498       Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
499           .createFamilyCompressionMap(conf);
500 
501       // test that we have a value for all column families that matches with the
502       // used mock values
503       for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
504         assertEquals("Compression configuration incorrect for column family:"
505             + entry.getKey(), entry.getValue(),
506             retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
507       }
508     }
509   }
510 
511   private void setupMockColumnFamiliesForCompression(HTable table,
512       Map<String, Compression.Algorithm> familyToCompression) throws IOException {
513     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
514     for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
515       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
516           .setMaxVersions(1)
517           .setCompressionType(entry.getValue())
518           .setBlockCacheEnabled(false)
519           .setTimeToLive(0));
520     }
521     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
522   }
523 
524   /**
525    * @return a map from column family names to compression algorithms for
526    *         testing column family compression. Column family names have special characters
527    */
528   private Map<String, Compression.Algorithm>
529       getMockColumnFamiliesForCompression (int numCfs) {
530     Map<String, Compression.Algorithm> familyToCompression
531       = new HashMap<String, Compression.Algorithm>();
532     // use column family names having special characters
533     if (numCfs-- > 0) {
534       familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
535     }
536     if (numCfs-- > 0) {
537       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
538     }
539     if (numCfs-- > 0) {
540       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
541     }
542     if (numCfs-- > 0) {
543       familyToCompression.put("Family3", Compression.Algorithm.NONE);
544     }
545     return familyToCompression;
546   }
547 
548 
549   /**
550    * Test for {@link HFileOutputFormat2#configureBloomType(HTable,
551    * Configuration)} and {@link HFileOutputFormat2#createFamilyBloomTypeMap
552    * (Configuration)}.
553    * Tests that the compression map is correctly serialized into
554    * and deserialized from configuration
555    *
556    * @throws IOException
557    */
558   @Test
559   public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
560     for (int numCfs = 0; numCfs <= 2; numCfs++) {
561       Configuration conf = new Configuration(this.util.getConfiguration());
562       Map<String, BloomType> familyToBloomType =
563           getMockColumnFamiliesForBloomType(numCfs);
564       HTable table = Mockito.mock(HTable.class);
565       setupMockColumnFamiliesForBloomType(table,
566           familyToBloomType);
567       HFileOutputFormat2.configureBloomType(table, conf);
568 
569       // read back family specific data block encoding settings from the
570       // configuration
571       Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
572           HFileOutputFormat2
573               .createFamilyBloomTypeMap(conf);
574 
575       // test that we have a value for all column families that matches with the
576       // used mock values
577       for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
578         assertEquals("BloomType configuration incorrect for column family:"
579             + entry.getKey(), entry.getValue(),
580             retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
581       }
582     }
583   }
584 
585   private void setupMockColumnFamiliesForBloomType(HTable table,
586       Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
587     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
588     for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
589       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
590           .setMaxVersions(1)
591           .setBloomFilterType(entry.getValue())
592           .setBlockCacheEnabled(false)
593           .setTimeToLive(0));
594     }
595     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
596   }
597 
598   /**
599    * @return a map from column family names to compression algorithms for
600    *         testing column family compression. Column family names have special characters
601    */
602   private Map<String, BloomType>
603   getMockColumnFamiliesForBloomType (int numCfs) {
604     Map<String, BloomType> familyToBloomType =
605         new HashMap<String, BloomType>();
606     // use column family names having special characters
607     if (numCfs-- > 0) {
608       familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
609     }
610     if (numCfs-- > 0) {
611       familyToBloomType.put("Family2=asdads&!AASD",
612           BloomType.ROWCOL);
613     }
614     if (numCfs-- > 0) {
615       familyToBloomType.put("Family3", BloomType.NONE);
616     }
617     return familyToBloomType;
618   }
619 
620   /**
621    * Test for {@link HFileOutputFormat2#configureBlockSize(HTable,
622    * Configuration)} and {@link HFileOutputFormat2#createFamilyBlockSizeMap
623    * (Configuration)}.
624    * Tests that the compression map is correctly serialized into
625    * and deserialized from configuration
626    *
627    * @throws IOException
628    */
629   @Test
630   public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
631     for (int numCfs = 0; numCfs <= 3; numCfs++) {
632       Configuration conf = new Configuration(this.util.getConfiguration());
633       Map<String, Integer> familyToBlockSize =
634           getMockColumnFamiliesForBlockSize(numCfs);
635       HTable table = Mockito.mock(HTable.class);
636       setupMockColumnFamiliesForBlockSize(table,
637           familyToBlockSize);
638       HFileOutputFormat2.configureBlockSize(table, conf);
639 
640       // read back family specific data block encoding settings from the
641       // configuration
642       Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
643           HFileOutputFormat2
644               .createFamilyBlockSizeMap(conf);
645 
646       // test that we have a value for all column families that matches with the
647       // used mock values
648       for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
649           ) {
650         assertEquals("BlockSize configuration incorrect for column family:"
651             + entry.getKey(), entry.getValue(),
652             retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
653       }
654     }
655   }
656 
657   private void setupMockColumnFamiliesForBlockSize(HTable table,
658       Map<String, Integer> familyToDataBlockEncoding) throws IOException {
659     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
660     for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
661       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
662           .setMaxVersions(1)
663           .setBlocksize(entry.getValue())
664           .setBlockCacheEnabled(false)
665           .setTimeToLive(0));
666     }
667     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
668   }
669 
670   /**
671    * @return a map from column family names to compression algorithms for
672    *         testing column family compression. Column family names have special characters
673    */
674   private Map<String, Integer>
675   getMockColumnFamiliesForBlockSize (int numCfs) {
676     Map<String, Integer> familyToBlockSize =
677         new HashMap<String, Integer>();
678     // use column family names having special characters
679     if (numCfs-- > 0) {
680       familyToBlockSize.put("Family1!@#!@#&", 1234);
681     }
682     if (numCfs-- > 0) {
683       familyToBlockSize.put("Family2=asdads&!AASD",
684           Integer.MAX_VALUE);
685     }
686     if (numCfs-- > 0) {
687       familyToBlockSize.put("Family2=asdads&!AASD",
688           Integer.MAX_VALUE);
689     }
690     if (numCfs-- > 0) {
691       familyToBlockSize.put("Family3", 0);
692     }
693     return familyToBlockSize;
694   }
695 
696     /**
697    * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTable,
698    * Configuration)} and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap
699    * (Configuration)}.
700    * Tests that the compression map is correctly serialized into
701    * and deserialized from configuration
702    *
703    * @throws IOException
704    */
705   @Test
706   public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
707     for (int numCfs = 0; numCfs <= 3; numCfs++) {
708       Configuration conf = new Configuration(this.util.getConfiguration());
709       Map<String, DataBlockEncoding> familyToDataBlockEncoding =
710           getMockColumnFamiliesForDataBlockEncoding(numCfs);
711       HTable table = Mockito.mock(HTable.class);
712       setupMockColumnFamiliesForDataBlockEncoding(table,
713           familyToDataBlockEncoding);
714       HFileOutputFormat2.configureDataBlockEncoding(table, conf);
715 
716       // read back family specific data block encoding settings from the
717       // configuration
718       Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
719           HFileOutputFormat2
720           .createFamilyDataBlockEncodingMap(conf);
721 
722       // test that we have a value for all column families that matches with the
723       // used mock values
724       for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
725         assertEquals("DataBlockEncoding configuration incorrect for column family:"
726             + entry.getKey(), entry.getValue(),
727             retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
728       }
729     }
730   }
731 
732   private void setupMockColumnFamiliesForDataBlockEncoding(HTable table,
733       Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
734     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
735     for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
736       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
737           .setMaxVersions(1)
738           .setDataBlockEncoding(entry.getValue())
739           .setBlockCacheEnabled(false)
740           .setTimeToLive(0));
741     }
742     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
743   }
744 
745   /**
746    * @return a map from column family names to compression algorithms for
747    *         testing column family compression. Column family names have special characters
748    */
749   private Map<String, DataBlockEncoding>
750       getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
751     Map<String, DataBlockEncoding> familyToDataBlockEncoding =
752         new HashMap<String, DataBlockEncoding>();
753     // use column family names having special characters
754     if (numCfs-- > 0) {
755       familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
756     }
757     if (numCfs-- > 0) {
758       familyToDataBlockEncoding.put("Family2=asdads&!AASD",
759           DataBlockEncoding.FAST_DIFF);
760     }
761     if (numCfs-- > 0) {
762       familyToDataBlockEncoding.put("Family2=asdads&!AASD",
763           DataBlockEncoding.PREFIX);
764     }
765     if (numCfs-- > 0) {
766       familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
767     }
768     return familyToDataBlockEncoding;
769   }
770 
771   private void setupMockStartKeys(HTable table) throws IOException {
772     byte[][] mockKeys = new byte[][] {
773         HConstants.EMPTY_BYTE_ARRAY,
774         Bytes.toBytes("aaa"),
775         Bytes.toBytes("ggg"),
776         Bytes.toBytes("zzz")
777     };
778     Mockito.doReturn(mockKeys).when(table).getStartKeys();
779   }
780 
781   /**
782    * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
783    * bloom filter settings from the column family descriptor
784    */
785   @Test
786   public void testColumnFamilySettings() throws Exception {
787     Configuration conf = new Configuration(this.util.getConfiguration());
788     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
789     TaskAttemptContext context = null;
790     Path dir = util.getDataTestDir("testColumnFamilySettings");
791 
792     // Setup table descriptor
793     HTable table = Mockito.mock(HTable.class);
794     HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
795     Mockito.doReturn(htd).when(table).getTableDescriptor();
796     for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
797       htd.addFamily(hcd);
798     }
799 
800     // set up the table to return some mock keys
801     setupMockStartKeys(table);
802 
803     try {
804       // partial map red setup to get an operational writer for testing
805       // We turn off the sequence file compression, because DefaultCodec
806       // pollutes the GZip codec pool with an incompatible compressor.
807       conf.set("io.seqfile.compression.type", "NONE");
808       conf.set("hbase.fs.tmp.dir", dir.toString());
809       Job job = new Job(conf, "testLocalMRIncrementalLoad");
810       job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
811       setupRandomGeneratorMapper(job);
812       HFileOutputFormat2.configureIncrementalLoad(job, table);
813       FileOutputFormat.setOutputPath(job, dir);
814       context = createTestTaskAttemptContext(job);
815       HFileOutputFormat2 hof = new HFileOutputFormat2();
816       writer = hof.getRecordWriter(context);
817 
818       // write out random rows
819       writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
820       writer.close(context);
821 
822       // Make sure that a directory was created for every CF
823       FileSystem fs = dir.getFileSystem(conf);
824 
825       // commit so that the filesystem has one directory per column family
826       hof.getOutputCommitter(context).commitTask(context);
827       hof.getOutputCommitter(context).commitJob(context);
828       FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
829       assertEquals(htd.getFamilies().size(), families.length);
830       for (FileStatus f : families) {
831         String familyStr = f.getPath().getName();
832         HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
833         // verify that the compression on this file matches the configured
834         // compression
835         Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
836         Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
837         Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
838 
839         byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
840         if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
841         assertEquals("Incorrect bloom filter used for column family " + familyStr +
842           "(reader: " + reader + ")",
843           hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
844         assertEquals("Incorrect compression used for column family " + familyStr +
845           "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
846       }
847     } finally {
848       dir.getFileSystem(conf).delete(dir, true);
849     }
850   }
851 
852   /**
853    * Write random values to the writer assuming a table created using
854    * {@link #FAMILIES} as column family descriptors
855    */
856   private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
857       TaskAttemptContext context, Set<byte[]> families, int numRows)
858       throws IOException, InterruptedException {
859     byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
860     int valLength = 10;
861     byte valBytes[] = new byte[valLength];
862 
863     int taskId = context.getTaskAttemptID().getTaskID().getId();
864     assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
865 
866     Random random = new Random();
867     for (int i = 0; i < numRows; i++) {
868 
869       Bytes.putInt(keyBytes, 0, i);
870       random.nextBytes(valBytes);
871       ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
872 
873       for (byte[] family : families) {
874         Cell kv = new KeyValue(keyBytes, family,
875             PerformanceEvaluation.QUALIFIER_NAME, valBytes);
876         writer.write(key, kv);
877       }
878     }
879   }
880 
881   /**
882    * This test is to test the scenario happened in HBASE-6901.
883    * All files are bulk loaded and excluded from minor compaction.
884    * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
885    * will be thrown.
886    */
887   @Ignore ("Flakey: See HBASE-9051") @Test
888   public void testExcludeAllFromMinorCompaction() throws Exception {
889     Configuration conf = util.getConfiguration();
890     conf.setInt("hbase.hstore.compaction.min", 2);
891     generateRandomStartKeys(5);
892 
893     try {
894       util.startMiniCluster();
895       final FileSystem fs = util.getDFSCluster().getFileSystem();
896       HBaseAdmin admin = new HBaseAdmin(conf);
897       HTable table = util.createTable(TABLE_NAME, FAMILIES);
898       assertEquals("Should start with empty table", 0, util.countRows(table));
899 
900       // deep inspection: get the StoreFile dir
901       final Path storePath = new Path(
902         FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
903           new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
904             Bytes.toString(FAMILIES[0])));
905       assertEquals(0, fs.listStatus(storePath).length);
906 
907       // Generate two bulk load files
908       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
909           true);
910       util.startMiniMapReduceCluster();
911 
912       for (int i = 0; i < 2; i++) {
913         Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
914         runIncrementalPELoad(conf, table, testDir);
915         // Perform the actual load
916         new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
917       }
918 
919       // Ensure data shows up
920       int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
921       assertEquals("LoadIncrementalHFiles should put expected data in table",
922           expectedRows, util.countRows(table));
923 
924       // should have a second StoreFile now
925       assertEquals(2, fs.listStatus(storePath).length);
926 
927       // minor compactions shouldn't get rid of the file
928       admin.compact(TABLE_NAME.getName());
929       try {
930         quickPoll(new Callable<Boolean>() {
931           public Boolean call() throws Exception {
932             return fs.listStatus(storePath).length == 1;
933           }
934         }, 5000);
935         throw new IOException("SF# = " + fs.listStatus(storePath).length);
936       } catch (AssertionError ae) {
937         // this is expected behavior
938       }
939 
940       // a major compaction should work though
941       admin.majorCompact(TABLE_NAME.getName());
942       quickPoll(new Callable<Boolean>() {
943         public Boolean call() throws Exception {
944           return fs.listStatus(storePath).length == 1;
945         }
946       }, 5000);
947 
948     } finally {
949       util.shutdownMiniMapReduceCluster();
950       util.shutdownMiniCluster();
951     }
952   }
953 
954   @Test
955   public void testExcludeMinorCompaction() throws Exception {
956     Configuration conf = util.getConfiguration();
957     conf.setInt("hbase.hstore.compaction.min", 2);
958     generateRandomStartKeys(5);
959 
960     try {
961       util.startMiniCluster();
962       Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
963       final FileSystem fs = util.getDFSCluster().getFileSystem();
964       HBaseAdmin admin = new HBaseAdmin(conf);
965       HTable table = util.createTable(TABLE_NAME, FAMILIES);
966       assertEquals("Should start with empty table", 0, util.countRows(table));
967 
968       // deep inspection: get the StoreFile dir
969       final Path storePath = new Path(
970         FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
971           new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
972             Bytes.toString(FAMILIES[0])));
973       assertEquals(0, fs.listStatus(storePath).length);
974 
975       // put some data in it and flush to create a storefile
976       Put p = new Put(Bytes.toBytes("test"));
977       p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
978       table.put(p);
979       admin.flush(TABLE_NAME.getName());
980       assertEquals(1, util.countRows(table));
981       quickPoll(new Callable<Boolean>() {
982         public Boolean call() throws Exception {
983           return fs.listStatus(storePath).length == 1;
984         }
985       }, 5000);
986 
987       // Generate a bulk load file with more rows
988       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
989           true);
990       util.startMiniMapReduceCluster();
991       runIncrementalPELoad(conf, table, testDir);
992 
993       // Perform the actual load
994       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
995 
996       // Ensure data shows up
997       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
998       assertEquals("LoadIncrementalHFiles should put expected data in table",
999           expectedRows + 1, util.countRows(table));
1000 
1001       // should have a second StoreFile now
1002       assertEquals(2, fs.listStatus(storePath).length);
1003 
1004       // minor compactions shouldn't get rid of the file
1005       admin.compact(TABLE_NAME.getName());
1006       try {
1007         quickPoll(new Callable<Boolean>() {
1008           public Boolean call() throws Exception {
1009             return fs.listStatus(storePath).length == 1;
1010           }
1011         }, 5000);
1012         throw new IOException("SF# = " + fs.listStatus(storePath).length);
1013       } catch (AssertionError ae) {
1014         // this is expected behavior
1015       }
1016 
1017       // a major compaction should work though
1018       admin.majorCompact(TABLE_NAME.getName());
1019       quickPoll(new Callable<Boolean>() {
1020         public Boolean call() throws Exception {
1021           return fs.listStatus(storePath).length == 1;
1022         }
1023       }, 5000);
1024 
1025     } finally {
1026       util.shutdownMiniMapReduceCluster();
1027       util.shutdownMiniCluster();
1028     }
1029   }
1030 
1031   private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1032     int sleepMs = 10;
1033     int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1034     while (retries-- > 0) {
1035       if (c.call().booleanValue()) {
1036         return;
1037       }
1038       Thread.sleep(sleepMs);
1039     }
1040     fail();
1041   }
1042 
1043   public static void main(String args[]) throws Exception {
1044     new TestHFileOutputFormat2().manualTest(args);
1045   }
1046 
1047   public void manualTest(String args[]) throws Exception {
1048     Configuration conf = HBaseConfiguration.create();
1049     util = new HBaseTestingUtility(conf);
1050     if ("newtable".equals(args[0])) {
1051       byte[] tname = args[1].getBytes();
1052       HTable table = util.createTable(tname, FAMILIES);
1053       HBaseAdmin admin = new HBaseAdmin(conf);
1054       admin.disableTable(tname);
1055       byte[][] startKeys = generateRandomStartKeys(5);
1056       util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
1057       admin.enableTable(tname);
1058     } else if ("incremental".equals(args[0])) {
1059       byte[] tname = args[1].getBytes();
1060       HTable table = new HTable(conf, tname);
1061       Path outDir = new Path("incremental-out");
1062       runIncrementalPELoad(conf, table, outDir);
1063     } else {
1064       throw new RuntimeException(
1065           "usage: TestHFileOutputFormat2 newtable | incremental");
1066     }
1067   }
1068 
1069 }
1070