View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.util.Arrays;
30  import java.util.HashMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Map.Entry;
34  import java.util.Random;
35  import java.util.concurrent.Callable;
36  
37  import org.apache.commons.logging.Log;
38  import org.apache.commons.logging.LogFactory;
39  import org.apache.hadoop.conf.Configuration;
40  import org.apache.hadoop.fs.FileStatus;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.hbase.Cell;
44  import org.apache.hadoop.hbase.CellUtil;
45  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
46  import org.apache.hadoop.hbase.HBaseConfiguration;
47  import org.apache.hadoop.hbase.HBaseTestingUtility;
48  import org.apache.hadoop.hbase.HColumnDescriptor;
49  import org.apache.hadoop.hbase.HConstants;
50  import org.apache.hadoop.hbase.HTableDescriptor;
51  import org.apache.hadoop.hbase.HadoopShims;
52  import org.apache.hadoop.hbase.KeyValue;
53  import org.apache.hadoop.hbase.LargeTests;
54  import org.apache.hadoop.hbase.PerformanceEvaluation;
55  import org.apache.hadoop.hbase.TableName;
56  import org.apache.hadoop.hbase.client.HBaseAdmin;
57  import org.apache.hadoop.hbase.client.HTable;
58  import org.apache.hadoop.hbase.client.Put;
59  import org.apache.hadoop.hbase.client.Result;
60  import org.apache.hadoop.hbase.client.ResultScanner;
61  import org.apache.hadoop.hbase.client.Scan;
62  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
63  import org.apache.hadoop.hbase.io.compress.Compression;
64  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
65  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
66  import org.apache.hadoop.hbase.io.hfile.HFile;
67  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
68  import org.apache.hadoop.hbase.regionserver.HStore;
69  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
70  import org.apache.hadoop.hbase.util.Bytes;
71  import org.apache.hadoop.hbase.util.FSUtils;
72  import org.apache.hadoop.hbase.util.Threads;
73  import org.apache.hadoop.hbase.util.Writables;
74  import org.apache.hadoop.io.NullWritable;
75  import org.apache.hadoop.mapreduce.Job;
76  import org.apache.hadoop.mapreduce.Mapper;
77  import org.apache.hadoop.mapreduce.RecordWriter;
78  import org.apache.hadoop.mapreduce.TaskAttemptContext;
79  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
80  import org.junit.Ignore;
81  import org.junit.Test;
82  import org.junit.experimental.categories.Category;
83  import org.mockito.Mockito;
84  
85  import com.google.common.collect.Lists;
86  
87  /**
88   * Simple test for {@link KeyValueSortReducer} and {@link HFileOutputFormat2}.
89   * Sets up and runs a mapreduce job that writes hfile output.
90   * Creates a few inner classes to implement splits and an inputformat that
91   * emits keys and values like those of {@link PerformanceEvaluation}.
92   */
93  @Category(LargeTests.class)
94  public class TestHFileOutputFormat2  {
95    private final static int ROWSPERSPLIT = 1024;
96  
97    private static final byte[][] FAMILIES
98      = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
99        , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
100   private static final TableName TABLE_NAME =
101       TableName.valueOf("TestTable");
102 
103   private HBaseTestingUtility util = new HBaseTestingUtility();
104 
105   private static Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
106 
107   /**
108    * Simple mapper that makes KeyValue output.
109    */
110   static class RandomKVGeneratingMapper
111   extends Mapper<NullWritable, NullWritable,
112                  ImmutableBytesWritable, KeyValue> {
113 
114     private int keyLength;
115     private static final int KEYLEN_DEFAULT=10;
116     private static final String KEYLEN_CONF="randomkv.key.length";
117 
118     private int valLength;
119     private static final int VALLEN_DEFAULT=10;
120     private static final String VALLEN_CONF="randomkv.val.length";
121 
122     @Override
123     protected void setup(Context context) throws IOException,
124         InterruptedException {
125       super.setup(context);
126 
127       Configuration conf = context.getConfiguration();
128       keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
129       valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
130     }
131 
132     protected void map(
133         NullWritable n1, NullWritable n2,
134         Mapper<NullWritable, NullWritable,
135                ImmutableBytesWritable,KeyValue>.Context context)
136         throws java.io.IOException ,InterruptedException
137     {
138 
139       byte keyBytes[] = new byte[keyLength];
140       byte valBytes[] = new byte[valLength];
141 
142       int taskId = context.getTaskAttemptID().getTaskID().getId();
143       assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
144 
145       Random random = new Random();
146       for (int i = 0; i < ROWSPERSPLIT; i++) {
147 
148         random.nextBytes(keyBytes);
149         // Ensure that unique tasks generate unique keys
150         keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
151         random.nextBytes(valBytes);
152         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
153 
154         for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
155           KeyValue kv = new KeyValue(keyBytes, family,
156               PerformanceEvaluation.QUALIFIER_NAME, valBytes);
157           context.write(key, kv);
158         }
159       }
160     }
161   }
162 
163   private void setupRandomGeneratorMapper(Job job) {
164     job.setInputFormatClass(NMapInputFormat.class);
165     job.setMapperClass(RandomKVGeneratingMapper.class);
166     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
167     job.setMapOutputValueClass(KeyValue.class);
168   }
169 
170   /**
171    * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
172    * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
173    * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
174    */
175   @Test
176   public void test_LATEST_TIMESTAMP_isReplaced()
177   throws Exception {
178     Configuration conf = new Configuration(this.util.getConfiguration());
179     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
180     TaskAttemptContext context = null;
181     Path dir =
182       util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
183     try {
184       Job job = new Job(conf);
185       FileOutputFormat.setOutputPath(job, dir);
186       context = createTestTaskAttemptContext(job);
187       HFileOutputFormat2 hof = new HFileOutputFormat2();
188       writer = hof.getRecordWriter(context);
189       final byte [] b = Bytes.toBytes("b");
190 
191       // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
192       // changed by call to write.  Check all in kv is same but ts.
193       KeyValue kv = new KeyValue(b, b, b);
194       KeyValue original = kv.clone();
195       writer.write(new ImmutableBytesWritable(), kv);
196       assertFalse(original.equals(kv));
197       assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
198       assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier()));
199       assertNotSame(original.getTimestamp(), kv.getTimestamp());
200       assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
201 
202       // Test 2. Now test passing a kv that has explicit ts.  It should not be
203       // changed by call to record write.
204       kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
205       original = kv.clone();
206       writer.write(new ImmutableBytesWritable(), kv);
207       assertTrue(original.equals(kv));
208     } finally {
209       if (writer != null && context != null) writer.close(context);
210       dir.getFileSystem(conf).delete(dir, true);
211     }
212   }
213 
214   private TaskAttemptContext createTestTaskAttemptContext(final Job job)
215   throws IOException, Exception {
216     HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
217     TaskAttemptContext context = hadoop.createTestTaskAttemptContext(job, "attempt_200707121733_0001_m_000000_0");
218     return context;
219   }
220 
221   /*
222    * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
223    * metadata used by time-restricted scans.
224    */
225   @Test
226   public void test_TIMERANGE() throws Exception {
227     Configuration conf = new Configuration(this.util.getConfiguration());
228     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
229     TaskAttemptContext context = null;
230     Path dir =
231       util.getDataTestDir("test_TIMERANGE_present");
232     LOG.info("Timerange dir writing to dir: "+ dir);
233     try {
234       // build a record writer using HFileOutputFormat2
235       Job job = new Job(conf);
236       FileOutputFormat.setOutputPath(job, dir);
237       context = createTestTaskAttemptContext(job);
238       HFileOutputFormat2 hof = new HFileOutputFormat2();
239       writer = hof.getRecordWriter(context);
240 
241       // Pass two key values with explicit times stamps
242       final byte [] b = Bytes.toBytes("b");
243 
244       // value 1 with timestamp 2000
245       KeyValue kv = new KeyValue(b, b, b, 2000, b);
246       KeyValue original = kv.clone();
247       writer.write(new ImmutableBytesWritable(), kv);
248       assertEquals(original,kv);
249 
250       // value 2 with timestamp 1000
251       kv = new KeyValue(b, b, b, 1000, b);
252       original = kv.clone();
253       writer.write(new ImmutableBytesWritable(), kv);
254       assertEquals(original, kv);
255 
256       // verify that the file has the proper FileInfo.
257       writer.close(context);
258 
259       // the generated file lives 1 directory down from the attempt directory
260       // and is the only file, e.g.
261       // _attempt__0000_r_000000_0/b/1979617994050536795
262       FileSystem fs = FileSystem.get(conf);
263       Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
264       FileStatus[] sub1 = fs.listStatus(attemptDirectory);
265       FileStatus[] file = fs.listStatus(sub1[0].getPath());
266 
267       // open as HFile Reader and pull out TIMERANGE FileInfo.
268       HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
269           new CacheConfig(conf));
270       Map<byte[],byte[]> finfo = rd.loadFileInfo();
271       byte[] range = finfo.get("TIMERANGE".getBytes());
272       assertNotNull(range);
273 
274       // unmarshall and check values.
275       TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
276       Writables.copyWritable(range, timeRangeTracker);
277       LOG.info(timeRangeTracker.getMinimumTimestamp() +
278           "...." + timeRangeTracker.getMaximumTimestamp());
279       assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
280       assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
281       rd.close();
282     } finally {
283       if (writer != null && context != null) writer.close(context);
284       dir.getFileSystem(conf).delete(dir, true);
285     }
286   }
287 
288   /**
289    * Run small MR job.
290    */
291   @Test
292   public void testWritingPEData() throws Exception {
293     Configuration conf = util.getConfiguration();
294     Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
295     FileSystem fs = testDir.getFileSystem(conf);
296 
297     // Set down this value or we OOME in eclipse.
298     conf.setInt("io.sort.mb", 20);
299     // Write a few files.
300     conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
301 
302     Job job = new Job(conf, "testWritingPEData");
303     setupRandomGeneratorMapper(job);
304     // This partitioner doesn't work well for number keys but using it anyways
305     // just to demonstrate how to configure it.
306     byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
307     byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
308 
309     Arrays.fill(startKey, (byte)0);
310     Arrays.fill(endKey, (byte)0xff);
311 
312     job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
313     // Set start and end rows for partitioner.
314     SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
315     SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
316     job.setReducerClass(KeyValueSortReducer.class);
317     job.setOutputFormatClass(HFileOutputFormat2.class);
318     job.setNumReduceTasks(4);
319     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
320         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
321         KeyValueSerialization.class.getName());
322 
323     FileOutputFormat.setOutputPath(job, testDir);
324     assertTrue(job.waitForCompletion(false));
325     FileStatus [] files = fs.listStatus(testDir);
326     assertTrue(files.length > 0);
327   }
328 
329   @Test
330   public void testJobConfiguration() throws Exception {
331     Job job = new Job(util.getConfiguration());
332     job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
333     HTable table = Mockito.mock(HTable.class);
334     setupMockStartKeys(table);
335     HFileOutputFormat2.configureIncrementalLoad(job, table);
336     assertEquals(job.getNumReduceTasks(), 4);
337   }
338 
339   private byte [][] generateRandomStartKeys(int numKeys) {
340     Random random = new Random();
341     byte[][] ret = new byte[numKeys][];
342     // first region start key is always empty
343     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
344     for (int i = 1; i < numKeys; i++) {
345       ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.ROW_LENGTH);
346     }
347     return ret;
348   }
349 
350   @Test
351   public void testMRIncrementalLoad() throws Exception {
352     LOG.info("\nStarting test testMRIncrementalLoad\n");
353     doIncrementalLoadTest(false);
354   }
355 
356   @Test
357   public void testMRIncrementalLoadWithSplit() throws Exception {
358     LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
359     doIncrementalLoadTest(true);
360   }
361 
362   private void doIncrementalLoadTest(
363       boolean shouldChangeRegions) throws Exception {
364     util = new HBaseTestingUtility();
365     Configuration conf = util.getConfiguration();
366     byte[][] startKeys = generateRandomStartKeys(5);
367     HBaseAdmin admin = null;
368     try {
369       util.startMiniCluster();
370       Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
371       admin = new HBaseAdmin(conf);
372       HTable table = util.createTable(TABLE_NAME, FAMILIES);
373       assertEquals("Should start with empty table",
374           0, util.countRows(table));
375       int numRegions = util.createMultiRegions(
376           util.getConfiguration(), table, FAMILIES[0], startKeys);
377       assertEquals("Should make 5 regions", numRegions, 5);
378 
379       // Generate the bulk load files
380       util.startMiniMapReduceCluster();
381       runIncrementalPELoad(conf, table, testDir);
382       // This doesn't write into the table, just makes files
383       assertEquals("HFOF should not touch actual table",
384           0, util.countRows(table));
385 
386 
387       // Make sure that a directory was created for every CF
388       int dir = 0;
389       for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
390         for (byte[] family : FAMILIES) {
391           if (Bytes.toString(family).equals(f.getPath().getName())) {
392             ++dir;
393           }
394         }
395       }
396       assertEquals("Column family not found in FS.", FAMILIES.length, dir);
397 
398       // handle the split case
399       if (shouldChangeRegions) {
400         LOG.info("Changing regions in table");
401         admin.disableTable(table.getTableName());
402         while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
403             getRegionStates().isRegionsInTransition()) {
404           Threads.sleep(200);
405           LOG.info("Waiting on table to finish disabling");
406         }
407         byte[][] newStartKeys = generateRandomStartKeys(15);
408         util.createMultiRegions(
409             util.getConfiguration(), table, FAMILIES[0], newStartKeys);
410         admin.enableTable(table.getTableName());
411         while (table.getRegionLocations().size() != 15 ||
412             !admin.isTableAvailable(table.getTableName())) {
413           Thread.sleep(200);
414           LOG.info("Waiting for new region assignment to happen");
415         }
416       }
417 
418       // Perform the actual load
419       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
420 
421       // Ensure data shows up
422       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
423       assertEquals("LoadIncrementalHFiles should put expected data in table",
424           expectedRows, util.countRows(table));
425       Scan scan = new Scan();
426       ResultScanner results = table.getScanner(scan);
427       for (Result res : results) {
428         assertEquals(FAMILIES.length, res.rawCells().length);
429         Cell first = res.rawCells()[0];
430         for (Cell kv : res.rawCells()) {
431           assertTrue(CellUtil.matchingRow(first, kv));
432           assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
433         }
434       }
435       results.close();
436       String tableDigestBefore = util.checksumRows(table);
437 
438       // Cause regions to reopen
439       admin.disableTable(TABLE_NAME);
440       while (!admin.isTableDisabled(TABLE_NAME)) {
441         Thread.sleep(200);
442         LOG.info("Waiting for table to disable");
443       }
444       admin.enableTable(TABLE_NAME);
445       util.waitTableAvailable(TABLE_NAME.getName());
446       assertEquals("Data should remain after reopening of regions",
447           tableDigestBefore, util.checksumRows(table));
448     } finally {
449       if (admin != null) admin.close();
450       util.shutdownMiniMapReduceCluster();
451       util.shutdownMiniCluster();
452     }
453   }
454 
455   private void runIncrementalPELoad(
456       Configuration conf, HTable table, Path outDir)
457   throws Exception {
458     Job job = new Job(conf, "testLocalMRIncrementalLoad");
459     job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
460     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
461         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
462         KeyValueSerialization.class.getName());
463     setupRandomGeneratorMapper(job);
464     HFileOutputFormat2.configureIncrementalLoad(job, table);
465     FileOutputFormat.setOutputPath(job, outDir);
466 
467     assertFalse( util.getTestFileSystem().exists(outDir)) ;
468 
469     assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
470 
471     assertTrue(job.waitForCompletion(true));
472   }
473 
474   /**
475    * Test for
476    * {@link HFileOutputFormat2#createFamilyCompressionMap(Configuration)}. Tests
477    * that the compression map is correctly deserialized from configuration
478    *
479    * @throws IOException
480    */
481   @Test
482   public void testCreateFamilyCompressionMap() throws IOException {
483     for (int numCfs = 0; numCfs <= 3; numCfs++) {
484       Configuration conf = new Configuration(this.util.getConfiguration());
485       Map<String, Compression.Algorithm> familyToCompression = getMockColumnFamilies(numCfs);
486       HTable table = Mockito.mock(HTable.class);
487       setupMockColumnFamilies(table, familyToCompression);
488       HFileOutputFormat2.configureCompression(table, conf);
489 
490       // read back family specific compression setting from the configuration
491       Map<byte[], String> retrievedFamilyToCompressionMap = HFileOutputFormat2.createFamilyCompressionMap(conf);
492 
493       // test that we have a value for all column families that matches with the
494       // used mock values
495       for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
496         assertEquals("Compression configuration incorrect for column family:" + entry.getKey(), entry.getValue()
497                      .getName(), retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
498       }
499     }
500   }
501 
502   private void setupMockColumnFamilies(HTable table,
503     Map<String, Compression.Algorithm> familyToCompression) throws IOException
504   {
505     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
506     for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
507       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
508           .setMaxVersions(1)
509           .setCompressionType(entry.getValue())
510           .setBlockCacheEnabled(false)
511           .setTimeToLive(0));
512     }
513     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
514   }
515 
516   private void setupMockStartKeys(HTable table) throws IOException {
517     byte[][] mockKeys = new byte[][] {
518         HConstants.EMPTY_BYTE_ARRAY,
519         Bytes.toBytes("aaa"),
520         Bytes.toBytes("ggg"),
521         Bytes.toBytes("zzz")
522     };
523     Mockito.doReturn(mockKeys).when(table).getStartKeys();
524   }
525 
526   /**
527    * @return a map from column family names to compression algorithms for
528    *         testing column family compression. Column family names have special characters
529    */
530   private Map<String, Compression.Algorithm> getMockColumnFamilies(int numCfs) {
531     Map<String, Compression.Algorithm> familyToCompression = new HashMap<String, Compression.Algorithm>();
532     // use column family names having special characters
533     if (numCfs-- > 0) {
534       familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
535     }
536     if (numCfs-- > 0) {
537       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
538     }
539     if (numCfs-- > 0) {
540       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
541     }
542     if (numCfs-- > 0) {
543       familyToCompression.put("Family3", Compression.Algorithm.NONE);
544     }
545     return familyToCompression;
546   }
547 
548   /**
549    * Test that {@link HFileOutputFormat2} RecordWriter uses compression settings
550    * from the column family descriptor
551    */
552   @Test
553   public void testColumnFamilyCompression() throws Exception {
554     Configuration conf = new Configuration(this.util.getConfiguration());
555     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
556     TaskAttemptContext context = null;
557     Path dir =
558         util.getDataTestDirOnTestFS("testColumnFamilyCompression");
559 
560     HTable table = Mockito.mock(HTable.class);
561 
562     Map<String, Compression.Algorithm> configuredCompression =
563       new HashMap<String, Compression.Algorithm>();
564     Compression.Algorithm[] supportedAlgos = getSupportedCompressionAlgorithms();
565 
566     int familyIndex = 0;
567     for (byte[] family : FAMILIES) {
568       configuredCompression.put(Bytes.toString(family),
569                                 supportedAlgos[familyIndex++ % supportedAlgos.length]);
570     }
571     setupMockColumnFamilies(table, configuredCompression);
572 
573     // set up the table to return some mock keys
574     setupMockStartKeys(table);
575 
576     try {
577       // partial map red setup to get an operational writer for testing
578       // We turn off the sequence file compression, because DefaultCodec
579       // pollutes the GZip codec pool with an incompatible compressor.
580       conf.set("io.seqfile.compression.type", "NONE");
581       Job job = new Job(conf, "testLocalMRIncrementalLoad");
582       job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilyCompression"));
583       setupRandomGeneratorMapper(job);
584       HFileOutputFormat2.configureIncrementalLoad(job, table);
585       FileOutputFormat.setOutputPath(job, dir);
586       context = createTestTaskAttemptContext(job);
587       HFileOutputFormat2 hof = new HFileOutputFormat2();
588       writer = hof.getRecordWriter(context);
589 
590       // write out random rows
591       writeRandomKeyValues(writer, context, ROWSPERSPLIT);
592       writer.close(context);
593 
594       // Make sure that a directory was created for every CF
595       FileSystem fileSystem = dir.getFileSystem(conf);
596 
597       // commit so that the filesystem has one directory per column family
598       hof.getOutputCommitter(context).commitTask(context);
599       hof.getOutputCommitter(context).commitJob(context);
600       for (byte[] family : FAMILIES) {
601         String familyStr = new String(family);
602         boolean found = false;
603         for (FileStatus f : fileSystem.listStatus(dir)) {
604 
605           if (Bytes.toString(family).equals(f.getPath().getName())) {
606             // we found a matching directory
607             found = true;
608 
609             // verify that the compression on this file matches the configured
610             // compression
611             Path dataFilePath = fileSystem.listStatus(f.getPath())[0].getPath();
612             Reader reader = HFile.createReader(fileSystem, dataFilePath,
613                 new CacheConfig(conf));
614             reader.loadFileInfo();
615             assertEquals("Incorrect compression used for column family " + familyStr
616                          + "(reader: " + reader + ")",
617                          configuredCompression.get(familyStr), reader.getCompressionAlgorithm());
618             break;
619           }
620         }
621 
622         if (!found) {
623           fail("HFile for column family " + familyStr + " not found");
624         }
625       }
626 
627     } finally {
628       dir.getFileSystem(conf).delete(dir, true);
629     }
630   }
631 
632 
633   /**
634    * @return
635    */
636   private Compression.Algorithm[] getSupportedCompressionAlgorithms() {
637     String[] allAlgos = HFile.getSupportedCompressionAlgorithms();
638     List<Compression.Algorithm> supportedAlgos = Lists.newArrayList();
639 
640     for (String algoName : allAlgos) {
641       try {
642         Compression.Algorithm algo = Compression.getCompressionAlgorithmByName(algoName);
643         algo.getCompressor();
644         supportedAlgos.add(algo);
645       } catch (Throwable t) {
646         // this algo is not available
647       }
648     }
649 
650     return supportedAlgos.toArray(new Compression.Algorithm[0]);
651   }
652 
653 
654   /**
655    * Write random values to the writer assuming a table created using
656    * {@link #FAMILIES} as column family descriptors
657    */
658   private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer, TaskAttemptContext context,
659       int numRows)
660       throws IOException, InterruptedException {
661     byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
662     int valLength = 10;
663     byte valBytes[] = new byte[valLength];
664 
665     int taskId = context.getTaskAttemptID().getTaskID().getId();
666     assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
667 
668     Random random = new Random();
669     for (int i = 0; i < numRows; i++) {
670 
671       Bytes.putInt(keyBytes, 0, i);
672       random.nextBytes(valBytes);
673       ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
674 
675       for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
676         KeyValue kv = new KeyValue(keyBytes, family,
677             PerformanceEvaluation.QUALIFIER_NAME, valBytes);
678         writer.write(key, kv);
679       }
680     }
681   }
682 
683   /**
684    * This test is to test the scenario happened in HBASE-6901.
685    * All files are bulk loaded and excluded from minor compaction.
686    * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
687    * will be thrown.
688    */
689   @Ignore ("Flakey: See HBASE-9051") @Test
690   public void testExcludeAllFromMinorCompaction() throws Exception {
691     Configuration conf = util.getConfiguration();
692     conf.setInt("hbase.hstore.compaction.min", 2);
693     generateRandomStartKeys(5);
694 
695     try {
696       util.startMiniCluster();
697       final FileSystem fs = util.getDFSCluster().getFileSystem();
698       HBaseAdmin admin = new HBaseAdmin(conf);
699       HTable table = util.createTable(TABLE_NAME, FAMILIES);
700       assertEquals("Should start with empty table", 0, util.countRows(table));
701 
702       // deep inspection: get the StoreFile dir
703       final Path storePath = HStore.getStoreHomedir(
704           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
705           admin.getTableRegions(TABLE_NAME).get(0),
706           FAMILIES[0]);
707       assertEquals(0, fs.listStatus(storePath).length);
708 
709       // Generate two bulk load files
710       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
711           true);
712       util.startMiniMapReduceCluster();
713 
714       for (int i = 0; i < 2; i++) {
715         Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
716         runIncrementalPELoad(conf, table, testDir);
717         // Perform the actual load
718         new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
719       }
720 
721       // Ensure data shows up
722       int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
723       assertEquals("LoadIncrementalHFiles should put expected data in table",
724           expectedRows, util.countRows(table));
725 
726       // should have a second StoreFile now
727       assertEquals(2, fs.listStatus(storePath).length);
728 
729       // minor compactions shouldn't get rid of the file
730       admin.compact(TABLE_NAME.getName());
731       try {
732         quickPoll(new Callable<Boolean>() {
733           public Boolean call() throws Exception {
734             return fs.listStatus(storePath).length == 1;
735           }
736         }, 5000);
737         throw new IOException("SF# = " + fs.listStatus(storePath).length);
738       } catch (AssertionError ae) {
739         // this is expected behavior
740       }
741 
742       // a major compaction should work though
743       admin.majorCompact(TABLE_NAME.getName());
744       quickPoll(new Callable<Boolean>() {
745         public Boolean call() throws Exception {
746           return fs.listStatus(storePath).length == 1;
747         }
748       }, 5000);
749 
750     } finally {
751       util.shutdownMiniMapReduceCluster();
752       util.shutdownMiniCluster();
753     }
754   }
755 
756   @Test
757   public void testExcludeMinorCompaction() throws Exception {
758     Configuration conf = util.getConfiguration();
759     conf.setInt("hbase.hstore.compaction.min", 2);
760     generateRandomStartKeys(5);
761 
762     try {
763       util.startMiniCluster();
764       Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
765       final FileSystem fs = util.getDFSCluster().getFileSystem();
766       HBaseAdmin admin = new HBaseAdmin(conf);
767       HTable table = util.createTable(TABLE_NAME, FAMILIES);
768       assertEquals("Should start with empty table", 0, util.countRows(table));
769 
770       // deep inspection: get the StoreFile dir
771       final Path storePath = HStore.getStoreHomedir(
772           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
773           admin.getTableRegions(TABLE_NAME).get(0),
774           FAMILIES[0]);
775       assertEquals(0, fs.listStatus(storePath).length);
776 
777       // put some data in it and flush to create a storefile
778       Put p = new Put(Bytes.toBytes("test"));
779       p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
780       table.put(p);
781       admin.flush(TABLE_NAME.getName());
782       assertEquals(1, util.countRows(table));
783       quickPoll(new Callable<Boolean>() {
784         public Boolean call() throws Exception {
785           return fs.listStatus(storePath).length == 1;
786         }
787       }, 5000);
788 
789       // Generate a bulk load file with more rows
790       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
791           true);
792       util.startMiniMapReduceCluster();
793       runIncrementalPELoad(conf, table, testDir);
794 
795       // Perform the actual load
796       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
797 
798       // Ensure data shows up
799       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
800       assertEquals("LoadIncrementalHFiles should put expected data in table",
801           expectedRows + 1, util.countRows(table));
802 
803       // should have a second StoreFile now
804       assertEquals(2, fs.listStatus(storePath).length);
805 
806       // minor compactions shouldn't get rid of the file
807       admin.compact(TABLE_NAME.getName());
808       try {
809         quickPoll(new Callable<Boolean>() {
810           public Boolean call() throws Exception {
811             return fs.listStatus(storePath).length == 1;
812           }
813         }, 5000);
814         throw new IOException("SF# = " + fs.listStatus(storePath).length);
815       } catch (AssertionError ae) {
816         // this is expected behavior
817       }
818 
819       // a major compaction should work though
820       admin.majorCompact(TABLE_NAME.getName());
821       quickPoll(new Callable<Boolean>() {
822         public Boolean call() throws Exception {
823           return fs.listStatus(storePath).length == 1;
824         }
825       }, 5000);
826 
827     } finally {
828       util.shutdownMiniMapReduceCluster();
829       util.shutdownMiniCluster();
830     }
831   }
832 
833   private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
834     int sleepMs = 10;
835     int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
836     while (retries-- > 0) {
837       if (c.call().booleanValue()) {
838         return;
839       }
840       Thread.sleep(sleepMs);
841     }
842     fail();
843   }
844 
845   public static void main(String args[]) throws Exception {
846     new TestHFileOutputFormat2().manualTest(args);
847   }
848 
849   public void manualTest(String args[]) throws Exception {
850     Configuration conf = HBaseConfiguration.create();
851     util = new HBaseTestingUtility(conf);
852     if ("newtable".equals(args[0])) {
853       byte[] tname = args[1].getBytes();
854       HTable table = util.createTable(tname, FAMILIES);
855       HBaseAdmin admin = new HBaseAdmin(conf);
856       admin.disableTable(tname);
857       byte[][] startKeys = generateRandomStartKeys(5);
858       util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
859       admin.enableTable(tname);
860     } else if ("incremental".equals(args[0])) {
861       byte[] tname = args[1].getBytes();
862       HTable table = new HTable(conf, tname);
863       Path outDir = new Path("incremental-out");
864       runIncrementalPELoad(conf, table, outDir);
865     } else {
866       throw new RuntimeException(
867           "usage: TestHFileOutputFormat2 newtable | incremental");
868     }
869   }
870 
871 }
872