View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.util.Arrays;
30  import java.util.HashMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Map.Entry;
34  import java.util.Random;
35  import java.util.concurrent.Callable;
36  
37  import com.google.common.collect.Lists;
38  
39  import junit.framework.Assert;
40  
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.fs.FileStatus;
45  import org.apache.hadoop.fs.FileSystem;
46  import org.apache.hadoop.fs.Path;
47  import org.apache.hadoop.hbase.Cell;
48  import org.apache.hadoop.hbase.CellUtil;
49  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
50  import org.apache.hadoop.hbase.HBaseConfiguration;
51  import org.apache.hadoop.hbase.HBaseTestingUtility;
52  import org.apache.hadoop.hbase.HColumnDescriptor;
53  import org.apache.hadoop.hbase.HConstants;
54  import org.apache.hadoop.hbase.HTableDescriptor;
55  import org.apache.hadoop.hbase.HadoopShims;
56  import org.apache.hadoop.hbase.KeyValue;
57  import org.apache.hadoop.hbase.LargeTests;
58  import org.apache.hadoop.hbase.PerformanceEvaluation;
59  import org.apache.hadoop.hbase.TableName;
60  import org.apache.hadoop.hbase.client.HBaseAdmin;
61  import org.apache.hadoop.hbase.client.HTable;
62  import org.apache.hadoop.hbase.client.Put;
63  import org.apache.hadoop.hbase.client.Result;
64  import org.apache.hadoop.hbase.client.ResultScanner;
65  import org.apache.hadoop.hbase.client.Scan;
66  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
67  import org.apache.hadoop.hbase.io.compress.Compression;
68  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
69  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
70  import org.apache.hadoop.hbase.io.hfile.HFile;
71  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
72  import org.apache.hadoop.hbase.regionserver.HStore;
73  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
74  import org.apache.hadoop.hbase.util.Bytes;
75  import org.apache.hadoop.hbase.util.FSUtils;
76  import org.apache.hadoop.hbase.util.Threads;
77  import org.apache.hadoop.hbase.util.Writables;
78  import org.apache.hadoop.io.NullWritable;
79  import org.apache.hadoop.mapreduce.Job;
80  import org.apache.hadoop.mapreduce.Mapper;
81  import org.apache.hadoop.mapreduce.RecordWriter;
82  import org.apache.hadoop.mapreduce.TaskAttemptContext;
83  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
84  import org.junit.Ignore;
85  import org.junit.Test;
86  import org.junit.experimental.categories.Category;
87  import org.mockito.Mockito;
88  
89  /**
90   * Simple test for {@link KeyValueSortReducer} and {@link HFileOutputFormat}.
91   * Sets up and runs a mapreduce job that writes hfile output.
92   * Creates a few inner classes to implement splits and an inputformat that
93   * emits keys and values like those of {@link PerformanceEvaluation}.
94   */
95  @Category(LargeTests.class)
96  public class TestHFileOutputFormat  {
97    private final static int ROWSPERSPLIT = 1024;
98  
99    private static final byte[][] FAMILIES
100     = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
101       , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
102   private static final TableName TABLE_NAME =
103       TableName.valueOf("TestTable");
104 
105   private HBaseTestingUtility util = new HBaseTestingUtility();
106 
107   private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
108 
109   /**
110    * Simple mapper that makes KeyValue output.
111    */
112   static class RandomKVGeneratingMapper
113   extends Mapper<NullWritable, NullWritable,
114                  ImmutableBytesWritable, KeyValue> {
115 
116     private int keyLength;
117     private static final int KEYLEN_DEFAULT=10;
118     private static final String KEYLEN_CONF="randomkv.key.length";
119 
120     private int valLength;
121     private static final int VALLEN_DEFAULT=10;
122     private static final String VALLEN_CONF="randomkv.val.length";
123 
124     @Override
125     protected void setup(Context context) throws IOException,
126         InterruptedException {
127       super.setup(context);
128 
129       Configuration conf = context.getConfiguration();
130       keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
131       valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
132     }
133 
134     protected void map(
135         NullWritable n1, NullWritable n2,
136         Mapper<NullWritable, NullWritable,
137                ImmutableBytesWritable,KeyValue>.Context context)
138         throws java.io.IOException ,InterruptedException
139     {
140 
141       byte keyBytes[] = new byte[keyLength];
142       byte valBytes[] = new byte[valLength];
143 
144       int taskId = context.getTaskAttemptID().getTaskID().getId();
145       assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
146 
147       Random random = new Random();
148       for (int i = 0; i < ROWSPERSPLIT; i++) {
149 
150         random.nextBytes(keyBytes);
151         // Ensure that unique tasks generate unique keys
152         keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
153         random.nextBytes(valBytes);
154         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
155 
156         for (byte[] family : TestHFileOutputFormat.FAMILIES) {
157           KeyValue kv = new KeyValue(keyBytes, family,
158               PerformanceEvaluation.QUALIFIER_NAME, valBytes);
159           context.write(key, kv);
160         }
161       }
162     }
163   }
164 
165   private void setupRandomGeneratorMapper(Job job) {
166     job.setInputFormatClass(NMapInputFormat.class);
167     job.setMapperClass(RandomKVGeneratingMapper.class);
168     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
169     job.setMapOutputValueClass(KeyValue.class);
170   }
171 
172   /**
173    * Test that {@link HFileOutputFormat} RecordWriter amends timestamps if
174    * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
175    * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
176    */
177   @Test
178   public void test_LATEST_TIMESTAMP_isReplaced()
179   throws Exception {
180     Configuration conf = new Configuration(this.util.getConfiguration());
181     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
182     TaskAttemptContext context = null;
183     Path dir =
184       util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
185     try {
186       Job job = new Job(conf);
187       FileOutputFormat.setOutputPath(job, dir);
188       context = createTestTaskAttemptContext(job);
189       HFileOutputFormat hof = new HFileOutputFormat();
190       writer = hof.getRecordWriter(context);
191       final byte [] b = Bytes.toBytes("b");
192 
193       // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
194       // changed by call to write.  Check all in kv is same but ts.
195       KeyValue kv = new KeyValue(b, b, b);
196       KeyValue original = kv.clone();
197       writer.write(new ImmutableBytesWritable(), kv);
198       assertFalse(original.equals(kv));
199       assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
200       assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier()));
201       assertNotSame(original.getTimestamp(), kv.getTimestamp());
202       assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
203 
204       // Test 2. Now test passing a kv that has explicit ts.  It should not be
205       // changed by call to record write.
206       kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
207       original = kv.clone();
208       writer.write(new ImmutableBytesWritable(), kv);
209       assertTrue(original.equals(kv));
210     } finally {
211       if (writer != null && context != null) writer.close(context);
212       dir.getFileSystem(conf).delete(dir, true);
213     }
214   }
215 
216   private TaskAttemptContext createTestTaskAttemptContext(final Job job)
217   throws IOException, Exception {
218     HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
219     TaskAttemptContext context = hadoop.createTestTaskAttemptContext(job, "attempt_200707121733_0001_m_000000_0");
220     return context;
221   }
222 
223   /*
224    * Test that {@link HFileOutputFormat} creates an HFile with TIMERANGE
225    * metadata used by time-restricted scans.
226    */
227   @Test
228   public void test_TIMERANGE() throws Exception {
229     Configuration conf = new Configuration(this.util.getConfiguration());
230     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
231     TaskAttemptContext context = null;
232     Path dir =
233       util.getDataTestDir("test_TIMERANGE_present");
234     LOG.info("Timerange dir writing to dir: "+ dir);
235     try {
236       // build a record writer using HFileOutputFormat
237       Job job = new Job(conf);
238       FileOutputFormat.setOutputPath(job, dir);
239       context = createTestTaskAttemptContext(job);
240       HFileOutputFormat hof = new HFileOutputFormat();
241       writer = hof.getRecordWriter(context);
242 
243       // Pass two key values with explicit times stamps
244       final byte [] b = Bytes.toBytes("b");
245 
246       // value 1 with timestamp 2000
247       KeyValue kv = new KeyValue(b, b, b, 2000, b);
248       KeyValue original = kv.clone();
249       writer.write(new ImmutableBytesWritable(), kv);
250       assertEquals(original,kv);
251 
252       // value 2 with timestamp 1000
253       kv = new KeyValue(b, b, b, 1000, b);
254       original = kv.clone();
255       writer.write(new ImmutableBytesWritable(), kv);
256       assertEquals(original, kv);
257 
258       // verify that the file has the proper FileInfo.
259       writer.close(context);
260 
261       // the generated file lives 1 directory down from the attempt directory
262       // and is the only file, e.g.
263       // _attempt__0000_r_000000_0/b/1979617994050536795
264       FileSystem fs = FileSystem.get(conf);
265       Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
266       FileStatus[] sub1 = fs.listStatus(attemptDirectory);
267       FileStatus[] file = fs.listStatus(sub1[0].getPath());
268 
269       // open as HFile Reader and pull out TIMERANGE FileInfo.
270       HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
271           new CacheConfig(conf));
272       Map<byte[],byte[]> finfo = rd.loadFileInfo();
273       byte[] range = finfo.get("TIMERANGE".getBytes());
274       assertNotNull(range);
275 
276       // unmarshall and check values.
277       TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
278       Writables.copyWritable(range, timeRangeTracker);
279       LOG.info(timeRangeTracker.getMinimumTimestamp() +
280           "...." + timeRangeTracker.getMaximumTimestamp());
281       assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
282       assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
283       rd.close();
284     } finally {
285       if (writer != null && context != null) writer.close(context);
286       dir.getFileSystem(conf).delete(dir, true);
287     }
288   }
289 
290   /**
291    * Run small MR job.
292    */
293   @Test
294   public void testWritingPEData() throws Exception {
295     Configuration conf = util.getConfiguration();
296     Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
297     FileSystem fs = testDir.getFileSystem(conf);
298 
299     // Set down this value or we OOME in eclipse.
300     conf.setInt("io.sort.mb", 20);
301     // Write a few files.
302     conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
303 
304     Job job = new Job(conf, "testWritingPEData");
305     setupRandomGeneratorMapper(job);
306     // This partitioner doesn't work well for number keys but using it anyways
307     // just to demonstrate how to configure it.
308     byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
309     byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
310 
311     Arrays.fill(startKey, (byte)0);
312     Arrays.fill(endKey, (byte)0xff);
313 
314     job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
315     // Set start and end rows for partitioner.
316     SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
317     SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
318     job.setReducerClass(KeyValueSortReducer.class);
319     job.setOutputFormatClass(HFileOutputFormat.class);
320     job.setNumReduceTasks(4);
321     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
322         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
323         KeyValueSerialization.class.getName());
324 
325     FileOutputFormat.setOutputPath(job, testDir);
326     assertTrue(job.waitForCompletion(false));
327     FileStatus [] files = fs.listStatus(testDir);
328     assertTrue(files.length > 0);
329   }
330 
331   @Test
332   public void testJobConfiguration() throws Exception {
333     Job job = new Job(util.getConfiguration());
334     job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
335     HTable table = Mockito.mock(HTable.class);
336     setupMockStartKeys(table);
337     HFileOutputFormat.configureIncrementalLoad(job, table);
338     assertEquals(job.getNumReduceTasks(), 4);
339   }
340 
341   private byte [][] generateRandomStartKeys(int numKeys) {
342     Random random = new Random();
343     byte[][] ret = new byte[numKeys][];
344     // first region start key is always empty
345     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
346     for (int i = 1; i < numKeys; i++) {
347       ret[i] = PerformanceEvaluation.generateValue(random);
348     }
349     return ret;
350   }
351 
352   @Test
353   public void testMRIncrementalLoad() throws Exception {
354     LOG.info("\nStarting test testMRIncrementalLoad\n");
355     doIncrementalLoadTest(false);
356   }
357 
358   @Test
359   public void testMRIncrementalLoadWithSplit() throws Exception {
360     LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
361     doIncrementalLoadTest(true);
362   }
363 
364   private void doIncrementalLoadTest(
365       boolean shouldChangeRegions) throws Exception {
366     util = new HBaseTestingUtility();
367     Configuration conf = util.getConfiguration();
368     byte[][] startKeys = generateRandomStartKeys(5);
369     HBaseAdmin admin = null;
370     try {
371       util.startMiniCluster();
372       Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
373       admin = new HBaseAdmin(conf);
374       HTable table = util.createTable(TABLE_NAME, FAMILIES);
375       assertEquals("Should start with empty table",
376           0, util.countRows(table));
377       int numRegions = util.createMultiRegions(
378           util.getConfiguration(), table, FAMILIES[0], startKeys);
379       assertEquals("Should make 5 regions", numRegions, 5);
380 
381       // Generate the bulk load files
382       util.startMiniMapReduceCluster();
383       runIncrementalPELoad(conf, table, testDir);
384       // This doesn't write into the table, just makes files
385       assertEquals("HFOF should not touch actual table",
386           0, util.countRows(table));
387 
388 
389       // Make sure that a directory was created for every CF
390       int dir = 0;
391       for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
392         for (byte[] family : FAMILIES) {
393           if (Bytes.toString(family).equals(f.getPath().getName())) {
394             ++dir;
395           }
396         }
397       }
398       assertEquals("Column family not found in FS.", FAMILIES.length, dir);
399 
400       // handle the split case
401       if (shouldChangeRegions) {
402         LOG.info("Changing regions in table");
403         admin.disableTable(table.getTableName());
404         while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
405             getRegionStates().isRegionsInTransition()) {
406           Threads.sleep(200);
407           LOG.info("Waiting on table to finish disabling");
408         }
409         byte[][] newStartKeys = generateRandomStartKeys(15);
410         util.createMultiRegions(
411             util.getConfiguration(), table, FAMILIES[0], newStartKeys);
412         admin.enableTable(table.getTableName());
413         while (table.getRegionLocations().size() != 15 ||
414             !admin.isTableAvailable(table.getTableName())) {
415           Thread.sleep(200);
416           LOG.info("Waiting for new region assignment to happen");
417         }
418       }
419 
420       // Perform the actual load
421       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
422 
423       // Ensure data shows up
424       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
425       assertEquals("LoadIncrementalHFiles should put expected data in table",
426           expectedRows, util.countRows(table));
427       Scan scan = new Scan();
428       ResultScanner results = table.getScanner(scan);
429       for (Result res : results) {
430         assertEquals(FAMILIES.length, res.rawCells().length);
431         Cell first = res.rawCells()[0];
432         for (Cell kv : res.rawCells()) {
433           assertTrue(CellUtil.matchingRow(first, kv));
434           assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
435         }
436       }
437       results.close();
438       String tableDigestBefore = util.checksumRows(table);
439 
440       // Cause regions to reopen
441       admin.disableTable(TABLE_NAME);
442       while (!admin.isTableDisabled(TABLE_NAME)) {
443         Thread.sleep(200);
444         LOG.info("Waiting for table to disable");
445       }
446       admin.enableTable(TABLE_NAME);
447       util.waitTableAvailable(TABLE_NAME.getName());
448       assertEquals("Data should remain after reopening of regions",
449           tableDigestBefore, util.checksumRows(table));
450     } finally {
451       if (admin != null) admin.close();
452       util.shutdownMiniMapReduceCluster();
453       util.shutdownMiniCluster();
454     }
455   }
456 
457   private void runIncrementalPELoad(
458       Configuration conf, HTable table, Path outDir)
459   throws Exception {
460     Job job = new Job(conf, "testLocalMRIncrementalLoad");
461     job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
462     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
463         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
464         KeyValueSerialization.class.getName());
465     setupRandomGeneratorMapper(job);
466     HFileOutputFormat.configureIncrementalLoad(job, table);
467     FileOutputFormat.setOutputPath(job, outDir);
468 
469     Assert.assertFalse( util.getTestFileSystem().exists(outDir)) ;
470 
471     assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
472 
473     assertTrue(job.waitForCompletion(true));
474   }
475 
476   /**
477    * Test for
478    * {@link HFileOutputFormat#createFamilyCompressionMap(Configuration)}. Tests
479    * that the compression map is correctly deserialized from configuration
480    *
481    * @throws IOException
482    */
483   @Test
484   public void testCreateFamilyCompressionMap() throws IOException {
485     for (int numCfs = 0; numCfs <= 3; numCfs++) {
486       Configuration conf = new Configuration(this.util.getConfiguration());
487       Map<String, Compression.Algorithm> familyToCompression = getMockColumnFamilies(numCfs);
488       HTable table = Mockito.mock(HTable.class);
489       setupMockColumnFamilies(table, familyToCompression);
490       HFileOutputFormat.configureCompression(table, conf);
491 
492       // read back family specific compression setting from the configuration
493       Map<byte[], String> retrievedFamilyToCompressionMap = HFileOutputFormat.createFamilyCompressionMap(conf);
494 
495       // test that we have a value for all column families that matches with the
496       // used mock values
497       for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
498         assertEquals("Compression configuration incorrect for column family:" + entry.getKey(), entry.getValue()
499                      .getName(), retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
500       }
501     }
502   }
503 
504   private void setupMockColumnFamilies(HTable table,
505     Map<String, Compression.Algorithm> familyToCompression) throws IOException
506   {
507     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
508     for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
509       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
510           .setMaxVersions(1)
511           .setCompressionType(entry.getValue())
512           .setBlockCacheEnabled(false)
513           .setTimeToLive(0));
514     }
515     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
516   }
517 
518   private void setupMockStartKeys(HTable table) throws IOException {
519     byte[][] mockKeys = new byte[][] {
520         HConstants.EMPTY_BYTE_ARRAY,
521         Bytes.toBytes("aaa"),
522         Bytes.toBytes("ggg"),
523         Bytes.toBytes("zzz")
524     };
525     Mockito.doReturn(mockKeys).when(table).getStartKeys();
526   }
527 
528   /**
529    * @return a map from column family names to compression algorithms for
530    *         testing column family compression. Column family names have special characters
531    */
532   private Map<String, Compression.Algorithm> getMockColumnFamilies(int numCfs) {
533     Map<String, Compression.Algorithm> familyToCompression = new HashMap<String, Compression.Algorithm>();
534     // use column family names having special characters
535     if (numCfs-- > 0) {
536       familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
537     }
538     if (numCfs-- > 0) {
539       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
540     }
541     if (numCfs-- > 0) {
542       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
543     }
544     if (numCfs-- > 0) {
545       familyToCompression.put("Family3", Compression.Algorithm.NONE);
546     }
547     return familyToCompression;
548   }
549 
550   /**
551    * Test that {@link HFileOutputFormat} RecordWriter uses compression settings
552    * from the column family descriptor
553    */
554   @Test
555   public void testColumnFamilyCompression() throws Exception {
556     Configuration conf = new Configuration(this.util.getConfiguration());
557     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
558     TaskAttemptContext context = null;
559     Path dir =
560         util.getDataTestDirOnTestFS("testColumnFamilyCompression");
561 
562     HTable table = Mockito.mock(HTable.class);
563 
564     Map<String, Compression.Algorithm> configuredCompression =
565       new HashMap<String, Compression.Algorithm>();
566     Compression.Algorithm[] supportedAlgos = getSupportedCompressionAlgorithms();
567 
568     int familyIndex = 0;
569     for (byte[] family : FAMILIES) {
570       configuredCompression.put(Bytes.toString(family),
571                                 supportedAlgos[familyIndex++ % supportedAlgos.length]);
572     }
573     setupMockColumnFamilies(table, configuredCompression);
574 
575     // set up the table to return some mock keys
576     setupMockStartKeys(table);
577 
578     try {
579       // partial map red setup to get an operational writer for testing
580       // We turn off the sequence file compression, because DefaultCodec
581       // pollutes the GZip codec pool with an incompatible compressor.
582       conf.set("io.seqfile.compression.type", "NONE");
583       Job job = new Job(conf, "testLocalMRIncrementalLoad");
584       job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilyCompression"));
585       setupRandomGeneratorMapper(job);
586       HFileOutputFormat.configureIncrementalLoad(job, table);
587       FileOutputFormat.setOutputPath(job, dir);
588       context = createTestTaskAttemptContext(job);
589       HFileOutputFormat hof = new HFileOutputFormat();
590       writer = hof.getRecordWriter(context);
591 
592       // write out random rows
593       writeRandomKeyValues(writer, context, ROWSPERSPLIT);
594       writer.close(context);
595 
596       // Make sure that a directory was created for every CF
597       FileSystem fileSystem = dir.getFileSystem(conf);
598 
599       // commit so that the filesystem has one directory per column family
600       hof.getOutputCommitter(context).commitTask(context);
601       hof.getOutputCommitter(context).commitJob(context);
602       for (byte[] family : FAMILIES) {
603         String familyStr = new String(family);
604         boolean found = false;
605         for (FileStatus f : fileSystem.listStatus(dir)) {
606 
607           if (Bytes.toString(family).equals(f.getPath().getName())) {
608             // we found a matching directory
609             found = true;
610 
611             // verify that the compression on this file matches the configured
612             // compression
613             Path dataFilePath = fileSystem.listStatus(f.getPath())[0].getPath();
614             Reader reader = HFile.createReader(fileSystem, dataFilePath,
615                 new CacheConfig(conf));
616             reader.loadFileInfo();
617             assertEquals("Incorrect compression used for column family " + familyStr
618                          + "(reader: " + reader + ")",
619                          configuredCompression.get(familyStr), reader.getCompressionAlgorithm());
620             break;
621           }
622         }
623 
624         if (!found) {
625           fail("HFile for column family " + familyStr + " not found");
626         }
627       }
628 
629     } finally {
630       dir.getFileSystem(conf).delete(dir, true);
631     }
632   }
633 
634 
635   /**
636    * @return
637    */
638   private Compression.Algorithm[] getSupportedCompressionAlgorithms() {
639     String[] allAlgos = HFile.getSupportedCompressionAlgorithms();
640     List<Compression.Algorithm> supportedAlgos = Lists.newArrayList();
641 
642     for (String algoName : allAlgos) {
643       try {
644         Compression.Algorithm algo = Compression.getCompressionAlgorithmByName(algoName);
645         algo.getCompressor();
646         supportedAlgos.add(algo);
647       } catch (Throwable t) {
648         // this algo is not available
649       }
650     }
651 
652     return supportedAlgos.toArray(new Compression.Algorithm[0]);
653   }
654 
655 
656   /**
657    * Write random values to the writer assuming a table created using
658    * {@link #FAMILIES} as column family descriptors
659    */
660   private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer, TaskAttemptContext context,
661       int numRows)
662       throws IOException, InterruptedException {
663     byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
664     int valLength = 10;
665     byte valBytes[] = new byte[valLength];
666 
667     int taskId = context.getTaskAttemptID().getTaskID().getId();
668     assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
669 
670     Random random = new Random();
671     for (int i = 0; i < numRows; i++) {
672 
673       Bytes.putInt(keyBytes, 0, i);
674       random.nextBytes(valBytes);
675       ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
676 
677       for (byte[] family : TestHFileOutputFormat.FAMILIES) {
678         KeyValue kv = new KeyValue(keyBytes, family,
679             PerformanceEvaluation.QUALIFIER_NAME, valBytes);
680         writer.write(key, kv);
681       }
682     }
683   }
684 
685   /**
686    * This test is to test the scenario happened in HBASE-6901.
687    * All files are bulk loaded and excluded from minor compaction.
688    * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
689    * will be thrown.
690    */
691   @Ignore ("Flakey: See HBASE-9051") @Test
692   public void testExcludeAllFromMinorCompaction() throws Exception {
693     Configuration conf = util.getConfiguration();
694     conf.setInt("hbase.hstore.compaction.min", 2);
695     generateRandomStartKeys(5);
696 
697     try {
698       util.startMiniCluster();
699       final FileSystem fs = util.getDFSCluster().getFileSystem();
700       HBaseAdmin admin = new HBaseAdmin(conf);
701       HTable table = util.createTable(TABLE_NAME, FAMILIES);
702       assertEquals("Should start with empty table", 0, util.countRows(table));
703 
704       // deep inspection: get the StoreFile dir
705       final Path storePath = HStore.getStoreHomedir(
706           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
707           admin.getTableRegions(TABLE_NAME).get(0),
708           FAMILIES[0]);
709       assertEquals(0, fs.listStatus(storePath).length);
710 
711       // Generate two bulk load files
712       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
713           true);
714       util.startMiniMapReduceCluster();
715 
716       for (int i = 0; i < 2; i++) {
717         Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
718         runIncrementalPELoad(conf, table, testDir);
719         // Perform the actual load
720         new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
721       }
722 
723       // Ensure data shows up
724       int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
725       assertEquals("LoadIncrementalHFiles should put expected data in table",
726           expectedRows, util.countRows(table));
727 
728       // should have a second StoreFile now
729       assertEquals(2, fs.listStatus(storePath).length);
730 
731       // minor compactions shouldn't get rid of the file
732       admin.compact(TABLE_NAME.getName());
733       try {
734         quickPoll(new Callable<Boolean>() {
735           public Boolean call() throws Exception {
736             return fs.listStatus(storePath).length == 1;
737           }
738         }, 5000);
739         throw new IOException("SF# = " + fs.listStatus(storePath).length);
740       } catch (AssertionError ae) {
741         // this is expected behavior
742       }
743 
744       // a major compaction should work though
745       admin.majorCompact(TABLE_NAME.getName());
746       quickPoll(new Callable<Boolean>() {
747         public Boolean call() throws Exception {
748           return fs.listStatus(storePath).length == 1;
749         }
750       }, 5000);
751 
752     } finally {
753       util.shutdownMiniMapReduceCluster();
754       util.shutdownMiniCluster();
755     }
756   }
757 
758   @Test
759   public void testExcludeMinorCompaction() throws Exception {
760     Configuration conf = util.getConfiguration();
761     conf.setInt("hbase.hstore.compaction.min", 2);
762     generateRandomStartKeys(5);
763 
764     try {
765       util.startMiniCluster();
766       Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
767       final FileSystem fs = util.getDFSCluster().getFileSystem();
768       HBaseAdmin admin = new HBaseAdmin(conf);
769       HTable table = util.createTable(TABLE_NAME, FAMILIES);
770       assertEquals("Should start with empty table", 0, util.countRows(table));
771 
772       // deep inspection: get the StoreFile dir
773       final Path storePath = HStore.getStoreHomedir(
774           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
775           admin.getTableRegions(TABLE_NAME).get(0),
776           FAMILIES[0]);
777       assertEquals(0, fs.listStatus(storePath).length);
778 
779       // put some data in it and flush to create a storefile
780       Put p = new Put(Bytes.toBytes("test"));
781       p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
782       table.put(p);
783       admin.flush(TABLE_NAME.getName());
784       assertEquals(1, util.countRows(table));
785       quickPoll(new Callable<Boolean>() {
786         public Boolean call() throws Exception {
787           return fs.listStatus(storePath).length == 1;
788         }
789       }, 5000);
790 
791       // Generate a bulk load file with more rows
792       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
793           true);
794       util.startMiniMapReduceCluster();
795       runIncrementalPELoad(conf, table, testDir);
796 
797       // Perform the actual load
798       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
799 
800       // Ensure data shows up
801       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
802       assertEquals("LoadIncrementalHFiles should put expected data in table",
803           expectedRows + 1, util.countRows(table));
804 
805       // should have a second StoreFile now
806       assertEquals(2, fs.listStatus(storePath).length);
807 
808       // minor compactions shouldn't get rid of the file
809       admin.compact(TABLE_NAME.getName());
810       try {
811         quickPoll(new Callable<Boolean>() {
812           public Boolean call() throws Exception {
813             return fs.listStatus(storePath).length == 1;
814           }
815         }, 5000);
816         throw new IOException("SF# = " + fs.listStatus(storePath).length);
817       } catch (AssertionError ae) {
818         // this is expected behavior
819       }
820 
821       // a major compaction should work though
822       admin.majorCompact(TABLE_NAME.getName());
823       quickPoll(new Callable<Boolean>() {
824         public Boolean call() throws Exception {
825           return fs.listStatus(storePath).length == 1;
826         }
827       }, 5000);
828 
829     } finally {
830       util.shutdownMiniMapReduceCluster();
831       util.shutdownMiniCluster();
832     }
833   }
834 
835   private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
836     int sleepMs = 10;
837     int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
838     while (retries-- > 0) {
839       if (c.call().booleanValue()) {
840         return;
841       }
842       Thread.sleep(sleepMs);
843     }
844     fail();
845   }
846 
847   public static void main(String args[]) throws Exception {
848     new TestHFileOutputFormat().manualTest(args);
849   }
850 
851   public void manualTest(String args[]) throws Exception {
852     Configuration conf = HBaseConfiguration.create();
853     util = new HBaseTestingUtility(conf);
854     if ("newtable".equals(args[0])) {
855       byte[] tname = args[1].getBytes();
856       HTable table = util.createTable(tname, FAMILIES);
857       HBaseAdmin admin = new HBaseAdmin(conf);
858       admin.disableTable(tname);
859       byte[][] startKeys = generateRandomStartKeys(5);
860       util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
861       admin.enableTable(tname);
862     } else if ("incremental".equals(args[0])) {
863       byte[] tname = args[1].getBytes();
864       HTable table = new HTable(conf, tname);
865       Path outDir = new Path("incremental-out");
866       runIncrementalPELoad(conf, table, outDir);
867     } else {
868       throw new RuntimeException(
869           "usage: TestHFileOutputFormat newtable | incremental");
870     }
871   }
872 
873 }
874