View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.util.Arrays;
30  import java.util.HashMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Map.Entry;
34  import java.util.Set;
35  import java.util.Random;
36  import java.util.concurrent.Callable;
37  
38  import com.google.common.collect.Lists;
39  
40  import junit.framework.Assert;
41  
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.hadoop.conf.Configuration;
45  import org.apache.hadoop.fs.FileStatus;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
49  import org.apache.hadoop.hbase.TableName;
50  import org.apache.hadoop.hbase.HBaseConfiguration;
51  import org.apache.hadoop.hbase.HBaseTestingUtility;
52  import org.apache.hadoop.hbase.HColumnDescriptor;
53  import org.apache.hadoop.hbase.HConstants;
54  import org.apache.hadoop.hbase.HTableDescriptor;
55  import org.apache.hadoop.hbase.HadoopShims;
56  import org.apache.hadoop.hbase.KeyValue;
57  import org.apache.hadoop.hbase.LargeTests;
58  import org.apache.hadoop.hbase.PerformanceEvaluation;
59  import org.apache.hadoop.hbase.client.HBaseAdmin;
60  import org.apache.hadoop.hbase.client.HTable;
61  import org.apache.hadoop.hbase.client.Put;
62  import org.apache.hadoop.hbase.client.Result;
63  import org.apache.hadoop.hbase.client.ResultScanner;
64  import org.apache.hadoop.hbase.client.Scan;
65  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
66  import org.apache.hadoop.hbase.io.compress.Compression;
67  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
68  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
69  import org.apache.hadoop.hbase.io.hfile.HFile;
70  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
71  import org.apache.hadoop.hbase.regionserver.HStore;
72  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
73  import org.apache.hadoop.hbase.util.Bytes;
74  import org.apache.hadoop.hbase.util.FSUtils;
75  import org.apache.hadoop.hbase.util.Threads;
76  import org.apache.hadoop.hbase.util.Writables;
77  import org.apache.hadoop.io.NullWritable;
78  import org.apache.hadoop.mapreduce.Job;
79  import org.apache.hadoop.mapreduce.Mapper;
80  import org.apache.hadoop.mapreduce.RecordWriter;
81  import org.apache.hadoop.mapreduce.TaskAttemptContext;
82  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
83  import org.junit.Ignore;
84  import org.junit.Test;
85  import org.junit.experimental.categories.Category;
86  import org.mockito.Mockito;
87  
88  /**
89   * Simple test for {@link KeyValueSortReducer} and {@link HFileOutputFormat}.
90   * Sets up and runs a mapreduce job that writes hfile output.
91   * Creates a few inner classes to implement splits and an inputformat that
92   * emits keys and values like those of {@link PerformanceEvaluation}.
93   */
94  @Category(LargeTests.class)
95  public class TestHFileOutputFormat  {
96    private final static int ROWSPERSPLIT = 1024;
97  
98    private static final byte[][] FAMILIES
99      = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
100       , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
101   private static final TableName TABLE_NAME =
102       TableName.valueOf("TestTable");
103 
104   private HBaseTestingUtility util = new HBaseTestingUtility();
105 
106   private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
107 
108   /**
109    * Simple mapper that makes KeyValue output.
110    */
111   static class RandomKVGeneratingMapper
112   extends Mapper<NullWritable, NullWritable,
113                  ImmutableBytesWritable, KeyValue> {
114 
115     private int keyLength;
116     private static final int KEYLEN_DEFAULT=10;
117     private static final String KEYLEN_CONF="randomkv.key.length";
118 
119     private int valLength;
120     private static final int VALLEN_DEFAULT=10;
121     private static final String VALLEN_CONF="randomkv.val.length";
122 
123     @Override
124     protected void setup(Context context) throws IOException,
125         InterruptedException {
126       super.setup(context);
127 
128       Configuration conf = context.getConfiguration();
129       keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
130       valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
131     }
132 
133     protected void map(
134         NullWritable n1, NullWritable n2,
135         Mapper<NullWritable, NullWritable,
136                ImmutableBytesWritable,KeyValue>.Context context)
137         throws java.io.IOException ,InterruptedException
138     {
139 
140       byte keyBytes[] = new byte[keyLength];
141       byte valBytes[] = new byte[valLength];
142 
143       int taskId = context.getTaskAttemptID().getTaskID().getId();
144       assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
145 
146       Random random = new Random();
147       for (int i = 0; i < ROWSPERSPLIT; i++) {
148 
149         random.nextBytes(keyBytes);
150         // Ensure that unique tasks generate unique keys
151         keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
152         random.nextBytes(valBytes);
153         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
154 
155         for (byte[] family : TestHFileOutputFormat.FAMILIES) {
156           KeyValue kv = new KeyValue(keyBytes, family,
157               PerformanceEvaluation.QUALIFIER_NAME, valBytes);
158           context.write(key, kv);
159         }
160       }
161     }
162   }
163 
164   private void setupRandomGeneratorMapper(Job job) {
165     job.setInputFormatClass(NMapInputFormat.class);
166     job.setMapperClass(RandomKVGeneratingMapper.class);
167     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
168     job.setMapOutputValueClass(KeyValue.class);
169   }
170 
171   /**
172    * Test that {@link HFileOutputFormat} RecordWriter amends timestamps if
173    * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
174    * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
175    */
176   @Test
177   public void test_LATEST_TIMESTAMP_isReplaced()
178   throws Exception {
179     Configuration conf = new Configuration(this.util.getConfiguration());
180     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
181     TaskAttemptContext context = null;
182     Path dir =
183       util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
184     try {
185       Job job = new Job(conf);
186       FileOutputFormat.setOutputPath(job, dir);
187       context = createTestTaskAttemptContext(job);
188       HFileOutputFormat hof = new HFileOutputFormat();
189       writer = hof.getRecordWriter(context);
190       final byte [] b = Bytes.toBytes("b");
191 
192       // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
193       // changed by call to write.  Check all in kv is same but ts.
194       KeyValue kv = new KeyValue(b, b, b);
195       KeyValue original = kv.clone();
196       writer.write(new ImmutableBytesWritable(), kv);
197       assertFalse(original.equals(kv));
198       assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
199       assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier()));
200       assertNotSame(original.getTimestamp(), kv.getTimestamp());
201       assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
202 
203       // Test 2. Now test passing a kv that has explicit ts.  It should not be
204       // changed by call to record write.
205       kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
206       original = kv.clone();
207       writer.write(new ImmutableBytesWritable(), kv);
208       assertTrue(original.equals(kv));
209     } finally {
210       if (writer != null && context != null) writer.close(context);
211       dir.getFileSystem(conf).delete(dir, true);
212     }
213   }
214 
215   private TaskAttemptContext createTestTaskAttemptContext(final Job job)
216   throws IOException, Exception {
217     HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
218     TaskAttemptContext context = hadoop.createTestTaskAttemptContext(job, "attempt_200707121733_0001_m_000000_0");
219     return context;
220   }
221 
222   /*
223    * Test that {@link HFileOutputFormat} creates an HFile with TIMERANGE
224    * metadata used by time-restricted scans.
225    */
226   @Test
227   public void test_TIMERANGE() throws Exception {
228     Configuration conf = new Configuration(this.util.getConfiguration());
229     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
230     TaskAttemptContext context = null;
231     Path dir =
232       util.getDataTestDir("test_TIMERANGE_present");
233     LOG.info("Timerange dir writing to dir: "+ dir);
234     try {
235       // build a record writer using HFileOutputFormat
236       Job job = new Job(conf);
237       FileOutputFormat.setOutputPath(job, dir);
238       context = createTestTaskAttemptContext(job);
239       HFileOutputFormat hof = new HFileOutputFormat();
240       writer = hof.getRecordWriter(context);
241 
242       // Pass two key values with explicit times stamps
243       final byte [] b = Bytes.toBytes("b");
244 
245       // value 1 with timestamp 2000
246       KeyValue kv = new KeyValue(b, b, b, 2000, b);
247       KeyValue original = kv.clone();
248       writer.write(new ImmutableBytesWritable(), kv);
249       assertEquals(original,kv);
250 
251       // value 2 with timestamp 1000
252       kv = new KeyValue(b, b, b, 1000, b);
253       original = kv.clone();
254       writer.write(new ImmutableBytesWritable(), kv);
255       assertEquals(original, kv);
256 
257       // verify that the file has the proper FileInfo.
258       writer.close(context);
259 
260       // the generated file lives 1 directory down from the attempt directory
261       // and is the only file, e.g.
262       // _attempt__0000_r_000000_0/b/1979617994050536795
263       FileSystem fs = FileSystem.get(conf);
264       Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
265       FileStatus[] sub1 = fs.listStatus(attemptDirectory);
266       FileStatus[] file = fs.listStatus(sub1[0].getPath());
267 
268       // open as HFile Reader and pull out TIMERANGE FileInfo.
269       HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
270           new CacheConfig(conf));
271       Map<byte[],byte[]> finfo = rd.loadFileInfo();
272       byte[] range = finfo.get("TIMERANGE".getBytes());
273       assertNotNull(range);
274 
275       // unmarshall and check values.
276       TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
277       Writables.copyWritable(range, timeRangeTracker);
278       LOG.info(timeRangeTracker.getMinimumTimestamp() +
279           "...." + timeRangeTracker.getMaximumTimestamp());
280       assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
281       assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
282       rd.close();
283     } finally {
284       if (writer != null && context != null) writer.close(context);
285       dir.getFileSystem(conf).delete(dir, true);
286     }
287   }
288 
289   /**
290    * Run small MR job.
291    */
292   @Test
293   public void testWritingPEData() throws Exception {
294     Configuration conf = util.getConfiguration();
295     Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
296     FileSystem fs = testDir.getFileSystem(conf);
297 
298     // Set down this value or we OOME in eclipse.
299     conf.setInt("io.sort.mb", 20);
300     // Write a few files.
301     conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
302 
303     Job job = new Job(conf, "testWritingPEData");
304     setupRandomGeneratorMapper(job);
305     // This partitioner doesn't work well for number keys but using it anyways
306     // just to demonstrate how to configure it.
307     byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
308     byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
309 
310     Arrays.fill(startKey, (byte)0);
311     Arrays.fill(endKey, (byte)0xff);
312 
313     job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
314     // Set start and end rows for partitioner.
315     SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
316     SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
317     job.setReducerClass(KeyValueSortReducer.class);
318     job.setOutputFormatClass(HFileOutputFormat.class);
319     job.setNumReduceTasks(4);
320     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
321         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
322         KeyValueSerialization.class.getName());
323 
324     FileOutputFormat.setOutputPath(job, testDir);
325     assertTrue(job.waitForCompletion(false));
326     FileStatus [] files = fs.listStatus(testDir);
327     assertTrue(files.length > 0);
328   }
329 
330   @Test
331   public void testJobConfiguration() throws Exception {
332     Job job = new Job(util.getConfiguration());
333     job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
334     HTable table = Mockito.mock(HTable.class);
335     setupMockStartKeys(table);
336     HFileOutputFormat.configureIncrementalLoad(job, table);
337     assertEquals(job.getNumReduceTasks(), 4);
338   }
339 
340   private byte [][] generateRandomStartKeys(int numKeys) {
341     Random random = new Random();
342     byte[][] ret = new byte[numKeys][];
343     // first region start key is always empty
344     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
345     for (int i = 1; i < numKeys; i++) {
346       ret[i] = PerformanceEvaluation.generateValue(random);
347     }
348     return ret;
349   }
350 
351   @Test
352   public void testMRIncrementalLoad() throws Exception {
353     LOG.info("\nStarting test testMRIncrementalLoad\n");
354     doIncrementalLoadTest(false);
355   }
356 
357   @Test
358   public void testMRIncrementalLoadWithSplit() throws Exception {
359     LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
360     doIncrementalLoadTest(true);
361   }
362 
363   private void doIncrementalLoadTest(
364       boolean shouldChangeRegions) throws Exception {
365     util = new HBaseTestingUtility();
366     Configuration conf = util.getConfiguration();
367     byte[][] startKeys = generateRandomStartKeys(5);
368     HBaseAdmin admin = null;
369     try {
370       util.startMiniCluster();
371       Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
372       admin = new HBaseAdmin(conf);
373       HTable table = util.createTable(TABLE_NAME, FAMILIES);
374       assertEquals("Should start with empty table",
375           0, util.countRows(table));
376       int numRegions = util.createMultiRegions(
377           util.getConfiguration(), table, FAMILIES[0], startKeys);
378       assertEquals("Should make 5 regions", numRegions, 5);
379 
380       // Generate the bulk load files
381       util.startMiniMapReduceCluster();
382       runIncrementalPELoad(conf, table, testDir);
383       // This doesn't write into the table, just makes files
384       assertEquals("HFOF should not touch actual table",
385           0, util.countRows(table));
386 
387 
388       // Make sure that a directory was created for every CF
389       int dir = 0;
390       for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
391         for (byte[] family : FAMILIES) {
392           if (Bytes.toString(family).equals(f.getPath().getName())) {
393             ++dir;
394           }
395         }
396       }
397       assertEquals("Column family not found in FS.", FAMILIES.length, dir);
398 
399       // handle the split case
400       if (shouldChangeRegions) {
401         LOG.info("Changing regions in table");
402         admin.disableTable(table.getTableName());
403         while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
404             getRegionStates().isRegionsInTransition()) {
405           Threads.sleep(200);
406           LOG.info("Waiting on table to finish disabling");
407         }
408         byte[][] newStartKeys = generateRandomStartKeys(15);
409         util.createMultiRegions(
410             util.getConfiguration(), table, FAMILIES[0], newStartKeys);
411         admin.enableTable(table.getTableName());
412         while (table.getRegionLocations().size() != 15 ||
413             !admin.isTableAvailable(table.getTableName())) {
414           Thread.sleep(200);
415           LOG.info("Waiting for new region assignment to happen");
416         }
417       }
418 
419       // Perform the actual load
420       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
421 
422       // Ensure data shows up
423       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
424       assertEquals("LoadIncrementalHFiles should put expected data in table",
425           expectedRows, util.countRows(table));
426       Scan scan = new Scan();
427       ResultScanner results = table.getScanner(scan);
428       for (Result res : results) {
429         assertEquals(FAMILIES.length, res.raw().length);
430         KeyValue first = res.raw()[0];
431         for (KeyValue kv : res.raw()) {
432           assertTrue(KeyValue.COMPARATOR.matchingRows(first, kv));
433           assertTrue(Bytes.equals(first.getValue(), kv.getValue()));
434         }
435       }
436       results.close();
437       String tableDigestBefore = util.checksumRows(table);
438 
439       // Cause regions to reopen
440       admin.disableTable(TABLE_NAME);
441       while (!admin.isTableDisabled(TABLE_NAME)) {
442         Thread.sleep(200);
443         LOG.info("Waiting for table to disable");
444       }
445       admin.enableTable(TABLE_NAME);
446       util.waitTableAvailable(TABLE_NAME.getName());
447       assertEquals("Data should remain after reopening of regions",
448           tableDigestBefore, util.checksumRows(table));
449     } finally {
450       if (admin != null) admin.close();
451       util.shutdownMiniMapReduceCluster();
452       util.shutdownMiniCluster();
453     }
454   }
455 
456   private void runIncrementalPELoad(
457       Configuration conf, HTable table, Path outDir)
458   throws Exception {
459     Job job = new Job(conf, "testLocalMRIncrementalLoad");
460     job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
461     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
462         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
463         KeyValueSerialization.class.getName());
464     setupRandomGeneratorMapper(job);
465     HFileOutputFormat.configureIncrementalLoad(job, table);
466     FileOutputFormat.setOutputPath(job, outDir);
467 
468     Assert.assertFalse( util.getTestFileSystem().exists(outDir)) ;
469 
470     assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
471 
472     assertTrue(job.waitForCompletion(true));
473   }
474 
475   /**
476    * Test for
477    * {@link HFileOutputFormat#createFamilyCompressionMap(Configuration)}. Tests
478    * that the compression map is correctly deserialized from configuration
479    *
480    * @throws IOException
481    */
482   @Test
483   public void testCreateFamilyCompressionMap() throws IOException {
484     for (int numCfs = 0; numCfs <= 3; numCfs++) {
485       Configuration conf = new Configuration(this.util.getConfiguration());
486       Map<String, Compression.Algorithm> familyToCompression = getMockColumnFamilies(numCfs);
487       HTable table = Mockito.mock(HTable.class);
488       setupMockColumnFamilies(table, familyToCompression);
489       HFileOutputFormat.configureCompression(table, conf);
490 
491       // read back family specific compression setting from the configuration
492       Map<byte[], String> retrievedFamilyToCompressionMap = HFileOutputFormat.createFamilyCompressionMap(conf);
493 
494       // test that we have a value for all column families that matches with the
495       // used mock values
496       for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
497         assertEquals("Compression configuration incorrect for column family:" + entry.getKey(), entry.getValue()
498                      .getName(), retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
499       }
500     }
501   }
502 
503   private void setupMockColumnFamilies(HTable table,
504     Map<String, Compression.Algorithm> familyToCompression) throws IOException
505   {
506     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
507     for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
508       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
509           .setMaxVersions(1)
510           .setCompressionType(entry.getValue())
511           .setBlockCacheEnabled(false)
512           .setTimeToLive(0));
513     }
514     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
515   }
516 
517   private void setupMockStartKeys(HTable table) throws IOException {
518     byte[][] mockKeys = new byte[][] {
519         HConstants.EMPTY_BYTE_ARRAY,
520         Bytes.toBytes("aaa"),
521         Bytes.toBytes("ggg"),
522         Bytes.toBytes("zzz")
523     };
524     Mockito.doReturn(mockKeys).when(table).getStartKeys();
525   }
526 
527   /**
528    * @return a map from column family names to compression algorithms for
529    *         testing column family compression. Column family names have special characters
530    */
531   private Map<String, Compression.Algorithm> getMockColumnFamilies(int numCfs) {
532     Map<String, Compression.Algorithm> familyToCompression = new HashMap<String, Compression.Algorithm>();
533     // use column family names having special characters
534     if (numCfs-- > 0) {
535       familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
536     }
537     if (numCfs-- > 0) {
538       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
539     }
540     if (numCfs-- > 0) {
541       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
542     }
543     if (numCfs-- > 0) {
544       familyToCompression.put("Family3", Compression.Algorithm.NONE);
545     }
546     return familyToCompression;
547   }
548 
549   /**
550    * Test that {@link HFileOutputFormat} RecordWriter uses compression settings
551    * from the column family descriptor
552    */
553   @Test
554   public void testColumnFamilyCompression() throws Exception {
555     Configuration conf = new Configuration(this.util.getConfiguration());
556     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
557     TaskAttemptContext context = null;
558     Path dir =
559         util.getDataTestDirOnTestFS("testColumnFamilyCompression");
560 
561     HTable table = Mockito.mock(HTable.class);
562 
563     Map<String, Compression.Algorithm> configuredCompression =
564       new HashMap<String, Compression.Algorithm>();
565     Compression.Algorithm[] supportedAlgos = getSupportedCompressionAlgorithms();
566 
567     int familyIndex = 0;
568     for (byte[] family : FAMILIES) {
569       configuredCompression.put(Bytes.toString(family),
570                                 supportedAlgos[familyIndex++ % supportedAlgos.length]);
571     }
572     setupMockColumnFamilies(table, configuredCompression);
573 
574     // set up the table to return some mock keys
575     setupMockStartKeys(table);
576 
577     try {
578       // partial map red setup to get an operational writer for testing
579       // We turn off the sequence file compression, because DefaultCodec
580       // pollutes the GZip codec pool with an incompatible compressor.
581       conf.set("io.seqfile.compression.type", "NONE");
582       Job job = new Job(conf, "testLocalMRIncrementalLoad");
583       job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilyCompression"));
584       setupRandomGeneratorMapper(job);
585       HFileOutputFormat.configureIncrementalLoad(job, table);
586       FileOutputFormat.setOutputPath(job, dir);
587       context = createTestTaskAttemptContext(job);
588       HFileOutputFormat hof = new HFileOutputFormat();
589       writer = hof.getRecordWriter(context);
590 
591       // write out random rows
592       writeRandomKeyValues(writer, context, ROWSPERSPLIT);
593       writer.close(context);
594 
595       // Make sure that a directory was created for every CF
596       FileSystem fileSystem = dir.getFileSystem(conf);
597 
598       // commit so that the filesystem has one directory per column family
599       hof.getOutputCommitter(context).commitTask(context);
600       hof.getOutputCommitter(context).commitJob(context);
601       for (byte[] family : FAMILIES) {
602         String familyStr = new String(family);
603         boolean found = false;
604         for (FileStatus f : fileSystem.listStatus(dir)) {
605 
606           if (Bytes.toString(family).equals(f.getPath().getName())) {
607             // we found a matching directory
608             found = true;
609 
610             // verify that the compression on this file matches the configured
611             // compression
612             Path dataFilePath = fileSystem.listStatus(f.getPath())[0].getPath();
613             Reader reader = HFile.createReader(fileSystem, dataFilePath,
614                 new CacheConfig(conf));
615             reader.loadFileInfo();
616             assertEquals("Incorrect compression used for column family " + familyStr
617                          + "(reader: " + reader + ")",
618                          configuredCompression.get(familyStr), reader.getCompressionAlgorithm());
619             break;
620           }
621         }
622 
623         if (!found) {
624           fail("HFile for column family " + familyStr + " not found");
625         }
626       }
627 
628     } finally {
629       dir.getFileSystem(conf).delete(dir, true);
630     }
631   }
632 
633 
634   /**
635    * @return
636    */
637   private Compression.Algorithm[] getSupportedCompressionAlgorithms() {
638     String[] allAlgos = HFile.getSupportedCompressionAlgorithms();
639     List<Compression.Algorithm> supportedAlgos = Lists.newArrayList();
640 
641     for (String algoName : allAlgos) {
642       try {
643         Compression.Algorithm algo = Compression.getCompressionAlgorithmByName(algoName);
644         algo.getCompressor();
645         supportedAlgos.add(algo);
646       } catch (Throwable t) {
647         // this algo is not available
648       }
649     }
650 
651     return supportedAlgos.toArray(new Compression.Algorithm[0]);
652   }
653 
654 
655   /**
656    * Write random values to the writer assuming a table created using
657    * {@link #FAMILIES} as column family descriptors
658    */
659   private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer, TaskAttemptContext context,
660       int numRows)
661       throws IOException, InterruptedException {
662     byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
663     int valLength = 10;
664     byte valBytes[] = new byte[valLength];
665 
666     int taskId = context.getTaskAttemptID().getTaskID().getId();
667     assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
668 
669     Random random = new Random();
670     for (int i = 0; i < numRows; i++) {
671 
672       Bytes.putInt(keyBytes, 0, i);
673       random.nextBytes(valBytes);
674       ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
675 
676       for (byte[] family : TestHFileOutputFormat.FAMILIES) {
677         KeyValue kv = new KeyValue(keyBytes, family,
678             PerformanceEvaluation.QUALIFIER_NAME, valBytes);
679         writer.write(key, kv);
680       }
681     }
682   }
683 
684   /**
685    * This test is to test the scenario happened in HBASE-6901.
686    * All files are bulk loaded and excluded from minor compaction.
687    * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
688    * will be thrown.
689    */
690   @Ignore ("Flakey: See HBASE-9051") @Test
691   public void testExcludeAllFromMinorCompaction() throws Exception {
692     Configuration conf = util.getConfiguration();
693     conf.setInt("hbase.hstore.compaction.min", 2);
694     generateRandomStartKeys(5);
695 
696     try {
697       util.startMiniCluster();
698       final FileSystem fs = util.getDFSCluster().getFileSystem();
699       HBaseAdmin admin = new HBaseAdmin(conf);
700       HTable table = util.createTable(TABLE_NAME, FAMILIES);
701       assertEquals("Should start with empty table", 0, util.countRows(table));
702 
703       // deep inspection: get the StoreFile dir
704       final Path storePath = HStore.getStoreHomedir(
705           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
706           admin.getTableRegions(TABLE_NAME).get(0),
707           FAMILIES[0]);
708       assertEquals(0, fs.listStatus(storePath).length);
709 
710       // Generate two bulk load files
711       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
712           true);
713       util.startMiniMapReduceCluster();
714 
715       for (int i = 0; i < 2; i++) {
716         Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
717         runIncrementalPELoad(conf, table, testDir);
718         // Perform the actual load
719         new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
720       }
721 
722       // Ensure data shows up
723       int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
724       assertEquals("LoadIncrementalHFiles should put expected data in table",
725           expectedRows, util.countRows(table));
726 
727       // should have a second StoreFile now
728       assertEquals(2, fs.listStatus(storePath).length);
729 
730       // minor compactions shouldn't get rid of the file
731       admin.compact(TABLE_NAME.getName());
732       try {
733         quickPoll(new Callable<Boolean>() {
734           public Boolean call() throws Exception {
735             return fs.listStatus(storePath).length == 1;
736           }
737         }, 5000);
738         throw new IOException("SF# = " + fs.listStatus(storePath).length);
739       } catch (AssertionError ae) {
740         // this is expected behavior
741       }
742 
743       // a major compaction should work though
744       admin.majorCompact(TABLE_NAME.getName());
745       quickPoll(new Callable<Boolean>() {
746         public Boolean call() throws Exception {
747           return fs.listStatus(storePath).length == 1;
748         }
749       }, 5000);
750 
751     } finally {
752       util.shutdownMiniMapReduceCluster();
753       util.shutdownMiniCluster();
754     }
755   }
756 
757   @Test
758   public void testExcludeMinorCompaction() throws Exception {
759     Configuration conf = util.getConfiguration();
760     conf.setInt("hbase.hstore.compaction.min", 2);
761     generateRandomStartKeys(5);
762 
763     try {
764       util.startMiniCluster();
765       Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
766       final FileSystem fs = util.getDFSCluster().getFileSystem();
767       HBaseAdmin admin = new HBaseAdmin(conf);
768       HTable table = util.createTable(TABLE_NAME, FAMILIES);
769       assertEquals("Should start with empty table", 0, util.countRows(table));
770 
771       // deep inspection: get the StoreFile dir
772       final Path storePath = HStore.getStoreHomedir(
773           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
774           admin.getTableRegions(TABLE_NAME).get(0),
775           FAMILIES[0]);
776       assertEquals(0, fs.listStatus(storePath).length);
777 
778       // put some data in it and flush to create a storefile
779       Put p = new Put(Bytes.toBytes("test"));
780       p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
781       table.put(p);
782       admin.flush(TABLE_NAME.getName());
783       assertEquals(1, util.countRows(table));
784       quickPoll(new Callable<Boolean>() {
785         public Boolean call() throws Exception {
786           return fs.listStatus(storePath).length == 1;
787         }
788       }, 5000);
789 
790       // Generate a bulk load file with more rows
791       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
792           true);
793       util.startMiniMapReduceCluster();
794       runIncrementalPELoad(conf, table, testDir);
795 
796       // Perform the actual load
797       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
798 
799       // Ensure data shows up
800       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
801       assertEquals("LoadIncrementalHFiles should put expected data in table",
802           expectedRows + 1, util.countRows(table));
803 
804       // should have a second StoreFile now
805       assertEquals(2, fs.listStatus(storePath).length);
806 
807       // minor compactions shouldn't get rid of the file
808       admin.compact(TABLE_NAME.getName());
809       try {
810         quickPoll(new Callable<Boolean>() {
811           public Boolean call() throws Exception {
812             return fs.listStatus(storePath).length == 1;
813           }
814         }, 5000);
815         throw new IOException("SF# = " + fs.listStatus(storePath).length);
816       } catch (AssertionError ae) {
817         // this is expected behavior
818       }
819 
820       // a major compaction should work though
821       admin.majorCompact(TABLE_NAME.getName());
822       quickPoll(new Callable<Boolean>() {
823         public Boolean call() throws Exception {
824           return fs.listStatus(storePath).length == 1;
825         }
826       }, 5000);
827 
828     } finally {
829       util.shutdownMiniMapReduceCluster();
830       util.shutdownMiniCluster();
831     }
832   }
833 
834   private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
835     int sleepMs = 10;
836     int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
837     while (retries-- > 0) {
838       if (c.call().booleanValue()) {
839         return;
840       }
841       Thread.sleep(sleepMs);
842     }
843     fail();
844   }
845 
846   public static void main(String args[]) throws Exception {
847     new TestHFileOutputFormat().manualTest(args);
848   }
849 
850   public void manualTest(String args[]) throws Exception {
851     Configuration conf = HBaseConfiguration.create();
852     util = new HBaseTestingUtility(conf);
853     if ("newtable".equals(args[0])) {
854       byte[] tname = args[1].getBytes();
855       HTable table = util.createTable(tname, FAMILIES);
856       HBaseAdmin admin = new HBaseAdmin(conf);
857       admin.disableTable(tname);
858       byte[][] startKeys = generateRandomStartKeys(5);
859       util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
860       admin.enableTable(tname);
861     } else if ("incremental".equals(args[0])) {
862       byte[] tname = args[1].getBytes();
863       HTable table = new HTable(conf, tname);
864       Path outDir = new Path("incremental-out");
865       runIncrementalPELoad(conf, table, outDir);
866     } else {
867       throw new RuntimeException(
868           "usage: TestHFileOutputFormat newtable | incremental");
869     }
870   }
871 
872 }
873