View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.util.Arrays;
30  import java.util.HashMap;
31  import java.util.Map;
32  import java.util.Map.Entry;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.concurrent.Callable;
36  import junit.framework.Assert;
37  import org.apache.commons.logging.Log;
38  import org.apache.commons.logging.LogFactory;
39  import org.apache.hadoop.conf.Configuration;
40  import org.apache.hadoop.fs.FileStatus;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.hbase.Cell;
44  import org.apache.hadoop.hbase.CellUtil;
45  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
46  import org.apache.hadoop.hbase.HBaseConfiguration;
47  import org.apache.hadoop.hbase.HBaseTestingUtility;
48  import org.apache.hadoop.hbase.HColumnDescriptor;
49  import org.apache.hadoop.hbase.HConstants;
50  import org.apache.hadoop.hbase.HTableDescriptor;
51  import org.apache.hadoop.hbase.HadoopShims;
52  import org.apache.hadoop.hbase.KeyValue;
53  import org.apache.hadoop.hbase.testclassification.LargeTests;
54  import org.apache.hadoop.hbase.PerformanceEvaluation;
55  import org.apache.hadoop.hbase.TableName;
56  import org.apache.hadoop.hbase.client.HBaseAdmin;
57  import org.apache.hadoop.hbase.client.HTable;
58  import org.apache.hadoop.hbase.client.Put;
59  import org.apache.hadoop.hbase.client.Result;
60  import org.apache.hadoop.hbase.client.ResultScanner;
61  import org.apache.hadoop.hbase.client.Scan;
62  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
63  import org.apache.hadoop.hbase.io.compress.Compression;
64  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
65  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
66  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
67  import org.apache.hadoop.hbase.io.hfile.HFile;
68  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
69  import org.apache.hadoop.hbase.regionserver.BloomType;
70  import org.apache.hadoop.hbase.regionserver.HStore;
71  import org.apache.hadoop.hbase.regionserver.StoreFile;
72  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
73  import org.apache.hadoop.hbase.util.Bytes;
74  import org.apache.hadoop.hbase.util.FSUtils;
75  import org.apache.hadoop.hbase.util.Threads;
76  import org.apache.hadoop.hbase.util.Writables;
77  import org.apache.hadoop.io.NullWritable;
78  import org.apache.hadoop.mapreduce.Job;
79  import org.apache.hadoop.mapreduce.Mapper;
80  import org.apache.hadoop.mapreduce.RecordWriter;
81  import org.apache.hadoop.mapreduce.TaskAttemptContext;
82  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
83  import org.junit.Ignore;
84  import org.junit.Test;
85  import org.junit.experimental.categories.Category;
86  import org.mockito.Mockito;
87  
88  /**
89   * Simple test for {@link KeyValueSortReducer} and {@link HFileOutputFormat}.
90   * Sets up and runs a mapreduce job that writes hfile output.
91   * Creates a few inner classes to implement splits and an inputformat that
92   * emits keys and values like those of {@link PerformanceEvaluation}.
93   */
94  @Category(LargeTests.class)
95  public class TestHFileOutputFormat  {
96    private final static int ROWSPERSPLIT = 1024;
97  
98    private static final byte[][] FAMILIES
99      = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
100       , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
101   private static final TableName TABLE_NAME =
102       TableName.valueOf("TestTable");
103 
104   private HBaseTestingUtility util = new HBaseTestingUtility();
105 
106   private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
107 
108   /**
109    * Simple mapper that makes KeyValue output.
110    */
111   static class RandomKVGeneratingMapper
112   extends Mapper<NullWritable, NullWritable,
113                  ImmutableBytesWritable, KeyValue> {
114 
115     private int keyLength;
116     private static final int KEYLEN_DEFAULT=10;
117     private static final String KEYLEN_CONF="randomkv.key.length";
118 
119     private int valLength;
120     private static final int VALLEN_DEFAULT=10;
121     private static final String VALLEN_CONF="randomkv.val.length";
122 
123     @Override
124     protected void setup(Context context) throws IOException,
125         InterruptedException {
126       super.setup(context);
127 
128       Configuration conf = context.getConfiguration();
129       keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
130       valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
131     }
132 
133     protected void map(
134         NullWritable n1, NullWritable n2,
135         Mapper<NullWritable, NullWritable,
136                ImmutableBytesWritable,KeyValue>.Context context)
137         throws java.io.IOException ,InterruptedException
138     {
139 
140       byte keyBytes[] = new byte[keyLength];
141       byte valBytes[] = new byte[valLength];
142 
143       int taskId = context.getTaskAttemptID().getTaskID().getId();
144       assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
145 
146       Random random = new Random();
147       for (int i = 0; i < ROWSPERSPLIT; i++) {
148 
149         random.nextBytes(keyBytes);
150         // Ensure that unique tasks generate unique keys
151         keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
152         random.nextBytes(valBytes);
153         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
154 
155         for (byte[] family : TestHFileOutputFormat.FAMILIES) {
156           KeyValue kv = new KeyValue(keyBytes, family,
157               PerformanceEvaluation.QUALIFIER_NAME, valBytes);
158           context.write(key, kv);
159         }
160       }
161     }
162   }
163 
164   private void setupRandomGeneratorMapper(Job job) {
165     job.setInputFormatClass(NMapInputFormat.class);
166     job.setMapperClass(RandomKVGeneratingMapper.class);
167     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
168     job.setMapOutputValueClass(KeyValue.class);
169   }
170 
171   /**
172    * Test that {@link HFileOutputFormat} RecordWriter amends timestamps if
173    * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
174    * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
175    */
176   @Test
177   public void test_LATEST_TIMESTAMP_isReplaced()
178   throws Exception {
179     Configuration conf = new Configuration(this.util.getConfiguration());
180     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
181     TaskAttemptContext context = null;
182     Path dir =
183       util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
184     try {
185       Job job = new Job(conf);
186       FileOutputFormat.setOutputPath(job, dir);
187       context = createTestTaskAttemptContext(job);
188       HFileOutputFormat hof = new HFileOutputFormat();
189       writer = hof.getRecordWriter(context);
190       final byte [] b = Bytes.toBytes("b");
191 
192       // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
193       // changed by call to write.  Check all in kv is same but ts.
194       KeyValue kv = new KeyValue(b, b, b);
195       KeyValue original = kv.clone();
196       writer.write(new ImmutableBytesWritable(), kv);
197       assertFalse(original.equals(kv));
198       assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
199       assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier()));
200       assertNotSame(original.getTimestamp(), kv.getTimestamp());
201       assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
202 
203       // Test 2. Now test passing a kv that has explicit ts.  It should not be
204       // changed by call to record write.
205       kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
206       original = kv.clone();
207       writer.write(new ImmutableBytesWritable(), kv);
208       assertTrue(original.equals(kv));
209     } finally {
210       if (writer != null && context != null) writer.close(context);
211       dir.getFileSystem(conf).delete(dir, true);
212     }
213   }
214 
215   private TaskAttemptContext createTestTaskAttemptContext(final Job job)
216   throws IOException, Exception {
217     HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
218     TaskAttemptContext context = hadoop.createTestTaskAttemptContext(job, "attempt_200707121733_0001_m_000000_0");
219     return context;
220   }
221 
222   /*
223    * Test that {@link HFileOutputFormat} creates an HFile with TIMERANGE
224    * metadata used by time-restricted scans.
225    */
226   @Test
227   public void test_TIMERANGE() throws Exception {
228     Configuration conf = new Configuration(this.util.getConfiguration());
229     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
230     TaskAttemptContext context = null;
231     Path dir =
232       util.getDataTestDir("test_TIMERANGE_present");
233     LOG.info("Timerange dir writing to dir: "+ dir);
234     try {
235       // build a record writer using HFileOutputFormat
236       Job job = new Job(conf);
237       FileOutputFormat.setOutputPath(job, dir);
238       context = createTestTaskAttemptContext(job);
239       HFileOutputFormat hof = new HFileOutputFormat();
240       writer = hof.getRecordWriter(context);
241 
242       // Pass two key values with explicit times stamps
243       final byte [] b = Bytes.toBytes("b");
244 
245       // value 1 with timestamp 2000
246       KeyValue kv = new KeyValue(b, b, b, 2000, b);
247       KeyValue original = kv.clone();
248       writer.write(new ImmutableBytesWritable(), kv);
249       assertEquals(original,kv);
250 
251       // value 2 with timestamp 1000
252       kv = new KeyValue(b, b, b, 1000, b);
253       original = kv.clone();
254       writer.write(new ImmutableBytesWritable(), kv);
255       assertEquals(original, kv);
256 
257       // verify that the file has the proper FileInfo.
258       writer.close(context);
259 
260       // the generated file lives 1 directory down from the attempt directory
261       // and is the only file, e.g.
262       // _attempt__0000_r_000000_0/b/1979617994050536795
263       FileSystem fs = FileSystem.get(conf);
264       Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
265       FileStatus[] sub1 = fs.listStatus(attemptDirectory);
266       FileStatus[] file = fs.listStatus(sub1[0].getPath());
267 
268       // open as HFile Reader and pull out TIMERANGE FileInfo.
269       HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
270           new CacheConfig(conf), conf);
271       Map<byte[],byte[]> finfo = rd.loadFileInfo();
272       byte[] range = finfo.get("TIMERANGE".getBytes());
273       assertNotNull(range);
274 
275       // unmarshall and check values.
276       TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
277       Writables.copyWritable(range, timeRangeTracker);
278       LOG.info(timeRangeTracker.getMinimumTimestamp() +
279           "...." + timeRangeTracker.getMaximumTimestamp());
280       assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
281       assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
282       rd.close();
283     } finally {
284       if (writer != null && context != null) writer.close(context);
285       dir.getFileSystem(conf).delete(dir, true);
286     }
287   }
288 
289   /**
290    * Run small MR job.
291    */
292   @Test
293   public void testWritingPEData() throws Exception {
294     Configuration conf = util.getConfiguration();
295     Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
296     FileSystem fs = testDir.getFileSystem(conf);
297 
298     // Set down this value or we OOME in eclipse.
299     conf.setInt("io.sort.mb", 20);
300     // Write a few files.
301     conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
302 
303     Job job = new Job(conf, "testWritingPEData");
304     setupRandomGeneratorMapper(job);
305     // This partitioner doesn't work well for number keys but using it anyways
306     // just to demonstrate how to configure it.
307     byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
308     byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
309 
310     Arrays.fill(startKey, (byte)0);
311     Arrays.fill(endKey, (byte)0xff);
312 
313     job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
314     // Set start and end rows for partitioner.
315     SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
316     SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
317     job.setReducerClass(KeyValueSortReducer.class);
318     job.setOutputFormatClass(HFileOutputFormat.class);
319     job.setNumReduceTasks(4);
320     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
321         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
322         KeyValueSerialization.class.getName());
323 
324     FileOutputFormat.setOutputPath(job, testDir);
325     assertTrue(job.waitForCompletion(false));
326     FileStatus [] files = fs.listStatus(testDir);
327     assertTrue(files.length > 0);
328   }
329 
330   @Test
331   public void testJobConfiguration() throws Exception {
332     Configuration conf = new Configuration(this.util.getConfiguration());
333     conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString());
334     Job job = new Job(conf);
335     job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
336     HTable table = Mockito.mock(HTable.class);
337     setupMockStartKeys(table);
338     setupMockTableName(table);
339     HFileOutputFormat.configureIncrementalLoad(job, table);
340     assertEquals(job.getNumReduceTasks(), 4);
341   }
342 
343   private byte [][] generateRandomStartKeys(int numKeys) {
344     Random random = new Random();
345     byte[][] ret = new byte[numKeys][];
346     // first region start key is always empty
347     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
348     for (int i = 1; i < numKeys; i++) {
349       ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
350     }
351     return ret;
352   }
353 
354   @Test
355   public void testMRIncrementalLoad() throws Exception {
356     LOG.info("\nStarting test testMRIncrementalLoad\n");
357     doIncrementalLoadTest(false);
358   }
359 
360   @Test
361   public void testMRIncrementalLoadWithSplit() throws Exception {
362     LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
363     doIncrementalLoadTest(true);
364   }
365 
366   private void doIncrementalLoadTest(
367       boolean shouldChangeRegions) throws Exception {
368     util = new HBaseTestingUtility();
369     Configuration conf = util.getConfiguration();
370     byte[][] startKeys = generateRandomStartKeys(5);
371     HBaseAdmin admin = null;
372     try {
373       util.startMiniCluster();
374       Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
375       admin = new HBaseAdmin(conf);
376       HTable table = util.createTable(TABLE_NAME, FAMILIES);
377       assertEquals("Should start with empty table",
378           0, util.countRows(table));
379       int numRegions = util.createMultiRegions(
380           util.getConfiguration(), table, FAMILIES[0], startKeys);
381       assertEquals("Should make 5 regions", numRegions, 5);
382 
383       // Generate the bulk load files
384       util.startMiniMapReduceCluster();
385       runIncrementalPELoad(conf, table, testDir);
386       // This doesn't write into the table, just makes files
387       assertEquals("HFOF should not touch actual table",
388           0, util.countRows(table));
389 
390 
391       // Make sure that a directory was created for every CF
392       int dir = 0;
393       for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
394         for (byte[] family : FAMILIES) {
395           if (Bytes.toString(family).equals(f.getPath().getName())) {
396             ++dir;
397           }
398         }
399       }
400       assertEquals("Column family not found in FS.", FAMILIES.length, dir);
401 
402       // handle the split case
403       if (shouldChangeRegions) {
404         LOG.info("Changing regions in table");
405         admin.disableTable(table.getTableName());
406         while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
407             getRegionStates().isRegionsInTransition()) {
408           Threads.sleep(200);
409           LOG.info("Waiting on table to finish disabling");
410         }
411         byte[][] newStartKeys = generateRandomStartKeys(15);
412         util.createMultiRegions(
413             util.getConfiguration(), table, FAMILIES[0], newStartKeys);
414         admin.enableTable(table.getTableName());
415         while (table.getRegionLocations().size() != 15 ||
416             !admin.isTableAvailable(table.getTableName())) {
417           Thread.sleep(200);
418           LOG.info("Waiting for new region assignment to happen");
419         }
420       }
421 
422       // Perform the actual load
423       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
424 
425       // Ensure data shows up
426       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
427       assertEquals("LoadIncrementalHFiles should put expected data in table",
428           expectedRows, util.countRows(table));
429       Scan scan = new Scan();
430       ResultScanner results = table.getScanner(scan);
431       for (Result res : results) {
432         assertEquals(FAMILIES.length, res.rawCells().length);
433         Cell first = res.rawCells()[0];
434         for (Cell kv : res.rawCells()) {
435           assertTrue(CellUtil.matchingRow(first, kv));
436           assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
437         }
438       }
439       results.close();
440       String tableDigestBefore = util.checksumRows(table);
441 
442       // Cause regions to reopen
443       admin.disableTable(TABLE_NAME);
444       while (!admin.isTableDisabled(TABLE_NAME)) {
445         Thread.sleep(200);
446         LOG.info("Waiting for table to disable");
447       }
448       admin.enableTable(TABLE_NAME);
449       util.waitTableAvailable(TABLE_NAME.getName());
450       assertEquals("Data should remain after reopening of regions",
451           tableDigestBefore, util.checksumRows(table));
452     } finally {
453       if (admin != null) admin.close();
454       util.shutdownMiniMapReduceCluster();
455       util.shutdownMiniCluster();
456     }
457   }
458 
459   private void runIncrementalPELoad(
460       Configuration conf, HTable table, Path outDir)
461   throws Exception {
462     Job job = new Job(conf, "testLocalMRIncrementalLoad");
463     job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
464     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
465         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
466         KeyValueSerialization.class.getName());
467     setupRandomGeneratorMapper(job);
468     HFileOutputFormat.configureIncrementalLoad(job, table);
469     FileOutputFormat.setOutputPath(job, outDir);
470 
471     Assert.assertFalse( util.getTestFileSystem().exists(outDir)) ;
472 
473     assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
474 
475     assertTrue(job.waitForCompletion(true));
476   }
477 
478   /**
479    * Test for {@link HFileOutputFormat#configureCompression(HTable,
480    * Configuration)} and {@link HFileOutputFormat#createFamilyCompressionMap
481    * (Configuration)}.
482    * Tests that the compression map is correctly serialized into
483    * and deserialized from configuration
484    *
485    * @throws IOException
486    */
487   @Test
488   public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
489     for (int numCfs = 0; numCfs <= 3; numCfs++) {
490       Configuration conf = new Configuration(this.util.getConfiguration());
491       Map<String, Compression.Algorithm> familyToCompression =
492           getMockColumnFamiliesForCompression(numCfs);
493       HTable table = Mockito.mock(HTable.class);
494       setupMockColumnFamiliesForCompression(table, familyToCompression);
495       HFileOutputFormat.configureCompression(table, conf);
496 
497       // read back family specific compression setting from the configuration
498       Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat
499           .createFamilyCompressionMap(conf);
500 
501       // test that we have a value for all column families that matches with the
502       // used mock values
503       for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
504         assertEquals("Compression configuration incorrect for column family:"
505             + entry.getKey(), entry.getValue(),
506             retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
507       }
508     }
509   }
510 
511   private void setupMockColumnFamiliesForCompression(HTable table,
512       Map<String, Compression.Algorithm> familyToCompression) throws IOException {
513     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
514     for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
515       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
516           .setMaxVersions(1)
517           .setCompressionType(entry.getValue())
518           .setBlockCacheEnabled(false)
519           .setTimeToLive(0));
520     }
521     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
522   }
523 
524   /**
525    * @return a map from column family names to compression algorithms for
526    *         testing column family compression. Column family names have special characters
527    */
528   private Map<String, Compression.Algorithm>
529       getMockColumnFamiliesForCompression (int numCfs) {
530     Map<String, Compression.Algorithm> familyToCompression = new HashMap<String, Compression.Algorithm>();
531     // use column family names having special characters
532     if (numCfs-- > 0) {
533       familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
534     }
535     if (numCfs-- > 0) {
536       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
537     }
538     if (numCfs-- > 0) {
539       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
540     }
541     if (numCfs-- > 0) {
542       familyToCompression.put("Family3", Compression.Algorithm.NONE);
543     }
544     return familyToCompression;
545   }
546 
547 
548   /**
549    * Test for {@link HFileOutputFormat#configureBloomType(HTable,
550    * Configuration)} and {@link HFileOutputFormat#createFamilyBloomTypeMap
551    * (Configuration)}.
552    * Tests that the compression map is correctly serialized into
553    * and deserialized from configuration
554    *
555    * @throws IOException
556    */
557   @Test
558   public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
559     for (int numCfs = 0; numCfs <= 2; numCfs++) {
560       Configuration conf = new Configuration(this.util.getConfiguration());
561       Map<String, BloomType> familyToBloomType =
562           getMockColumnFamiliesForBloomType(numCfs);
563       HTable table = Mockito.mock(HTable.class);
564       setupMockColumnFamiliesForBloomType(table,
565           familyToBloomType);
566       HFileOutputFormat.configureBloomType(table, conf);
567 
568       // read back family specific data block encoding settings from the
569       // configuration
570       Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
571           HFileOutputFormat
572               .createFamilyBloomTypeMap(conf);
573 
574       // test that we have a value for all column families that matches with the
575       // used mock values
576       for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
577         assertEquals("BloomType configuration incorrect for column family:"
578             + entry.getKey(), entry.getValue(),
579             retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
580       }
581     }
582   }
583 
584   private void setupMockColumnFamiliesForBloomType(HTable table,
585       Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
586     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
587     for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
588       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
589           .setMaxVersions(1)
590           .setBloomFilterType(entry.getValue())
591           .setBlockCacheEnabled(false)
592           .setTimeToLive(0));
593     }
594     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
595   }
596 
597   /**
598    * @return a map from column family names to compression algorithms for
599    *         testing column family compression. Column family names have special characters
600    */
601   private Map<String, BloomType>
602   getMockColumnFamiliesForBloomType (int numCfs) {
603     Map<String, BloomType> familyToBloomType =
604         new HashMap<String, BloomType>();
605     // use column family names having special characters
606     if (numCfs-- > 0) {
607       familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
608     }
609     if (numCfs-- > 0) {
610       familyToBloomType.put("Family2=asdads&!AASD",
611           BloomType.ROWCOL);
612     }
613     if (numCfs-- > 0) {
614       familyToBloomType.put("Family3", BloomType.NONE);
615     }
616     return familyToBloomType;
617   }
618 
619   /**
620    * Test for {@link HFileOutputFormat#configureBlockSize(HTable,
621    * Configuration)} and {@link HFileOutputFormat#createFamilyBlockSizeMap
622    * (Configuration)}.
623    * Tests that the compression map is correctly serialized into
624    * and deserialized from configuration
625    *
626    * @throws IOException
627    */
628   @Test
629   public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
630     for (int numCfs = 0; numCfs <= 3; numCfs++) {
631       Configuration conf = new Configuration(this.util.getConfiguration());
632       Map<String, Integer> familyToBlockSize =
633           getMockColumnFamiliesForBlockSize(numCfs);
634       HTable table = Mockito.mock(HTable.class);
635       setupMockColumnFamiliesForBlockSize(table,
636           familyToBlockSize);
637       HFileOutputFormat.configureBlockSize(table, conf);
638 
639       // read back family specific data block encoding settings from the
640       // configuration
641       Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
642           HFileOutputFormat
643               .createFamilyBlockSizeMap(conf);
644 
645       // test that we have a value for all column families that matches with the
646       // used mock values
647       for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
648           ) {
649         assertEquals("BlockSize configuration incorrect for column family:"
650             + entry.getKey(), entry.getValue(),
651             retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
652       }
653     }
654   }
655 
656   private void setupMockColumnFamiliesForBlockSize(HTable table,
657       Map<String, Integer> familyToDataBlockEncoding) throws IOException {
658     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
659     for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
660       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
661           .setMaxVersions(1)
662           .setBlocksize(entry.getValue())
663           .setBlockCacheEnabled(false)
664           .setTimeToLive(0));
665     }
666     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
667   }
668 
669   /**
670    * @return a map from column family names to compression algorithms for
671    *         testing column family compression. Column family names have special characters
672    */
673   private Map<String, Integer>
674   getMockColumnFamiliesForBlockSize (int numCfs) {
675     Map<String, Integer> familyToBlockSize =
676         new HashMap<String, Integer>();
677     // use column family names having special characters
678     if (numCfs-- > 0) {
679       familyToBlockSize.put("Family1!@#!@#&", 1234);
680     }
681     if (numCfs-- > 0) {
682       familyToBlockSize.put("Family2=asdads&!AASD",
683           Integer.MAX_VALUE);
684     }
685     if (numCfs-- > 0) {
686       familyToBlockSize.put("Family2=asdads&!AASD",
687           Integer.MAX_VALUE);
688     }
689     if (numCfs-- > 0) {
690       familyToBlockSize.put("Family3", 0);
691     }
692     return familyToBlockSize;
693   }
694 
695     /**
696    * Test for {@link HFileOutputFormat#configureDataBlockEncoding(HTable,
697    * Configuration)} and {@link HFileOutputFormat#createFamilyDataBlockEncodingMap
698    * (Configuration)}.
699    * Tests that the compression map is correctly serialized into
700    * and deserialized from configuration
701    *
702    * @throws IOException
703    */
704   @Test
705   public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
706     for (int numCfs = 0; numCfs <= 3; numCfs++) {
707       Configuration conf = new Configuration(this.util.getConfiguration());
708       Map<String, DataBlockEncoding> familyToDataBlockEncoding =
709           getMockColumnFamiliesForDataBlockEncoding(numCfs);
710       HTable table = Mockito.mock(HTable.class);
711       setupMockColumnFamiliesForDataBlockEncoding(table,
712           familyToDataBlockEncoding);
713       HFileOutputFormat.configureDataBlockEncoding(table, conf);
714 
715       // read back family specific data block encoding settings from the
716       // configuration
717       Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
718           HFileOutputFormat
719           .createFamilyDataBlockEncodingMap(conf);
720 
721       // test that we have a value for all column families that matches with the
722       // used mock values
723       for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
724         assertEquals("DataBlockEncoding configuration incorrect for column family:"
725             + entry.getKey(), entry.getValue(),
726             retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
727       }
728     }
729   }
730 
731   private void setupMockColumnFamiliesForDataBlockEncoding(HTable table,
732       Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
733     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
734     for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
735       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
736           .setMaxVersions(1)
737           .setDataBlockEncoding(entry.getValue())
738           .setBlockCacheEnabled(false)
739           .setTimeToLive(0));
740     }
741     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
742   }
743 
744   /**
745    * @return a map from column family names to compression algorithms for
746    *         testing column family compression. Column family names have special characters
747    */
748   private Map<String, DataBlockEncoding>
749       getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
750     Map<String, DataBlockEncoding> familyToDataBlockEncoding =
751         new HashMap<String, DataBlockEncoding>();
752     // use column family names having special characters
753     if (numCfs-- > 0) {
754       familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
755     }
756     if (numCfs-- > 0) {
757       familyToDataBlockEncoding.put("Family2=asdads&!AASD",
758           DataBlockEncoding.FAST_DIFF);
759     }
760     if (numCfs-- > 0) {
761       familyToDataBlockEncoding.put("Family2=asdads&!AASD",
762           DataBlockEncoding.PREFIX);
763     }
764     if (numCfs-- > 0) {
765       familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
766     }
767     return familyToDataBlockEncoding;
768   }
769 
770   private void setupMockStartKeys(HTable table) throws IOException {
771     byte[][] mockKeys = new byte[][] {
772         HConstants.EMPTY_BYTE_ARRAY,
773         Bytes.toBytes("aaa"),
774         Bytes.toBytes("ggg"),
775         Bytes.toBytes("zzz")
776     };
777     Mockito.doReturn(mockKeys).when(table).getStartKeys();
778   }
779 
780   private void setupMockTableName(HTable table) throws IOException {
781     TableName mockTableName = TableName.valueOf("mock_table");
782     Mockito.doReturn(mockTableName).when(table).getName();
783   }
784 
785   /**
786    * Test that {@link HFileOutputFormat} RecordWriter uses compression and
787    * bloom filter settings from the column family descriptor
788    */
789   @Test
790   public void testColumnFamilySettings() throws Exception {
791     Configuration conf = new Configuration(this.util.getConfiguration());
792     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
793     TaskAttemptContext context = null;
794     Path dir = util.getDataTestDir("testColumnFamilySettings");
795 
796     // Setup table descriptor
797     HTable table = Mockito.mock(HTable.class);
798     HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
799     Mockito.doReturn(htd).when(table).getTableDescriptor();
800     for (HColumnDescriptor hcd: this.util.generateColumnDescriptors()) {
801       htd.addFamily(hcd);
802     }
803 
804     // set up the table to return some mock keys
805     setupMockStartKeys(table);
806 
807     try {
808       // partial map red setup to get an operational writer for testing
809       // We turn off the sequence file compression, because DefaultCodec
810       // pollutes the GZip codec pool with an incompatible compressor.
811       conf.set("io.seqfile.compression.type", "NONE");
812       conf.set("hbase.fs.tmp.dir", dir.toString());
813       // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
814       conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
815 
816       Job job = new Job(conf, "testLocalMRIncrementalLoad");
817       job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
818       setupRandomGeneratorMapper(job);
819       HFileOutputFormat.configureIncrementalLoad(job, table);
820       FileOutputFormat.setOutputPath(job, dir);
821       context = createTestTaskAttemptContext(job);
822       HFileOutputFormat hof = new HFileOutputFormat();
823       writer = hof.getRecordWriter(context);
824 
825       // write out random rows
826       writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
827       writer.close(context);
828 
829       // Make sure that a directory was created for every CF
830       FileSystem fs = dir.getFileSystem(conf);
831 
832       // commit so that the filesystem has one directory per column family
833       hof.getOutputCommitter(context).commitTask(context);
834       hof.getOutputCommitter(context).commitJob(context);
835       FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
836       assertEquals(htd.getFamilies().size(), families.length);
837       for (FileStatus f : families) {
838         String familyStr = f.getPath().getName();
839         HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
840         // verify that the compression on this file matches the configured
841         // compression
842         Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
843         Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
844         Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
845 
846         byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
847         if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
848         assertEquals("Incorrect bloom filter used for column family " + familyStr +
849           "(reader: " + reader + ")",
850           hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
851         assertEquals("Incorrect compression used for column family " + familyStr +
852           "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
853       }
854     } finally {
855       dir.getFileSystem(conf).delete(dir, true);
856     }
857   }
858 
859   /**
860    * Write random values to the writer assuming a table created using
861    * {@link #FAMILIES} as column family descriptors
862    */
863   private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer,
864       TaskAttemptContext context, Set<byte[]> families, int numRows)
865       throws IOException, InterruptedException {
866     byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
867     int valLength = 10;
868     byte valBytes[] = new byte[valLength];
869 
870     int taskId = context.getTaskAttemptID().getTaskID().getId();
871     assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
872 
873     Random random = new Random();
874     for (int i = 0; i < numRows; i++) {
875 
876       Bytes.putInt(keyBytes, 0, i);
877       random.nextBytes(valBytes);
878       ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
879 
880       for (byte[] family : families) {
881         KeyValue kv = new KeyValue(keyBytes, family,
882             PerformanceEvaluation.QUALIFIER_NAME, valBytes);
883         writer.write(key, kv);
884       }
885     }
886   }
887 
888   /**
889    * This test is to test the scenario happened in HBASE-6901.
890    * All files are bulk loaded and excluded from minor compaction.
891    * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
892    * will be thrown.
893    */
894   @Ignore ("Flakey: See HBASE-9051") @Test
895   public void testExcludeAllFromMinorCompaction() throws Exception {
896     Configuration conf = util.getConfiguration();
897     conf.setInt("hbase.hstore.compaction.min", 2);
898     generateRandomStartKeys(5);
899 
900     try {
901       util.startMiniCluster();
902       final FileSystem fs = util.getDFSCluster().getFileSystem();
903       HBaseAdmin admin = new HBaseAdmin(conf);
904       HTable table = util.createTable(TABLE_NAME, FAMILIES);
905       assertEquals("Should start with empty table", 0, util.countRows(table));
906 
907       // deep inspection: get the StoreFile dir
908       final Path storePath = HStore.getStoreHomedir(
909           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
910           admin.getTableRegions(TABLE_NAME).get(0),
911           FAMILIES[0]);
912       assertEquals(0, fs.listStatus(storePath).length);
913 
914       // Generate two bulk load files
915       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
916           true);
917       util.startMiniMapReduceCluster();
918 
919       for (int i = 0; i < 2; i++) {
920         Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
921         runIncrementalPELoad(conf, table, testDir);
922         // Perform the actual load
923         new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
924       }
925 
926       // Ensure data shows up
927       int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
928       assertEquals("LoadIncrementalHFiles should put expected data in table",
929           expectedRows, util.countRows(table));
930 
931       // should have a second StoreFile now
932       assertEquals(2, fs.listStatus(storePath).length);
933 
934       // minor compactions shouldn't get rid of the file
935       admin.compact(TABLE_NAME.getName());
936       try {
937         quickPoll(new Callable<Boolean>() {
938           public Boolean call() throws Exception {
939             return fs.listStatus(storePath).length == 1;
940           }
941         }, 5000);
942         throw new IOException("SF# = " + fs.listStatus(storePath).length);
943       } catch (AssertionError ae) {
944         // this is expected behavior
945       }
946 
947       // a major compaction should work though
948       admin.majorCompact(TABLE_NAME.getName());
949       quickPoll(new Callable<Boolean>() {
950         public Boolean call() throws Exception {
951           return fs.listStatus(storePath).length == 1;
952         }
953       }, 5000);
954 
955     } finally {
956       util.shutdownMiniMapReduceCluster();
957       util.shutdownMiniCluster();
958     }
959   }
960 
961   @Test
962   public void testExcludeMinorCompaction() throws Exception {
963     Configuration conf = util.getConfiguration();
964     conf.setInt("hbase.hstore.compaction.min", 2);
965     generateRandomStartKeys(5);
966 
967     try {
968       util.startMiniCluster();
969       Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
970       final FileSystem fs = util.getDFSCluster().getFileSystem();
971       HBaseAdmin admin = new HBaseAdmin(conf);
972       HTable table = util.createTable(TABLE_NAME, FAMILIES);
973       assertEquals("Should start with empty table", 0, util.countRows(table));
974 
975       // deep inspection: get the StoreFile dir
976       final Path storePath = HStore.getStoreHomedir(
977           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
978           admin.getTableRegions(TABLE_NAME).get(0),
979           FAMILIES[0]);
980       assertEquals(0, fs.listStatus(storePath).length);
981 
982       // put some data in it and flush to create a storefile
983       Put p = new Put(Bytes.toBytes("test"));
984       p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
985       table.put(p);
986       admin.flush(TABLE_NAME.getName());
987       assertEquals(1, util.countRows(table));
988       quickPoll(new Callable<Boolean>() {
989         public Boolean call() throws Exception {
990           return fs.listStatus(storePath).length == 1;
991         }
992       }, 5000);
993 
994       // Generate a bulk load file with more rows
995       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
996           true);
997       util.startMiniMapReduceCluster();
998       runIncrementalPELoad(conf, table, testDir);
999 
1000       // Perform the actual load
1001       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
1002 
1003       // Ensure data shows up
1004       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1005       assertEquals("LoadIncrementalHFiles should put expected data in table",
1006           expectedRows + 1, util.countRows(table));
1007 
1008       // should have a second StoreFile now
1009       assertEquals(2, fs.listStatus(storePath).length);
1010 
1011       // minor compactions shouldn't get rid of the file
1012       admin.compact(TABLE_NAME.getName());
1013       try {
1014         quickPoll(new Callable<Boolean>() {
1015           public Boolean call() throws Exception {
1016             return fs.listStatus(storePath).length == 1;
1017           }
1018         }, 5000);
1019         throw new IOException("SF# = " + fs.listStatus(storePath).length);
1020       } catch (AssertionError ae) {
1021         // this is expected behavior
1022       }
1023 
1024       // a major compaction should work though
1025       admin.majorCompact(TABLE_NAME.getName());
1026       quickPoll(new Callable<Boolean>() {
1027         public Boolean call() throws Exception {
1028           return fs.listStatus(storePath).length == 1;
1029         }
1030       }, 5000);
1031 
1032     } finally {
1033       util.shutdownMiniMapReduceCluster();
1034       util.shutdownMiniCluster();
1035     }
1036   }
1037 
1038   private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1039     int sleepMs = 10;
1040     int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1041     while (retries-- > 0) {
1042       if (c.call().booleanValue()) {
1043         return;
1044       }
1045       Thread.sleep(sleepMs);
1046     }
1047     fail();
1048   }
1049 
1050   public static void main(String args[]) throws Exception {
1051     new TestHFileOutputFormat().manualTest(args);
1052   }
1053 
1054   public void manualTest(String args[]) throws Exception {
1055     Configuration conf = HBaseConfiguration.create();
1056     util = new HBaseTestingUtility(conf);
1057     if ("newtable".equals(args[0])) {
1058       byte[] tname = args[1].getBytes();
1059       HTable table = util.createTable(tname, FAMILIES);
1060       HBaseAdmin admin = new HBaseAdmin(conf);
1061       admin.disableTable(tname);
1062       byte[][] startKeys = generateRandomStartKeys(5);
1063       util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
1064       admin.enableTable(tname);
1065     } else if ("incremental".equals(args[0])) {
1066       byte[] tname = args[1].getBytes();
1067       HTable table = new HTable(conf, tname);
1068       Path outDir = new Path("incremental-out");
1069       runIncrementalPELoad(conf, table, outDir);
1070     } else {
1071       throw new RuntimeException(
1072           "usage: TestHFileOutputFormat newtable | incremental");
1073     }
1074   }
1075 
1076 }
1077