View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.util.Arrays;
30  import java.util.HashMap;
31  import java.util.Map;
32  import java.util.Map.Entry;
33  import java.util.Random;
34  import java.util.Set;
35  import java.util.concurrent.Callable;
36  
37  import junit.framework.Assert;
38  
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  import org.apache.hadoop.conf.Configuration;
42  import org.apache.hadoop.fs.FileStatus;
43  import org.apache.hadoop.fs.FileSystem;
44  import org.apache.hadoop.fs.Path;
45  import org.apache.hadoop.hbase.Cell;
46  import org.apache.hadoop.hbase.CellUtil;
47  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
48  import org.apache.hadoop.hbase.HBaseConfiguration;
49  import org.apache.hadoop.hbase.HBaseTestingUtility;
50  import org.apache.hadoop.hbase.HColumnDescriptor;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.HTableDescriptor;
53  import org.apache.hadoop.hbase.HadoopShims;
54  import org.apache.hadoop.hbase.KeyValue;
55  import org.apache.hadoop.hbase.testclassification.LargeTests;
56  import org.apache.hadoop.hbase.PerformanceEvaluation;
57  import org.apache.hadoop.hbase.TableName;
58  import org.apache.hadoop.hbase.client.HBaseAdmin;
59  import org.apache.hadoop.hbase.client.HTable;
60  import org.apache.hadoop.hbase.client.Put;
61  import org.apache.hadoop.hbase.client.RegionLocator;
62  import org.apache.hadoop.hbase.client.Result;
63  import org.apache.hadoop.hbase.client.ResultScanner;
64  import org.apache.hadoop.hbase.client.Scan;
65  import org.apache.hadoop.hbase.client.Table;
66  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
67  import org.apache.hadoop.hbase.io.compress.Compression;
68  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
69  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
70  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
71  import org.apache.hadoop.hbase.io.hfile.HFile;
72  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
73  import org.apache.hadoop.hbase.regionserver.BloomType;
74  import org.apache.hadoop.hbase.regionserver.HStore;
75  import org.apache.hadoop.hbase.regionserver.StoreFile;
76  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
77  import org.apache.hadoop.hbase.util.Bytes;
78  import org.apache.hadoop.hbase.util.FSUtils;
79  import org.apache.hadoop.hbase.util.Threads;
80  import org.apache.hadoop.hbase.util.Writables;
81  import org.apache.hadoop.io.NullWritable;
82  import org.apache.hadoop.mapreduce.Job;
83  import org.apache.hadoop.mapreduce.Mapper;
84  import org.apache.hadoop.mapreduce.RecordWriter;
85  import org.apache.hadoop.mapreduce.TaskAttemptContext;
86  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
87  import org.junit.Ignore;
88  import org.junit.Test;
89  import org.junit.experimental.categories.Category;
90  import org.mockito.Mockito;
91  
92  /**
93   * Simple test for {@link KeyValueSortReducer} and {@link HFileOutputFormat}.
94   * Sets up and runs a mapreduce job that writes hfile output.
95   * Creates a few inner classes to implement splits and an inputformat that
96   * emits keys and values like those of {@link PerformanceEvaluation}.
97   */
98  @Category(LargeTests.class)
99  public class TestHFileOutputFormat  {
100   private final static int ROWSPERSPLIT = 1024;
101 
102   private static final byte[][] FAMILIES
103     = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
104       , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
105   private static final TableName TABLE_NAME =
106       TableName.valueOf("TestTable");
107 
108   private HBaseTestingUtility util = new HBaseTestingUtility();
109 
110   private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
111 
112   /**
113    * Simple mapper that makes KeyValue output.
114    */
115   static class RandomKVGeneratingMapper
116   extends Mapper<NullWritable, NullWritable,
117                  ImmutableBytesWritable, KeyValue> {
118 
119     private int keyLength;
120     private static final int KEYLEN_DEFAULT=10;
121     private static final String KEYLEN_CONF="randomkv.key.length";
122 
123     private int valLength;
124     private static final int VALLEN_DEFAULT=10;
125     private static final String VALLEN_CONF="randomkv.val.length";
126     private static final byte [] QUALIFIER = Bytes.toBytes("data");
127 
128     @Override
129     protected void setup(Context context) throws IOException,
130         InterruptedException {
131       super.setup(context);
132 
133       Configuration conf = context.getConfiguration();
134       keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
135       valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
136     }
137 
138     protected void map(
139         NullWritable n1, NullWritable n2,
140         Mapper<NullWritable, NullWritable,
141                ImmutableBytesWritable,KeyValue>.Context context)
142         throws java.io.IOException ,InterruptedException
143     {
144 
145       byte keyBytes[] = new byte[keyLength];
146       byte valBytes[] = new byte[valLength];
147 
148       int taskId = context.getTaskAttemptID().getTaskID().getId();
149       assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
150 
151       Random random = new Random();
152       for (int i = 0; i < ROWSPERSPLIT; i++) {
153 
154         random.nextBytes(keyBytes);
155         // Ensure that unique tasks generate unique keys
156         keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
157         random.nextBytes(valBytes);
158         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
159 
160         for (byte[] family : TestHFileOutputFormat.FAMILIES) {
161           KeyValue kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
162           context.write(key, kv);
163         }
164       }
165     }
166   }
167 
168   private void setupRandomGeneratorMapper(Job job) {
169     job.setInputFormatClass(NMapInputFormat.class);
170     job.setMapperClass(RandomKVGeneratingMapper.class);
171     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
172     job.setMapOutputValueClass(KeyValue.class);
173   }
174 
175   /**
176    * Test that {@link HFileOutputFormat} RecordWriter amends timestamps if
177    * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
178    * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
179    */
180   @Test
181   public void test_LATEST_TIMESTAMP_isReplaced()
182   throws Exception {
183     Configuration conf = new Configuration(this.util.getConfiguration());
184     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
185     TaskAttemptContext context = null;
186     Path dir =
187       util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
188     try {
189       Job job = new Job(conf);
190       FileOutputFormat.setOutputPath(job, dir);
191       context = createTestTaskAttemptContext(job);
192       HFileOutputFormat hof = new HFileOutputFormat();
193       writer = hof.getRecordWriter(context);
194       final byte [] b = Bytes.toBytes("b");
195 
196       // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
197       // changed by call to write.  Check all in kv is same but ts.
198       KeyValue kv = new KeyValue(b, b, b);
199       KeyValue original = kv.clone();
200       writer.write(new ImmutableBytesWritable(), kv);
201       assertFalse(original.equals(kv));
202       assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
203       assertTrue(CellUtil.matchingColumn(original, kv.getFamily(), kv.getQualifier()));
204       assertNotSame(original.getTimestamp(), kv.getTimestamp());
205       assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
206 
207       // Test 2. Now test passing a kv that has explicit ts.  It should not be
208       // changed by call to record write.
209       kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
210       original = kv.clone();
211       writer.write(new ImmutableBytesWritable(), kv);
212       assertTrue(original.equals(kv));
213     } finally {
214       if (writer != null && context != null) writer.close(context);
215       dir.getFileSystem(conf).delete(dir, true);
216     }
217   }
218 
219   private TaskAttemptContext createTestTaskAttemptContext(final Job job)
220   throws IOException, Exception {
221     HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
222     TaskAttemptContext context = hadoop.createTestTaskAttemptContext(job, "attempt_200707121733_0001_m_000000_0");
223     return context;
224   }
225 
226   /*
227    * Test that {@link HFileOutputFormat} creates an HFile with TIMERANGE
228    * metadata used by time-restricted scans.
229    */
230   @Test
231   public void test_TIMERANGE() throws Exception {
232     Configuration conf = new Configuration(this.util.getConfiguration());
233     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
234     TaskAttemptContext context = null;
235     Path dir =
236       util.getDataTestDir("test_TIMERANGE_present");
237     LOG.info("Timerange dir writing to dir: "+ dir);
238     try {
239       // build a record writer using HFileOutputFormat
240       Job job = new Job(conf);
241       FileOutputFormat.setOutputPath(job, dir);
242       context = createTestTaskAttemptContext(job);
243       HFileOutputFormat hof = new HFileOutputFormat();
244       writer = hof.getRecordWriter(context);
245 
246       // Pass two key values with explicit times stamps
247       final byte [] b = Bytes.toBytes("b");
248 
249       // value 1 with timestamp 2000
250       KeyValue kv = new KeyValue(b, b, b, 2000, b);
251       KeyValue original = kv.clone();
252       writer.write(new ImmutableBytesWritable(), kv);
253       assertEquals(original,kv);
254 
255       // value 2 with timestamp 1000
256       kv = new KeyValue(b, b, b, 1000, b);
257       original = kv.clone();
258       writer.write(new ImmutableBytesWritable(), kv);
259       assertEquals(original, kv);
260 
261       // verify that the file has the proper FileInfo.
262       writer.close(context);
263 
264       // the generated file lives 1 directory down from the attempt directory
265       // and is the only file, e.g.
266       // _attempt__0000_r_000000_0/b/1979617994050536795
267       FileSystem fs = FileSystem.get(conf);
268       Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
269       FileStatus[] sub1 = fs.listStatus(attemptDirectory);
270       FileStatus[] file = fs.listStatus(sub1[0].getPath());
271 
272       // open as HFile Reader and pull out TIMERANGE FileInfo.
273       HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
274           new CacheConfig(conf), conf);
275       Map<byte[],byte[]> finfo = rd.loadFileInfo();
276       byte[] range = finfo.get("TIMERANGE".getBytes());
277       assertNotNull(range);
278 
279       // unmarshall and check values.
280       TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
281       Writables.copyWritable(range, timeRangeTracker);
282       LOG.info(timeRangeTracker.getMinimumTimestamp() +
283           "...." + timeRangeTracker.getMaximumTimestamp());
284       assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
285       assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
286       rd.close();
287     } finally {
288       if (writer != null && context != null) writer.close(context);
289       dir.getFileSystem(conf).delete(dir, true);
290     }
291   }
292 
293   /**
294    * Run small MR job.
295    */
296   @Test
297   public void testWritingPEData() throws Exception {
298     Configuration conf = util.getConfiguration();
299     Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
300     FileSystem fs = testDir.getFileSystem(conf);
301 
302     // Set down this value or we OOME in eclipse.
303     conf.setInt("mapreduce.task.io.sort.mb", 20);
304     // Write a few files.
305     conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
306 
307     Job job = new Job(conf, "testWritingPEData");
308     setupRandomGeneratorMapper(job);
309     // This partitioner doesn't work well for number keys but using it anyways
310     // just to demonstrate how to configure it.
311     byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
312     byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
313 
314     Arrays.fill(startKey, (byte)0);
315     Arrays.fill(endKey, (byte)0xff);
316 
317     job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
318     // Set start and end rows for partitioner.
319     SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
320     SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
321     job.setReducerClass(KeyValueSortReducer.class);
322     job.setOutputFormatClass(HFileOutputFormat.class);
323     job.setNumReduceTasks(4);
324     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
325         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
326         KeyValueSerialization.class.getName());
327 
328     FileOutputFormat.setOutputPath(job, testDir);
329     assertTrue(job.waitForCompletion(false));
330     FileStatus [] files = fs.listStatus(testDir);
331     assertTrue(files.length > 0);
332   }
333 
334   @Test
335   public void testJobConfiguration() throws Exception {
336     Job job = new Job(util.getConfiguration());
337     job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
338     HTable table = Mockito.mock(HTable.class);
339     setupMockStartKeys(table);
340     HFileOutputFormat.configureIncrementalLoad(job, table);
341     assertEquals(job.getNumReduceTasks(), 4);
342   }
343 
344   private byte [][] generateRandomStartKeys(int numKeys) {
345     Random random = new Random();
346     byte[][] ret = new byte[numKeys][];
347     // first region start key is always empty
348     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
349     for (int i = 1; i < numKeys; i++) {
350       ret[i] =
351         PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
352     }
353     return ret;
354   }
355 
356   @Test
357   public void testMRIncrementalLoad() throws Exception {
358     LOG.info("\nStarting test testMRIncrementalLoad\n");
359     doIncrementalLoadTest(false);
360   }
361 
362   @Test
363   public void testMRIncrementalLoadWithSplit() throws Exception {
364     LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
365     doIncrementalLoadTest(true);
366   }
367 
368   private void doIncrementalLoadTest(
369       boolean shouldChangeRegions) throws Exception {
370     util = new HBaseTestingUtility();
371     Configuration conf = util.getConfiguration();
372     byte[][] startKeys = generateRandomStartKeys(5);
373     HBaseAdmin admin = null;
374     try {
375       util.startMiniCluster();
376       Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
377       admin = new HBaseAdmin(conf);
378       HTable table = util.createTable(TABLE_NAME, FAMILIES);
379       assertEquals("Should start with empty table",
380           0, util.countRows(table));
381       int numRegions = util.createMultiRegions(
382           util.getConfiguration(), table, FAMILIES[0], startKeys);
383       assertEquals("Should make 5 regions", numRegions, 5);
384 
385       // Generate the bulk load files
386       util.startMiniMapReduceCluster();
387       runIncrementalPELoad(conf, table, testDir);
388       // This doesn't write into the table, just makes files
389       assertEquals("HFOF should not touch actual table",
390           0, util.countRows(table));
391 
392 
393       // Make sure that a directory was created for every CF
394       int dir = 0;
395       for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
396         for (byte[] family : FAMILIES) {
397           if (Bytes.toString(family).equals(f.getPath().getName())) {
398             ++dir;
399           }
400         }
401       }
402       assertEquals("Column family not found in FS.", FAMILIES.length, dir);
403 
404       // handle the split case
405       if (shouldChangeRegions) {
406         LOG.info("Changing regions in table");
407         admin.disableTable(table.getTableName());
408         while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
409             getRegionStates().isRegionsInTransition()) {
410           Threads.sleep(200);
411           LOG.info("Waiting on table to finish disabling");
412         }
413         byte[][] newStartKeys = generateRandomStartKeys(15);
414         util.createMultiRegions(
415             util.getConfiguration(), table, FAMILIES[0], newStartKeys);
416         admin.enableTable(table.getTableName());
417         while (table.getRegionLocations().size() != 15 ||
418             !admin.isTableAvailable(table.getTableName())) {
419           Thread.sleep(200);
420           LOG.info("Waiting for new region assignment to happen");
421         }
422       }
423 
424       // Perform the actual load
425       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
426 
427       // Ensure data shows up
428       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
429       assertEquals("LoadIncrementalHFiles should put expected data in table",
430           expectedRows, util.countRows(table));
431       Scan scan = new Scan();
432       ResultScanner results = table.getScanner(scan);
433       for (Result res : results) {
434         assertEquals(FAMILIES.length, res.rawCells().length);
435         Cell first = res.rawCells()[0];
436         for (Cell kv : res.rawCells()) {
437           assertTrue(CellUtil.matchingRow(first, kv));
438           assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
439         }
440       }
441       results.close();
442       String tableDigestBefore = util.checksumRows(table);
443 
444       // Cause regions to reopen
445       admin.disableTable(TABLE_NAME);
446       while (!admin.isTableDisabled(TABLE_NAME)) {
447         Thread.sleep(200);
448         LOG.info("Waiting for table to disable");
449       }
450       admin.enableTable(TABLE_NAME);
451       util.waitTableAvailable(TABLE_NAME);
452       assertEquals("Data should remain after reopening of regions",
453           tableDigestBefore, util.checksumRows(table));
454     } finally {
455       if (admin != null) admin.close();
456       util.shutdownMiniMapReduceCluster();
457       util.shutdownMiniCluster();
458     }
459   }
460 
461   private void runIncrementalPELoad(
462       Configuration conf, HTable table, Path outDir)
463   throws Exception {
464     Job job = new Job(conf, "testLocalMRIncrementalLoad");
465     job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
466     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
467         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
468         KeyValueSerialization.class.getName());
469     setupRandomGeneratorMapper(job);
470     HFileOutputFormat.configureIncrementalLoad(job, table);
471     FileOutputFormat.setOutputPath(job, outDir);
472 
473     Assert.assertFalse( util.getTestFileSystem().exists(outDir)) ;
474 
475     assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
476 
477     assertTrue(job.waitForCompletion(true));
478   }
479 
480   /**
481    * Test for {@link HFileOutputFormat#configureCompression(org.apache.hadoop.hbase.client.Table,
482    * Configuration)} and {@link HFileOutputFormat#createFamilyCompressionMap
483    * (Configuration)}.
484    * Tests that the compression map is correctly serialized into
485    * and deserialized from configuration
486    *
487    * @throws IOException
488    */
489   @Test
490   public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
491     for (int numCfs = 0; numCfs <= 3; numCfs++) {
492       Configuration conf = new Configuration(this.util.getConfiguration());
493       Map<String, Compression.Algorithm> familyToCompression =
494           getMockColumnFamiliesForCompression(numCfs);
495       Table table = Mockito.mock(HTable.class);
496       setupMockColumnFamiliesForCompression(table, familyToCompression);
497       HFileOutputFormat.configureCompression(table, conf);
498 
499       // read back family specific compression setting from the configuration
500       Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat
501           .createFamilyCompressionMap(conf);
502 
503       // test that we have a value for all column families that matches with the
504       // used mock values
505       for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
506         assertEquals("Compression configuration incorrect for column family:"
507             + entry.getKey(), entry.getValue(),
508             retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
509       }
510     }
511   }
512 
513   private void setupMockColumnFamiliesForCompression(Table table,
514       Map<String, Compression.Algorithm> familyToCompression) throws IOException {
515     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
516     for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
517       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
518           .setMaxVersions(1)
519           .setCompressionType(entry.getValue())
520           .setBlockCacheEnabled(false)
521           .setTimeToLive(0));
522     }
523     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
524   }
525 
526   /**
527    * @return a map from column family names to compression algorithms for
528    *         testing column family compression. Column family names have special characters
529    */
530   private Map<String, Compression.Algorithm>
531       getMockColumnFamiliesForCompression (int numCfs) {
532     Map<String, Compression.Algorithm> familyToCompression = new HashMap<String, Compression.Algorithm>();
533     // use column family names having special characters
534     if (numCfs-- > 0) {
535       familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
536     }
537     if (numCfs-- > 0) {
538       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
539     }
540     if (numCfs-- > 0) {
541       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
542     }
543     if (numCfs-- > 0) {
544       familyToCompression.put("Family3", Compression.Algorithm.NONE);
545     }
546     return familyToCompression;
547   }
548 
549 
550   /**
551    * Test for {@link HFileOutputFormat#configureBloomType(org.apache.hadoop.hbase.client.Table,
552    * Configuration)} and {@link HFileOutputFormat#createFamilyBloomTypeMap
553    * (Configuration)}.
554    * Tests that the compression map is correctly serialized into
555    * and deserialized from configuration
556    *
557    * @throws IOException
558    */
559   @Test
560   public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
561     for (int numCfs = 0; numCfs <= 2; numCfs++) {
562       Configuration conf = new Configuration(this.util.getConfiguration());
563       Map<String, BloomType> familyToBloomType =
564           getMockColumnFamiliesForBloomType(numCfs);
565       Table table = Mockito.mock(HTable.class);
566       setupMockColumnFamiliesForBloomType(table,
567           familyToBloomType);
568       HFileOutputFormat.configureBloomType(table, conf);
569 
570       // read back family specific data block encoding settings from the
571       // configuration
572       Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
573           HFileOutputFormat
574               .createFamilyBloomTypeMap(conf);
575 
576       // test that we have a value for all column families that matches with the
577       // used mock values
578       for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
579         assertEquals("BloomType configuration incorrect for column family:"
580             + entry.getKey(), entry.getValue(),
581             retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
582       }
583     }
584   }
585 
586   private void setupMockColumnFamiliesForBloomType(Table table,
587       Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
588     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
589     for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
590       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
591           .setMaxVersions(1)
592           .setBloomFilterType(entry.getValue())
593           .setBlockCacheEnabled(false)
594           .setTimeToLive(0));
595     }
596     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
597   }
598 
599   /**
600    * @return a map from column family names to compression algorithms for
601    *         testing column family compression. Column family names have special characters
602    */
603   private Map<String, BloomType>
604   getMockColumnFamiliesForBloomType (int numCfs) {
605     Map<String, BloomType> familyToBloomType =
606         new HashMap<String, BloomType>();
607     // use column family names having special characters
608     if (numCfs-- > 0) {
609       familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
610     }
611     if (numCfs-- > 0) {
612       familyToBloomType.put("Family2=asdads&!AASD",
613           BloomType.ROWCOL);
614     }
615     if (numCfs-- > 0) {
616       familyToBloomType.put("Family3", BloomType.NONE);
617     }
618     return familyToBloomType;
619   }
620 
621   /**
622    * Test for {@link HFileOutputFormat#configureBlockSize(org.apache.hadoop.hbase.client.Table,
623    * Configuration)} and {@link HFileOutputFormat#createFamilyBlockSizeMap
624    * (Configuration)}.
625    * Tests that the compression map is correctly serialized into
626    * and deserialized from configuration
627    *
628    * @throws IOException
629    */
630   @Test
631   public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
632     for (int numCfs = 0; numCfs <= 3; numCfs++) {
633       Configuration conf = new Configuration(this.util.getConfiguration());
634       Map<String, Integer> familyToBlockSize =
635           getMockColumnFamiliesForBlockSize(numCfs);
636       Table table = Mockito.mock(HTable.class);
637       setupMockColumnFamiliesForBlockSize(table,
638           familyToBlockSize);
639       HFileOutputFormat.configureBlockSize(table, conf);
640 
641       // read back family specific data block encoding settings from the
642       // configuration
643       Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
644           HFileOutputFormat
645               .createFamilyBlockSizeMap(conf);
646 
647       // test that we have a value for all column families that matches with the
648       // used mock values
649       for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
650           ) {
651         assertEquals("BlockSize configuration incorrect for column family:"
652             + entry.getKey(), entry.getValue(),
653             retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
654       }
655     }
656   }
657 
658   private void setupMockColumnFamiliesForBlockSize(Table table,
659       Map<String, Integer> familyToDataBlockEncoding) throws IOException {
660     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
661     for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
662       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
663           .setMaxVersions(1)
664           .setBlocksize(entry.getValue())
665           .setBlockCacheEnabled(false)
666           .setTimeToLive(0));
667     }
668     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
669   }
670 
671   /**
672    * @return a map from column family names to compression algorithms for
673    *         testing column family compression. Column family names have special characters
674    */
675   private Map<String, Integer>
676   getMockColumnFamiliesForBlockSize (int numCfs) {
677     Map<String, Integer> familyToBlockSize =
678         new HashMap<String, Integer>();
679     // use column family names having special characters
680     if (numCfs-- > 0) {
681       familyToBlockSize.put("Family1!@#!@#&", 1234);
682     }
683     if (numCfs-- > 0) {
684       familyToBlockSize.put("Family2=asdads&!AASD",
685           Integer.MAX_VALUE);
686     }
687     if (numCfs-- > 0) {
688       familyToBlockSize.put("Family2=asdads&!AASD",
689           Integer.MAX_VALUE);
690     }
691     if (numCfs-- > 0) {
692       familyToBlockSize.put("Family3", 0);
693     }
694     return familyToBlockSize;
695   }
696 
697     /**
698    * Test for {@link HFileOutputFormat#configureDataBlockEncoding(org.apache.hadoop.hbase.client.Table,
699    * Configuration)} and {@link HFileOutputFormat#createFamilyDataBlockEncodingMap
700    * (Configuration)}.
701    * Tests that the compression map is correctly serialized into
702    * and deserialized from configuration
703    *
704    * @throws IOException
705    */
706   @Test
707   public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
708     for (int numCfs = 0; numCfs <= 3; numCfs++) {
709       Configuration conf = new Configuration(this.util.getConfiguration());
710       Map<String, DataBlockEncoding> familyToDataBlockEncoding =
711           getMockColumnFamiliesForDataBlockEncoding(numCfs);
712       Table table = Mockito.mock(HTable.class);
713       setupMockColumnFamiliesForDataBlockEncoding(table,
714           familyToDataBlockEncoding);
715       HFileOutputFormat.configureDataBlockEncoding(table, conf);
716 
717       // read back family specific data block encoding settings from the
718       // configuration
719       Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
720           HFileOutputFormat
721           .createFamilyDataBlockEncodingMap(conf);
722 
723       // test that we have a value for all column families that matches with the
724       // used mock values
725       for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
726         assertEquals("DataBlockEncoding configuration incorrect for column family:"
727             + entry.getKey(), entry.getValue(),
728             retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
729       }
730     }
731   }
732 
733   private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
734       Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
735     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
736     for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
737       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
738           .setMaxVersions(1)
739           .setDataBlockEncoding(entry.getValue())
740           .setBlockCacheEnabled(false)
741           .setTimeToLive(0));
742     }
743     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
744   }
745 
746   /**
747    * @return a map from column family names to compression algorithms for
748    *         testing column family compression. Column family names have special characters
749    */
750   private Map<String, DataBlockEncoding>
751       getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
752     Map<String, DataBlockEncoding> familyToDataBlockEncoding =
753         new HashMap<String, DataBlockEncoding>();
754     // use column family names having special characters
755     if (numCfs-- > 0) {
756       familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
757     }
758     if (numCfs-- > 0) {
759       familyToDataBlockEncoding.put("Family2=asdads&!AASD",
760           DataBlockEncoding.FAST_DIFF);
761     }
762     if (numCfs-- > 0) {
763       familyToDataBlockEncoding.put("Family2=asdads&!AASD",
764           DataBlockEncoding.PREFIX);
765     }
766     if (numCfs-- > 0) {
767       familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
768     }
769     return familyToDataBlockEncoding;
770   }
771 
772   private void setupMockStartKeys(RegionLocator table) throws IOException {
773     byte[][] mockKeys = new byte[][] {
774         HConstants.EMPTY_BYTE_ARRAY,
775         Bytes.toBytes("aaa"),
776         Bytes.toBytes("ggg"),
777         Bytes.toBytes("zzz")
778     };
779     Mockito.doReturn(mockKeys).when(table).getStartKeys();
780   }
781 
782   /**
783    * Test that {@link HFileOutputFormat} RecordWriter uses compression and
784    * bloom filter settings from the column family descriptor
785    */
786   @Test
787   public void testColumnFamilySettings() throws Exception {
788     Configuration conf = new Configuration(this.util.getConfiguration());
789     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
790     TaskAttemptContext context = null;
791     Path dir = util.getDataTestDir("testColumnFamilySettings");
792 
793     // Setup table descriptor
794     HTable table = Mockito.mock(HTable.class);
795     HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
796     Mockito.doReturn(htd).when(table).getTableDescriptor();
797     for (HColumnDescriptor hcd: this.util.generateColumnDescriptors()) {
798       htd.addFamily(hcd);
799     }
800 
801     // set up the table to return some mock keys
802     setupMockStartKeys(table);
803 
804     try {
805       // partial map red setup to get an operational writer for testing
806       // We turn off the sequence file compression, because DefaultCodec
807       // pollutes the GZip codec pool with an incompatible compressor.
808       conf.set("io.seqfile.compression.type", "NONE");
809       Job job = new Job(conf, "testLocalMRIncrementalLoad");
810       job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
811       setupRandomGeneratorMapper(job);
812       HFileOutputFormat.configureIncrementalLoad(job, table);
813       FileOutputFormat.setOutputPath(job, dir);
814       context = createTestTaskAttemptContext(job);
815       HFileOutputFormat hof = new HFileOutputFormat();
816       writer = hof.getRecordWriter(context);
817 
818       // write out random rows
819       writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
820       writer.close(context);
821 
822       // Make sure that a directory was created for every CF
823       FileSystem fs = dir.getFileSystem(conf);
824 
825       // commit so that the filesystem has one directory per column family
826       hof.getOutputCommitter(context).commitTask(context);
827       hof.getOutputCommitter(context).commitJob(context);
828       FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
829       assertEquals(htd.getFamilies().size(), families.length);
830       for (FileStatus f : families) {
831         String familyStr = f.getPath().getName();
832         HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
833         // verify that the compression on this file matches the configured
834         // compression
835         Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
836         Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
837         Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
838 
839         byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
840         if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
841         assertEquals("Incorrect bloom filter used for column family " + familyStr +
842           "(reader: " + reader + ")",
843           hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
844         assertEquals("Incorrect compression used for column family " + familyStr +
845           "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
846       }
847     } finally {
848       dir.getFileSystem(conf).delete(dir, true);
849     }
850   }
851 
852   /**
853    * Write random values to the writer assuming a table created using
854    * {@link #FAMILIES} as column family descriptors
855    */
856   private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer,
857       TaskAttemptContext context, Set<byte[]> families, int numRows)
858       throws IOException, InterruptedException {
859     byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
860     int valLength = 10;
861     byte valBytes[] = new byte[valLength];
862 
863     int taskId = context.getTaskAttemptID().getTaskID().getId();
864     assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
865     final byte [] qualifier = Bytes.toBytes("data");
866     Random random = new Random();
867     for (int i = 0; i < numRows; i++) {
868 
869       Bytes.putInt(keyBytes, 0, i);
870       random.nextBytes(valBytes);
871       ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
872 
873       for (byte[] family : families) {
874         KeyValue kv = new KeyValue(keyBytes, family, qualifier, valBytes);
875         writer.write(key, kv);
876       }
877     }
878   }
879 
880   /**
881    * This test is to test the scenario happened in HBASE-6901.
882    * All files are bulk loaded and excluded from minor compaction.
883    * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
884    * will be thrown.
885    */
886   @Ignore ("Flakey: See HBASE-9051") @Test
887   public void testExcludeAllFromMinorCompaction() throws Exception {
888     Configuration conf = util.getConfiguration();
889     conf.setInt("hbase.hstore.compaction.min", 2);
890     generateRandomStartKeys(5);
891 
892     try {
893       util.startMiniCluster();
894       final FileSystem fs = util.getDFSCluster().getFileSystem();
895       HBaseAdmin admin = new HBaseAdmin(conf);
896       HTable table = util.createTable(TABLE_NAME, FAMILIES);
897       assertEquals("Should start with empty table", 0, util.countRows(table));
898 
899       // deep inspection: get the StoreFile dir
900       final Path storePath = HStore.getStoreHomedir(
901           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
902           admin.getTableRegions(TABLE_NAME).get(0),
903           FAMILIES[0]);
904       assertEquals(0, fs.listStatus(storePath).length);
905 
906       // Generate two bulk load files
907       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
908           true);
909       util.startMiniMapReduceCluster();
910 
911       for (int i = 0; i < 2; i++) {
912         Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
913         runIncrementalPELoad(conf, table, testDir);
914         // Perform the actual load
915         new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
916       }
917 
918       // Ensure data shows up
919       int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
920       assertEquals("LoadIncrementalHFiles should put expected data in table",
921           expectedRows, util.countRows(table));
922 
923       // should have a second StoreFile now
924       assertEquals(2, fs.listStatus(storePath).length);
925 
926       // minor compactions shouldn't get rid of the file
927       admin.compact(TABLE_NAME.getName());
928       try {
929         quickPoll(new Callable<Boolean>() {
930           public Boolean call() throws Exception {
931             return fs.listStatus(storePath).length == 1;
932           }
933         }, 5000);
934         throw new IOException("SF# = " + fs.listStatus(storePath).length);
935       } catch (AssertionError ae) {
936         // this is expected behavior
937       }
938 
939       // a major compaction should work though
940       admin.majorCompact(TABLE_NAME.getName());
941       quickPoll(new Callable<Boolean>() {
942         public Boolean call() throws Exception {
943           return fs.listStatus(storePath).length == 1;
944         }
945       }, 5000);
946 
947     } finally {
948       util.shutdownMiniMapReduceCluster();
949       util.shutdownMiniCluster();
950     }
951   }
952 
953   @Test
954   public void testExcludeMinorCompaction() throws Exception {
955     Configuration conf = util.getConfiguration();
956     conf.setInt("hbase.hstore.compaction.min", 2);
957     generateRandomStartKeys(5);
958 
959     try {
960       util.startMiniCluster();
961       Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
962       final FileSystem fs = util.getDFSCluster().getFileSystem();
963       HBaseAdmin admin = new HBaseAdmin(conf);
964       HTable table = util.createTable(TABLE_NAME, FAMILIES);
965       assertEquals("Should start with empty table", 0, util.countRows(table));
966 
967       // deep inspection: get the StoreFile dir
968       final Path storePath = HStore.getStoreHomedir(
969           FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
970           admin.getTableRegions(TABLE_NAME).get(0),
971           FAMILIES[0]);
972       assertEquals(0, fs.listStatus(storePath).length);
973 
974       // put some data in it and flush to create a storefile
975       Put p = new Put(Bytes.toBytes("test"));
976       p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
977       table.put(p);
978       admin.flush(TABLE_NAME.getName());
979       assertEquals(1, util.countRows(table));
980       quickPoll(new Callable<Boolean>() {
981         public Boolean call() throws Exception {
982           return fs.listStatus(storePath).length == 1;
983         }
984       }, 5000);
985 
986       // Generate a bulk load file with more rows
987       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
988           true);
989       util.startMiniMapReduceCluster();
990       runIncrementalPELoad(conf, table, testDir);
991 
992       // Perform the actual load
993       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
994 
995       // Ensure data shows up
996       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
997       assertEquals("LoadIncrementalHFiles should put expected data in table",
998           expectedRows + 1, util.countRows(table));
999 
1000       // should have a second StoreFile now
1001       assertEquals(2, fs.listStatus(storePath).length);
1002 
1003       // minor compactions shouldn't get rid of the file
1004       admin.compact(TABLE_NAME.getName());
1005       try {
1006         quickPoll(new Callable<Boolean>() {
1007           public Boolean call() throws Exception {
1008             return fs.listStatus(storePath).length == 1;
1009           }
1010         }, 5000);
1011         throw new IOException("SF# = " + fs.listStatus(storePath).length);
1012       } catch (AssertionError ae) {
1013         // this is expected behavior
1014       }
1015 
1016       // a major compaction should work though
1017       admin.majorCompact(TABLE_NAME.getName());
1018       quickPoll(new Callable<Boolean>() {
1019         public Boolean call() throws Exception {
1020           return fs.listStatus(storePath).length == 1;
1021         }
1022       }, 5000);
1023 
1024     } finally {
1025       util.shutdownMiniMapReduceCluster();
1026       util.shutdownMiniCluster();
1027     }
1028   }
1029 
1030   private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1031     int sleepMs = 10;
1032     int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1033     while (retries-- > 0) {
1034       if (c.call().booleanValue()) {
1035         return;
1036       }
1037       Thread.sleep(sleepMs);
1038     }
1039     fail();
1040   }
1041 
1042   public static void main(String args[]) throws Exception {
1043     new TestHFileOutputFormat().manualTest(args);
1044   }
1045 
1046   public void manualTest(String args[]) throws Exception {
1047     Configuration conf = HBaseConfiguration.create();
1048     util = new HBaseTestingUtility(conf);
1049     if ("newtable".equals(args[0])) {
1050       TableName tname = TableName.valueOf(args[1]);
1051       HTable table = util.createTable(tname, FAMILIES);
1052       HBaseAdmin admin = new HBaseAdmin(conf);
1053       admin.disableTable(tname);
1054       byte[][] startKeys = generateRandomStartKeys(5);
1055       util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
1056       admin.enableTable(tname);
1057     } else if ("incremental".equals(args[0])) {
1058       TableName tname = TableName.valueOf(args[1]);
1059       HTable table = new HTable(conf, tname);
1060       Path outDir = new Path("incremental-out");
1061       runIncrementalPELoad(conf, table, outDir);
1062     } else {
1063       throw new RuntimeException(
1064           "usage: TestHFileOutputFormat newtable | incremental");
1065     }
1066   }
1067 
1068 }
1069