1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertTrue;
26 import static org.junit.Assert.fail;
27
28 import java.io.IOException;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.Map.Entry;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.concurrent.Callable;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.fs.FileStatus;
41 import org.apache.hadoop.fs.FileSystem;
42 import org.apache.hadoop.fs.Path;
43 import org.apache.hadoop.hbase.Cell;
44 import org.apache.hadoop.hbase.CellUtil;
45 import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
46 import org.apache.hadoop.hbase.HBaseConfiguration;
47 import org.apache.hadoop.hbase.HBaseTestingUtility;
48 import org.apache.hadoop.hbase.HColumnDescriptor;
49 import org.apache.hadoop.hbase.HConstants;
50 import org.apache.hadoop.hbase.HTableDescriptor;
51 import org.apache.hadoop.hbase.HadoopShims;
52 import org.apache.hadoop.hbase.KeyValue;
53 import org.apache.hadoop.hbase.testclassification.LargeTests;
54 import org.apache.hadoop.hbase.PerformanceEvaluation;
55 import org.apache.hadoop.hbase.TableName;
56 import org.apache.hadoop.hbase.client.HBaseAdmin;
57 import org.apache.hadoop.hbase.client.HTable;
58 import org.apache.hadoop.hbase.client.Put;
59 import org.apache.hadoop.hbase.client.RegionLocator;
60 import org.apache.hadoop.hbase.client.Result;
61 import org.apache.hadoop.hbase.client.ResultScanner;
62 import org.apache.hadoop.hbase.client.Scan;
63 import org.apache.hadoop.hbase.client.Table;
64 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
65 import org.apache.hadoop.hbase.io.compress.Compression;
66 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
67 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
68 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
69 import org.apache.hadoop.hbase.io.hfile.HFile;
70 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
71 import org.apache.hadoop.hbase.regionserver.BloomType;
72 import org.apache.hadoop.hbase.regionserver.StoreFile;
73 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
74 import org.apache.hadoop.hbase.util.Bytes;
75 import org.apache.hadoop.hbase.util.FSUtils;
76 import org.apache.hadoop.hbase.util.Threads;
77 import org.apache.hadoop.hbase.util.Writables;
78 import org.apache.hadoop.io.NullWritable;
79 import org.apache.hadoop.mapreduce.Job;
80 import org.apache.hadoop.mapreduce.Mapper;
81 import org.apache.hadoop.mapreduce.RecordWriter;
82 import org.apache.hadoop.mapreduce.TaskAttemptContext;
83 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
84 import org.junit.Ignore;
85 import org.junit.Test;
86 import org.junit.experimental.categories.Category;
87 import org.mockito.Mockito;
88
89
90
91
92
93
94
95 @Category(LargeTests.class)
96 public class TestHFileOutputFormat2 {
97 private final static int ROWSPERSPLIT = 1024;
98
99 private static final byte[][] FAMILIES
100 = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
101 , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
102 private static final TableName TABLE_NAME =
103 TableName.valueOf("TestTable");
104
105 private HBaseTestingUtility util = new HBaseTestingUtility();
106
107 private static Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
108
109
110
111
112 static class RandomKVGeneratingMapper
113 extends Mapper<NullWritable, NullWritable,
114 ImmutableBytesWritable, Cell> {
115
116 private int keyLength;
117 private static final int KEYLEN_DEFAULT=10;
118 private static final String KEYLEN_CONF="randomkv.key.length";
119
120 private int valLength;
121 private static final int VALLEN_DEFAULT=10;
122 private static final String VALLEN_CONF="randomkv.val.length";
123 private static final byte [] QUALIFIER = Bytes.toBytes("data");
124
125 @Override
126 protected void setup(Context context) throws IOException,
127 InterruptedException {
128 super.setup(context);
129
130 Configuration conf = context.getConfiguration();
131 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
132 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
133 }
134
135 protected void map(
136 NullWritable n1, NullWritable n2,
137 Mapper<NullWritable, NullWritable,
138 ImmutableBytesWritable,Cell>.Context context)
139 throws java.io.IOException ,InterruptedException
140 {
141
142 byte keyBytes[] = new byte[keyLength];
143 byte valBytes[] = new byte[valLength];
144
145 int taskId = context.getTaskAttemptID().getTaskID().getId();
146 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
147
148 Random random = new Random();
149 for (int i = 0; i < ROWSPERSPLIT; i++) {
150
151 random.nextBytes(keyBytes);
152
153 keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
154 random.nextBytes(valBytes);
155 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
156
157 for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
158 Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
159 context.write(key, kv);
160 }
161 }
162 }
163 }
164
165 private void setupRandomGeneratorMapper(Job job) {
166 job.setInputFormatClass(NMapInputFormat.class);
167 job.setMapperClass(RandomKVGeneratingMapper.class);
168 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
169 job.setMapOutputValueClass(KeyValue.class);
170 }
171
172
173
174
175
176
177 @Test
178 public void test_LATEST_TIMESTAMP_isReplaced()
179 throws Exception {
180 Configuration conf = new Configuration(this.util.getConfiguration());
181 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
182 TaskAttemptContext context = null;
183 Path dir =
184 util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
185 try {
186 Job job = new Job(conf);
187 FileOutputFormat.setOutputPath(job, dir);
188 context = createTestTaskAttemptContext(job);
189 HFileOutputFormat2 hof = new HFileOutputFormat2();
190 writer = hof.getRecordWriter(context);
191 final byte [] b = Bytes.toBytes("b");
192
193
194
195 KeyValue kv = new KeyValue(b, b, b);
196 KeyValue original = kv.clone();
197 writer.write(new ImmutableBytesWritable(), kv);
198 assertFalse(original.equals(kv));
199 assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
200 assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
201 assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
202 assertNotSame(original.getTimestamp(), kv.getTimestamp());
203 assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
204
205
206
207 kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
208 original = kv.clone();
209 writer.write(new ImmutableBytesWritable(), kv);
210 assertTrue(original.equals(kv));
211 } finally {
212 if (writer != null && context != null) writer.close(context);
213 dir.getFileSystem(conf).delete(dir, true);
214 }
215 }
216
217 private TaskAttemptContext createTestTaskAttemptContext(final Job job)
218 throws IOException, Exception {
219 HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
220 TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
221 job, "attempt_201402131733_0001_m_000000_0");
222 return context;
223 }
224
225
226
227
228
229 @Test
230 public void test_TIMERANGE() throws Exception {
231 Configuration conf = new Configuration(this.util.getConfiguration());
232 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
233 TaskAttemptContext context = null;
234 Path dir =
235 util.getDataTestDir("test_TIMERANGE_present");
236 LOG.info("Timerange dir writing to dir: "+ dir);
237 try {
238
239 Job job = new Job(conf);
240 FileOutputFormat.setOutputPath(job, dir);
241 context = createTestTaskAttemptContext(job);
242 HFileOutputFormat2 hof = new HFileOutputFormat2();
243 writer = hof.getRecordWriter(context);
244
245
246 final byte [] b = Bytes.toBytes("b");
247
248
249 KeyValue kv = new KeyValue(b, b, b, 2000, b);
250 KeyValue original = kv.clone();
251 writer.write(new ImmutableBytesWritable(), kv);
252 assertEquals(original,kv);
253
254
255 kv = new KeyValue(b, b, b, 1000, b);
256 original = kv.clone();
257 writer.write(new ImmutableBytesWritable(), kv);
258 assertEquals(original, kv);
259
260
261 writer.close(context);
262
263
264
265
266 FileSystem fs = FileSystem.get(conf);
267 Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
268 FileStatus[] sub1 = fs.listStatus(attemptDirectory);
269 FileStatus[] file = fs.listStatus(sub1[0].getPath());
270
271
272 HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
273 new CacheConfig(conf), conf);
274 Map<byte[],byte[]> finfo = rd.loadFileInfo();
275 byte[] range = finfo.get("TIMERANGE".getBytes());
276 assertNotNull(range);
277
278
279 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
280 Writables.copyWritable(range, timeRangeTracker);
281 LOG.info(timeRangeTracker.getMinimumTimestamp() +
282 "...." + timeRangeTracker.getMaximumTimestamp());
283 assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
284 assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
285 rd.close();
286 } finally {
287 if (writer != null && context != null) writer.close(context);
288 dir.getFileSystem(conf).delete(dir, true);
289 }
290 }
291
292
293
294
295 @Test
296 public void testWritingPEData() throws Exception {
297 Configuration conf = util.getConfiguration();
298 Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
299 FileSystem fs = testDir.getFileSystem(conf);
300
301
302 conf.setInt("mapreduce.task.io.sort.mb", 20);
303
304 conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
305
306 Job job = new Job(conf, "testWritingPEData");
307 setupRandomGeneratorMapper(job);
308
309
310 byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
311 byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
312
313 Arrays.fill(startKey, (byte)0);
314 Arrays.fill(endKey, (byte)0xff);
315
316 job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
317
318 SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
319 SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
320 job.setReducerClass(KeyValueSortReducer.class);
321 job.setOutputFormatClass(HFileOutputFormat2.class);
322 job.setNumReduceTasks(4);
323 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
324 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
325 KeyValueSerialization.class.getName());
326
327 FileOutputFormat.setOutputPath(job, testDir);
328 assertTrue(job.waitForCompletion(false));
329 FileStatus [] files = fs.listStatus(testDir);
330 assertTrue(files.length > 0);
331 }
332
333 @Test
334 public void testJobConfiguration() throws Exception {
335 Job job = new Job(util.getConfiguration());
336 job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
337 HTable table = Mockito.mock(HTable.class);
338 setupMockStartKeys(table);
339 HFileOutputFormat2.configureIncrementalLoad(job, table, table);
340 assertEquals(job.getNumReduceTasks(), 4);
341 }
342
343 private byte [][] generateRandomStartKeys(int numKeys) {
344 Random random = new Random();
345 byte[][] ret = new byte[numKeys][];
346
347 ret[0] = HConstants.EMPTY_BYTE_ARRAY;
348 for (int i = 1; i < numKeys; i++) {
349 ret[i] =
350 PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
351 }
352 return ret;
353 }
354
355 @Test
356 public void testMRIncrementalLoad() throws Exception {
357 LOG.info("\nStarting test testMRIncrementalLoad\n");
358 doIncrementalLoadTest(false);
359 }
360
361 @Test
362 public void testMRIncrementalLoadWithSplit() throws Exception {
363 LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
364 doIncrementalLoadTest(true);
365 }
366
367 private void doIncrementalLoadTest(
368 boolean shouldChangeRegions) throws Exception {
369 util = new HBaseTestingUtility();
370 Configuration conf = util.getConfiguration();
371 byte[][] startKeys = generateRandomStartKeys(5);
372 HBaseAdmin admin = null;
373 try {
374 util.startMiniCluster();
375 Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
376 admin = new HBaseAdmin(conf);
377 HTable table = util.createTable(TABLE_NAME, FAMILIES);
378 assertEquals("Should start with empty table",
379 0, util.countRows(table));
380 int numRegions = util.createMultiRegions(
381 util.getConfiguration(), table, FAMILIES[0], startKeys);
382 assertEquals("Should make 5 regions", numRegions, 5);
383
384
385 util.startMiniMapReduceCluster();
386 runIncrementalPELoad(conf, table, testDir);
387
388 assertEquals("HFOF should not touch actual table",
389 0, util.countRows(table));
390
391
392
393 int dir = 0;
394 for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
395 for (byte[] family : FAMILIES) {
396 if (Bytes.toString(family).equals(f.getPath().getName())) {
397 ++dir;
398 }
399 }
400 }
401 assertEquals("Column family not found in FS.", FAMILIES.length, dir);
402
403
404 if (shouldChangeRegions) {
405 LOG.info("Changing regions in table");
406 admin.disableTable(table.getTableName());
407 while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
408 getRegionStates().isRegionsInTransition()) {
409 Threads.sleep(200);
410 LOG.info("Waiting on table to finish disabling");
411 }
412 byte[][] newStartKeys = generateRandomStartKeys(15);
413 util.createMultiRegions(
414 util.getConfiguration(), table, FAMILIES[0], newStartKeys);
415 admin.enableTable(table.getTableName());
416 while (table.getRegionLocations().size() != 15 ||
417 !admin.isTableAvailable(table.getTableName())) {
418 Thread.sleep(200);
419 LOG.info("Waiting for new region assignment to happen");
420 }
421 }
422
423
424 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
425
426
427 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
428 assertEquals("LoadIncrementalHFiles should put expected data in table",
429 expectedRows, util.countRows(table));
430 Scan scan = new Scan();
431 ResultScanner results = table.getScanner(scan);
432 for (Result res : results) {
433 assertEquals(FAMILIES.length, res.rawCells().length);
434 Cell first = res.rawCells()[0];
435 for (Cell kv : res.rawCells()) {
436 assertTrue(CellUtil.matchingRow(first, kv));
437 assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
438 }
439 }
440 results.close();
441 String tableDigestBefore = util.checksumRows(table);
442
443
444 admin.disableTable(TABLE_NAME);
445 while (!admin.isTableDisabled(TABLE_NAME)) {
446 Thread.sleep(200);
447 LOG.info("Waiting for table to disable");
448 }
449 admin.enableTable(TABLE_NAME);
450 util.waitTableAvailable(TABLE_NAME);
451 assertEquals("Data should remain after reopening of regions",
452 tableDigestBefore, util.checksumRows(table));
453 } finally {
454 if (admin != null) admin.close();
455 util.shutdownMiniMapReduceCluster();
456 util.shutdownMiniCluster();
457 }
458 }
459
460 private void runIncrementalPELoad(
461 Configuration conf, HTable table, Path outDir)
462 throws Exception {
463 Job job = new Job(conf, "testLocalMRIncrementalLoad");
464 job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
465 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
466 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
467 KeyValueSerialization.class.getName());
468 setupRandomGeneratorMapper(job);
469 HFileOutputFormat2.configureIncrementalLoad(job, table, table);
470 FileOutputFormat.setOutputPath(job, outDir);
471
472 assertFalse(util.getTestFileSystem().exists(outDir)) ;
473
474 assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
475
476 assertTrue(job.waitForCompletion(true));
477 }
478
479
480
481
482
483
484
485
486
487
488 @Test
489 public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
490 for (int numCfs = 0; numCfs <= 3; numCfs++) {
491 Configuration conf = new Configuration(this.util.getConfiguration());
492 Map<String, Compression.Algorithm> familyToCompression =
493 getMockColumnFamiliesForCompression(numCfs);
494 Table table = Mockito.mock(HTable.class);
495 setupMockColumnFamiliesForCompression(table, familyToCompression);
496 HFileOutputFormat2.configureCompression(table, conf);
497
498
499 Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
500 .createFamilyCompressionMap(conf);
501
502
503
504 for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
505 assertEquals("Compression configuration incorrect for column family:"
506 + entry.getKey(), entry.getValue(),
507 retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
508 }
509 }
510 }
511
512 private void setupMockColumnFamiliesForCompression(Table table,
513 Map<String, Compression.Algorithm> familyToCompression) throws IOException {
514 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
515 for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
516 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
517 .setMaxVersions(1)
518 .setCompressionType(entry.getValue())
519 .setBlockCacheEnabled(false)
520 .setTimeToLive(0));
521 }
522 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
523 }
524
525
526
527
528
529 private Map<String, Compression.Algorithm>
530 getMockColumnFamiliesForCompression (int numCfs) {
531 Map<String, Compression.Algorithm> familyToCompression
532 = new HashMap<String, Compression.Algorithm>();
533
534 if (numCfs-- > 0) {
535 familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
536 }
537 if (numCfs-- > 0) {
538 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
539 }
540 if (numCfs-- > 0) {
541 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
542 }
543 if (numCfs-- > 0) {
544 familyToCompression.put("Family3", Compression.Algorithm.NONE);
545 }
546 return familyToCompression;
547 }
548
549
550
551
552
553
554
555
556
557
558
559 @Test
560 public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
561 for (int numCfs = 0; numCfs <= 2; numCfs++) {
562 Configuration conf = new Configuration(this.util.getConfiguration());
563 Map<String, BloomType> familyToBloomType =
564 getMockColumnFamiliesForBloomType(numCfs);
565 Table table = Mockito.mock(HTable.class);
566 setupMockColumnFamiliesForBloomType(table,
567 familyToBloomType);
568 HFileOutputFormat2.configureBloomType(table, conf);
569
570
571
572 Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
573 HFileOutputFormat2
574 .createFamilyBloomTypeMap(conf);
575
576
577
578 for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
579 assertEquals("BloomType configuration incorrect for column family:"
580 + entry.getKey(), entry.getValue(),
581 retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
582 }
583 }
584 }
585
586 private void setupMockColumnFamiliesForBloomType(Table table,
587 Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
588 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
589 for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
590 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
591 .setMaxVersions(1)
592 .setBloomFilterType(entry.getValue())
593 .setBlockCacheEnabled(false)
594 .setTimeToLive(0));
595 }
596 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
597 }
598
599
600
601
602
603 private Map<String, BloomType>
604 getMockColumnFamiliesForBloomType (int numCfs) {
605 Map<String, BloomType> familyToBloomType =
606 new HashMap<String, BloomType>();
607
608 if (numCfs-- > 0) {
609 familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
610 }
611 if (numCfs-- > 0) {
612 familyToBloomType.put("Family2=asdads&!AASD",
613 BloomType.ROWCOL);
614 }
615 if (numCfs-- > 0) {
616 familyToBloomType.put("Family3", BloomType.NONE);
617 }
618 return familyToBloomType;
619 }
620
621
622
623
624
625
626
627
628
629
630 @Test
631 public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
632 for (int numCfs = 0; numCfs <= 3; numCfs++) {
633 Configuration conf = new Configuration(this.util.getConfiguration());
634 Map<String, Integer> familyToBlockSize =
635 getMockColumnFamiliesForBlockSize(numCfs);
636 Table table = Mockito.mock(HTable.class);
637 setupMockColumnFamiliesForBlockSize(table,
638 familyToBlockSize);
639 HFileOutputFormat2.configureBlockSize(table, conf);
640
641
642
643 Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
644 HFileOutputFormat2
645 .createFamilyBlockSizeMap(conf);
646
647
648
649 for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
650 ) {
651 assertEquals("BlockSize configuration incorrect for column family:"
652 + entry.getKey(), entry.getValue(),
653 retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
654 }
655 }
656 }
657
658 private void setupMockColumnFamiliesForBlockSize(Table table,
659 Map<String, Integer> familyToDataBlockEncoding) throws IOException {
660 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
661 for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
662 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
663 .setMaxVersions(1)
664 .setBlocksize(entry.getValue())
665 .setBlockCacheEnabled(false)
666 .setTimeToLive(0));
667 }
668 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
669 }
670
671
672
673
674
675 private Map<String, Integer>
676 getMockColumnFamiliesForBlockSize (int numCfs) {
677 Map<String, Integer> familyToBlockSize =
678 new HashMap<String, Integer>();
679
680 if (numCfs-- > 0) {
681 familyToBlockSize.put("Family1!@#!@#&", 1234);
682 }
683 if (numCfs-- > 0) {
684 familyToBlockSize.put("Family2=asdads&!AASD",
685 Integer.MAX_VALUE);
686 }
687 if (numCfs-- > 0) {
688 familyToBlockSize.put("Family2=asdads&!AASD",
689 Integer.MAX_VALUE);
690 }
691 if (numCfs-- > 0) {
692 familyToBlockSize.put("Family3", 0);
693 }
694 return familyToBlockSize;
695 }
696
697
698
699
700
701
702
703
704
705
706 @Test
707 public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
708 for (int numCfs = 0; numCfs <= 3; numCfs++) {
709 Configuration conf = new Configuration(this.util.getConfiguration());
710 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
711 getMockColumnFamiliesForDataBlockEncoding(numCfs);
712 Table table = Mockito.mock(HTable.class);
713 setupMockColumnFamiliesForDataBlockEncoding(table,
714 familyToDataBlockEncoding);
715 HFileOutputFormat2.configureDataBlockEncoding(table, conf);
716
717
718
719 Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
720 HFileOutputFormat2
721 .createFamilyDataBlockEncodingMap(conf);
722
723
724
725 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
726 assertEquals("DataBlockEncoding configuration incorrect for column family:"
727 + entry.getKey(), entry.getValue(),
728 retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
729 }
730 }
731 }
732
733 private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
734 Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
735 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
736 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
737 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
738 .setMaxVersions(1)
739 .setDataBlockEncoding(entry.getValue())
740 .setBlockCacheEnabled(false)
741 .setTimeToLive(0));
742 }
743 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
744 }
745
746
747
748
749
750 private Map<String, DataBlockEncoding>
751 getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
752 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
753 new HashMap<String, DataBlockEncoding>();
754
755 if (numCfs-- > 0) {
756 familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
757 }
758 if (numCfs-- > 0) {
759 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
760 DataBlockEncoding.FAST_DIFF);
761 }
762 if (numCfs-- > 0) {
763 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
764 DataBlockEncoding.PREFIX);
765 }
766 if (numCfs-- > 0) {
767 familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
768 }
769 return familyToDataBlockEncoding;
770 }
771
772 private void setupMockStartKeys(RegionLocator table) throws IOException {
773 byte[][] mockKeys = new byte[][] {
774 HConstants.EMPTY_BYTE_ARRAY,
775 Bytes.toBytes("aaa"),
776 Bytes.toBytes("ggg"),
777 Bytes.toBytes("zzz")
778 };
779 Mockito.doReturn(mockKeys).when(table).getStartKeys();
780 }
781
782
783
784
785
786 @Test
787 public void testColumnFamilySettings() throws Exception {
788 Configuration conf = new Configuration(this.util.getConfiguration());
789 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
790 TaskAttemptContext context = null;
791 Path dir = util.getDataTestDir("testColumnFamilySettings");
792
793
794 HTable table = Mockito.mock(HTable.class);
795 HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
796 Mockito.doReturn(htd).when(table).getTableDescriptor();
797 for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
798 htd.addFamily(hcd);
799 }
800
801
802 setupMockStartKeys(table);
803
804 try {
805
806
807
808 conf.set("io.seqfile.compression.type", "NONE");
809 Job job = new Job(conf, "testLocalMRIncrementalLoad");
810 job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
811 setupRandomGeneratorMapper(job);
812 HFileOutputFormat2.configureIncrementalLoad(job, table, table);
813 FileOutputFormat.setOutputPath(job, dir);
814 context = createTestTaskAttemptContext(job);
815 HFileOutputFormat2 hof = new HFileOutputFormat2();
816 writer = hof.getRecordWriter(context);
817
818
819 writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
820 writer.close(context);
821
822
823 FileSystem fs = dir.getFileSystem(conf);
824
825
826 hof.getOutputCommitter(context).commitTask(context);
827 hof.getOutputCommitter(context).commitJob(context);
828 FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
829 assertEquals(htd.getFamilies().size(), families.length);
830 for (FileStatus f : families) {
831 String familyStr = f.getPath().getName();
832 HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
833
834
835 Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
836 Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
837 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
838
839 byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
840 if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
841 assertEquals("Incorrect bloom filter used for column family " + familyStr +
842 "(reader: " + reader + ")",
843 hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
844 assertEquals("Incorrect compression used for column family " + familyStr +
845 "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
846 }
847 } finally {
848 dir.getFileSystem(conf).delete(dir, true);
849 }
850 }
851
852
853
854
855
856 private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
857 TaskAttemptContext context, Set<byte[]> families, int numRows)
858 throws IOException, InterruptedException {
859 byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
860 int valLength = 10;
861 byte valBytes[] = new byte[valLength];
862
863 int taskId = context.getTaskAttemptID().getTaskID().getId();
864 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
865 final byte [] qualifier = Bytes.toBytes("data");
866 Random random = new Random();
867 for (int i = 0; i < numRows; i++) {
868
869 Bytes.putInt(keyBytes, 0, i);
870 random.nextBytes(valBytes);
871 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
872
873 for (byte[] family : families) {
874 Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
875 writer.write(key, kv);
876 }
877 }
878 }
879
880
881
882
883
884
885
886 @Ignore ("Flakey: See HBASE-9051") @Test
887 public void testExcludeAllFromMinorCompaction() throws Exception {
888 Configuration conf = util.getConfiguration();
889 conf.setInt("hbase.hstore.compaction.min", 2);
890 generateRandomStartKeys(5);
891
892 try {
893 util.startMiniCluster();
894 final FileSystem fs = util.getDFSCluster().getFileSystem();
895 HBaseAdmin admin = new HBaseAdmin(conf);
896 HTable table = util.createTable(TABLE_NAME, FAMILIES);
897 assertEquals("Should start with empty table", 0, util.countRows(table));
898
899
900 final Path storePath = new Path(
901 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
902 new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
903 Bytes.toString(FAMILIES[0])));
904 assertEquals(0, fs.listStatus(storePath).length);
905
906
907 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
908 true);
909 util.startMiniMapReduceCluster();
910
911 for (int i = 0; i < 2; i++) {
912 Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
913 runIncrementalPELoad(conf, table, testDir);
914
915 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
916 }
917
918
919 int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
920 assertEquals("LoadIncrementalHFiles should put expected data in table",
921 expectedRows, util.countRows(table));
922
923
924 assertEquals(2, fs.listStatus(storePath).length);
925
926
927 admin.compact(TABLE_NAME.getName());
928 try {
929 quickPoll(new Callable<Boolean>() {
930 public Boolean call() throws Exception {
931 return fs.listStatus(storePath).length == 1;
932 }
933 }, 5000);
934 throw new IOException("SF# = " + fs.listStatus(storePath).length);
935 } catch (AssertionError ae) {
936
937 }
938
939
940 admin.majorCompact(TABLE_NAME.getName());
941 quickPoll(new Callable<Boolean>() {
942 public Boolean call() throws Exception {
943 return fs.listStatus(storePath).length == 1;
944 }
945 }, 5000);
946
947 } finally {
948 util.shutdownMiniMapReduceCluster();
949 util.shutdownMiniCluster();
950 }
951 }
952
953 @Test
954 public void testExcludeMinorCompaction() throws Exception {
955 Configuration conf = util.getConfiguration();
956 conf.setInt("hbase.hstore.compaction.min", 2);
957 generateRandomStartKeys(5);
958
959 try {
960 util.startMiniCluster();
961 Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
962 final FileSystem fs = util.getDFSCluster().getFileSystem();
963 HBaseAdmin admin = new HBaseAdmin(conf);
964 HTable table = util.createTable(TABLE_NAME, FAMILIES);
965 assertEquals("Should start with empty table", 0, util.countRows(table));
966
967
968 final Path storePath = new Path(
969 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
970 new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
971 Bytes.toString(FAMILIES[0])));
972 assertEquals(0, fs.listStatus(storePath).length);
973
974
975 Put p = new Put(Bytes.toBytes("test"));
976 p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
977 table.put(p);
978 admin.flush(TABLE_NAME.getName());
979 assertEquals(1, util.countRows(table));
980 quickPoll(new Callable<Boolean>() {
981 public Boolean call() throws Exception {
982 return fs.listStatus(storePath).length == 1;
983 }
984 }, 5000);
985
986
987 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
988 true);
989 util.startMiniMapReduceCluster();
990 runIncrementalPELoad(conf, table, testDir);
991
992
993 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
994
995
996 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
997 assertEquals("LoadIncrementalHFiles should put expected data in table",
998 expectedRows + 1, util.countRows(table));
999
1000
1001 assertEquals(2, fs.listStatus(storePath).length);
1002
1003
1004 admin.compact(TABLE_NAME.getName());
1005 try {
1006 quickPoll(new Callable<Boolean>() {
1007 public Boolean call() throws Exception {
1008 return fs.listStatus(storePath).length == 1;
1009 }
1010 }, 5000);
1011 throw new IOException("SF# = " + fs.listStatus(storePath).length);
1012 } catch (AssertionError ae) {
1013
1014 }
1015
1016
1017 admin.majorCompact(TABLE_NAME.getName());
1018 quickPoll(new Callable<Boolean>() {
1019 public Boolean call() throws Exception {
1020 return fs.listStatus(storePath).length == 1;
1021 }
1022 }, 5000);
1023
1024 } finally {
1025 util.shutdownMiniMapReduceCluster();
1026 util.shutdownMiniCluster();
1027 }
1028 }
1029
1030 private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1031 int sleepMs = 10;
1032 int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1033 while (retries-- > 0) {
1034 if (c.call().booleanValue()) {
1035 return;
1036 }
1037 Thread.sleep(sleepMs);
1038 }
1039 fail();
1040 }
1041
1042 public static void main(String args[]) throws Exception {
1043 new TestHFileOutputFormat2().manualTest(args);
1044 }
1045
1046 public void manualTest(String args[]) throws Exception {
1047 Configuration conf = HBaseConfiguration.create();
1048 util = new HBaseTestingUtility(conf);
1049 if ("newtable".equals(args[0])) {
1050 byte[] tname = args[1].getBytes();
1051 HTable table = util.createTable(tname, FAMILIES);
1052 HBaseAdmin admin = new HBaseAdmin(conf);
1053 admin.disableTable(tname);
1054 byte[][] startKeys = generateRandomStartKeys(5);
1055 util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
1056 admin.enableTable(tname);
1057 } else if ("incremental".equals(args[0])) {
1058 TableName tname = TableName.valueOf(args[1]);
1059 HTable table = new HTable(conf, tname);
1060 Path outDir = new Path("incremental-out");
1061 runIncrementalPELoad(conf, table, outDir);
1062 } else {
1063 throw new RuntimeException(
1064 "usage: TestHFileOutputFormat2 newtable | incremental");
1065 }
1066 }
1067
1068 }
1069