1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertTrue;
26 import static org.junit.Assert.fail;
27
28 import java.io.IOException;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.Map.Entry;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.concurrent.Callable;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.fs.FileStatus;
41 import org.apache.hadoop.fs.FileSystem;
42 import org.apache.hadoop.fs.Path;
43 import org.apache.hadoop.hbase.Cell;
44 import org.apache.hadoop.hbase.CellUtil;
45 import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
46 import org.apache.hadoop.hbase.HBaseConfiguration;
47 import org.apache.hadoop.hbase.HBaseTestingUtility;
48 import org.apache.hadoop.hbase.HColumnDescriptor;
49 import org.apache.hadoop.hbase.HConstants;
50 import org.apache.hadoop.hbase.HTableDescriptor;
51 import org.apache.hadoop.hbase.HadoopShims;
52 import org.apache.hadoop.hbase.KeyValue;
53 import org.apache.hadoop.hbase.testclassification.LargeTests;
54 import org.apache.hadoop.hbase.PerformanceEvaluation;
55 import org.apache.hadoop.hbase.TableName;
56 import org.apache.hadoop.hbase.client.HBaseAdmin;
57 import org.apache.hadoop.hbase.client.HTable;
58 import org.apache.hadoop.hbase.client.Put;
59 import org.apache.hadoop.hbase.client.Result;
60 import org.apache.hadoop.hbase.client.ResultScanner;
61 import org.apache.hadoop.hbase.client.Scan;
62 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
63 import org.apache.hadoop.hbase.io.compress.Compression;
64 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
65 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
66 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
67 import org.apache.hadoop.hbase.io.hfile.HFile;
68 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
69 import org.apache.hadoop.hbase.regionserver.BloomType;
70 import org.apache.hadoop.hbase.regionserver.StoreFile;
71 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
72 import org.apache.hadoop.hbase.util.Bytes;
73 import org.apache.hadoop.hbase.util.FSUtils;
74 import org.apache.hadoop.hbase.util.Threads;
75 import org.apache.hadoop.hbase.util.Writables;
76 import org.apache.hadoop.io.NullWritable;
77 import org.apache.hadoop.mapreduce.Job;
78 import org.apache.hadoop.mapreduce.Mapper;
79 import org.apache.hadoop.mapreduce.RecordWriter;
80 import org.apache.hadoop.mapreduce.TaskAttemptContext;
81 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
82 import org.junit.Ignore;
83 import org.junit.Test;
84 import org.junit.experimental.categories.Category;
85 import org.mockito.Mockito;
86
87
88
89
90
91
92
93 @Category(LargeTests.class)
94 public class TestHFileOutputFormat2 {
95 private final static int ROWSPERSPLIT = 1024;
96
97 private static final byte[][] FAMILIES
98 = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
99 , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
100 private static final TableName TABLE_NAME =
101 TableName.valueOf("TestTable");
102
103 private HBaseTestingUtility util = new HBaseTestingUtility();
104
105 private static Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
106
107
108
109
110 static class RandomKVGeneratingMapper
111 extends Mapper<NullWritable, NullWritable,
112 ImmutableBytesWritable, Cell> {
113
114 private int keyLength;
115 private static final int KEYLEN_DEFAULT=10;
116 private static final String KEYLEN_CONF="randomkv.key.length";
117
118 private int valLength;
119 private static final int VALLEN_DEFAULT=10;
120 private static final String VALLEN_CONF="randomkv.val.length";
121
122 @Override
123 protected void setup(Context context) throws IOException,
124 InterruptedException {
125 super.setup(context);
126
127 Configuration conf = context.getConfiguration();
128 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
129 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
130 }
131
132 protected void map(
133 NullWritable n1, NullWritable n2,
134 Mapper<NullWritable, NullWritable,
135 ImmutableBytesWritable,Cell>.Context context)
136 throws java.io.IOException ,InterruptedException
137 {
138
139 byte keyBytes[] = new byte[keyLength];
140 byte valBytes[] = new byte[valLength];
141
142 int taskId = context.getTaskAttemptID().getTaskID().getId();
143 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
144
145 Random random = new Random();
146 for (int i = 0; i < ROWSPERSPLIT; i++) {
147
148 random.nextBytes(keyBytes);
149
150 keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
151 random.nextBytes(valBytes);
152 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
153
154 for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
155 Cell kv = new KeyValue(keyBytes, family,
156 PerformanceEvaluation.QUALIFIER_NAME, valBytes);
157 context.write(key, kv);
158 }
159 }
160 }
161 }
162
163 private void setupRandomGeneratorMapper(Job job) {
164 job.setInputFormatClass(NMapInputFormat.class);
165 job.setMapperClass(RandomKVGeneratingMapper.class);
166 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
167 job.setMapOutputValueClass(KeyValue.class);
168 }
169
170
171
172
173
174
175 @Test
176 public void test_LATEST_TIMESTAMP_isReplaced()
177 throws Exception {
178 Configuration conf = new Configuration(this.util.getConfiguration());
179 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
180 TaskAttemptContext context = null;
181 Path dir =
182 util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
183 try {
184 Job job = new Job(conf);
185 FileOutputFormat.setOutputPath(job, dir);
186 context = createTestTaskAttemptContext(job);
187 HFileOutputFormat2 hof = new HFileOutputFormat2();
188 writer = hof.getRecordWriter(context);
189 final byte [] b = Bytes.toBytes("b");
190
191
192
193 KeyValue kv = new KeyValue(b, b, b);
194 KeyValue original = kv.clone();
195 writer.write(new ImmutableBytesWritable(), kv);
196 assertFalse(original.equals(kv));
197 assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
198 assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
199 assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
200 assertNotSame(original.getTimestamp(), kv.getTimestamp());
201 assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
202
203
204
205 kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
206 original = kv.clone();
207 writer.write(new ImmutableBytesWritable(), kv);
208 assertTrue(original.equals(kv));
209 } finally {
210 if (writer != null && context != null) writer.close(context);
211 dir.getFileSystem(conf).delete(dir, true);
212 }
213 }
214
215 private TaskAttemptContext createTestTaskAttemptContext(final Job job)
216 throws IOException, Exception {
217 HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
218 TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
219 job, "attempt_201402131733_0001_m_000000_0");
220 return context;
221 }
222
223
224
225
226
227 @Test
228 public void test_TIMERANGE() throws Exception {
229 Configuration conf = new Configuration(this.util.getConfiguration());
230 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
231 TaskAttemptContext context = null;
232 Path dir =
233 util.getDataTestDir("test_TIMERANGE_present");
234 LOG.info("Timerange dir writing to dir: "+ dir);
235 try {
236
237 Job job = new Job(conf);
238 FileOutputFormat.setOutputPath(job, dir);
239 context = createTestTaskAttemptContext(job);
240 HFileOutputFormat2 hof = new HFileOutputFormat2();
241 writer = hof.getRecordWriter(context);
242
243
244 final byte [] b = Bytes.toBytes("b");
245
246
247 KeyValue kv = new KeyValue(b, b, b, 2000, b);
248 KeyValue original = kv.clone();
249 writer.write(new ImmutableBytesWritable(), kv);
250 assertEquals(original,kv);
251
252
253 kv = new KeyValue(b, b, b, 1000, b);
254 original = kv.clone();
255 writer.write(new ImmutableBytesWritable(), kv);
256 assertEquals(original, kv);
257
258
259 writer.close(context);
260
261
262
263
264 FileSystem fs = FileSystem.get(conf);
265 Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
266 FileStatus[] sub1 = fs.listStatus(attemptDirectory);
267 FileStatus[] file = fs.listStatus(sub1[0].getPath());
268
269
270 HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
271 new CacheConfig(conf), conf);
272 Map<byte[],byte[]> finfo = rd.loadFileInfo();
273 byte[] range = finfo.get("TIMERANGE".getBytes());
274 assertNotNull(range);
275
276
277 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
278 Writables.copyWritable(range, timeRangeTracker);
279 LOG.info(timeRangeTracker.getMinimumTimestamp() +
280 "...." + timeRangeTracker.getMaximumTimestamp());
281 assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
282 assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
283 rd.close();
284 } finally {
285 if (writer != null && context != null) writer.close(context);
286 dir.getFileSystem(conf).delete(dir, true);
287 }
288 }
289
290
291
292
293 @Test
294 public void testWritingPEData() throws Exception {
295 Configuration conf = util.getConfiguration();
296 Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
297 FileSystem fs = testDir.getFileSystem(conf);
298
299
300 conf.setInt("io.sort.mb", 20);
301
302 conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
303
304 Job job = new Job(conf, "testWritingPEData");
305 setupRandomGeneratorMapper(job);
306
307
308 byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
309 byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
310
311 Arrays.fill(startKey, (byte)0);
312 Arrays.fill(endKey, (byte)0xff);
313
314 job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
315
316 SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
317 SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
318 job.setReducerClass(KeyValueSortReducer.class);
319 job.setOutputFormatClass(HFileOutputFormat2.class);
320 job.setNumReduceTasks(4);
321 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
322 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
323 KeyValueSerialization.class.getName());
324
325 FileOutputFormat.setOutputPath(job, testDir);
326 assertTrue(job.waitForCompletion(false));
327 FileStatus [] files = fs.listStatus(testDir);
328 assertTrue(files.length > 0);
329 }
330
331 @Test
332 public void testJobConfiguration() throws Exception {
333 Configuration conf = new Configuration(this.util.getConfiguration());
334 conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString());
335 Job job = new Job(conf);
336 job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
337 HTable table = Mockito.mock(HTable.class);
338 setupMockStartKeys(table);
339 HFileOutputFormat2.configureIncrementalLoad(job, table);
340 assertEquals(job.getNumReduceTasks(), 4);
341 }
342
343 private byte [][] generateRandomStartKeys(int numKeys) {
344 Random random = new Random();
345 byte[][] ret = new byte[numKeys][];
346
347 ret[0] = HConstants.EMPTY_BYTE_ARRAY;
348 for (int i = 1; i < numKeys; i++) {
349 ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.VALUE_LENGTH);
350 }
351 return ret;
352 }
353
354 @Test
355 public void testMRIncrementalLoad() throws Exception {
356 LOG.info("\nStarting test testMRIncrementalLoad\n");
357 doIncrementalLoadTest(false);
358 }
359
360 @Test
361 public void testMRIncrementalLoadWithSplit() throws Exception {
362 LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
363 doIncrementalLoadTest(true);
364 }
365
366 private void doIncrementalLoadTest(
367 boolean shouldChangeRegions) throws Exception {
368 util = new HBaseTestingUtility();
369 Configuration conf = util.getConfiguration();
370 byte[][] startKeys = generateRandomStartKeys(5);
371 HBaseAdmin admin = null;
372 try {
373 util.startMiniCluster();
374 Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
375 admin = new HBaseAdmin(conf);
376 HTable table = util.createTable(TABLE_NAME, FAMILIES);
377 assertEquals("Should start with empty table",
378 0, util.countRows(table));
379 int numRegions = util.createMultiRegions(
380 util.getConfiguration(), table, FAMILIES[0], startKeys);
381 assertEquals("Should make 5 regions", numRegions, 5);
382
383
384 util.startMiniMapReduceCluster();
385 runIncrementalPELoad(conf, table, testDir);
386
387 assertEquals("HFOF should not touch actual table",
388 0, util.countRows(table));
389
390
391
392 int dir = 0;
393 for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
394 for (byte[] family : FAMILIES) {
395 if (Bytes.toString(family).equals(f.getPath().getName())) {
396 ++dir;
397 }
398 }
399 }
400 assertEquals("Column family not found in FS.", FAMILIES.length, dir);
401
402
403 if (shouldChangeRegions) {
404 LOG.info("Changing regions in table");
405 admin.disableTable(table.getTableName());
406 while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
407 getRegionStates().isRegionsInTransition()) {
408 Threads.sleep(200);
409 LOG.info("Waiting on table to finish disabling");
410 }
411 byte[][] newStartKeys = generateRandomStartKeys(15);
412 util.createMultiRegions(
413 util.getConfiguration(), table, FAMILIES[0], newStartKeys);
414 admin.enableTable(table.getTableName());
415 while (table.getRegionLocations().size() != 15 ||
416 !admin.isTableAvailable(table.getTableName())) {
417 Thread.sleep(200);
418 LOG.info("Waiting for new region assignment to happen");
419 }
420 }
421
422
423 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
424
425
426 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
427 assertEquals("LoadIncrementalHFiles should put expected data in table",
428 expectedRows, util.countRows(table));
429 Scan scan = new Scan();
430 ResultScanner results = table.getScanner(scan);
431 for (Result res : results) {
432 assertEquals(FAMILIES.length, res.rawCells().length);
433 Cell first = res.rawCells()[0];
434 for (Cell kv : res.rawCells()) {
435 assertTrue(CellUtil.matchingRow(first, kv));
436 assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
437 }
438 }
439 results.close();
440 String tableDigestBefore = util.checksumRows(table);
441
442
443 admin.disableTable(TABLE_NAME);
444 while (!admin.isTableDisabled(TABLE_NAME)) {
445 Thread.sleep(200);
446 LOG.info("Waiting for table to disable");
447 }
448 admin.enableTable(TABLE_NAME);
449 util.waitTableAvailable(TABLE_NAME.getName());
450 assertEquals("Data should remain after reopening of regions",
451 tableDigestBefore, util.checksumRows(table));
452 } finally {
453 if (admin != null) admin.close();
454 util.shutdownMiniMapReduceCluster();
455 util.shutdownMiniCluster();
456 }
457 }
458
459 private void runIncrementalPELoad(
460 Configuration conf, HTable table, Path outDir)
461 throws Exception {
462 Job job = new Job(conf, "testLocalMRIncrementalLoad");
463 job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
464 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
465 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
466 KeyValueSerialization.class.getName());
467 setupRandomGeneratorMapper(job);
468 HFileOutputFormat2.configureIncrementalLoad(job, table);
469 FileOutputFormat.setOutputPath(job, outDir);
470
471 assertFalse(util.getTestFileSystem().exists(outDir)) ;
472
473 assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
474
475 assertTrue(job.waitForCompletion(true));
476 }
477
478
479
480
481
482
483
484
485
486
487 @Test
488 public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
489 for (int numCfs = 0; numCfs <= 3; numCfs++) {
490 Configuration conf = new Configuration(this.util.getConfiguration());
491 Map<String, Compression.Algorithm> familyToCompression =
492 getMockColumnFamiliesForCompression(numCfs);
493 HTable table = Mockito.mock(HTable.class);
494 setupMockColumnFamiliesForCompression(table, familyToCompression);
495 HFileOutputFormat2.configureCompression(table, conf);
496
497
498 Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
499 .createFamilyCompressionMap(conf);
500
501
502
503 for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
504 assertEquals("Compression configuration incorrect for column family:"
505 + entry.getKey(), entry.getValue(),
506 retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
507 }
508 }
509 }
510
511 private void setupMockColumnFamiliesForCompression(HTable table,
512 Map<String, Compression.Algorithm> familyToCompression) throws IOException {
513 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
514 for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
515 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
516 .setMaxVersions(1)
517 .setCompressionType(entry.getValue())
518 .setBlockCacheEnabled(false)
519 .setTimeToLive(0));
520 }
521 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
522 }
523
524
525
526
527
528 private Map<String, Compression.Algorithm>
529 getMockColumnFamiliesForCompression (int numCfs) {
530 Map<String, Compression.Algorithm> familyToCompression
531 = new HashMap<String, Compression.Algorithm>();
532
533 if (numCfs-- > 0) {
534 familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
535 }
536 if (numCfs-- > 0) {
537 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
538 }
539 if (numCfs-- > 0) {
540 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
541 }
542 if (numCfs-- > 0) {
543 familyToCompression.put("Family3", Compression.Algorithm.NONE);
544 }
545 return familyToCompression;
546 }
547
548
549
550
551
552
553
554
555
556
557
558 @Test
559 public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
560 for (int numCfs = 0; numCfs <= 2; numCfs++) {
561 Configuration conf = new Configuration(this.util.getConfiguration());
562 Map<String, BloomType> familyToBloomType =
563 getMockColumnFamiliesForBloomType(numCfs);
564 HTable table = Mockito.mock(HTable.class);
565 setupMockColumnFamiliesForBloomType(table,
566 familyToBloomType);
567 HFileOutputFormat2.configureBloomType(table, conf);
568
569
570
571 Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
572 HFileOutputFormat2
573 .createFamilyBloomTypeMap(conf);
574
575
576
577 for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
578 assertEquals("BloomType configuration incorrect for column family:"
579 + entry.getKey(), entry.getValue(),
580 retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
581 }
582 }
583 }
584
585 private void setupMockColumnFamiliesForBloomType(HTable table,
586 Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
587 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
588 for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
589 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
590 .setMaxVersions(1)
591 .setBloomFilterType(entry.getValue())
592 .setBlockCacheEnabled(false)
593 .setTimeToLive(0));
594 }
595 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
596 }
597
598
599
600
601
602 private Map<String, BloomType>
603 getMockColumnFamiliesForBloomType (int numCfs) {
604 Map<String, BloomType> familyToBloomType =
605 new HashMap<String, BloomType>();
606
607 if (numCfs-- > 0) {
608 familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
609 }
610 if (numCfs-- > 0) {
611 familyToBloomType.put("Family2=asdads&!AASD",
612 BloomType.ROWCOL);
613 }
614 if (numCfs-- > 0) {
615 familyToBloomType.put("Family3", BloomType.NONE);
616 }
617 return familyToBloomType;
618 }
619
620
621
622
623
624
625
626
627
628
629 @Test
630 public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
631 for (int numCfs = 0; numCfs <= 3; numCfs++) {
632 Configuration conf = new Configuration(this.util.getConfiguration());
633 Map<String, Integer> familyToBlockSize =
634 getMockColumnFamiliesForBlockSize(numCfs);
635 HTable table = Mockito.mock(HTable.class);
636 setupMockColumnFamiliesForBlockSize(table,
637 familyToBlockSize);
638 HFileOutputFormat2.configureBlockSize(table, conf);
639
640
641
642 Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
643 HFileOutputFormat2
644 .createFamilyBlockSizeMap(conf);
645
646
647
648 for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
649 ) {
650 assertEquals("BlockSize configuration incorrect for column family:"
651 + entry.getKey(), entry.getValue(),
652 retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
653 }
654 }
655 }
656
657 private void setupMockColumnFamiliesForBlockSize(HTable table,
658 Map<String, Integer> familyToDataBlockEncoding) throws IOException {
659 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
660 for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
661 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
662 .setMaxVersions(1)
663 .setBlocksize(entry.getValue())
664 .setBlockCacheEnabled(false)
665 .setTimeToLive(0));
666 }
667 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
668 }
669
670
671
672
673
674 private Map<String, Integer>
675 getMockColumnFamiliesForBlockSize (int numCfs) {
676 Map<String, Integer> familyToBlockSize =
677 new HashMap<String, Integer>();
678
679 if (numCfs-- > 0) {
680 familyToBlockSize.put("Family1!@#!@#&", 1234);
681 }
682 if (numCfs-- > 0) {
683 familyToBlockSize.put("Family2=asdads&!AASD",
684 Integer.MAX_VALUE);
685 }
686 if (numCfs-- > 0) {
687 familyToBlockSize.put("Family2=asdads&!AASD",
688 Integer.MAX_VALUE);
689 }
690 if (numCfs-- > 0) {
691 familyToBlockSize.put("Family3", 0);
692 }
693 return familyToBlockSize;
694 }
695
696
697
698
699
700
701
702
703
704
705 @Test
706 public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
707 for (int numCfs = 0; numCfs <= 3; numCfs++) {
708 Configuration conf = new Configuration(this.util.getConfiguration());
709 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
710 getMockColumnFamiliesForDataBlockEncoding(numCfs);
711 HTable table = Mockito.mock(HTable.class);
712 setupMockColumnFamiliesForDataBlockEncoding(table,
713 familyToDataBlockEncoding);
714 HFileOutputFormat2.configureDataBlockEncoding(table, conf);
715
716
717
718 Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
719 HFileOutputFormat2
720 .createFamilyDataBlockEncodingMap(conf);
721
722
723
724 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
725 assertEquals("DataBlockEncoding configuration incorrect for column family:"
726 + entry.getKey(), entry.getValue(),
727 retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
728 }
729 }
730 }
731
732 private void setupMockColumnFamiliesForDataBlockEncoding(HTable table,
733 Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
734 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
735 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
736 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
737 .setMaxVersions(1)
738 .setDataBlockEncoding(entry.getValue())
739 .setBlockCacheEnabled(false)
740 .setTimeToLive(0));
741 }
742 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
743 }
744
745
746
747
748
749 private Map<String, DataBlockEncoding>
750 getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
751 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
752 new HashMap<String, DataBlockEncoding>();
753
754 if (numCfs-- > 0) {
755 familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
756 }
757 if (numCfs-- > 0) {
758 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
759 DataBlockEncoding.FAST_DIFF);
760 }
761 if (numCfs-- > 0) {
762 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
763 DataBlockEncoding.PREFIX);
764 }
765 if (numCfs-- > 0) {
766 familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
767 }
768 return familyToDataBlockEncoding;
769 }
770
771 private void setupMockStartKeys(HTable table) throws IOException {
772 byte[][] mockKeys = new byte[][] {
773 HConstants.EMPTY_BYTE_ARRAY,
774 Bytes.toBytes("aaa"),
775 Bytes.toBytes("ggg"),
776 Bytes.toBytes("zzz")
777 };
778 Mockito.doReturn(mockKeys).when(table).getStartKeys();
779 }
780
781
782
783
784
785 @Test
786 public void testColumnFamilySettings() throws Exception {
787 Configuration conf = new Configuration(this.util.getConfiguration());
788 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
789 TaskAttemptContext context = null;
790 Path dir = util.getDataTestDir("testColumnFamilySettings");
791
792
793 HTable table = Mockito.mock(HTable.class);
794 HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
795 Mockito.doReturn(htd).when(table).getTableDescriptor();
796 for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
797 htd.addFamily(hcd);
798 }
799
800
801 setupMockStartKeys(table);
802
803 try {
804
805
806
807 conf.set("io.seqfile.compression.type", "NONE");
808 conf.set("hbase.fs.tmp.dir", dir.toString());
809 Job job = new Job(conf, "testLocalMRIncrementalLoad");
810 job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
811 setupRandomGeneratorMapper(job);
812 HFileOutputFormat2.configureIncrementalLoad(job, table);
813 FileOutputFormat.setOutputPath(job, dir);
814 context = createTestTaskAttemptContext(job);
815 HFileOutputFormat2 hof = new HFileOutputFormat2();
816 writer = hof.getRecordWriter(context);
817
818
819 writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
820 writer.close(context);
821
822
823 FileSystem fs = dir.getFileSystem(conf);
824
825
826 hof.getOutputCommitter(context).commitTask(context);
827 hof.getOutputCommitter(context).commitJob(context);
828 FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
829 assertEquals(htd.getFamilies().size(), families.length);
830 for (FileStatus f : families) {
831 String familyStr = f.getPath().getName();
832 HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
833
834
835 Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
836 Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
837 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
838
839 byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
840 if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
841 assertEquals("Incorrect bloom filter used for column family " + familyStr +
842 "(reader: " + reader + ")",
843 hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
844 assertEquals("Incorrect compression used for column family " + familyStr +
845 "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
846 }
847 } finally {
848 dir.getFileSystem(conf).delete(dir, true);
849 }
850 }
851
852
853
854
855
856 private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
857 TaskAttemptContext context, Set<byte[]> families, int numRows)
858 throws IOException, InterruptedException {
859 byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
860 int valLength = 10;
861 byte valBytes[] = new byte[valLength];
862
863 int taskId = context.getTaskAttemptID().getTaskID().getId();
864 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
865
866 Random random = new Random();
867 for (int i = 0; i < numRows; i++) {
868
869 Bytes.putInt(keyBytes, 0, i);
870 random.nextBytes(valBytes);
871 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
872
873 for (byte[] family : families) {
874 Cell kv = new KeyValue(keyBytes, family,
875 PerformanceEvaluation.QUALIFIER_NAME, valBytes);
876 writer.write(key, kv);
877 }
878 }
879 }
880
881
882
883
884
885
886
887 @Ignore ("Flakey: See HBASE-9051") @Test
888 public void testExcludeAllFromMinorCompaction() throws Exception {
889 Configuration conf = util.getConfiguration();
890 conf.setInt("hbase.hstore.compaction.min", 2);
891 generateRandomStartKeys(5);
892
893 try {
894 util.startMiniCluster();
895 final FileSystem fs = util.getDFSCluster().getFileSystem();
896 HBaseAdmin admin = new HBaseAdmin(conf);
897 HTable table = util.createTable(TABLE_NAME, FAMILIES);
898 assertEquals("Should start with empty table", 0, util.countRows(table));
899
900
901 final Path storePath = new Path(
902 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
903 new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
904 Bytes.toString(FAMILIES[0])));
905 assertEquals(0, fs.listStatus(storePath).length);
906
907
908 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
909 true);
910 util.startMiniMapReduceCluster();
911
912 for (int i = 0; i < 2; i++) {
913 Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
914 runIncrementalPELoad(conf, table, testDir);
915
916 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
917 }
918
919
920 int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
921 assertEquals("LoadIncrementalHFiles should put expected data in table",
922 expectedRows, util.countRows(table));
923
924
925 assertEquals(2, fs.listStatus(storePath).length);
926
927
928 admin.compact(TABLE_NAME.getName());
929 try {
930 quickPoll(new Callable<Boolean>() {
931 public Boolean call() throws Exception {
932 return fs.listStatus(storePath).length == 1;
933 }
934 }, 5000);
935 throw new IOException("SF# = " + fs.listStatus(storePath).length);
936 } catch (AssertionError ae) {
937
938 }
939
940
941 admin.majorCompact(TABLE_NAME.getName());
942 quickPoll(new Callable<Boolean>() {
943 public Boolean call() throws Exception {
944 return fs.listStatus(storePath).length == 1;
945 }
946 }, 5000);
947
948 } finally {
949 util.shutdownMiniMapReduceCluster();
950 util.shutdownMiniCluster();
951 }
952 }
953
954 @Test
955 public void testExcludeMinorCompaction() throws Exception {
956 Configuration conf = util.getConfiguration();
957 conf.setInt("hbase.hstore.compaction.min", 2);
958 generateRandomStartKeys(5);
959
960 try {
961 util.startMiniCluster();
962 Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
963 final FileSystem fs = util.getDFSCluster().getFileSystem();
964 HBaseAdmin admin = new HBaseAdmin(conf);
965 HTable table = util.createTable(TABLE_NAME, FAMILIES);
966 assertEquals("Should start with empty table", 0, util.countRows(table));
967
968
969 final Path storePath = new Path(
970 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
971 new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
972 Bytes.toString(FAMILIES[0])));
973 assertEquals(0, fs.listStatus(storePath).length);
974
975
976 Put p = new Put(Bytes.toBytes("test"));
977 p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
978 table.put(p);
979 admin.flush(TABLE_NAME.getName());
980 assertEquals(1, util.countRows(table));
981 quickPoll(new Callable<Boolean>() {
982 public Boolean call() throws Exception {
983 return fs.listStatus(storePath).length == 1;
984 }
985 }, 5000);
986
987
988 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
989 true);
990 util.startMiniMapReduceCluster();
991 runIncrementalPELoad(conf, table, testDir);
992
993
994 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
995
996
997 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
998 assertEquals("LoadIncrementalHFiles should put expected data in table",
999 expectedRows + 1, util.countRows(table));
1000
1001
1002 assertEquals(2, fs.listStatus(storePath).length);
1003
1004
1005 admin.compact(TABLE_NAME.getName());
1006 try {
1007 quickPoll(new Callable<Boolean>() {
1008 public Boolean call() throws Exception {
1009 return fs.listStatus(storePath).length == 1;
1010 }
1011 }, 5000);
1012 throw new IOException("SF# = " + fs.listStatus(storePath).length);
1013 } catch (AssertionError ae) {
1014
1015 }
1016
1017
1018 admin.majorCompact(TABLE_NAME.getName());
1019 quickPoll(new Callable<Boolean>() {
1020 public Boolean call() throws Exception {
1021 return fs.listStatus(storePath).length == 1;
1022 }
1023 }, 5000);
1024
1025 } finally {
1026 util.shutdownMiniMapReduceCluster();
1027 util.shutdownMiniCluster();
1028 }
1029 }
1030
1031 private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1032 int sleepMs = 10;
1033 int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1034 while (retries-- > 0) {
1035 if (c.call().booleanValue()) {
1036 return;
1037 }
1038 Thread.sleep(sleepMs);
1039 }
1040 fail();
1041 }
1042
1043 public static void main(String args[]) throws Exception {
1044 new TestHFileOutputFormat2().manualTest(args);
1045 }
1046
1047 public void manualTest(String args[]) throws Exception {
1048 Configuration conf = HBaseConfiguration.create();
1049 util = new HBaseTestingUtility(conf);
1050 if ("newtable".equals(args[0])) {
1051 byte[] tname = args[1].getBytes();
1052 HTable table = util.createTable(tname, FAMILIES);
1053 HBaseAdmin admin = new HBaseAdmin(conf);
1054 admin.disableTable(tname);
1055 byte[][] startKeys = generateRandomStartKeys(5);
1056 util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
1057 admin.enableTable(tname);
1058 } else if ("incremental".equals(args[0])) {
1059 byte[] tname = args[1].getBytes();
1060 HTable table = new HTable(conf, tname);
1061 Path outDir = new Path("incremental-out");
1062 runIncrementalPELoad(conf, table, outDir);
1063 } else {
1064 throw new RuntimeException(
1065 "usage: TestHFileOutputFormat2 newtable | incremental");
1066 }
1067 }
1068
1069 }
1070