1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertTrue;
26 import static org.junit.Assert.fail;
27
28 import java.io.IOException;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.Map.Entry;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.concurrent.Callable;
36 import junit.framework.Assert;
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.fs.FileStatus;
41 import org.apache.hadoop.fs.FileSystem;
42 import org.apache.hadoop.fs.Path;
43 import org.apache.hadoop.hbase.Cell;
44 import org.apache.hadoop.hbase.CellUtil;
45 import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
46 import org.apache.hadoop.hbase.HBaseConfiguration;
47 import org.apache.hadoop.hbase.HBaseTestingUtility;
48 import org.apache.hadoop.hbase.HColumnDescriptor;
49 import org.apache.hadoop.hbase.HConstants;
50 import org.apache.hadoop.hbase.HTableDescriptor;
51 import org.apache.hadoop.hbase.HadoopShims;
52 import org.apache.hadoop.hbase.KeyValue;
53 import org.apache.hadoop.hbase.testclassification.LargeTests;
54 import org.apache.hadoop.hbase.PerformanceEvaluation;
55 import org.apache.hadoop.hbase.TableName;
56 import org.apache.hadoop.hbase.client.HBaseAdmin;
57 import org.apache.hadoop.hbase.client.HTable;
58 import org.apache.hadoop.hbase.client.Put;
59 import org.apache.hadoop.hbase.client.Result;
60 import org.apache.hadoop.hbase.client.ResultScanner;
61 import org.apache.hadoop.hbase.client.Scan;
62 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
63 import org.apache.hadoop.hbase.io.compress.Compression;
64 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
65 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
66 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
67 import org.apache.hadoop.hbase.io.hfile.HFile;
68 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
69 import org.apache.hadoop.hbase.regionserver.BloomType;
70 import org.apache.hadoop.hbase.regionserver.HStore;
71 import org.apache.hadoop.hbase.regionserver.StoreFile;
72 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
73 import org.apache.hadoop.hbase.util.Bytes;
74 import org.apache.hadoop.hbase.util.FSUtils;
75 import org.apache.hadoop.hbase.util.Threads;
76 import org.apache.hadoop.hbase.util.Writables;
77 import org.apache.hadoop.io.NullWritable;
78 import org.apache.hadoop.mapreduce.Job;
79 import org.apache.hadoop.mapreduce.Mapper;
80 import org.apache.hadoop.mapreduce.RecordWriter;
81 import org.apache.hadoop.mapreduce.TaskAttemptContext;
82 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
83 import org.junit.Ignore;
84 import org.junit.Test;
85 import org.junit.experimental.categories.Category;
86 import org.mockito.Mockito;
87
88
89
90
91
92
93
94 @Category(LargeTests.class)
95 public class TestHFileOutputFormat {
96 private final static int ROWSPERSPLIT = 1024;
97
98 private static final byte[][] FAMILIES
99 = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
100 , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
101 private static final TableName TABLE_NAME =
102 TableName.valueOf("TestTable");
103
104 private HBaseTestingUtility util = new HBaseTestingUtility();
105
106 private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
107
108
109
110
111 static class RandomKVGeneratingMapper
112 extends Mapper<NullWritable, NullWritable,
113 ImmutableBytesWritable, KeyValue> {
114
115 private int keyLength;
116 private static final int KEYLEN_DEFAULT=10;
117 private static final String KEYLEN_CONF="randomkv.key.length";
118
119 private int valLength;
120 private static final int VALLEN_DEFAULT=10;
121 private static final String VALLEN_CONF="randomkv.val.length";
122
123 @Override
124 protected void setup(Context context) throws IOException,
125 InterruptedException {
126 super.setup(context);
127
128 Configuration conf = context.getConfiguration();
129 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
130 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
131 }
132
133 protected void map(
134 NullWritable n1, NullWritable n2,
135 Mapper<NullWritable, NullWritable,
136 ImmutableBytesWritable,KeyValue>.Context context)
137 throws java.io.IOException ,InterruptedException
138 {
139
140 byte keyBytes[] = new byte[keyLength];
141 byte valBytes[] = new byte[valLength];
142
143 int taskId = context.getTaskAttemptID().getTaskID().getId();
144 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
145
146 Random random = new Random();
147 for (int i = 0; i < ROWSPERSPLIT; i++) {
148
149 random.nextBytes(keyBytes);
150
151 keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
152 random.nextBytes(valBytes);
153 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
154
155 for (byte[] family : TestHFileOutputFormat.FAMILIES) {
156 KeyValue kv = new KeyValue(keyBytes, family,
157 PerformanceEvaluation.QUALIFIER_NAME, valBytes);
158 context.write(key, kv);
159 }
160 }
161 }
162 }
163
164 private void setupRandomGeneratorMapper(Job job) {
165 job.setInputFormatClass(NMapInputFormat.class);
166 job.setMapperClass(RandomKVGeneratingMapper.class);
167 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
168 job.setMapOutputValueClass(KeyValue.class);
169 }
170
171
172
173
174
175
176 @Test
177 public void test_LATEST_TIMESTAMP_isReplaced()
178 throws Exception {
179 Configuration conf = new Configuration(this.util.getConfiguration());
180 RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
181 TaskAttemptContext context = null;
182 Path dir =
183 util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
184 try {
185 Job job = new Job(conf);
186 FileOutputFormat.setOutputPath(job, dir);
187 context = createTestTaskAttemptContext(job);
188 HFileOutputFormat hof = new HFileOutputFormat();
189 writer = hof.getRecordWriter(context);
190 final byte [] b = Bytes.toBytes("b");
191
192
193
194 KeyValue kv = new KeyValue(b, b, b);
195 KeyValue original = kv.clone();
196 writer.write(new ImmutableBytesWritable(), kv);
197 assertFalse(original.equals(kv));
198 assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
199 assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier()));
200 assertNotSame(original.getTimestamp(), kv.getTimestamp());
201 assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
202
203
204
205 kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
206 original = kv.clone();
207 writer.write(new ImmutableBytesWritable(), kv);
208 assertTrue(original.equals(kv));
209 } finally {
210 if (writer != null && context != null) writer.close(context);
211 dir.getFileSystem(conf).delete(dir, true);
212 }
213 }
214
215 private TaskAttemptContext createTestTaskAttemptContext(final Job job)
216 throws IOException, Exception {
217 HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
218 TaskAttemptContext context = hadoop.createTestTaskAttemptContext(job, "attempt_200707121733_0001_m_000000_0");
219 return context;
220 }
221
222
223
224
225
226 @Test
227 public void test_TIMERANGE() throws Exception {
228 Configuration conf = new Configuration(this.util.getConfiguration());
229 RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
230 TaskAttemptContext context = null;
231 Path dir =
232 util.getDataTestDir("test_TIMERANGE_present");
233 LOG.info("Timerange dir writing to dir: "+ dir);
234 try {
235
236 Job job = new Job(conf);
237 FileOutputFormat.setOutputPath(job, dir);
238 context = createTestTaskAttemptContext(job);
239 HFileOutputFormat hof = new HFileOutputFormat();
240 writer = hof.getRecordWriter(context);
241
242
243 final byte [] b = Bytes.toBytes("b");
244
245
246 KeyValue kv = new KeyValue(b, b, b, 2000, b);
247 KeyValue original = kv.clone();
248 writer.write(new ImmutableBytesWritable(), kv);
249 assertEquals(original,kv);
250
251
252 kv = new KeyValue(b, b, b, 1000, b);
253 original = kv.clone();
254 writer.write(new ImmutableBytesWritable(), kv);
255 assertEquals(original, kv);
256
257
258 writer.close(context);
259
260
261
262
263 FileSystem fs = FileSystem.get(conf);
264 Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
265 FileStatus[] sub1 = fs.listStatus(attemptDirectory);
266 FileStatus[] file = fs.listStatus(sub1[0].getPath());
267
268
269 HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
270 new CacheConfig(conf), conf);
271 Map<byte[],byte[]> finfo = rd.loadFileInfo();
272 byte[] range = finfo.get("TIMERANGE".getBytes());
273 assertNotNull(range);
274
275
276 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
277 Writables.copyWritable(range, timeRangeTracker);
278 LOG.info(timeRangeTracker.getMinimumTimestamp() +
279 "...." + timeRangeTracker.getMaximumTimestamp());
280 assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
281 assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
282 rd.close();
283 } finally {
284 if (writer != null && context != null) writer.close(context);
285 dir.getFileSystem(conf).delete(dir, true);
286 }
287 }
288
289
290
291
292 @Test
293 public void testWritingPEData() throws Exception {
294 Configuration conf = util.getConfiguration();
295 Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
296 FileSystem fs = testDir.getFileSystem(conf);
297
298
299 conf.setInt("io.sort.mb", 20);
300
301 conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
302
303 Job job = new Job(conf, "testWritingPEData");
304 setupRandomGeneratorMapper(job);
305
306
307 byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
308 byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
309
310 Arrays.fill(startKey, (byte)0);
311 Arrays.fill(endKey, (byte)0xff);
312
313 job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
314
315 SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
316 SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
317 job.setReducerClass(KeyValueSortReducer.class);
318 job.setOutputFormatClass(HFileOutputFormat.class);
319 job.setNumReduceTasks(4);
320 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
321 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
322 KeyValueSerialization.class.getName());
323
324 FileOutputFormat.setOutputPath(job, testDir);
325 assertTrue(job.waitForCompletion(false));
326 FileStatus [] files = fs.listStatus(testDir);
327 assertTrue(files.length > 0);
328 }
329
330 @Test
331 public void testJobConfiguration() throws Exception {
332 Configuration conf = new Configuration(this.util.getConfiguration());
333 conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString());
334 Job job = new Job(conf);
335 job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
336 HTable table = Mockito.mock(HTable.class);
337 setupMockStartKeys(table);
338 setupMockTableName(table);
339 HFileOutputFormat.configureIncrementalLoad(job, table);
340 assertEquals(job.getNumReduceTasks(), 4);
341 }
342
343 private byte [][] generateRandomStartKeys(int numKeys) {
344 Random random = new Random();
345 byte[][] ret = new byte[numKeys][];
346
347 ret[0] = HConstants.EMPTY_BYTE_ARRAY;
348 for (int i = 1; i < numKeys; i++) {
349 ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
350 }
351 return ret;
352 }
353
354 @Test
355 public void testMRIncrementalLoad() throws Exception {
356 LOG.info("\nStarting test testMRIncrementalLoad\n");
357 doIncrementalLoadTest(false);
358 }
359
360 @Test
361 public void testMRIncrementalLoadWithSplit() throws Exception {
362 LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
363 doIncrementalLoadTest(true);
364 }
365
366 private void doIncrementalLoadTest(
367 boolean shouldChangeRegions) throws Exception {
368 util = new HBaseTestingUtility();
369 Configuration conf = util.getConfiguration();
370 byte[][] startKeys = generateRandomStartKeys(5);
371 HBaseAdmin admin = null;
372 try {
373 util.startMiniCluster();
374 Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
375 admin = new HBaseAdmin(conf);
376 HTable table = util.createTable(TABLE_NAME, FAMILIES);
377 assertEquals("Should start with empty table",
378 0, util.countRows(table));
379 int numRegions = util.createMultiRegions(
380 util.getConfiguration(), table, FAMILIES[0], startKeys);
381 assertEquals("Should make 5 regions", numRegions, 5);
382
383
384 util.startMiniMapReduceCluster();
385 runIncrementalPELoad(conf, table, testDir);
386
387 assertEquals("HFOF should not touch actual table",
388 0, util.countRows(table));
389
390
391
392 int dir = 0;
393 for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
394 for (byte[] family : FAMILIES) {
395 if (Bytes.toString(family).equals(f.getPath().getName())) {
396 ++dir;
397 }
398 }
399 }
400 assertEquals("Column family not found in FS.", FAMILIES.length, dir);
401
402
403 if (shouldChangeRegions) {
404 LOG.info("Changing regions in table");
405 admin.disableTable(table.getTableName());
406 while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
407 getRegionStates().isRegionsInTransition()) {
408 Threads.sleep(200);
409 LOG.info("Waiting on table to finish disabling");
410 }
411 byte[][] newStartKeys = generateRandomStartKeys(15);
412 util.createMultiRegions(
413 util.getConfiguration(), table, FAMILIES[0], newStartKeys);
414 admin.enableTable(table.getTableName());
415 while (table.getRegionLocations().size() != 15 ||
416 !admin.isTableAvailable(table.getTableName())) {
417 Thread.sleep(200);
418 LOG.info("Waiting for new region assignment to happen");
419 }
420 }
421
422
423 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
424
425
426 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
427 assertEquals("LoadIncrementalHFiles should put expected data in table",
428 expectedRows, util.countRows(table));
429 Scan scan = new Scan();
430 ResultScanner results = table.getScanner(scan);
431 for (Result res : results) {
432 assertEquals(FAMILIES.length, res.rawCells().length);
433 Cell first = res.rawCells()[0];
434 for (Cell kv : res.rawCells()) {
435 assertTrue(CellUtil.matchingRow(first, kv));
436 assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
437 }
438 }
439 results.close();
440 String tableDigestBefore = util.checksumRows(table);
441
442
443 admin.disableTable(TABLE_NAME);
444 while (!admin.isTableDisabled(TABLE_NAME)) {
445 Thread.sleep(200);
446 LOG.info("Waiting for table to disable");
447 }
448 admin.enableTable(TABLE_NAME);
449 util.waitTableAvailable(TABLE_NAME.getName());
450 assertEquals("Data should remain after reopening of regions",
451 tableDigestBefore, util.checksumRows(table));
452 } finally {
453 if (admin != null) admin.close();
454 util.shutdownMiniMapReduceCluster();
455 util.shutdownMiniCluster();
456 }
457 }
458
459 private void runIncrementalPELoad(
460 Configuration conf, HTable table, Path outDir)
461 throws Exception {
462 Job job = new Job(conf, "testLocalMRIncrementalLoad");
463 job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
464 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
465 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
466 KeyValueSerialization.class.getName());
467 setupRandomGeneratorMapper(job);
468 HFileOutputFormat.configureIncrementalLoad(job, table);
469 FileOutputFormat.setOutputPath(job, outDir);
470
471 Assert.assertFalse( util.getTestFileSystem().exists(outDir)) ;
472
473 assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
474
475 assertTrue(job.waitForCompletion(true));
476 }
477
478
479
480
481
482
483
484
485
486
487 @Test
488 public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
489 for (int numCfs = 0; numCfs <= 3; numCfs++) {
490 Configuration conf = new Configuration(this.util.getConfiguration());
491 Map<String, Compression.Algorithm> familyToCompression =
492 getMockColumnFamiliesForCompression(numCfs);
493 HTable table = Mockito.mock(HTable.class);
494 setupMockColumnFamiliesForCompression(table, familyToCompression);
495 HFileOutputFormat.configureCompression(table, conf);
496
497
498 Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat
499 .createFamilyCompressionMap(conf);
500
501
502
503 for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
504 assertEquals("Compression configuration incorrect for column family:"
505 + entry.getKey(), entry.getValue(),
506 retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
507 }
508 }
509 }
510
511 private void setupMockColumnFamiliesForCompression(HTable table,
512 Map<String, Compression.Algorithm> familyToCompression) throws IOException {
513 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
514 for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
515 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
516 .setMaxVersions(1)
517 .setCompressionType(entry.getValue())
518 .setBlockCacheEnabled(false)
519 .setTimeToLive(0));
520 }
521 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
522 }
523
524
525
526
527
528 private Map<String, Compression.Algorithm>
529 getMockColumnFamiliesForCompression (int numCfs) {
530 Map<String, Compression.Algorithm> familyToCompression = new HashMap<String, Compression.Algorithm>();
531
532 if (numCfs-- > 0) {
533 familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
534 }
535 if (numCfs-- > 0) {
536 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
537 }
538 if (numCfs-- > 0) {
539 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
540 }
541 if (numCfs-- > 0) {
542 familyToCompression.put("Family3", Compression.Algorithm.NONE);
543 }
544 return familyToCompression;
545 }
546
547
548
549
550
551
552
553
554
555
556
557 @Test
558 public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
559 for (int numCfs = 0; numCfs <= 2; numCfs++) {
560 Configuration conf = new Configuration(this.util.getConfiguration());
561 Map<String, BloomType> familyToBloomType =
562 getMockColumnFamiliesForBloomType(numCfs);
563 HTable table = Mockito.mock(HTable.class);
564 setupMockColumnFamiliesForBloomType(table,
565 familyToBloomType);
566 HFileOutputFormat.configureBloomType(table, conf);
567
568
569
570 Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
571 HFileOutputFormat
572 .createFamilyBloomTypeMap(conf);
573
574
575
576 for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
577 assertEquals("BloomType configuration incorrect for column family:"
578 + entry.getKey(), entry.getValue(),
579 retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
580 }
581 }
582 }
583
584 private void setupMockColumnFamiliesForBloomType(HTable table,
585 Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
586 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
587 for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
588 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
589 .setMaxVersions(1)
590 .setBloomFilterType(entry.getValue())
591 .setBlockCacheEnabled(false)
592 .setTimeToLive(0));
593 }
594 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
595 }
596
597
598
599
600
601 private Map<String, BloomType>
602 getMockColumnFamiliesForBloomType (int numCfs) {
603 Map<String, BloomType> familyToBloomType =
604 new HashMap<String, BloomType>();
605
606 if (numCfs-- > 0) {
607 familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
608 }
609 if (numCfs-- > 0) {
610 familyToBloomType.put("Family2=asdads&!AASD",
611 BloomType.ROWCOL);
612 }
613 if (numCfs-- > 0) {
614 familyToBloomType.put("Family3", BloomType.NONE);
615 }
616 return familyToBloomType;
617 }
618
619
620
621
622
623
624
625
626
627
628 @Test
629 public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
630 for (int numCfs = 0; numCfs <= 3; numCfs++) {
631 Configuration conf = new Configuration(this.util.getConfiguration());
632 Map<String, Integer> familyToBlockSize =
633 getMockColumnFamiliesForBlockSize(numCfs);
634 HTable table = Mockito.mock(HTable.class);
635 setupMockColumnFamiliesForBlockSize(table,
636 familyToBlockSize);
637 HFileOutputFormat.configureBlockSize(table, conf);
638
639
640
641 Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
642 HFileOutputFormat
643 .createFamilyBlockSizeMap(conf);
644
645
646
647 for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
648 ) {
649 assertEquals("BlockSize configuration incorrect for column family:"
650 + entry.getKey(), entry.getValue(),
651 retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
652 }
653 }
654 }
655
656 private void setupMockColumnFamiliesForBlockSize(HTable table,
657 Map<String, Integer> familyToDataBlockEncoding) throws IOException {
658 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
659 for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
660 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
661 .setMaxVersions(1)
662 .setBlocksize(entry.getValue())
663 .setBlockCacheEnabled(false)
664 .setTimeToLive(0));
665 }
666 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
667 }
668
669
670
671
672
673 private Map<String, Integer>
674 getMockColumnFamiliesForBlockSize (int numCfs) {
675 Map<String, Integer> familyToBlockSize =
676 new HashMap<String, Integer>();
677
678 if (numCfs-- > 0) {
679 familyToBlockSize.put("Family1!@#!@#&", 1234);
680 }
681 if (numCfs-- > 0) {
682 familyToBlockSize.put("Family2=asdads&!AASD",
683 Integer.MAX_VALUE);
684 }
685 if (numCfs-- > 0) {
686 familyToBlockSize.put("Family2=asdads&!AASD",
687 Integer.MAX_VALUE);
688 }
689 if (numCfs-- > 0) {
690 familyToBlockSize.put("Family3", 0);
691 }
692 return familyToBlockSize;
693 }
694
695
696
697
698
699
700
701
702
703
704 @Test
705 public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
706 for (int numCfs = 0; numCfs <= 3; numCfs++) {
707 Configuration conf = new Configuration(this.util.getConfiguration());
708 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
709 getMockColumnFamiliesForDataBlockEncoding(numCfs);
710 HTable table = Mockito.mock(HTable.class);
711 setupMockColumnFamiliesForDataBlockEncoding(table,
712 familyToDataBlockEncoding);
713 HFileOutputFormat.configureDataBlockEncoding(table, conf);
714
715
716
717 Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
718 HFileOutputFormat
719 .createFamilyDataBlockEncodingMap(conf);
720
721
722
723 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
724 assertEquals("DataBlockEncoding configuration incorrect for column family:"
725 + entry.getKey(), entry.getValue(),
726 retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
727 }
728 }
729 }
730
731 private void setupMockColumnFamiliesForDataBlockEncoding(HTable table,
732 Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
733 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
734 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
735 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
736 .setMaxVersions(1)
737 .setDataBlockEncoding(entry.getValue())
738 .setBlockCacheEnabled(false)
739 .setTimeToLive(0));
740 }
741 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
742 }
743
744
745
746
747
748 private Map<String, DataBlockEncoding>
749 getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
750 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
751 new HashMap<String, DataBlockEncoding>();
752
753 if (numCfs-- > 0) {
754 familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
755 }
756 if (numCfs-- > 0) {
757 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
758 DataBlockEncoding.FAST_DIFF);
759 }
760 if (numCfs-- > 0) {
761 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
762 DataBlockEncoding.PREFIX);
763 }
764 if (numCfs-- > 0) {
765 familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
766 }
767 return familyToDataBlockEncoding;
768 }
769
770 private void setupMockStartKeys(HTable table) throws IOException {
771 byte[][] mockKeys = new byte[][] {
772 HConstants.EMPTY_BYTE_ARRAY,
773 Bytes.toBytes("aaa"),
774 Bytes.toBytes("ggg"),
775 Bytes.toBytes("zzz")
776 };
777 Mockito.doReturn(mockKeys).when(table).getStartKeys();
778 }
779
780 private void setupMockTableName(HTable table) throws IOException {
781 TableName mockTableName = TableName.valueOf("mock_table");
782 Mockito.doReturn(mockTableName).when(table).getName();
783 }
784
785
786
787
788
789 @Test
790 public void testColumnFamilySettings() throws Exception {
791 Configuration conf = new Configuration(this.util.getConfiguration());
792 RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
793 TaskAttemptContext context = null;
794 Path dir = util.getDataTestDir("testColumnFamilySettings");
795
796
797 HTable table = Mockito.mock(HTable.class);
798 HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
799 Mockito.doReturn(htd).when(table).getTableDescriptor();
800 for (HColumnDescriptor hcd: this.util.generateColumnDescriptors()) {
801 htd.addFamily(hcd);
802 }
803
804
805 setupMockStartKeys(table);
806
807 try {
808
809
810
811 conf.set("io.seqfile.compression.type", "NONE");
812 conf.set("hbase.fs.tmp.dir", dir.toString());
813
814 conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
815
816 Job job = new Job(conf, "testLocalMRIncrementalLoad");
817 job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
818 setupRandomGeneratorMapper(job);
819 HFileOutputFormat.configureIncrementalLoad(job, table);
820 FileOutputFormat.setOutputPath(job, dir);
821 context = createTestTaskAttemptContext(job);
822 HFileOutputFormat hof = new HFileOutputFormat();
823 writer = hof.getRecordWriter(context);
824
825
826 writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
827 writer.close(context);
828
829
830 FileSystem fs = dir.getFileSystem(conf);
831
832
833 hof.getOutputCommitter(context).commitTask(context);
834 hof.getOutputCommitter(context).commitJob(context);
835 FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
836 assertEquals(htd.getFamilies().size(), families.length);
837 for (FileStatus f : families) {
838 String familyStr = f.getPath().getName();
839 HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
840
841
842 Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
843 Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
844 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
845
846 byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
847 if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
848 assertEquals("Incorrect bloom filter used for column family " + familyStr +
849 "(reader: " + reader + ")",
850 hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
851 assertEquals("Incorrect compression used for column family " + familyStr +
852 "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
853 }
854 } finally {
855 dir.getFileSystem(conf).delete(dir, true);
856 }
857 }
858
859
860
861
862
863 private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer,
864 TaskAttemptContext context, Set<byte[]> families, int numRows)
865 throws IOException, InterruptedException {
866 byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
867 int valLength = 10;
868 byte valBytes[] = new byte[valLength];
869
870 int taskId = context.getTaskAttemptID().getTaskID().getId();
871 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
872
873 Random random = new Random();
874 for (int i = 0; i < numRows; i++) {
875
876 Bytes.putInt(keyBytes, 0, i);
877 random.nextBytes(valBytes);
878 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
879
880 for (byte[] family : families) {
881 KeyValue kv = new KeyValue(keyBytes, family,
882 PerformanceEvaluation.QUALIFIER_NAME, valBytes);
883 writer.write(key, kv);
884 }
885 }
886 }
887
888
889
890
891
892
893
894 @Ignore ("Flakey: See HBASE-9051") @Test
895 public void testExcludeAllFromMinorCompaction() throws Exception {
896 Configuration conf = util.getConfiguration();
897 conf.setInt("hbase.hstore.compaction.min", 2);
898 generateRandomStartKeys(5);
899
900 try {
901 util.startMiniCluster();
902 final FileSystem fs = util.getDFSCluster().getFileSystem();
903 HBaseAdmin admin = new HBaseAdmin(conf);
904 HTable table = util.createTable(TABLE_NAME, FAMILIES);
905 assertEquals("Should start with empty table", 0, util.countRows(table));
906
907
908 final Path storePath = HStore.getStoreHomedir(
909 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
910 admin.getTableRegions(TABLE_NAME).get(0),
911 FAMILIES[0]);
912 assertEquals(0, fs.listStatus(storePath).length);
913
914
915 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
916 true);
917 util.startMiniMapReduceCluster();
918
919 for (int i = 0; i < 2; i++) {
920 Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
921 runIncrementalPELoad(conf, table, testDir);
922
923 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
924 }
925
926
927 int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
928 assertEquals("LoadIncrementalHFiles should put expected data in table",
929 expectedRows, util.countRows(table));
930
931
932 assertEquals(2, fs.listStatus(storePath).length);
933
934
935 admin.compact(TABLE_NAME.getName());
936 try {
937 quickPoll(new Callable<Boolean>() {
938 public Boolean call() throws Exception {
939 return fs.listStatus(storePath).length == 1;
940 }
941 }, 5000);
942 throw new IOException("SF# = " + fs.listStatus(storePath).length);
943 } catch (AssertionError ae) {
944
945 }
946
947
948 admin.majorCompact(TABLE_NAME.getName());
949 quickPoll(new Callable<Boolean>() {
950 public Boolean call() throws Exception {
951 return fs.listStatus(storePath).length == 1;
952 }
953 }, 5000);
954
955 } finally {
956 util.shutdownMiniMapReduceCluster();
957 util.shutdownMiniCluster();
958 }
959 }
960
961 @Test
962 public void testExcludeMinorCompaction() throws Exception {
963 Configuration conf = util.getConfiguration();
964 conf.setInt("hbase.hstore.compaction.min", 2);
965 generateRandomStartKeys(5);
966
967 try {
968 util.startMiniCluster();
969 Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
970 final FileSystem fs = util.getDFSCluster().getFileSystem();
971 HBaseAdmin admin = new HBaseAdmin(conf);
972 HTable table = util.createTable(TABLE_NAME, FAMILIES);
973 assertEquals("Should start with empty table", 0, util.countRows(table));
974
975
976 final Path storePath = HStore.getStoreHomedir(
977 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
978 admin.getTableRegions(TABLE_NAME).get(0),
979 FAMILIES[0]);
980 assertEquals(0, fs.listStatus(storePath).length);
981
982
983 Put p = new Put(Bytes.toBytes("test"));
984 p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
985 table.put(p);
986 admin.flush(TABLE_NAME.getName());
987 assertEquals(1, util.countRows(table));
988 quickPoll(new Callable<Boolean>() {
989 public Boolean call() throws Exception {
990 return fs.listStatus(storePath).length == 1;
991 }
992 }, 5000);
993
994
995 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
996 true);
997 util.startMiniMapReduceCluster();
998 runIncrementalPELoad(conf, table, testDir);
999
1000
1001 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
1002
1003
1004 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1005 assertEquals("LoadIncrementalHFiles should put expected data in table",
1006 expectedRows + 1, util.countRows(table));
1007
1008
1009 assertEquals(2, fs.listStatus(storePath).length);
1010
1011
1012 admin.compact(TABLE_NAME.getName());
1013 try {
1014 quickPoll(new Callable<Boolean>() {
1015 public Boolean call() throws Exception {
1016 return fs.listStatus(storePath).length == 1;
1017 }
1018 }, 5000);
1019 throw new IOException("SF# = " + fs.listStatus(storePath).length);
1020 } catch (AssertionError ae) {
1021
1022 }
1023
1024
1025 admin.majorCompact(TABLE_NAME.getName());
1026 quickPoll(new Callable<Boolean>() {
1027 public Boolean call() throws Exception {
1028 return fs.listStatus(storePath).length == 1;
1029 }
1030 }, 5000);
1031
1032 } finally {
1033 util.shutdownMiniMapReduceCluster();
1034 util.shutdownMiniCluster();
1035 }
1036 }
1037
1038 private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1039 int sleepMs = 10;
1040 int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1041 while (retries-- > 0) {
1042 if (c.call().booleanValue()) {
1043 return;
1044 }
1045 Thread.sleep(sleepMs);
1046 }
1047 fail();
1048 }
1049
1050 public static void main(String args[]) throws Exception {
1051 new TestHFileOutputFormat().manualTest(args);
1052 }
1053
1054 public void manualTest(String args[]) throws Exception {
1055 Configuration conf = HBaseConfiguration.create();
1056 util = new HBaseTestingUtility(conf);
1057 if ("newtable".equals(args[0])) {
1058 byte[] tname = args[1].getBytes();
1059 HTable table = util.createTable(tname, FAMILIES);
1060 HBaseAdmin admin = new HBaseAdmin(conf);
1061 admin.disableTable(tname);
1062 byte[][] startKeys = generateRandomStartKeys(5);
1063 util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
1064 admin.enableTable(tname);
1065 } else if ("incremental".equals(args[0])) {
1066 byte[] tname = args[1].getBytes();
1067 HTable table = new HTable(conf, tname);
1068 Path outDir = new Path("incremental-out");
1069 runIncrementalPELoad(conf, table, outDir);
1070 } else {
1071 throw new RuntimeException(
1072 "usage: TestHFileOutputFormat newtable | incremental");
1073 }
1074 }
1075
1076 }
1077