1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertTrue;
26 import static org.junit.Assert.fail;
27
28 import java.io.IOException;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.Map.Entry;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.concurrent.Callable;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.fs.FileStatus;
41 import org.apache.hadoop.fs.FileSystem;
42 import org.apache.hadoop.fs.Path;
43 import org.apache.hadoop.hbase.Cell;
44 import org.apache.hadoop.hbase.CellUtil;
45 import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
46 import org.apache.hadoop.hbase.HBaseConfiguration;
47 import org.apache.hadoop.hbase.HBaseTestingUtility;
48 import org.apache.hadoop.hbase.HColumnDescriptor;
49 import org.apache.hadoop.hbase.HConstants;
50 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
51 import org.apache.hadoop.hbase.HTableDescriptor;
52 import org.apache.hadoop.hbase.HadoopShims;
53 import org.apache.hadoop.hbase.KeyValue;
54 import org.apache.hadoop.hbase.testclassification.LargeTests;
55 import org.apache.hadoop.hbase.PerformanceEvaluation;
56 import org.apache.hadoop.hbase.TableName;
57 import org.apache.hadoop.hbase.client.HBaseAdmin;
58 import org.apache.hadoop.hbase.client.HTable;
59 import org.apache.hadoop.hbase.client.Put;
60 import org.apache.hadoop.hbase.client.Result;
61 import org.apache.hadoop.hbase.client.ResultScanner;
62 import org.apache.hadoop.hbase.client.Scan;
63 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
64 import org.apache.hadoop.hbase.io.compress.Compression;
65 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
66 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
67 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
68 import org.apache.hadoop.hbase.io.hfile.HFile;
69 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
70 import org.apache.hadoop.hbase.regionserver.BloomType;
71 import org.apache.hadoop.hbase.regionserver.HRegion;
72 import org.apache.hadoop.hbase.regionserver.StoreFile;
73 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
74 import org.apache.hadoop.hbase.util.Bytes;
75 import org.apache.hadoop.hbase.util.FSUtils;
76 import org.apache.hadoop.hbase.util.Threads;
77 import org.apache.hadoop.hbase.util.Writables;
78 import org.apache.hadoop.io.NullWritable;
79 import org.apache.hadoop.mapreduce.Job;
80 import org.apache.hadoop.mapreduce.Mapper;
81 import org.apache.hadoop.mapreduce.RecordWriter;
82 import org.apache.hadoop.mapreduce.TaskAttemptContext;
83 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
84 import org.junit.Ignore;
85 import org.junit.Test;
86 import org.junit.experimental.categories.Category;
87 import org.mockito.Mockito;
88
89
90
91
92
93
94
95 @Category(LargeTests.class)
96 public class TestHFileOutputFormat2 {
97 private final static int ROWSPERSPLIT = 1024;
98
99 private static final byte[][] FAMILIES
100 = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A"))
101 , Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B"))};
102 private static final TableName TABLE_NAME =
103 TableName.valueOf("TestTable");
104
105 private HBaseTestingUtility util = new HBaseTestingUtility();
106
107 private static Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
108
109
110
111
112 static class RandomKVGeneratingMapper
113 extends Mapper<NullWritable, NullWritable,
114 ImmutableBytesWritable, Cell> {
115
116 private int keyLength;
117 private static final int KEYLEN_DEFAULT=10;
118 private static final String KEYLEN_CONF="randomkv.key.length";
119
120 private int valLength;
121 private static final int VALLEN_DEFAULT=10;
122 private static final String VALLEN_CONF="randomkv.val.length";
123
124 @Override
125 protected void setup(Context context) throws IOException,
126 InterruptedException {
127 super.setup(context);
128
129 Configuration conf = context.getConfiguration();
130 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
131 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
132 }
133
134 protected void map(
135 NullWritable n1, NullWritable n2,
136 Mapper<NullWritable, NullWritable,
137 ImmutableBytesWritable,Cell>.Context context)
138 throws java.io.IOException ,InterruptedException
139 {
140
141 byte keyBytes[] = new byte[keyLength];
142 byte valBytes[] = new byte[valLength];
143
144 int taskId = context.getTaskAttemptID().getTaskID().getId();
145 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
146
147 Random random = new Random();
148 for (int i = 0; i < ROWSPERSPLIT; i++) {
149
150 random.nextBytes(keyBytes);
151
152 keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
153 random.nextBytes(valBytes);
154 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
155
156 for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
157 Cell kv = new KeyValue(keyBytes, family,
158 PerformanceEvaluation.QUALIFIER_NAME, valBytes);
159 context.write(key, kv);
160 }
161 }
162 }
163 }
164
165 private void setupRandomGeneratorMapper(Job job) {
166 job.setInputFormatClass(NMapInputFormat.class);
167 job.setMapperClass(RandomKVGeneratingMapper.class);
168 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
169 job.setMapOutputValueClass(KeyValue.class);
170 }
171
172
173
174
175
176
177 @Test
178 public void test_LATEST_TIMESTAMP_isReplaced()
179 throws Exception {
180 Configuration conf = new Configuration(this.util.getConfiguration());
181 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
182 TaskAttemptContext context = null;
183 Path dir =
184 util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
185 try {
186 Job job = new Job(conf);
187 FileOutputFormat.setOutputPath(job, dir);
188 context = createTestTaskAttemptContext(job);
189 HFileOutputFormat2 hof = new HFileOutputFormat2();
190 writer = hof.getRecordWriter(context);
191 final byte [] b = Bytes.toBytes("b");
192
193
194
195 KeyValue kv = new KeyValue(b, b, b);
196 KeyValue original = kv.clone();
197 writer.write(new ImmutableBytesWritable(), kv);
198 assertFalse(original.equals(kv));
199 assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
200 assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
201 assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
202 assertNotSame(original.getTimestamp(), kv.getTimestamp());
203 assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
204
205
206
207 kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
208 original = kv.clone();
209 writer.write(new ImmutableBytesWritable(), kv);
210 assertTrue(original.equals(kv));
211 } finally {
212 if (writer != null && context != null) writer.close(context);
213 dir.getFileSystem(conf).delete(dir, true);
214 }
215 }
216
217 private TaskAttemptContext createTestTaskAttemptContext(final Job job)
218 throws IOException, Exception {
219 HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
220 TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
221 job, "attempt_201402131733_0001_m_000000_0");
222 return context;
223 }
224
225
226
227
228
229 @Test
230 public void test_TIMERANGE() throws Exception {
231 Configuration conf = new Configuration(this.util.getConfiguration());
232 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
233 TaskAttemptContext context = null;
234 Path dir =
235 util.getDataTestDir("test_TIMERANGE_present");
236 LOG.info("Timerange dir writing to dir: "+ dir);
237 try {
238
239 Job job = new Job(conf);
240 FileOutputFormat.setOutputPath(job, dir);
241 context = createTestTaskAttemptContext(job);
242 HFileOutputFormat2 hof = new HFileOutputFormat2();
243 writer = hof.getRecordWriter(context);
244
245
246 final byte [] b = Bytes.toBytes("b");
247
248
249 KeyValue kv = new KeyValue(b, b, b, 2000, b);
250 KeyValue original = kv.clone();
251 writer.write(new ImmutableBytesWritable(), kv);
252 assertEquals(original,kv);
253
254
255 kv = new KeyValue(b, b, b, 1000, b);
256 original = kv.clone();
257 writer.write(new ImmutableBytesWritable(), kv);
258 assertEquals(original, kv);
259
260
261 writer.close(context);
262
263
264
265
266 FileSystem fs = FileSystem.get(conf);
267 Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
268 FileStatus[] sub1 = fs.listStatus(attemptDirectory);
269 FileStatus[] file = fs.listStatus(sub1[0].getPath());
270
271
272 HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
273 new CacheConfig(conf), conf);
274 Map<byte[],byte[]> finfo = rd.loadFileInfo();
275 byte[] range = finfo.get("TIMERANGE".getBytes());
276 assertNotNull(range);
277
278
279 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
280 Writables.copyWritable(range, timeRangeTracker);
281 LOG.info(timeRangeTracker.getMinimumTimestamp() +
282 "...." + timeRangeTracker.getMaximumTimestamp());
283 assertEquals(1000, timeRangeTracker.getMinimumTimestamp());
284 assertEquals(2000, timeRangeTracker.getMaximumTimestamp());
285 rd.close();
286 } finally {
287 if (writer != null && context != null) writer.close(context);
288 dir.getFileSystem(conf).delete(dir, true);
289 }
290 }
291
292
293
294
295 @Test
296 public void testWritingPEData() throws Exception {
297 Configuration conf = util.getConfiguration();
298 Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
299 FileSystem fs = testDir.getFileSystem(conf);
300
301
302 conf.setInt("io.sort.mb", 20);
303
304 conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
305
306 Job job = new Job(conf, "testWritingPEData");
307 setupRandomGeneratorMapper(job);
308
309
310 byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
311 byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
312
313 Arrays.fill(startKey, (byte)0);
314 Arrays.fill(endKey, (byte)0xff);
315
316 job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
317
318 SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
319 SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
320 job.setReducerClass(KeyValueSortReducer.class);
321 job.setOutputFormatClass(HFileOutputFormat2.class);
322 job.setNumReduceTasks(4);
323 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
324 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
325 KeyValueSerialization.class.getName());
326
327 FileOutputFormat.setOutputPath(job, testDir);
328 assertTrue(job.waitForCompletion(false));
329 FileStatus [] files = fs.listStatus(testDir);
330 assertTrue(files.length > 0);
331 }
332
333 @Test
334 public void testJobConfiguration() throws Exception {
335 Configuration conf = new Configuration(this.util.getConfiguration());
336 conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration")
337 .toString());
338 Job job = new Job(conf);
339 job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
340 HTable table = Mockito.mock(HTable.class);
341 setupMockStartKeys(table);
342 setupMockTableName(table);
343 HFileOutputFormat2.configureIncrementalLoad(job, table);
344 assertEquals(job.getNumReduceTasks(), 4);
345 }
346
347 private byte [][] generateRandomStartKeys(int numKeys) {
348 Random random = new Random();
349 byte[][] ret = new byte[numKeys][];
350
351 ret[0] = HConstants.EMPTY_BYTE_ARRAY;
352 for (int i = 1; i < numKeys; i++) {
353 ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
354 }
355 return ret;
356 }
357
358 @Test
359 public void testMRIncrementalLoad() throws Exception {
360 LOG.info("\nStarting test testMRIncrementalLoad\n");
361 doIncrementalLoadTest(false, false);
362 }
363
364 @Test
365 public void testMRIncrementalLoadWithSplit() throws Exception {
366 LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
367 doIncrementalLoadTest(true, false);
368 }
369
370
371
372
373
374
375
376
377
378 @Test
379 public void testMRIncrementalLoadWithLocality() throws Exception {
380 LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
381 doIncrementalLoadTest(false, true);
382 doIncrementalLoadTest(true, true);
383 }
384
385 private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality)
386 throws Exception {
387 util = new HBaseTestingUtility();
388 Configuration conf = util.getConfiguration();
389 conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
390 int hostCount = 1;
391 int regionNum = 5;
392 if (shouldKeepLocality) {
393
394
395 hostCount = 3;
396 regionNum = 20;
397 }
398
399 byte[][] startKeys = generateRandomStartKeys(regionNum);
400 String[] hostnames = new String[hostCount];
401 for (int i = 0; i < hostCount; ++i) {
402 hostnames[i] = "datanode_" + i;
403 }
404
405 Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
406 HBaseAdmin admin = null;
407 try {
408 util.startMiniCluster(1, hostCount, hostnames);
409 admin = new HBaseAdmin(conf);
410 HTable table = util.createTable(TABLE_NAME, FAMILIES);
411 assertEquals("Should start with empty table", 0, util.countRows(table));
412 int numRegions =
413 util.createMultiRegions(util.getConfiguration(), table, FAMILIES[0], startKeys);
414 assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
415
416
417 util.startMiniMapReduceCluster();
418 runIncrementalPELoad(conf, table, testDir);
419
420 assertEquals("HFOF should not touch actual table", 0, util.countRows(table));
421
422
423 int dir = 0;
424 for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
425 for (byte[] family : FAMILIES) {
426 if (Bytes.toString(family).equals(f.getPath().getName())) {
427 ++dir;
428 }
429 }
430 }
431 assertEquals("Column family not found in FS.", FAMILIES.length, dir);
432
433
434 if (shouldChangeRegions) {
435 LOG.info("Changing regions in table");
436 admin.disableTable(table.getTableName());
437 while (util.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
438 .isRegionsInTransition()) {
439 Threads.sleep(200);
440 LOG.info("Waiting on table to finish disabling");
441 }
442 byte[][] newStartKeys = generateRandomStartKeys(15);
443 util.createMultiRegions(util.getConfiguration(), table, FAMILIES[0], newStartKeys);
444 admin.enableTable(table.getTableName());
445 while (table.getRegionLocations().size() != 15
446 || !admin.isTableAvailable(table.getTableName())) {
447 Thread.sleep(200);
448 LOG.info("Waiting for new region assignment to happen");
449 }
450 }
451
452
453 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
454
455
456 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
457 assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
458 util.countRows(table));
459 Scan scan = new Scan();
460 ResultScanner results = table.getScanner(scan);
461 for (Result res : results) {
462 assertEquals(FAMILIES.length, res.rawCells().length);
463 Cell first = res.rawCells()[0];
464 for (Cell kv : res.rawCells()) {
465 assertTrue(CellUtil.matchingRow(first, kv));
466 assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
467 }
468 }
469 results.close();
470 String tableDigestBefore = util.checksumRows(table);
471
472
473 HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
474 for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) {
475 hbd.add(region.getHDFSBlocksDistribution());
476 }
477 for (String hostname : hostnames) {
478 float locality = hbd.getBlockLocalityIndex(hostname);
479 LOG.info("locality of [" + hostname + "]: " + locality);
480 assertEquals(100, (int) (locality * 100));
481 }
482
483
484 admin.disableTable(TABLE_NAME);
485 while (!admin.isTableDisabled(TABLE_NAME)) {
486 Thread.sleep(200);
487 LOG.info("Waiting for table to disable");
488 }
489 admin.enableTable(TABLE_NAME);
490 util.waitTableAvailable(TABLE_NAME.getName());
491 assertEquals("Data should remain after reopening of regions", tableDigestBefore,
492 util.checksumRows(table));
493 } finally {
494 util.deleteTable(TABLE_NAME);
495 testDir.getFileSystem(conf).delete(testDir, true);
496 if (admin != null) admin.close();
497 util.shutdownMiniMapReduceCluster();
498 util.shutdownMiniCluster();
499 }
500 }
501
502 private void runIncrementalPELoad(
503 Configuration conf, HTable table, Path outDir)
504 throws Exception {
505 Job job = new Job(conf, "testLocalMRIncrementalLoad");
506 job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
507 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
508 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
509 KeyValueSerialization.class.getName());
510 setupRandomGeneratorMapper(job);
511 HFileOutputFormat2.configureIncrementalLoad(job, table);
512 FileOutputFormat.setOutputPath(job, outDir);
513
514 assertFalse(util.getTestFileSystem().exists(outDir)) ;
515
516 assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks());
517
518 assertTrue(job.waitForCompletion(true));
519 }
520
521
522
523
524
525
526
527
528
529
530 @Test
531 public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
532 for (int numCfs = 0; numCfs <= 3; numCfs++) {
533 Configuration conf = new Configuration(this.util.getConfiguration());
534 Map<String, Compression.Algorithm> familyToCompression =
535 getMockColumnFamiliesForCompression(numCfs);
536 HTable table = Mockito.mock(HTable.class);
537 setupMockColumnFamiliesForCompression(table, familyToCompression);
538 HFileOutputFormat2.configureCompression(table, conf);
539
540
541 Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
542 .createFamilyCompressionMap(conf);
543
544
545
546 for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
547 assertEquals("Compression configuration incorrect for column family:"
548 + entry.getKey(), entry.getValue(),
549 retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
550 }
551 }
552 }
553
554 private void setupMockColumnFamiliesForCompression(HTable table,
555 Map<String, Compression.Algorithm> familyToCompression) throws IOException {
556 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
557 for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
558 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
559 .setMaxVersions(1)
560 .setCompressionType(entry.getValue())
561 .setBlockCacheEnabled(false)
562 .setTimeToLive(0));
563 }
564 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
565 }
566
567
568
569
570
571 private Map<String, Compression.Algorithm>
572 getMockColumnFamiliesForCompression (int numCfs) {
573 Map<String, Compression.Algorithm> familyToCompression
574 = new HashMap<String, Compression.Algorithm>();
575
576 if (numCfs-- > 0) {
577 familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
578 }
579 if (numCfs-- > 0) {
580 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
581 }
582 if (numCfs-- > 0) {
583 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
584 }
585 if (numCfs-- > 0) {
586 familyToCompression.put("Family3", Compression.Algorithm.NONE);
587 }
588 return familyToCompression;
589 }
590
591
592
593
594
595
596
597
598
599
600
601 @Test
602 public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
603 for (int numCfs = 0; numCfs <= 2; numCfs++) {
604 Configuration conf = new Configuration(this.util.getConfiguration());
605 Map<String, BloomType> familyToBloomType =
606 getMockColumnFamiliesForBloomType(numCfs);
607 HTable table = Mockito.mock(HTable.class);
608 setupMockColumnFamiliesForBloomType(table,
609 familyToBloomType);
610 HFileOutputFormat2.configureBloomType(table, conf);
611
612
613
614 Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
615 HFileOutputFormat2
616 .createFamilyBloomTypeMap(conf);
617
618
619
620 for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
621 assertEquals("BloomType configuration incorrect for column family:"
622 + entry.getKey(), entry.getValue(),
623 retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
624 }
625 }
626 }
627
628 private void setupMockColumnFamiliesForBloomType(HTable table,
629 Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
630 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
631 for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
632 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
633 .setMaxVersions(1)
634 .setBloomFilterType(entry.getValue())
635 .setBlockCacheEnabled(false)
636 .setTimeToLive(0));
637 }
638 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
639 }
640
641
642
643
644
645 private Map<String, BloomType>
646 getMockColumnFamiliesForBloomType (int numCfs) {
647 Map<String, BloomType> familyToBloomType =
648 new HashMap<String, BloomType>();
649
650 if (numCfs-- > 0) {
651 familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
652 }
653 if (numCfs-- > 0) {
654 familyToBloomType.put("Family2=asdads&!AASD",
655 BloomType.ROWCOL);
656 }
657 if (numCfs-- > 0) {
658 familyToBloomType.put("Family3", BloomType.NONE);
659 }
660 return familyToBloomType;
661 }
662
663
664
665
666
667
668
669
670
671
672 @Test
673 public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
674 for (int numCfs = 0; numCfs <= 3; numCfs++) {
675 Configuration conf = new Configuration(this.util.getConfiguration());
676 Map<String, Integer> familyToBlockSize =
677 getMockColumnFamiliesForBlockSize(numCfs);
678 HTable table = Mockito.mock(HTable.class);
679 setupMockColumnFamiliesForBlockSize(table,
680 familyToBlockSize);
681 HFileOutputFormat2.configureBlockSize(table, conf);
682
683
684
685 Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
686 HFileOutputFormat2
687 .createFamilyBlockSizeMap(conf);
688
689
690
691 for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
692 ) {
693 assertEquals("BlockSize configuration incorrect for column family:"
694 + entry.getKey(), entry.getValue(),
695 retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
696 }
697 }
698 }
699
700 private void setupMockColumnFamiliesForBlockSize(HTable table,
701 Map<String, Integer> familyToDataBlockEncoding) throws IOException {
702 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
703 for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
704 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
705 .setMaxVersions(1)
706 .setBlocksize(entry.getValue())
707 .setBlockCacheEnabled(false)
708 .setTimeToLive(0));
709 }
710 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
711 }
712
713
714
715
716
717 private Map<String, Integer>
718 getMockColumnFamiliesForBlockSize (int numCfs) {
719 Map<String, Integer> familyToBlockSize =
720 new HashMap<String, Integer>();
721
722 if (numCfs-- > 0) {
723 familyToBlockSize.put("Family1!@#!@#&", 1234);
724 }
725 if (numCfs-- > 0) {
726 familyToBlockSize.put("Family2=asdads&!AASD",
727 Integer.MAX_VALUE);
728 }
729 if (numCfs-- > 0) {
730 familyToBlockSize.put("Family2=asdads&!AASD",
731 Integer.MAX_VALUE);
732 }
733 if (numCfs-- > 0) {
734 familyToBlockSize.put("Family3", 0);
735 }
736 return familyToBlockSize;
737 }
738
739
740
741
742
743
744
745
746
747
748 @Test
749 public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
750 for (int numCfs = 0; numCfs <= 3; numCfs++) {
751 Configuration conf = new Configuration(this.util.getConfiguration());
752 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
753 getMockColumnFamiliesForDataBlockEncoding(numCfs);
754 HTable table = Mockito.mock(HTable.class);
755 setupMockColumnFamiliesForDataBlockEncoding(table,
756 familyToDataBlockEncoding);
757 HFileOutputFormat2.configureDataBlockEncoding(table, conf);
758
759
760
761 Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
762 HFileOutputFormat2
763 .createFamilyDataBlockEncodingMap(conf);
764
765
766
767 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
768 assertEquals("DataBlockEncoding configuration incorrect for column family:"
769 + entry.getKey(), entry.getValue(),
770 retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
771 }
772 }
773 }
774
775 private void setupMockColumnFamiliesForDataBlockEncoding(HTable table,
776 Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
777 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
778 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
779 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
780 .setMaxVersions(1)
781 .setDataBlockEncoding(entry.getValue())
782 .setBlockCacheEnabled(false)
783 .setTimeToLive(0));
784 }
785 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
786 }
787
788
789
790
791
792 private Map<String, DataBlockEncoding>
793 getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
794 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
795 new HashMap<String, DataBlockEncoding>();
796
797 if (numCfs-- > 0) {
798 familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
799 }
800 if (numCfs-- > 0) {
801 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
802 DataBlockEncoding.FAST_DIFF);
803 }
804 if (numCfs-- > 0) {
805 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
806 DataBlockEncoding.PREFIX);
807 }
808 if (numCfs-- > 0) {
809 familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
810 }
811 return familyToDataBlockEncoding;
812 }
813
814 private void setupMockStartKeys(HTable table) throws IOException {
815 byte[][] mockKeys = new byte[][] {
816 HConstants.EMPTY_BYTE_ARRAY,
817 Bytes.toBytes("aaa"),
818 Bytes.toBytes("ggg"),
819 Bytes.toBytes("zzz")
820 };
821 Mockito.doReturn(mockKeys).when(table).getStartKeys();
822 }
823
824 private void setupMockTableName(HTable table) throws IOException {
825 TableName mockTableName = TableName.valueOf("mock_table");
826 Mockito.doReturn(mockTableName).when(table).getName();
827 }
828
829
830
831
832
833 @Test
834 public void testColumnFamilySettings() throws Exception {
835 Configuration conf = new Configuration(this.util.getConfiguration());
836 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
837 TaskAttemptContext context = null;
838 Path dir = util.getDataTestDir("testColumnFamilySettings");
839
840
841 HTable table = Mockito.mock(HTable.class);
842 HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
843 Mockito.doReturn(htd).when(table).getTableDescriptor();
844 for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
845 htd.addFamily(hcd);
846 }
847
848
849 setupMockStartKeys(table);
850
851 try {
852
853
854
855 conf.set("io.seqfile.compression.type", "NONE");
856 conf.set("hbase.fs.tmp.dir", dir.toString());
857
858 conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
859
860 Job job = new Job(conf, "testLocalMRIncrementalLoad");
861 job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
862 setupRandomGeneratorMapper(job);
863 HFileOutputFormat2.configureIncrementalLoad(job, table);
864 FileOutputFormat.setOutputPath(job, dir);
865 context = createTestTaskAttemptContext(job);
866 HFileOutputFormat2 hof = new HFileOutputFormat2();
867 writer = hof.getRecordWriter(context);
868
869
870 writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
871 writer.close(context);
872
873
874 FileSystem fs = dir.getFileSystem(conf);
875
876
877 hof.getOutputCommitter(context).commitTask(context);
878 hof.getOutputCommitter(context).commitJob(context);
879 FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
880 assertEquals(htd.getFamilies().size(), families.length);
881 for (FileStatus f : families) {
882 String familyStr = f.getPath().getName();
883 HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
884
885
886 Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
887 Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
888 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
889
890 byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
891 if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
892 assertEquals("Incorrect bloom filter used for column family " + familyStr +
893 "(reader: " + reader + ")",
894 hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
895 assertEquals("Incorrect compression used for column family " + familyStr +
896 "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
897 }
898 } finally {
899 dir.getFileSystem(conf).delete(dir, true);
900 }
901 }
902
903
904
905
906
907 private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
908 TaskAttemptContext context, Set<byte[]> families, int numRows)
909 throws IOException, InterruptedException {
910 byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
911 int valLength = 10;
912 byte valBytes[] = new byte[valLength];
913
914 int taskId = context.getTaskAttemptID().getTaskID().getId();
915 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
916
917 Random random = new Random();
918 for (int i = 0; i < numRows; i++) {
919
920 Bytes.putInt(keyBytes, 0, i);
921 random.nextBytes(valBytes);
922 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
923
924 for (byte[] family : families) {
925 Cell kv = new KeyValue(keyBytes, family,
926 PerformanceEvaluation.QUALIFIER_NAME, valBytes);
927 writer.write(key, kv);
928 }
929 }
930 }
931
932
933
934
935
936
937
938 @Ignore("Flakey: See HBASE-9051")
939 @Test
940 public void testExcludeAllFromMinorCompaction() throws Exception {
941 Configuration conf = util.getConfiguration();
942 conf.setInt("hbase.hstore.compaction.min", 2);
943 generateRandomStartKeys(5);
944
945 try {
946 util.startMiniCluster();
947 final FileSystem fs = util.getDFSCluster().getFileSystem();
948 HBaseAdmin admin = new HBaseAdmin(conf);
949 HTable table = util.createTable(TABLE_NAME, FAMILIES);
950 assertEquals("Should start with empty table", 0, util.countRows(table));
951
952
953 final Path storePath = new Path(
954 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
955 new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
956 Bytes.toString(FAMILIES[0])));
957 assertEquals(0, fs.listStatus(storePath).length);
958
959
960 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
961 true);
962 util.startMiniMapReduceCluster();
963
964 for (int i = 0; i < 2; i++) {
965 Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
966 runIncrementalPELoad(conf, table, testDir);
967
968 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
969 }
970
971
972 int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
973 assertEquals("LoadIncrementalHFiles should put expected data in table",
974 expectedRows, util.countRows(table));
975
976
977 assertEquals(2, fs.listStatus(storePath).length);
978
979
980 admin.compact(TABLE_NAME.getName());
981 try {
982 quickPoll(new Callable<Boolean>() {
983 public Boolean call() throws Exception {
984 return fs.listStatus(storePath).length == 1;
985 }
986 }, 5000);
987 throw new IOException("SF# = " + fs.listStatus(storePath).length);
988 } catch (AssertionError ae) {
989
990 }
991
992
993 admin.majorCompact(TABLE_NAME.getName());
994 quickPoll(new Callable<Boolean>() {
995 public Boolean call() throws Exception {
996 return fs.listStatus(storePath).length == 1;
997 }
998 }, 5000);
999
1000 } finally {
1001 util.shutdownMiniMapReduceCluster();
1002 util.shutdownMiniCluster();
1003 }
1004 }
1005
1006 @Test
1007 public void testExcludeMinorCompaction() throws Exception {
1008 Configuration conf = util.getConfiguration();
1009 conf.setInt("hbase.hstore.compaction.min", 2);
1010 generateRandomStartKeys(5);
1011
1012 try {
1013 util.startMiniCluster();
1014 Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
1015 final FileSystem fs = util.getDFSCluster().getFileSystem();
1016 HBaseAdmin admin = new HBaseAdmin(conf);
1017 HTable table = util.createTable(TABLE_NAME, FAMILIES);
1018 assertEquals("Should start with empty table", 0, util.countRows(table));
1019
1020
1021 final Path storePath = new Path(
1022 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
1023 new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
1024 Bytes.toString(FAMILIES[0])));
1025 assertEquals(0, fs.listStatus(storePath).length);
1026
1027
1028 Put p = new Put(Bytes.toBytes("test"));
1029 p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
1030 table.put(p);
1031 admin.flush(TABLE_NAME.getName());
1032 assertEquals(1, util.countRows(table));
1033 quickPoll(new Callable<Boolean>() {
1034 public Boolean call() throws Exception {
1035 return fs.listStatus(storePath).length == 1;
1036 }
1037 }, 5000);
1038
1039
1040 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
1041 true);
1042 util.startMiniMapReduceCluster();
1043 runIncrementalPELoad(conf, table, testDir);
1044
1045
1046 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
1047
1048
1049 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1050 assertEquals("LoadIncrementalHFiles should put expected data in table",
1051 expectedRows + 1, util.countRows(table));
1052
1053
1054 assertEquals(2, fs.listStatus(storePath).length);
1055
1056
1057 admin.compact(TABLE_NAME.getName());
1058 try {
1059 quickPoll(new Callable<Boolean>() {
1060 public Boolean call() throws Exception {
1061 return fs.listStatus(storePath).length == 1;
1062 }
1063 }, 5000);
1064 throw new IOException("SF# = " + fs.listStatus(storePath).length);
1065 } catch (AssertionError ae) {
1066
1067 }
1068
1069
1070 admin.majorCompact(TABLE_NAME.getName());
1071 quickPoll(new Callable<Boolean>() {
1072 public Boolean call() throws Exception {
1073 return fs.listStatus(storePath).length == 1;
1074 }
1075 }, 5000);
1076
1077 } finally {
1078 util.shutdownMiniMapReduceCluster();
1079 util.shutdownMiniCluster();
1080 }
1081 }
1082
1083 private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1084 int sleepMs = 10;
1085 int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1086 while (retries-- > 0) {
1087 if (c.call().booleanValue()) {
1088 return;
1089 }
1090 Thread.sleep(sleepMs);
1091 }
1092 fail();
1093 }
1094
1095 public static void main(String args[]) throws Exception {
1096 new TestHFileOutputFormat2().manualTest(args);
1097 }
1098
1099 public void manualTest(String args[]) throws Exception {
1100 Configuration conf = HBaseConfiguration.create();
1101 util = new HBaseTestingUtility(conf);
1102 if ("newtable".equals(args[0])) {
1103 byte[] tname = args[1].getBytes();
1104 HTable table = util.createTable(tname, FAMILIES);
1105 HBaseAdmin admin = new HBaseAdmin(conf);
1106 admin.disableTable(tname);
1107 byte[][] startKeys = generateRandomStartKeys(5);
1108 util.createMultiRegions(conf, table, FAMILIES[0], startKeys);
1109 admin.enableTable(tname);
1110 } else if ("incremental".equals(args[0])) {
1111 byte[] tname = args[1].getBytes();
1112 HTable table = new HTable(conf, tname);
1113 Path outDir = new Path("incremental-out");
1114 runIncrementalPELoad(conf, table, outDir);
1115 } else {
1116 throw new RuntimeException(
1117 "usage: TestHFileOutputFormat2 newtable | incremental");
1118 }
1119 }
1120
1121 }
1122