1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.ByteArrayOutputStream;
27 import java.io.DataOutputStream;
28 import java.io.IOException;
29 import java.nio.ByteBuffer;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.Collection;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.Random;
36 import java.util.Set;
37
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40 import org.apache.hadoop.conf.Configuration;
41 import org.apache.hadoop.fs.FSDataInputStream;
42 import org.apache.hadoop.fs.FSDataOutputStream;
43 import org.apache.hadoop.fs.FileSystem;
44 import org.apache.hadoop.fs.Path;
45 import org.apache.hadoop.hbase.HBaseTestingUtility;
46 import org.apache.hadoop.hbase.KeyValue;
47 import org.apache.hadoop.hbase.MediumTests;
48 import org.apache.hadoop.hbase.fs.HFileSystem;
49 import org.apache.hadoop.hbase.io.compress.Compression;
50 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
51 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
52 import org.apache.hadoop.hbase.util.Bytes;
53 import org.apache.hadoop.hbase.util.ClassSize;
54 import org.junit.Before;
55 import org.junit.Test;
56 import org.junit.experimental.categories.Category;
57 import org.junit.runner.RunWith;
58 import org.junit.runners.Parameterized;
59 import org.junit.runners.Parameterized.Parameters;
60
61 @RunWith(Parameterized.class)
62 @Category(MediumTests.class)
63 public class TestHFileBlockIndex {
64
65 @Parameters
66 public static Collection<Object[]> compressionAlgorithms() {
67 return HBaseTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
68 }
69
70 public TestHFileBlockIndex(Compression.Algorithm compr) {
71 this.compr = compr;
72 }
73
74 private static final Log LOG = LogFactory.getLog(TestHFileBlockIndex.class);
75
76 private static final int NUM_DATA_BLOCKS = 1000;
77 private static final HBaseTestingUtility TEST_UTIL =
78 new HBaseTestingUtility();
79
80 private static final int SMALL_BLOCK_SIZE = 4096;
81 private static final int NUM_KV = 10000;
82
83 private static FileSystem fs;
84 private Path path;
85 private Random rand;
86 private long rootIndexOffset;
87 private int numRootEntries;
88 private int numLevels;
89 private static final List<byte[]> keys = new ArrayList<byte[]>();
90 private final Compression.Algorithm compr;
91 private byte[] firstKeyInFile;
92 private Configuration conf;
93
94 private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
95 private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
96 private static final int[] UNCOMPRESSED_INDEX_SIZES =
97 { 19187, 21813, 23086 };
98
99 private static final boolean includesMemstoreTS = true;
100
101 static {
102 assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
103 assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
104 }
105
106 @Before
107 public void setUp() throws IOException {
108 keys.clear();
109 rand = new Random(2389757);
110 firstKeyInFile = null;
111 conf = TEST_UTIL.getConfiguration();
112
113
114 conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
115
116 fs = HFileSystem.get(conf);
117 }
118
119 @Test
120 public void testBlockIndex() throws IOException {
121 path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr);
122 writeWholeIndex();
123 readIndex();
124 }
125
126
127
128
129
130 private static class BlockReaderWrapper implements HFile.CachingBlockReader {
131
132 private HFileBlock.FSReader realReader;
133 private long prevOffset;
134 private long prevOnDiskSize;
135 private boolean prevPread;
136 private HFileBlock prevBlock;
137
138 public int hitCount = 0;
139 public int missCount = 0;
140
141 public BlockReaderWrapper(HFileBlock.FSReader realReader) {
142 this.realReader = realReader;
143 }
144
145 @Override
146 public HFileBlock readBlock(long offset, long onDiskSize,
147 boolean cacheBlock, boolean pread, boolean isCompaction,
148 BlockType expectedBlockType)
149 throws IOException {
150 if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
151 pread == prevPread) {
152 hitCount += 1;
153 return prevBlock;
154 }
155
156 missCount += 1;
157 prevBlock = realReader.readBlockData(offset, onDiskSize,
158 -1, pread);
159 prevOffset = offset;
160 prevOnDiskSize = onDiskSize;
161 prevPread = pread;
162
163 return prevBlock;
164 }
165 }
166
167 public void readIndex() throws IOException {
168 long fileSize = fs.getFileStatus(path).getLen();
169 LOG.info("Size of " + path + ": " + fileSize);
170
171 FSDataInputStream istream = fs.open(path);
172 HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream,
173 compr, fs.getFileStatus(path).getLen());
174
175 BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
176 HFileBlockIndex.BlockIndexReader indexReader =
177 new HFileBlockIndex.BlockIndexReader(
178 KeyValue.RAW_COMPARATOR, numLevels, brw);
179
180 indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
181 fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
182
183 long prevOffset = -1;
184 int i = 0;
185 int expectedHitCount = 0;
186 int expectedMissCount = 0;
187 LOG.info("Total number of keys: " + keys.size());
188 for (byte[] key : keys) {
189 assertTrue(key != null);
190 assertTrue(indexReader != null);
191 HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null,
192 true, true, false);
193 if (Bytes.BYTES_RAWCOMPARATOR.compare(key, firstKeyInFile) < 0) {
194 assertTrue(b == null);
195 ++i;
196 continue;
197 }
198
199 String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
200
201 assertTrue("seekToDataBlock failed for " + keyStr, b != null);
202
203 if (prevOffset == b.getOffset()) {
204 assertEquals(++expectedHitCount, brw.hitCount);
205 } else {
206 LOG.info("First key in a new block: " + keyStr + ", block offset: "
207 + b.getOffset() + ")");
208 assertTrue(b.getOffset() > prevOffset);
209 assertEquals(++expectedMissCount, brw.missCount);
210 prevOffset = b.getOffset();
211 }
212 ++i;
213 }
214
215 istream.close();
216 }
217
218 private void writeWholeIndex() throws IOException {
219 assertEquals(0, keys.size());
220 HFileBlock.Writer hbw = new HFileBlock.Writer(compr, null,
221 includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE,
222 HFile.DEFAULT_BYTES_PER_CHECKSUM);
223 FSDataOutputStream outputStream = fs.create(path);
224 HFileBlockIndex.BlockIndexWriter biw =
225 new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
226
227 for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
228 hbw.startWriting(BlockType.DATA).write(
229 String.valueOf(rand.nextInt(1000)).getBytes());
230 long blockOffset = outputStream.getPos();
231 hbw.writeHeaderAndData(outputStream);
232
233 byte[] firstKey = null;
234 for (int j = 0; j < 16; ++j) {
235 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 16 + j);
236 keys.add(k);
237 if (j == 8)
238 firstKey = k;
239 }
240 assertTrue(firstKey != null);
241 if (firstKeyInFile == null)
242 firstKeyInFile = firstKey;
243 biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
244
245 writeInlineBlocks(hbw, outputStream, biw, false);
246 }
247 writeInlineBlocks(hbw, outputStream, biw, true);
248 rootIndexOffset = biw.writeIndexBlocks(outputStream);
249 outputStream.close();
250
251 numLevels = biw.getNumLevels();
252 numRootEntries = biw.getNumRootEntries();
253
254 LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
255 numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
256 }
257
258 private void writeInlineBlocks(HFileBlock.Writer hbw,
259 FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
260 boolean isClosing) throws IOException {
261 while (biw.shouldWriteBlock(isClosing)) {
262 long offset = outputStream.getPos();
263 biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
264 hbw.writeHeaderAndData(outputStream);
265 biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
266 hbw.getUncompressedSizeWithoutHeader());
267 LOG.info("Wrote an inline index block at " + offset + ", size " +
268 hbw.getOnDiskSizeWithHeader());
269 }
270 }
271
272 private static final long getDummyFileOffset(int i) {
273 return i * 185 + 379;
274 }
275
276 private static final int getDummyOnDiskSize(int i) {
277 return i * i * 37 + i * 19 + 13;
278 }
279
280 @Test
281 public void testSecondaryIndexBinarySearch() throws IOException {
282 int numTotalKeys = 99;
283 assertTrue(numTotalKeys % 2 == 1);
284
285
286 int numSearchedKeys = (numTotalKeys - 1) / 2;
287
288 ByteArrayOutputStream baos = new ByteArrayOutputStream();
289 DataOutputStream dos = new DataOutputStream(baos);
290
291 dos.writeInt(numSearchedKeys);
292 int curAllEntriesSize = 0;
293 int numEntriesAdded = 0;
294
295
296
297 int secondaryIndexEntries[] = new int[numTotalKeys];
298
299 for (int i = 0; i < numTotalKeys; ++i) {
300 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 2);
301 keys.add(k);
302 String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
303 StringBuilder padding = new StringBuilder();
304 while (msgPrefix.length() + padding.length() < 70)
305 padding.append(' ');
306 msgPrefix += padding;
307 if (i % 2 == 1) {
308 dos.writeInt(curAllEntriesSize);
309 secondaryIndexEntries[i] = curAllEntriesSize;
310 LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
311 ", offset " + curAllEntriesSize);
312 curAllEntriesSize += k.length
313 + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
314 ++numEntriesAdded;
315 } else {
316 secondaryIndexEntries[i] = -1;
317 LOG.info(msgPrefix + "not in the searched array");
318 }
319 }
320
321
322 for (int i = 0; i < keys.size() - 1; ++i)
323 assertTrue(Bytes.BYTES_RAWCOMPARATOR.compare(keys.get(i),
324 keys.get(i + 1)) < 0);
325
326 dos.writeInt(curAllEntriesSize);
327 assertEquals(numSearchedKeys, numEntriesAdded);
328 int secondaryIndexOffset = dos.size();
329 assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
330 secondaryIndexOffset);
331
332 for (int i = 1; i <= numTotalKeys - 1; i += 2) {
333 assertEquals(dos.size(),
334 secondaryIndexOffset + secondaryIndexEntries[i]);
335 long dummyFileOffset = getDummyFileOffset(i);
336 int dummyOnDiskSize = getDummyOnDiskSize(i);
337 LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
338 dummyOnDiskSize + " at offset " + dos.size());
339 dos.writeLong(dummyFileOffset);
340 dos.writeInt(dummyOnDiskSize);
341 LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
342 dos.write(keys.get(i));
343 }
344
345 dos.writeInt(curAllEntriesSize);
346
347 ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
348 for (int i = 0; i < numTotalKeys; ++i) {
349 byte[] searchKey = keys.get(i);
350 byte[] arrayHoldingKey = new byte[searchKey.length +
351 searchKey.length / 2];
352
353
354
355 System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
356 searchKey.length);
357
358 int searchResult = BlockIndexReader.binarySearchNonRootIndex(
359 arrayHoldingKey, searchKey.length / 2, searchKey.length, nonRootIndex,
360 KeyValue.RAW_COMPARATOR);
361 String lookupFailureMsg = "Failed to look up key #" + i + " ("
362 + Bytes.toStringBinary(searchKey) + ")";
363
364 int expectedResult;
365 int referenceItem;
366
367 if (i % 2 == 1) {
368
369
370 expectedResult = (i - 1) / 2;
371 referenceItem = i;
372 } else {
373
374
375
376 expectedResult = i / 2 - 1;
377 referenceItem = i - 1;
378 }
379
380 assertEquals(lookupFailureMsg, expectedResult, searchResult);
381
382
383
384 boolean locateBlockResult =
385 (BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey,
386 searchKey.length / 2, searchKey.length, KeyValue.RAW_COMPARATOR) != -1);
387
388 if (i == 0) {
389 assertFalse(locateBlockResult);
390 } else {
391 assertTrue(locateBlockResult);
392 String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
393 assertEquals(errorMsg, getDummyFileOffset(referenceItem),
394 nonRootIndex.getLong());
395 assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
396 nonRootIndex.getInt());
397 }
398 }
399
400 }
401
402 @Test
403 public void testBlockIndexChunk() throws IOException {
404 BlockIndexChunk c = new BlockIndexChunk();
405 ByteArrayOutputStream baos = new ByteArrayOutputStream();
406 int N = 1000;
407 int[] numSubEntriesAt = new int[N];
408 int numSubEntries = 0;
409 for (int i = 0; i < N; ++i) {
410 baos.reset();
411 DataOutputStream dos = new DataOutputStream(baos);
412 c.writeNonRoot(dos);
413 assertEquals(c.getNonRootSize(), dos.size());
414
415 baos.reset();
416 dos = new DataOutputStream(baos);
417 c.writeRoot(dos);
418 assertEquals(c.getRootSize(), dos.size());
419
420 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
421 numSubEntries += rand.nextInt(5) + 1;
422 keys.add(k);
423 c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
424 }
425
426
427
428
429 for (int i = 0; i < N; ++i) {
430 for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
431 j < numSubEntriesAt[i];
432 ++j) {
433 assertEquals(i, c.getEntryBySubEntry(j));
434 }
435 }
436 }
437
438
439 @Test
440 public void testHeapSizeForBlockIndex() throws IOException {
441 Class<HFileBlockIndex.BlockIndexReader> cl =
442 HFileBlockIndex.BlockIndexReader.class;
443 long expected = ClassSize.estimateBase(cl, false);
444
445 HFileBlockIndex.BlockIndexReader bi =
446 new HFileBlockIndex.BlockIndexReader(KeyValue.RAW_COMPARATOR, 1);
447 long actual = bi.heapSize();
448
449
450
451
452 expected -= ClassSize.align(3 * ClassSize.ARRAY);
453
454 if (expected != actual) {
455 ClassSize.estimateBase(cl, true);
456 assertEquals(expected, actual);
457 }
458 }
459
460
461
462
463
464
465
466
467 @Test
468 public void testHFileWriterAndReader() throws IOException {
469 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
470 "hfile_for_block_index");
471 CacheConfig cacheConf = new CacheConfig(conf);
472 BlockCache blockCache = cacheConf.getBlockCache();
473
474 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
475 int indexBlockSize = INDEX_CHUNK_SIZES[testI];
476 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
477 LOG.info("Index block size: " + indexBlockSize + ", compression: "
478 + compr);
479
480 blockCache.evictBlocksByHfileName(hfilePath.getName());
481
482 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
483 Set<String> keyStrSet = new HashSet<String>();
484 byte[][] keys = new byte[NUM_KV][];
485 byte[][] values = new byte[NUM_KV][];
486
487
488 {
489 HFile.Writer writer =
490 HFile.getWriterFactory(conf, cacheConf)
491 .withPath(fs, hfilePath)
492 .withBlockSize(SMALL_BLOCK_SIZE)
493 .withCompression(compr)
494 .create();
495 Random rand = new Random(19231737);
496
497 for (int i = 0; i < NUM_KV; ++i) {
498 byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i);
499
500
501 byte[] k = KeyValue.createFirstOnRow(row, 0, row.length, row, 0, 0,
502 row, 0, 0).getKey();
503
504 byte[] v = TestHFileWriterV2.randomValue(rand);
505 writer.append(k, v);
506 keys[i] = k;
507 values[i] = v;
508 keyStrSet.add(Bytes.toStringBinary(k));
509
510 if (i > 0) {
511 assertTrue(KeyValue.COMPARATOR.compareFlatKey(keys[i - 1],
512 keys[i]) < 0);
513 }
514 }
515
516 writer.close();
517 }
518
519
520 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf);
521 assertEquals(expectedNumLevels,
522 reader.getTrailer().getNumDataIndexLevels());
523
524 assertTrue(Bytes.equals(keys[0], reader.getFirstKey()));
525 assertTrue(Bytes.equals(keys[NUM_KV - 1], reader.getLastKey()));
526 LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
527
528 for (boolean pread : new boolean[] { false, true }) {
529 HFileScanner scanner = reader.getScanner(true, pread);
530 for (int i = 0; i < NUM_KV; ++i) {
531 checkSeekTo(keys, scanner, i);
532 checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
533 scanner.getValue());
534 }
535 assertTrue(scanner.seekTo());
536 for (int i = NUM_KV - 1; i >= 0; --i) {
537 checkSeekTo(keys, scanner, i);
538 checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
539 scanner.getValue());
540 }
541 }
542
543
544 HFileReaderV2 reader2 = (HFileReaderV2) reader;
545 HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
546
547 HFileBlock.BlockIterator iter = fsReader.blockRange(0,
548 reader.getTrailer().getLoadOnOpenDataOffset());
549 HFileBlock block;
550 List<byte[]> blockKeys = new ArrayList<byte[]>();
551 while ((block = iter.nextBlock()) != null) {
552 if (block.getBlockType() != BlockType.LEAF_INDEX)
553 return;
554 ByteBuffer b = block.getBufferReadOnly();
555 int n = b.getInt();
556
557 int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
558
559
560 for (int i = 0; i < n; ++i) {
561 int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 1));
562 int nextKeyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 2));
563 int keyLen = nextKeyRelOffset - keyRelOffset;
564 int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
565 HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
566 byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
567 + keyLen);
568 String blockKeyStr = Bytes.toString(blockKey);
569 blockKeys.add(blockKey);
570
571
572
573 assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
574 keyStrSet.contains(blockKeyStr));
575 }
576 }
577
578
579 assertEquals(
580 Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
581 Bytes.toStringBinary(reader.midkey()));
582
583 assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
584 reader.getTrailer().getUncompressedDataIndexSize());
585
586 reader.close();
587 reader2.close();
588 }
589 }
590
591 private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
592 throws IOException {
593 assertEquals("Failed to seek to key #" + i + " ("
594 + Bytes.toStringBinary(keys[i]) + ")", 0, scanner.seekTo(keys[i]));
595 }
596
597 private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
598 ByteBuffer buf) {
599 assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
600 + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
601 arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
602 }
603
604
605 private void checkKeyValue(String msgPrefix, byte[] expectedKey,
606 byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
607 if (!msgPrefix.isEmpty())
608 msgPrefix += ". ";
609
610 assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
611 assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
612 valueRead);
613 }
614
615
616 }
617