1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.apache.hadoop.hbase.io.hfile;
22
23 import static org.junit.Assert.assertEquals;
24 import static org.junit.Assert.assertFalse;
25 import static org.junit.Assert.assertTrue;
26
27 import java.io.ByteArrayOutputStream;
28 import java.io.DataOutputStream;
29 import java.io.IOException;
30 import java.nio.ByteBuffer;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collection;
34 import java.util.HashSet;
35 import java.util.List;
36 import java.util.Random;
37 import java.util.Set;
38
39 import org.apache.commons.logging.Log;
40 import org.apache.commons.logging.LogFactory;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.fs.FSDataInputStream;
43 import org.apache.hadoop.fs.FSDataOutputStream;
44 import org.apache.hadoop.fs.FileSystem;
45 import org.apache.hadoop.fs.Path;
46 import org.apache.hadoop.hbase.*;
47 import org.apache.hadoop.hbase.fs.HFileSystem;
48 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
49 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.ClassSize;
52 import org.junit.Before;
53 import org.junit.Test;
54 import org.junit.experimental.categories.Category;
55 import org.junit.runner.RunWith;
56 import org.junit.runners.Parameterized;
57 import org.junit.runners.Parameterized.Parameters;
58
59 @RunWith(Parameterized.class)
60 @Category(MediumTests.class)
61 public class TestHFileBlockIndex {
62
63 @Parameters
64 public static Collection<Object[]> compressionAlgorithms() {
65 return HBaseTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
66 }
67
68 public TestHFileBlockIndex(Compression.Algorithm compr) {
69 this.compr = compr;
70 }
71
72 private static final Log LOG = LogFactory.getLog(TestHFileBlockIndex.class);
73
74 private static final int NUM_DATA_BLOCKS = 1000;
75 private static final HBaseTestingUtility TEST_UTIL =
76 new HBaseTestingUtility();
77
78 private static final int SMALL_BLOCK_SIZE = 4096;
79 private static final int NUM_KV = 10000;
80
81 private static FileSystem fs;
82 private Path path;
83 private Random rand;
84 private long rootIndexOffset;
85 private int numRootEntries;
86 private int numLevels;
87 private static final List<byte[]> keys = new ArrayList<byte[]>();
88 private final Compression.Algorithm compr;
89 private byte[] firstKeyInFile;
90 private Configuration conf;
91
92 private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
93 private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
94 private static final int[] UNCOMPRESSED_INDEX_SIZES =
95 { 19187, 21813, 23086 };
96
97 private static final boolean includesMemstoreTS = true;
98
99 static {
100 assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
101 assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
102 }
103
104 @Before
105 public void setUp() throws IOException {
106 keys.clear();
107 rand = new Random(2389757);
108 firstKeyInFile = null;
109 conf = TEST_UTIL.getConfiguration();
110
111
112 conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
113
114 fs = HFileSystem.get(conf);
115 }
116
117 @Test
118 public void testBlockIndex() throws IOException {
119 path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr);
120 writeWholeIndex();
121 readIndex();
122 }
123
124
125
126
127
128 private static class BlockReaderWrapper implements HFile.CachingBlockReader {
129
130 private HFileBlock.FSReader realReader;
131 private long prevOffset;
132 private long prevOnDiskSize;
133 private boolean prevPread;
134 private HFileBlock prevBlock;
135
136 public int hitCount = 0;
137 public int missCount = 0;
138
139 public BlockReaderWrapper(HFileBlock.FSReader realReader) {
140 this.realReader = realReader;
141 }
142
143 @Override
144 public HFileBlock readBlock(long offset, long onDiskSize,
145 boolean cacheBlock, boolean pread, boolean isCompaction,
146 BlockType expectedBlockType)
147 throws IOException {
148 if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
149 pread == prevPread) {
150 hitCount += 1;
151 return prevBlock;
152 }
153
154 missCount += 1;
155 prevBlock = realReader.readBlockData(offset, onDiskSize,
156 -1, pread);
157 prevOffset = offset;
158 prevOnDiskSize = onDiskSize;
159 prevPread = pread;
160
161 return prevBlock;
162 }
163 }
164
165 public void readIndex() throws IOException {
166 long fileSize = fs.getFileStatus(path).getLen();
167 LOG.info("Size of " + path + ": " + fileSize);
168
169 FSDataInputStream istream = fs.open(path);
170 HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream,
171 compr, fs.getFileStatus(path).getLen());
172
173 BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
174 HFileBlockIndex.BlockIndexReader indexReader =
175 new HFileBlockIndex.BlockIndexReader(
176 Bytes.BYTES_RAWCOMPARATOR, numLevels, brw);
177
178 indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
179 fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
180
181 long prevOffset = -1;
182 int i = 0;
183 int expectedHitCount = 0;
184 int expectedMissCount = 0;
185 LOG.info("Total number of keys: " + keys.size());
186 for (byte[] key : keys) {
187 assertTrue(key != null);
188 assertTrue(indexReader != null);
189 HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null,
190 true, true, false);
191 if (Bytes.BYTES_RAWCOMPARATOR.compare(key, firstKeyInFile) < 0) {
192 assertTrue(b == null);
193 ++i;
194 continue;
195 }
196
197 String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
198
199 assertTrue("seekToDataBlock failed for " + keyStr, b != null);
200
201 if (prevOffset == b.getOffset()) {
202 assertEquals(++expectedHitCount, brw.hitCount);
203 } else {
204 LOG.info("First key in a new block: " + keyStr + ", block offset: "
205 + b.getOffset() + ")");
206 assertTrue(b.getOffset() > prevOffset);
207 assertEquals(++expectedMissCount, brw.missCount);
208 prevOffset = b.getOffset();
209 }
210 ++i;
211 }
212
213 istream.close();
214 }
215
216 private void writeWholeIndex() throws IOException {
217 assertEquals(0, keys.size());
218 HFileBlock.Writer hbw = new HFileBlock.Writer(compr, null,
219 includesMemstoreTS,
220 1,
221 HFile.DEFAULT_CHECKSUM_TYPE,
222 HFile.DEFAULT_BYTES_PER_CHECKSUM);
223 FSDataOutputStream outputStream = fs.create(path);
224 HFileBlockIndex.BlockIndexWriter biw =
225 new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
226
227 for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
228 hbw.startWriting(BlockType.DATA).write(
229 String.valueOf(rand.nextInt(1000)).getBytes());
230 long blockOffset = outputStream.getPos();
231 hbw.writeHeaderAndData(outputStream);
232
233 byte[] firstKey = null;
234 for (int j = 0; j < 16; ++j) {
235 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 16 + j);
236 keys.add(k);
237 if (j == 8)
238 firstKey = k;
239 }
240 assertTrue(firstKey != null);
241 if (firstKeyInFile == null)
242 firstKeyInFile = firstKey;
243 biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
244
245 writeInlineBlocks(hbw, outputStream, biw, false);
246 }
247 writeInlineBlocks(hbw, outputStream, biw, true);
248 rootIndexOffset = biw.writeIndexBlocks(outputStream);
249 outputStream.close();
250
251 numLevels = biw.getNumLevels();
252 numRootEntries = biw.getNumRootEntries();
253
254 LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
255 numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
256 }
257
258 private void writeInlineBlocks(HFileBlock.Writer hbw,
259 FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
260 boolean isClosing) throws IOException {
261 while (biw.shouldWriteBlock(isClosing)) {
262 long offset = outputStream.getPos();
263 biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
264 hbw.writeHeaderAndData(outputStream);
265 biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
266 hbw.getUncompressedSizeWithoutHeader());
267 LOG.info("Wrote an inline index block at " + offset + ", size " +
268 hbw.getOnDiskSizeWithHeader());
269 }
270 }
271
272 private static final long getDummyFileOffset(int i) {
273 return i * 185 + 379;
274 }
275
276 private static final int getDummyOnDiskSize(int i) {
277 return i * i * 37 + i * 19 + 13;
278 }
279
280 @Test
281 public void testSecondaryIndexBinarySearch() throws IOException {
282 int numTotalKeys = 99;
283 assertTrue(numTotalKeys % 2 == 1);
284
285
286 int numSearchedKeys = (numTotalKeys - 1) / 2;
287
288 ByteArrayOutputStream baos = new ByteArrayOutputStream();
289 DataOutputStream dos = new DataOutputStream(baos);
290
291 dos.writeInt(numSearchedKeys);
292 int curAllEntriesSize = 0;
293 int numEntriesAdded = 0;
294
295
296
297 int secondaryIndexEntries[] = new int[numTotalKeys];
298
299 for (int i = 0; i < numTotalKeys; ++i) {
300 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 2);
301 keys.add(k);
302 String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
303 StringBuilder padding = new StringBuilder();
304 while (msgPrefix.length() + padding.length() < 70)
305 padding.append(' ');
306 msgPrefix += padding;
307 if (i % 2 == 1) {
308 dos.writeInt(curAllEntriesSize);
309 secondaryIndexEntries[i] = curAllEntriesSize;
310 LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
311 ", offset " + curAllEntriesSize);
312 curAllEntriesSize += k.length
313 + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
314 ++numEntriesAdded;
315 } else {
316 secondaryIndexEntries[i] = -1;
317 LOG.info(msgPrefix + "not in the searched array");
318 }
319 }
320
321
322 for (int i = 0; i < keys.size() - 1; ++i)
323 assertTrue(Bytes.BYTES_RAWCOMPARATOR.compare(keys.get(i),
324 keys.get(i + 1)) < 0);
325
326 dos.writeInt(curAllEntriesSize);
327 assertEquals(numSearchedKeys, numEntriesAdded);
328 int secondaryIndexOffset = dos.size();
329 assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
330 secondaryIndexOffset);
331
332 for (int i = 1; i <= numTotalKeys - 1; i += 2) {
333 assertEquals(dos.size(),
334 secondaryIndexOffset + secondaryIndexEntries[i]);
335 long dummyFileOffset = getDummyFileOffset(i);
336 int dummyOnDiskSize = getDummyOnDiskSize(i);
337 LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
338 dummyOnDiskSize + " at offset " + dos.size());
339 dos.writeLong(dummyFileOffset);
340 dos.writeInt(dummyOnDiskSize);
341 LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
342 dos.write(keys.get(i));
343 }
344
345 dos.writeInt(curAllEntriesSize);
346
347 ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
348 for (int i = 0; i < numTotalKeys; ++i) {
349 byte[] searchKey = keys.get(i);
350 byte[] arrayHoldingKey = new byte[searchKey.length +
351 searchKey.length / 2];
352
353
354
355 System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
356 searchKey.length);
357
358 int searchResult = BlockIndexReader.binarySearchNonRootIndex(
359 arrayHoldingKey, searchKey.length / 2, searchKey.length, nonRootIndex,
360 Bytes.BYTES_RAWCOMPARATOR);
361 String lookupFailureMsg = "Failed to look up key #" + i + " ("
362 + Bytes.toStringBinary(searchKey) + ")";
363
364 int expectedResult;
365 int referenceItem;
366
367 if (i % 2 == 1) {
368
369
370 expectedResult = (i - 1) / 2;
371 referenceItem = i;
372 } else {
373
374
375
376 expectedResult = i / 2 - 1;
377 referenceItem = i - 1;
378 }
379
380 assertEquals(lookupFailureMsg, expectedResult, searchResult);
381
382
383
384 boolean locateBlockResult =
385 (BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey,
386 searchKey.length / 2, searchKey.length, Bytes.BYTES_RAWCOMPARATOR) != -1);
387
388 if (i == 0) {
389 assertFalse(locateBlockResult);
390 } else {
391 assertTrue(locateBlockResult);
392 String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
393 assertEquals(errorMsg, getDummyFileOffset(referenceItem),
394 nonRootIndex.getLong());
395 assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
396 nonRootIndex.getInt());
397 }
398 }
399
400 }
401
402 @Test
403 public void testBlockIndexChunk() throws IOException {
404 BlockIndexChunk c = new BlockIndexChunk();
405 ByteArrayOutputStream baos = new ByteArrayOutputStream();
406 int N = 1000;
407 int[] numSubEntriesAt = new int[N];
408 int numSubEntries = 0;
409 for (int i = 0; i < N; ++i) {
410 baos.reset();
411 DataOutputStream dos = new DataOutputStream(baos);
412 c.writeNonRoot(dos);
413 assertEquals(c.getNonRootSize(), dos.size());
414
415 baos.reset();
416 dos = new DataOutputStream(baos);
417 c.writeRoot(dos);
418 assertEquals(c.getRootSize(), dos.size());
419
420 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
421 numSubEntries += rand.nextInt(5) + 1;
422 keys.add(k);
423 c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
424 }
425
426
427
428
429 for (int i = 0; i < N; ++i) {
430 for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
431 j < numSubEntriesAt[i];
432 ++j) {
433 assertEquals(i, c.getEntryBySubEntry(j));
434 }
435 }
436 }
437
438
439 @Test
440 public void testHeapSizeForBlockIndex() throws IOException {
441 Class<HFileBlockIndex.BlockIndexReader> cl =
442 HFileBlockIndex.BlockIndexReader.class;
443 long expected = ClassSize.estimateBase(cl, false);
444
445 HFileBlockIndex.BlockIndexReader bi =
446 new HFileBlockIndex.BlockIndexReader(Bytes.BYTES_RAWCOMPARATOR, 1);
447 long actual = bi.heapSize();
448
449
450
451
452 expected -= ClassSize.align(3 * ClassSize.ARRAY);
453
454 if (expected != actual) {
455 ClassSize.estimateBase(cl, true);
456 assertEquals(expected, actual);
457 }
458 }
459
460
461
462
463
464
465
466
467 @Test
468 public void testHFileWriterAndReader() throws IOException {
469 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
470 "hfile_for_block_index");
471 CacheConfig cacheConf = new CacheConfig(conf);
472 BlockCache blockCache = cacheConf.getBlockCache();
473
474 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
475 int indexBlockSize = INDEX_CHUNK_SIZES[testI];
476 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
477 LOG.info("Index block size: " + indexBlockSize + ", compression: "
478 + compr);
479
480 blockCache.evictBlocksByHfileName(hfilePath.getName());
481
482 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
483 Set<String> keyStrSet = new HashSet<String>();
484 byte[][] keys = new byte[NUM_KV][];
485 byte[][] values = new byte[NUM_KV][];
486
487
488 {
489 HFile.Writer writer =
490 HFile.getWriterFactory(conf, cacheConf)
491 .withPath(fs, hfilePath)
492 .withBlockSize(SMALL_BLOCK_SIZE)
493 .withCompression(compr)
494 .withComparator(KeyValue.KEY_COMPARATOR)
495 .create();
496 Random rand = new Random(19231737);
497
498 for (int i = 0; i < NUM_KV; ++i) {
499 byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i);
500
501
502 byte[] k = KeyValue.createFirstOnRow(row, 0, row.length, row, 0, 0,
503 row, 0, 0).getKey();
504
505 byte[] v = TestHFileWriterV2.randomValue(rand);
506 writer.append(k, v);
507 keys[i] = k;
508 values[i] = v;
509 keyStrSet.add(Bytes.toStringBinary(k));
510
511 if (i > 0) {
512 assertTrue(KeyValue.KEY_COMPARATOR.compare(keys[i - 1],
513 keys[i]) < 0);
514 }
515 }
516
517 writer.close();
518 }
519
520
521 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf);
522 assertEquals(expectedNumLevels,
523 reader.getTrailer().getNumDataIndexLevels());
524
525 assertTrue(Bytes.equals(keys[0], reader.getFirstKey()));
526 assertTrue(Bytes.equals(keys[NUM_KV - 1], reader.getLastKey()));
527 LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
528
529 for (boolean pread : new boolean[] { false, true }) {
530 HFileScanner scanner = reader.getScanner(true, pread);
531 for (int i = 0; i < NUM_KV; ++i) {
532 checkSeekTo(keys, scanner, i);
533 checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
534 scanner.getValue());
535 }
536 assertTrue(scanner.seekTo());
537 for (int i = NUM_KV - 1; i >= 0; --i) {
538 checkSeekTo(keys, scanner, i);
539 checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
540 scanner.getValue());
541 }
542 }
543
544
545 HFileReaderV2 reader2 = (HFileReaderV2) reader;
546 HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
547
548 HFileBlock.BlockIterator iter = fsReader.blockRange(0,
549 reader.getTrailer().getLoadOnOpenDataOffset());
550 HFileBlock block;
551 List<byte[]> blockKeys = new ArrayList<byte[]>();
552 while ((block = iter.nextBlock()) != null) {
553 if (block.getBlockType() != BlockType.LEAF_INDEX)
554 return;
555 ByteBuffer b = block.getBufferReadOnly();
556 int n = b.getInt();
557
558 int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
559
560
561 for (int i = 0; i < n; ++i) {
562 int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 1));
563 int nextKeyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 2));
564 int keyLen = nextKeyRelOffset - keyRelOffset;
565 int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
566 HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
567 byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
568 + keyLen);
569 String blockKeyStr = Bytes.toString(blockKey);
570 blockKeys.add(blockKey);
571
572
573
574 assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
575 keyStrSet.contains(blockKeyStr));
576 }
577 }
578
579
580 assertEquals(
581 Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
582 Bytes.toStringBinary(reader.midkey()));
583
584 assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
585 reader.getTrailer().getUncompressedDataIndexSize());
586
587 reader.close();
588 reader2.close();
589 }
590 }
591
592 private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
593 throws IOException {
594 assertEquals("Failed to seek to key #" + i + " ("
595 + Bytes.toStringBinary(keys[i]) + ")", 0, scanner.seekTo(keys[i]));
596 }
597
598 private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
599 ByteBuffer buf) {
600 assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
601 + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
602 arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
603 }
604
605
606 private void checkKeyValue(String msgPrefix, byte[] expectedKey,
607 byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
608 if (!msgPrefix.isEmpty())
609 msgPrefix += ". ";
610
611 assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
612 assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
613 valueRead);
614 }
615
616
617 @org.junit.Rule
618 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
619 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
620 }
621