1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.ByteArrayOutputStream;
27 import java.io.DataOutputStream;
28 import java.io.IOException;
29 import java.nio.ByteBuffer;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.Collection;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.Random;
36 import java.util.Set;
37
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40 import org.apache.hadoop.conf.Configuration;
41 import org.apache.hadoop.fs.FSDataInputStream;
42 import org.apache.hadoop.fs.FSDataOutputStream;
43 import org.apache.hadoop.fs.FileSystem;
44 import org.apache.hadoop.fs.Path;
45 import org.apache.hadoop.hbase.*;
46 import org.apache.hadoop.hbase.fs.HFileSystem;
47 import org.apache.hadoop.hbase.io.compress.Compression;
48 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
49 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.ClassSize;
52 import org.junit.Before;
53 import org.junit.Test;
54 import org.junit.experimental.categories.Category;
55 import org.junit.runner.RunWith;
56 import org.junit.runners.Parameterized;
57 import org.junit.runners.Parameterized.Parameters;
58
59 @RunWith(Parameterized.class)
60 @Category(MediumTests.class)
61 public class TestHFileBlockIndex {
62
63 @Parameters
64 public static Collection<Object[]> compressionAlgorithms() {
65 return HBaseTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
66 }
67
68 public TestHFileBlockIndex(Compression.Algorithm compr) {
69 this.compr = compr;
70 }
71
72 private static final Log LOG = LogFactory.getLog(TestHFileBlockIndex.class);
73
74 private static final int NUM_DATA_BLOCKS = 1000;
75 private static final HBaseTestingUtility TEST_UTIL =
76 new HBaseTestingUtility();
77
78 private static final int SMALL_BLOCK_SIZE = 4096;
79 private static final int NUM_KV = 10000;
80
81 private static FileSystem fs;
82 private Path path;
83 private Random rand;
84 private long rootIndexOffset;
85 private int numRootEntries;
86 private int numLevels;
87 private static final List<byte[]> keys = new ArrayList<byte[]>();
88 private final Compression.Algorithm compr;
89 private byte[] firstKeyInFile;
90 private Configuration conf;
91
92 private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
93 private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
94 private static final int[] UNCOMPRESSED_INDEX_SIZES =
95 { 19187, 21813, 23086 };
96
97 private static final boolean includesMemstoreTS = true;
98
99 static {
100 assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
101 assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
102 }
103
104 @Before
105 public void setUp() throws IOException {
106 keys.clear();
107 rand = new Random(2389757);
108 firstKeyInFile = null;
109 conf = TEST_UTIL.getConfiguration();
110
111
112 conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
113
114 fs = HFileSystem.get(conf);
115 }
116
117 @Test
118 public void testBlockIndex() throws IOException {
119 path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr);
120 writeWholeIndex();
121 readIndex();
122 }
123
124
125
126
127
128 private static class BlockReaderWrapper implements HFile.CachingBlockReader {
129
130 private HFileBlock.FSReader realReader;
131 private long prevOffset;
132 private long prevOnDiskSize;
133 private boolean prevPread;
134 private HFileBlock prevBlock;
135
136 public int hitCount = 0;
137 public int missCount = 0;
138
139 public BlockReaderWrapper(HFileBlock.FSReader realReader) {
140 this.realReader = realReader;
141 }
142
143 @Override
144 public HFileBlock readBlock(long offset, long onDiskSize,
145 boolean cacheBlock, boolean pread, boolean isCompaction,
146 BlockType expectedBlockType)
147 throws IOException {
148 if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
149 pread == prevPread) {
150 hitCount += 1;
151 return prevBlock;
152 }
153
154 missCount += 1;
155 prevBlock = realReader.readBlockData(offset, onDiskSize,
156 -1, pread);
157 prevOffset = offset;
158 prevOnDiskSize = onDiskSize;
159 prevPread = pread;
160
161 return prevBlock;
162 }
163 }
164
165 public void readIndex() throws IOException {
166 long fileSize = fs.getFileStatus(path).getLen();
167 LOG.info("Size of " + path + ": " + fileSize);
168
169 FSDataInputStream istream = fs.open(path);
170 HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream,
171 compr, fs.getFileStatus(path).getLen());
172
173 BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
174 HFileBlockIndex.BlockIndexReader indexReader =
175 new HFileBlockIndex.BlockIndexReader(
176 Bytes.BYTES_RAWCOMPARATOR, numLevels, brw);
177
178 indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
179 fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
180
181 long prevOffset = -1;
182 int i = 0;
183 int expectedHitCount = 0;
184 int expectedMissCount = 0;
185 LOG.info("Total number of keys: " + keys.size());
186 for (byte[] key : keys) {
187 assertTrue(key != null);
188 assertTrue(indexReader != null);
189 HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null,
190 true, true, false);
191 if (Bytes.BYTES_RAWCOMPARATOR.compare(key, firstKeyInFile) < 0) {
192 assertTrue(b == null);
193 ++i;
194 continue;
195 }
196
197 String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
198
199 assertTrue("seekToDataBlock failed for " + keyStr, b != null);
200
201 if (prevOffset == b.getOffset()) {
202 assertEquals(++expectedHitCount, brw.hitCount);
203 } else {
204 LOG.info("First key in a new block: " + keyStr + ", block offset: "
205 + b.getOffset() + ")");
206 assertTrue(b.getOffset() > prevOffset);
207 assertEquals(++expectedMissCount, brw.missCount);
208 prevOffset = b.getOffset();
209 }
210 ++i;
211 }
212
213 istream.close();
214 }
215
216 private void writeWholeIndex() throws IOException {
217 assertEquals(0, keys.size());
218 HFileBlock.Writer hbw = new HFileBlock.Writer(compr, null,
219 includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE,
220 HFile.DEFAULT_BYTES_PER_CHECKSUM);
221 FSDataOutputStream outputStream = fs.create(path);
222 HFileBlockIndex.BlockIndexWriter biw =
223 new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
224
225 for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
226 hbw.startWriting(BlockType.DATA).write(
227 String.valueOf(rand.nextInt(1000)).getBytes());
228 long blockOffset = outputStream.getPos();
229 hbw.writeHeaderAndData(outputStream);
230
231 byte[] firstKey = null;
232 for (int j = 0; j < 16; ++j) {
233 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 16 + j);
234 keys.add(k);
235 if (j == 8)
236 firstKey = k;
237 }
238 assertTrue(firstKey != null);
239 if (firstKeyInFile == null)
240 firstKeyInFile = firstKey;
241 biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
242
243 writeInlineBlocks(hbw, outputStream, biw, false);
244 }
245 writeInlineBlocks(hbw, outputStream, biw, true);
246 rootIndexOffset = biw.writeIndexBlocks(outputStream);
247 outputStream.close();
248
249 numLevels = biw.getNumLevels();
250 numRootEntries = biw.getNumRootEntries();
251
252 LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
253 numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
254 }
255
256 private void writeInlineBlocks(HFileBlock.Writer hbw,
257 FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
258 boolean isClosing) throws IOException {
259 while (biw.shouldWriteBlock(isClosing)) {
260 long offset = outputStream.getPos();
261 biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
262 hbw.writeHeaderAndData(outputStream);
263 biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
264 hbw.getUncompressedSizeWithoutHeader());
265 LOG.info("Wrote an inline index block at " + offset + ", size " +
266 hbw.getOnDiskSizeWithHeader());
267 }
268 }
269
270 private static final long getDummyFileOffset(int i) {
271 return i * 185 + 379;
272 }
273
274 private static final int getDummyOnDiskSize(int i) {
275 return i * i * 37 + i * 19 + 13;
276 }
277
278 @Test
279 public void testSecondaryIndexBinarySearch() throws IOException {
280 int numTotalKeys = 99;
281 assertTrue(numTotalKeys % 2 == 1);
282
283
284 int numSearchedKeys = (numTotalKeys - 1) / 2;
285
286 ByteArrayOutputStream baos = new ByteArrayOutputStream();
287 DataOutputStream dos = new DataOutputStream(baos);
288
289 dos.writeInt(numSearchedKeys);
290 int curAllEntriesSize = 0;
291 int numEntriesAdded = 0;
292
293
294
295 int secondaryIndexEntries[] = new int[numTotalKeys];
296
297 for (int i = 0; i < numTotalKeys; ++i) {
298 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 2);
299 keys.add(k);
300 String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
301 StringBuilder padding = new StringBuilder();
302 while (msgPrefix.length() + padding.length() < 70)
303 padding.append(' ');
304 msgPrefix += padding;
305 if (i % 2 == 1) {
306 dos.writeInt(curAllEntriesSize);
307 secondaryIndexEntries[i] = curAllEntriesSize;
308 LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
309 ", offset " + curAllEntriesSize);
310 curAllEntriesSize += k.length
311 + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
312 ++numEntriesAdded;
313 } else {
314 secondaryIndexEntries[i] = -1;
315 LOG.info(msgPrefix + "not in the searched array");
316 }
317 }
318
319
320 for (int i = 0; i < keys.size() - 1; ++i)
321 assertTrue(Bytes.BYTES_RAWCOMPARATOR.compare(keys.get(i),
322 keys.get(i + 1)) < 0);
323
324 dos.writeInt(curAllEntriesSize);
325 assertEquals(numSearchedKeys, numEntriesAdded);
326 int secondaryIndexOffset = dos.size();
327 assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
328 secondaryIndexOffset);
329
330 for (int i = 1; i <= numTotalKeys - 1; i += 2) {
331 assertEquals(dos.size(),
332 secondaryIndexOffset + secondaryIndexEntries[i]);
333 long dummyFileOffset = getDummyFileOffset(i);
334 int dummyOnDiskSize = getDummyOnDiskSize(i);
335 LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
336 dummyOnDiskSize + " at offset " + dos.size());
337 dos.writeLong(dummyFileOffset);
338 dos.writeInt(dummyOnDiskSize);
339 LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
340 dos.write(keys.get(i));
341 }
342
343 dos.writeInt(curAllEntriesSize);
344
345 ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
346 for (int i = 0; i < numTotalKeys; ++i) {
347 byte[] searchKey = keys.get(i);
348 byte[] arrayHoldingKey = new byte[searchKey.length +
349 searchKey.length / 2];
350
351
352
353 System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
354 searchKey.length);
355
356 int searchResult = BlockIndexReader.binarySearchNonRootIndex(
357 arrayHoldingKey, searchKey.length / 2, searchKey.length, nonRootIndex,
358 Bytes.BYTES_RAWCOMPARATOR);
359 String lookupFailureMsg = "Failed to look up key #" + i + " ("
360 + Bytes.toStringBinary(searchKey) + ")";
361
362 int expectedResult;
363 int referenceItem;
364
365 if (i % 2 == 1) {
366
367
368 expectedResult = (i - 1) / 2;
369 referenceItem = i;
370 } else {
371
372
373
374 expectedResult = i / 2 - 1;
375 referenceItem = i - 1;
376 }
377
378 assertEquals(lookupFailureMsg, expectedResult, searchResult);
379
380
381
382 boolean locateBlockResult =
383 (BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey,
384 searchKey.length / 2, searchKey.length, Bytes.BYTES_RAWCOMPARATOR) != -1);
385
386 if (i == 0) {
387 assertFalse(locateBlockResult);
388 } else {
389 assertTrue(locateBlockResult);
390 String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
391 assertEquals(errorMsg, getDummyFileOffset(referenceItem),
392 nonRootIndex.getLong());
393 assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
394 nonRootIndex.getInt());
395 }
396 }
397
398 }
399
400 @Test
401 public void testBlockIndexChunk() throws IOException {
402 BlockIndexChunk c = new BlockIndexChunk();
403 ByteArrayOutputStream baos = new ByteArrayOutputStream();
404 int N = 1000;
405 int[] numSubEntriesAt = new int[N];
406 int numSubEntries = 0;
407 for (int i = 0; i < N; ++i) {
408 baos.reset();
409 DataOutputStream dos = new DataOutputStream(baos);
410 c.writeNonRoot(dos);
411 assertEquals(c.getNonRootSize(), dos.size());
412
413 baos.reset();
414 dos = new DataOutputStream(baos);
415 c.writeRoot(dos);
416 assertEquals(c.getRootSize(), dos.size());
417
418 byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
419 numSubEntries += rand.nextInt(5) + 1;
420 keys.add(k);
421 c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
422 }
423
424
425
426
427 for (int i = 0; i < N; ++i) {
428 for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
429 j < numSubEntriesAt[i];
430 ++j) {
431 assertEquals(i, c.getEntryBySubEntry(j));
432 }
433 }
434 }
435
436
437 @Test
438 public void testHeapSizeForBlockIndex() throws IOException {
439 Class<HFileBlockIndex.BlockIndexReader> cl =
440 HFileBlockIndex.BlockIndexReader.class;
441 long expected = ClassSize.estimateBase(cl, false);
442
443 HFileBlockIndex.BlockIndexReader bi =
444 new HFileBlockIndex.BlockIndexReader(Bytes.BYTES_RAWCOMPARATOR, 1);
445 long actual = bi.heapSize();
446
447
448
449
450 expected -= ClassSize.align(3 * ClassSize.ARRAY);
451
452 if (expected != actual) {
453 ClassSize.estimateBase(cl, true);
454 assertEquals(expected, actual);
455 }
456 }
457
458
459
460
461
462
463
464
465 @Test
466 public void testHFileWriterAndReader() throws IOException {
467 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
468 "hfile_for_block_index");
469 CacheConfig cacheConf = new CacheConfig(conf);
470 BlockCache blockCache = cacheConf.getBlockCache();
471
472 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
473 int indexBlockSize = INDEX_CHUNK_SIZES[testI];
474 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
475 LOG.info("Index block size: " + indexBlockSize + ", compression: "
476 + compr);
477
478 blockCache.evictBlocksByHfileName(hfilePath.getName());
479
480 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
481 Set<String> keyStrSet = new HashSet<String>();
482 byte[][] keys = new byte[NUM_KV][];
483 byte[][] values = new byte[NUM_KV][];
484
485
486 {
487 HFile.Writer writer =
488 HFile.getWriterFactory(conf, cacheConf)
489 .withPath(fs, hfilePath)
490 .withBlockSize(SMALL_BLOCK_SIZE)
491 .withCompression(compr)
492 .withComparator(KeyValue.KEY_COMPARATOR)
493 .create();
494 Random rand = new Random(19231737);
495
496 for (int i = 0; i < NUM_KV; ++i) {
497 byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i);
498
499
500 byte[] k = KeyValue.createFirstOnRow(row, 0, row.length, row, 0, 0,
501 row, 0, 0).getKey();
502
503 byte[] v = TestHFileWriterV2.randomValue(rand);
504 writer.append(k, v);
505 keys[i] = k;
506 values[i] = v;
507 keyStrSet.add(Bytes.toStringBinary(k));
508
509 if (i > 0) {
510 assertTrue(KeyValue.KEY_COMPARATOR.compare(keys[i - 1],
511 keys[i]) < 0);
512 }
513 }
514
515 writer.close();
516 }
517
518
519 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf);
520 assertEquals(expectedNumLevels,
521 reader.getTrailer().getNumDataIndexLevels());
522
523 assertTrue(Bytes.equals(keys[0], reader.getFirstKey()));
524 assertTrue(Bytes.equals(keys[NUM_KV - 1], reader.getLastKey()));
525 LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
526
527 for (boolean pread : new boolean[] { false, true }) {
528 HFileScanner scanner = reader.getScanner(true, pread);
529 for (int i = 0; i < NUM_KV; ++i) {
530 checkSeekTo(keys, scanner, i);
531 checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
532 scanner.getValue());
533 }
534 assertTrue(scanner.seekTo());
535 for (int i = NUM_KV - 1; i >= 0; --i) {
536 checkSeekTo(keys, scanner, i);
537 checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
538 scanner.getValue());
539 }
540 }
541
542
543 HFileReaderV2 reader2 = (HFileReaderV2) reader;
544 HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
545
546 HFileBlock.BlockIterator iter = fsReader.blockRange(0,
547 reader.getTrailer().getLoadOnOpenDataOffset());
548 HFileBlock block;
549 List<byte[]> blockKeys = new ArrayList<byte[]>();
550 while ((block = iter.nextBlock()) != null) {
551 if (block.getBlockType() != BlockType.LEAF_INDEX)
552 return;
553 ByteBuffer b = block.getBufferReadOnly();
554 int n = b.getInt();
555
556 int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
557
558
559 for (int i = 0; i < n; ++i) {
560 int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 1));
561 int nextKeyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 2));
562 int keyLen = nextKeyRelOffset - keyRelOffset;
563 int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
564 HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
565 byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
566 + keyLen);
567 String blockKeyStr = Bytes.toString(blockKey);
568 blockKeys.add(blockKey);
569
570
571
572 assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
573 keyStrSet.contains(blockKeyStr));
574 }
575 }
576
577
578 assertEquals(
579 Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
580 Bytes.toStringBinary(reader.midkey()));
581
582 assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
583 reader.getTrailer().getUncompressedDataIndexSize());
584
585 reader.close();
586 reader2.close();
587 }
588 }
589
590 private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
591 throws IOException {
592 assertEquals("Failed to seek to key #" + i + " ("
593 + Bytes.toStringBinary(keys[i]) + ")", 0, scanner.seekTo(keys[i]));
594 }
595
596 private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
597 ByteBuffer buf) {
598 assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
599 + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
600 arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
601 }
602
603
604 private void checkKeyValue(String msgPrefix, byte[] expectedKey,
605 byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
606 if (!msgPrefix.isEmpty())
607 msgPrefix += ". ";
608
609 assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
610 assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
611 valueRead);
612 }
613
614
615 }
616