1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.hfile;
20
21 import java.io.ByteArrayOutputStream;
22 import java.io.DataInput;
23 import java.io.DataInputStream;
24 import java.io.DataOutput;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.nio.ByteBuffer;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.List;
31 import java.util.concurrent.atomic.AtomicReference;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.hbase.classification.InterfaceAudience;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.FSDataOutputStream;
38 import org.apache.hadoop.hbase.Cell;
39 import org.apache.hadoop.hbase.HConstants;
40 import org.apache.hadoop.hbase.KeyValue;
41 import org.apache.hadoop.hbase.KeyValue.KVComparator;
42 import org.apache.hadoop.hbase.KeyValueUtil;
43 import org.apache.hadoop.hbase.io.HeapSize;
44 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
45 import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader;
46 import org.apache.hadoop.hbase.util.ByteBufferUtils;
47 import org.apache.hadoop.hbase.util.Bytes;
48 import org.apache.hadoop.hbase.util.ClassSize;
49 import org.apache.hadoop.io.WritableUtils;
50 import org.apache.hadoop.util.StringUtils;
51
52
53
54
55
56
57
58
59
60
61 @InterfaceAudience.Private
62 public class HFileBlockIndex {
63
64 private static final Log LOG = LogFactory.getLog(HFileBlockIndex.class);
65
66 static final int DEFAULT_MAX_CHUNK_SIZE = 128 * 1024;
67
68
69
70
71
72 public static final String MAX_CHUNK_SIZE_KEY = "hfile.index.block.max.size";
73
74
75
76
77
78
79
80 static final int SECONDARY_INDEX_ENTRY_OVERHEAD = Bytes.SIZEOF_INT
81 + Bytes.SIZEOF_LONG;
82
83
84
85
86 private static final String INLINE_BLOCKS_NOT_ALLOWED =
87 "Inline blocks are not allowed in the single-level-only mode";
88
89
90
91
92
93
94
95 private static final int MID_KEY_METADATA_SIZE = Bytes.SIZEOF_LONG +
96 2 * Bytes.SIZEOF_INT;
97
98
99
100
101
102
103
104
105
106
107
108 public static class BlockIndexReader implements HeapSize {
109
110 private final KVComparator comparator;
111
112
113 private byte[][] blockKeys;
114 private long[] blockOffsets;
115 private int[] blockDataSizes;
116 private int rootCount = 0;
117
118
119 private long midLeafBlockOffset = -1;
120 private int midLeafBlockOnDiskSize = -1;
121 private int midKeyEntry = -1;
122
123
124 private AtomicReference<byte[]> midKey = new AtomicReference<byte[]>();
125
126
127
128
129
130 private int searchTreeLevel;
131
132
133 private CachingBlockReader cachingBlockReader;
134
135 public BlockIndexReader(final KVComparator c, final int treeLevel,
136 final CachingBlockReader cachingBlockReader) {
137 this(c, treeLevel);
138 this.cachingBlockReader = cachingBlockReader;
139 }
140
141 public BlockIndexReader(final KVComparator c, final int treeLevel)
142 {
143 comparator = c;
144 searchTreeLevel = treeLevel;
145 }
146
147
148
149
150 public boolean isEmpty() {
151 return blockKeys.length == 0;
152 }
153
154
155
156
157
158 public void ensureNonEmpty() {
159 if (blockKeys.length == 0) {
160 throw new IllegalStateException("Block index is empty or not loaded");
161 }
162 }
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179 public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks,
180 boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
181 throws IOException {
182 BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock,
183 cacheBlocks,
184 pread, isCompaction, expectedDataBlockEncoding);
185 if (blockWithScanInfo == null) {
186 return null;
187 } else {
188 return blockWithScanInfo.getHFileBlock();
189 }
190 }
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211 public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
212 boolean cacheBlocks,
213 boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
214 throws IOException {
215 int rootLevelIndex = rootBlockContainingKey(key);
216 if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
217 return null;
218 }
219
220
221 Cell nextIndexedKey = null;
222
223
224 long currentOffset = blockOffsets[rootLevelIndex];
225 int currentOnDiskSize = blockDataSizes[rootLevelIndex];
226
227 if (rootLevelIndex < blockKeys.length - 1) {
228 nextIndexedKey = new KeyValue.KeyOnlyKeyValue(blockKeys[rootLevelIndex + 1]);
229 } else {
230 nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY;
231 }
232
233 int lookupLevel = 1;
234 int index = -1;
235
236 HFileBlock block;
237 while (true) {
238
239 if (currentBlock != null && currentBlock.getOffset() == currentOffset)
240 {
241
242
243
244
245 block = currentBlock;
246 } else {
247
248
249 boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
250 BlockType expectedBlockType;
251 if (lookupLevel < searchTreeLevel - 1) {
252 expectedBlockType = BlockType.INTERMEDIATE_INDEX;
253 } else if (lookupLevel == searchTreeLevel - 1) {
254 expectedBlockType = BlockType.LEAF_INDEX;
255 } else {
256
257 expectedBlockType = BlockType.DATA;
258 }
259 block = cachingBlockReader.readBlock(currentOffset,
260 currentOnDiskSize, shouldCache, pread, isCompaction, true,
261 expectedBlockType, expectedDataBlockEncoding);
262 }
263
264 if (block == null) {
265 throw new IOException("Failed to read block at offset " +
266 currentOffset + ", onDiskSize=" + currentOnDiskSize);
267 }
268
269
270 if (block.getBlockType().isData()) {
271 break;
272 }
273
274
275
276 if (++lookupLevel > searchTreeLevel) {
277 throw new IOException("Search Tree Level overflow: lookupLevel="+
278 lookupLevel + ", searchTreeLevel=" + searchTreeLevel);
279 }
280
281
282
283 ByteBuffer buffer = block.getBufferWithoutHeader();
284 index = locateNonRootIndexEntry(buffer, key, comparator);
285 if (index == -1) {
286
287
288 KeyValue kv = KeyValueUtil.ensureKeyValue(key);
289 throw new IOException("The key "
290 + Bytes.toStringBinary(kv.getKey(), kv.getKeyOffset(), kv.getKeyLength())
291 + " is before the" + " first key of the non-root index block "
292 + block);
293 }
294
295 currentOffset = buffer.getLong();
296 currentOnDiskSize = buffer.getInt();
297
298
299 byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1);
300 if (tmpNextIndexedKey != null) {
301 nextIndexedKey = new KeyValue.KeyOnlyKeyValue(tmpNextIndexedKey);
302 }
303 }
304
305 if (lookupLevel != searchTreeLevel) {
306 throw new IOException("Reached a data block at level " + lookupLevel +
307 " but the number of levels is " + searchTreeLevel);
308 }
309
310
311 BlockWithScanInfo blockWithScanInfo = new BlockWithScanInfo(block, nextIndexedKey);
312 return blockWithScanInfo;
313 }
314
315
316
317
318
319
320
321
322 public byte[] midkey() throws IOException {
323 if (rootCount == 0)
324 throw new IOException("HFile empty");
325
326 byte[] targetMidKey = this.midKey.get();
327 if (targetMidKey != null) {
328 return targetMidKey;
329 }
330
331 if (midLeafBlockOffset >= 0) {
332 if (cachingBlockReader == null) {
333 throw new IOException("Have to read the middle leaf block but " +
334 "no block reader available");
335 }
336
337
338 HFileBlock midLeafBlock = cachingBlockReader.readBlock(
339 midLeafBlockOffset, midLeafBlockOnDiskSize, true, true, false, true,
340 BlockType.LEAF_INDEX, null);
341
342 ByteBuffer b = midLeafBlock.getBufferWithoutHeader();
343 int numDataBlocks = b.getInt();
344 int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 1));
345 int keyLen = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 2)) -
346 keyRelOffset;
347 int keyOffset = Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset
348 + SECONDARY_INDEX_ENTRY_OVERHEAD;
349 targetMidKey = ByteBufferUtils.toBytes(b, keyOffset, keyLen);
350 } else {
351
352 targetMidKey = blockKeys[rootCount / 2];
353 }
354
355 this.midKey.set(targetMidKey);
356 return targetMidKey;
357 }
358
359
360
361
362 public byte[] getRootBlockKey(int i) {
363 return blockKeys[i];
364 }
365
366
367
368
369 public long getRootBlockOffset(int i) {
370 return blockOffsets[i];
371 }
372
373
374
375
376
377
378 public int getRootBlockDataSize(int i) {
379 return blockDataSizes[i];
380 }
381
382
383
384
385 public int getRootBlockCount() {
386 return rootCount;
387 }
388
389
390
391
392
393
394
395
396
397
398 public int rootBlockContainingKey(final byte[] key, int offset, int length) {
399 int pos = Bytes.binarySearch(blockKeys, key, offset, length, comparator);
400
401
402
403 if (pos >= 0) {
404
405 assert pos < blockKeys.length;
406 return pos;
407 }
408
409
410
411
412
413
414 int i = -pos - 1;
415 assert 0 <= i && i <= blockKeys.length;
416 return i - 1;
417 }
418
419
420
421
422
423
424
425 public int rootBlockContainingKey(final Cell key) {
426 int pos = Bytes.binarySearch(blockKeys, key, comparator);
427
428
429
430 if (pos >= 0) {
431
432 assert pos < blockKeys.length;
433 return pos;
434 }
435
436
437
438
439
440
441 int i = -pos - 1;
442 assert 0 <= i && i <= blockKeys.length;
443 return i - 1;
444 }
445
446
447
448
449
450
451
452
453 private void add(final byte[] key, final long offset, final int dataSize) {
454 blockOffsets[rootCount] = offset;
455 blockKeys[rootCount] = key;
456 blockDataSizes[rootCount] = dataSize;
457 rootCount++;
458 }
459
460
461
462
463
464
465
466 private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
467 int numEntries = nonRootIndex.getInt(0);
468 if (i < 0 || i >= numEntries) {
469 return null;
470 }
471
472
473
474 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
475
476 int targetKeyRelOffset = nonRootIndex.getInt(
477 Bytes.SIZEOF_INT * (i + 1));
478
479
480 int targetKeyOffset = entriesOffset
481 + targetKeyRelOffset
482 + SECONDARY_INDEX_ENTRY_OVERHEAD;
483
484
485
486
487 int targetKeyLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 2)) -
488 targetKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
489
490 return ByteBufferUtils.toBytes(nonRootIndex, targetKeyOffset, targetKeyLength);
491 }
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509 static int binarySearchNonRootIndex(Cell key, ByteBuffer nonRootIndex,
510 KVComparator comparator) {
511
512 int numEntries = nonRootIndex.getInt(0);
513 int low = 0;
514 int high = numEntries - 1;
515 int mid = 0;
516
517
518
519 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
520
521
522
523
524 KeyValue.KeyOnlyKeyValue nonRootIndexKV = new KeyValue.KeyOnlyKeyValue();
525 while (low <= high) {
526 mid = (low + high) >>> 1;
527
528
529 int midKeyRelOffset = nonRootIndex.getInt(
530 Bytes.SIZEOF_INT * (mid + 1));
531
532
533 int midKeyOffset = entriesOffset
534 + midKeyRelOffset
535 + SECONDARY_INDEX_ENTRY_OVERHEAD;
536
537
538
539
540 int midLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (mid + 2)) -
541 midKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
542
543
544
545
546
547 nonRootIndexKV.setKey(nonRootIndex.array(),
548 nonRootIndex.arrayOffset() + midKeyOffset, midLength);
549 int cmp = comparator.compareOnlyKeyPortion(key, nonRootIndexKV);
550
551
552 if (cmp > 0)
553 low = mid + 1;
554
555 else if (cmp < 0)
556 high = mid - 1;
557 else
558 return mid;
559 }
560
561
562
563
564
565 if (low != high + 1) {
566 throw new IllegalStateException("Binary search broken: low=" + low
567 + " " + "instead of " + (high + 1));
568 }
569
570
571
572 int i = low - 1;
573
574
575 if (i < -1 || i >= numEntries) {
576 throw new IllegalStateException("Binary search broken: result is " +
577 i + " but expected to be between -1 and (numEntries - 1) = " +
578 (numEntries - 1));
579 }
580
581 return i;
582 }
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598 static int locateNonRootIndexEntry(ByteBuffer nonRootBlock, Cell key,
599 KVComparator comparator) {
600 int entryIndex = binarySearchNonRootIndex(key, nonRootBlock, comparator);
601
602 if (entryIndex != -1) {
603 int numEntries = nonRootBlock.getInt(0);
604
605
606 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
607
608
609
610 int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT * (1 + entryIndex));
611
612 nonRootBlock.position(entriesOffset + entryRelOffset);
613 }
614
615 return entryIndex;
616 }
617
618
619
620
621
622
623
624
625
626
627
628 public void readRootIndex(DataInput in, final int numEntries)
629 throws IOException {
630 blockOffsets = new long[numEntries];
631 blockKeys = new byte[numEntries][];
632 blockDataSizes = new int[numEntries];
633
634
635 if (numEntries > 0) {
636 for (int i = 0; i < numEntries; ++i) {
637 long offset = in.readLong();
638 int dataSize = in.readInt();
639 byte[] key = Bytes.readByteArray(in);
640 add(key, offset, dataSize);
641 }
642 }
643 }
644
645
646
647
648
649
650
651
652
653
654
655
656 public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
657 DataInputStream in = blk.getByteStream();
658 readRootIndex(in, numEntries);
659 return in;
660 }
661
662
663
664
665
666
667
668
669
670
671 public void readMultiLevelIndexRoot(HFileBlock blk,
672 final int numEntries) throws IOException {
673 DataInputStream in = readRootIndex(blk, numEntries);
674
675
676 int checkSumBytes = blk.totalChecksumBytes();
677 if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
678
679 return;
680 }
681 midLeafBlockOffset = in.readLong();
682 midLeafBlockOnDiskSize = in.readInt();
683 midKeyEntry = in.readInt();
684 }
685
686 @Override
687 public String toString() {
688 StringBuilder sb = new StringBuilder();
689 sb.append("size=" + rootCount).append("\n");
690 for (int i = 0; i < rootCount; i++) {
691 sb.append("key=").append(KeyValue.keyToString(blockKeys[i]))
692 .append("\n offset=").append(blockOffsets[i])
693 .append(", dataSize=" + blockDataSizes[i]).append("\n");
694 }
695 return sb.toString();
696 }
697
698 @Override
699 public long heapSize() {
700 long heapSize = ClassSize.align(6 * ClassSize.REFERENCE +
701 2 * Bytes.SIZEOF_INT + ClassSize.OBJECT);
702
703
704 heapSize += MID_KEY_METADATA_SIZE;
705
706
707 if (blockKeys != null) {
708
709 heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length
710 * ClassSize.REFERENCE);
711
712
713 for (byte[] key : blockKeys) {
714 heapSize += ClassSize.align(ClassSize.ARRAY + key.length);
715 }
716 }
717
718 if (blockOffsets != null) {
719 heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length
720 * Bytes.SIZEOF_LONG);
721 }
722
723 if (blockDataSizes != null) {
724 heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length
725 * Bytes.SIZEOF_INT);
726 }
727
728 return ClassSize.align(heapSize);
729 }
730
731 }
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748 public static class BlockIndexWriter implements InlineBlockWriter {
749
750
751
752
753
754
755
756
757
758 private BlockIndexChunk rootChunk = new BlockIndexChunk();
759
760
761
762
763
764 private BlockIndexChunk curInlineChunk = new BlockIndexChunk();
765
766
767
768
769
770
771
772
773
774
775 private int numLevels = 1;
776
777 private HFileBlock.Writer blockWriter;
778 private byte[] firstKey = null;
779
780
781
782
783
784
785 private long totalNumEntries;
786
787
788 private long totalBlockOnDiskSize;
789
790
791 private long totalBlockUncompressedSize;
792
793
794 private int maxChunkSize;
795
796
797 private boolean singleLevelOnly;
798
799
800 private CacheConfig cacheConf;
801
802
803 private String nameForCaching;
804
805
806 public BlockIndexWriter() {
807 this(null, null, null);
808 singleLevelOnly = true;
809 }
810
811
812
813
814
815
816
817 public BlockIndexWriter(HFileBlock.Writer blockWriter,
818 CacheConfig cacheConf, String nameForCaching) {
819 if ((cacheConf == null) != (nameForCaching == null)) {
820 throw new IllegalArgumentException("Block cache and file name for " +
821 "caching must be both specified or both null");
822 }
823
824 this.blockWriter = blockWriter;
825 this.cacheConf = cacheConf;
826 this.nameForCaching = nameForCaching;
827 this.maxChunkSize = HFileBlockIndex.DEFAULT_MAX_CHUNK_SIZE;
828 }
829
830 public void setMaxChunkSize(int maxChunkSize) {
831 if (maxChunkSize <= 0) {
832 throw new IllegalArgumentException("Invald maximum index block size");
833 }
834 this.maxChunkSize = maxChunkSize;
835 }
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855 public long writeIndexBlocks(FSDataOutputStream out) throws IOException {
856 if (curInlineChunk != null && curInlineChunk.getNumEntries() != 0) {
857 throw new IOException("Trying to write a multi-level block index, " +
858 "but are " + curInlineChunk.getNumEntries() + " entries in the " +
859 "last inline chunk.");
860 }
861
862
863
864 byte[] midKeyMetadata = numLevels > 1 ? rootChunk.getMidKeyMetadata()
865 : null;
866
867 if (curInlineChunk != null) {
868 while (rootChunk.getRootSize() > maxChunkSize) {
869 rootChunk = writeIntermediateLevel(out, rootChunk);
870 numLevels += 1;
871 }
872 }
873
874
875 long rootLevelIndexPos = out.getPos();
876
877 {
878 DataOutput blockStream =
879 blockWriter.startWriting(BlockType.ROOT_INDEX);
880 rootChunk.writeRoot(blockStream);
881 if (midKeyMetadata != null)
882 blockStream.write(midKeyMetadata);
883 blockWriter.writeHeaderAndData(out);
884 }
885
886
887 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
888 totalBlockUncompressedSize +=
889 blockWriter.getUncompressedSizeWithoutHeader();
890
891 if (LOG.isTraceEnabled()) {
892 LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "
893 + rootLevelIndexPos + ", " + rootChunk.getNumEntries()
894 + " root-level entries, " + totalNumEntries + " total entries, "
895 + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) +
896 " on-disk size, "
897 + StringUtils.humanReadableInt(totalBlockUncompressedSize) +
898 " total uncompressed size.");
899 }
900 return rootLevelIndexPos;
901 }
902
903
904
905
906
907
908
909
910
911
912
913 public void writeSingleLevelIndex(DataOutput out, String description)
914 throws IOException {
915 expectNumLevels(1);
916
917 if (!singleLevelOnly)
918 throw new IOException("Single-level mode is turned off");
919
920 if (rootChunk.getNumEntries() > 0)
921 throw new IOException("Root-level entries already added in " +
922 "single-level mode");
923
924 rootChunk = curInlineChunk;
925 curInlineChunk = new BlockIndexChunk();
926
927 if (LOG.isTraceEnabled()) {
928 LOG.trace("Wrote a single-level " + description + " index with "
929 + rootChunk.getNumEntries() + " entries, " + rootChunk.getRootSize()
930 + " bytes");
931 }
932 rootChunk.writeRoot(out);
933 }
934
935
936
937
938
939
940
941
942
943
944
945
946
947 private BlockIndexChunk writeIntermediateLevel(FSDataOutputStream out,
948 BlockIndexChunk currentLevel) throws IOException {
949
950 BlockIndexChunk parent = new BlockIndexChunk();
951
952
953 BlockIndexChunk curChunk = new BlockIndexChunk();
954
955 for (int i = 0; i < currentLevel.getNumEntries(); ++i) {
956 curChunk.add(currentLevel.getBlockKey(i),
957 currentLevel.getBlockOffset(i), currentLevel.getOnDiskDataSize(i));
958
959 if (curChunk.getRootSize() >= maxChunkSize)
960 writeIntermediateBlock(out, parent, curChunk);
961 }
962
963 if (curChunk.getNumEntries() > 0) {
964 writeIntermediateBlock(out, parent, curChunk);
965 }
966
967 return parent;
968 }
969
970 private void writeIntermediateBlock(FSDataOutputStream out,
971 BlockIndexChunk parent, BlockIndexChunk curChunk) throws IOException {
972 long beginOffset = out.getPos();
973 DataOutputStream dos = blockWriter.startWriting(
974 BlockType.INTERMEDIATE_INDEX);
975 curChunk.writeNonRoot(dos);
976 byte[] curFirstKey = curChunk.getBlockKey(0);
977 blockWriter.writeHeaderAndData(out);
978
979 if (cacheConf != null) {
980 HFileBlock blockForCaching = blockWriter.getBlockForCaching(cacheConf);
981 cacheConf.getBlockCache().cacheBlock(new BlockCacheKey(nameForCaching,
982 beginOffset), blockForCaching);
983 }
984
985
986 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
987 totalBlockUncompressedSize +=
988 blockWriter.getUncompressedSizeWithoutHeader();
989
990
991
992
993
994
995 parent.add(curFirstKey, beginOffset,
996 blockWriter.getOnDiskSizeWithHeader());
997
998
999 curChunk.clear();
1000 curFirstKey = null;
1001 }
1002
1003
1004
1005
1006 public final int getNumRootEntries() {
1007 return rootChunk.getNumEntries();
1008 }
1009
1010
1011
1012
1013 public int getNumLevels() {
1014 return numLevels;
1015 }
1016
1017 private void expectNumLevels(int expectedNumLevels) {
1018 if (numLevels != expectedNumLevels) {
1019 throw new IllegalStateException("Number of block index levels is "
1020 + numLevels + "but is expected to be " + expectedNumLevels);
1021 }
1022 }
1023
1024
1025
1026
1027
1028
1029 @Override
1030 public boolean shouldWriteBlock(boolean closing) {
1031 if (singleLevelOnly) {
1032 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1033 }
1034
1035 if (curInlineChunk == null) {
1036 throw new IllegalStateException("curInlineChunk is null; has shouldWriteBlock been " +
1037 "called with closing=true and then called again?");
1038 }
1039
1040 if (curInlineChunk.getNumEntries() == 0) {
1041 return false;
1042 }
1043
1044
1045 if (closing) {
1046 if (rootChunk.getNumEntries() == 0) {
1047
1048
1049
1050 expectNumLevels(1);
1051 rootChunk = curInlineChunk;
1052 curInlineChunk = null;
1053 return false;
1054 }
1055
1056 return true;
1057 } else {
1058 return curInlineChunk.getNonRootSize() >= maxChunkSize;
1059 }
1060 }
1061
1062
1063
1064
1065
1066
1067
1068 @Override
1069 public void writeInlineBlock(DataOutput out) throws IOException {
1070 if (singleLevelOnly)
1071 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1072
1073
1074
1075 curInlineChunk.writeNonRoot(out);
1076
1077
1078
1079 firstKey = curInlineChunk.getBlockKey(0);
1080
1081
1082 curInlineChunk.clear();
1083 }
1084
1085
1086
1087
1088
1089 @Override
1090 public void blockWritten(long offset, int onDiskSize, int uncompressedSize) {
1091
1092 totalBlockOnDiskSize += onDiskSize;
1093 totalBlockUncompressedSize += uncompressedSize;
1094
1095 if (singleLevelOnly)
1096 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1097
1098 if (firstKey == null) {
1099 throw new IllegalStateException("Trying to add second-level index " +
1100 "entry with offset=" + offset + " and onDiskSize=" + onDiskSize +
1101 "but the first key was not set in writeInlineBlock");
1102 }
1103
1104 if (rootChunk.getNumEntries() == 0) {
1105
1106 expectNumLevels(1);
1107 numLevels = 2;
1108 }
1109
1110
1111
1112 rootChunk.add(firstKey, offset, onDiskSize, totalNumEntries);
1113 firstKey = null;
1114 }
1115
1116 @Override
1117 public BlockType getInlineBlockType() {
1118 return BlockType.LEAF_INDEX;
1119 }
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131 public void addEntry(byte[] firstKey, long blockOffset, int blockDataSize) {
1132 curInlineChunk.add(firstKey, blockOffset, blockDataSize);
1133 ++totalNumEntries;
1134 }
1135
1136
1137
1138
1139 public void ensureSingleLevel() throws IOException {
1140 if (numLevels > 1) {
1141 throw new IOException ("Wrote a " + numLevels + "-level index with " +
1142 rootChunk.getNumEntries() + " root-level entries, but " +
1143 "this is expected to be a single-level block index.");
1144 }
1145 }
1146
1147
1148
1149
1150
1151
1152 @Override
1153 public boolean getCacheOnWrite() {
1154 return cacheConf != null && cacheConf.shouldCacheIndexesOnWrite();
1155 }
1156
1157
1158
1159
1160
1161
1162
1163 public long getTotalUncompressedSize() {
1164 return totalBlockUncompressedSize;
1165 }
1166
1167 }
1168
1169
1170
1171
1172
1173
1174 static class BlockIndexChunk {
1175
1176
1177 private final List<byte[]> blockKeys = new ArrayList<byte[]>();
1178
1179
1180 private final List<Long> blockOffsets = new ArrayList<Long>();
1181
1182
1183 private final List<Integer> onDiskDataSizes = new ArrayList<Integer>();
1184
1185
1186
1187
1188
1189
1190 private final List<Long> numSubEntriesAt = new ArrayList<Long>();
1191
1192
1193
1194
1195
1196
1197 private int curTotalNonRootEntrySize = 0;
1198
1199
1200
1201
1202 private int curTotalRootSize = 0;
1203
1204
1205
1206
1207
1208
1209 private final List<Integer> secondaryIndexOffsetMarks =
1210 new ArrayList<Integer>();
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225 void add(byte[] firstKey, long blockOffset, int onDiskDataSize,
1226 long curTotalNumSubEntries) {
1227
1228 secondaryIndexOffsetMarks.add(curTotalNonRootEntrySize);
1229 curTotalNonRootEntrySize += SECONDARY_INDEX_ENTRY_OVERHEAD
1230 + firstKey.length;
1231
1232 curTotalRootSize += Bytes.SIZEOF_LONG + Bytes.SIZEOF_INT
1233 + WritableUtils.getVIntSize(firstKey.length) + firstKey.length;
1234
1235 blockKeys.add(firstKey);
1236 blockOffsets.add(blockOffset);
1237 onDiskDataSizes.add(onDiskDataSize);
1238
1239 if (curTotalNumSubEntries != -1) {
1240 numSubEntriesAt.add(curTotalNumSubEntries);
1241
1242
1243 if (numSubEntriesAt.size() != blockKeys.size()) {
1244 throw new IllegalStateException("Only have key/value count " +
1245 "stats for " + numSubEntriesAt.size() + " block index " +
1246 "entries out of " + blockKeys.size());
1247 }
1248 }
1249 }
1250
1251
1252
1253
1254
1255
1256
1257 public void add(byte[] firstKey, long blockOffset, int onDiskDataSize) {
1258 add(firstKey, blockOffset, onDiskDataSize, -1);
1259 }
1260
1261 public void clear() {
1262 blockKeys.clear();
1263 blockOffsets.clear();
1264 onDiskDataSizes.clear();
1265 secondaryIndexOffsetMarks.clear();
1266 numSubEntriesAt.clear();
1267 curTotalNonRootEntrySize = 0;
1268 curTotalRootSize = 0;
1269 }
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288 public int getEntryBySubEntry(long k) {
1289
1290
1291
1292 int i = Collections.binarySearch(numSubEntriesAt, k);
1293
1294
1295
1296
1297 if (i >= 0)
1298 return i + 1;
1299
1300
1301 return -i - 1;
1302 }
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312 public byte[] getMidKeyMetadata() throws IOException {
1313 ByteArrayOutputStream baos = new ByteArrayOutputStream(
1314 MID_KEY_METADATA_SIZE);
1315 DataOutputStream baosDos = new DataOutputStream(baos);
1316 long totalNumSubEntries = numSubEntriesAt.get(blockKeys.size() - 1);
1317 if (totalNumSubEntries == 0) {
1318 throw new IOException("No leaf-level entries, mid-key unavailable");
1319 }
1320 long midKeySubEntry = (totalNumSubEntries - 1) / 2;
1321 int midKeyEntry = getEntryBySubEntry(midKeySubEntry);
1322
1323 baosDos.writeLong(blockOffsets.get(midKeyEntry));
1324 baosDos.writeInt(onDiskDataSizes.get(midKeyEntry));
1325
1326 long numSubEntriesBefore = midKeyEntry > 0
1327 ? numSubEntriesAt.get(midKeyEntry - 1) : 0;
1328 long subEntryWithinEntry = midKeySubEntry - numSubEntriesBefore;
1329 if (subEntryWithinEntry < 0 || subEntryWithinEntry > Integer.MAX_VALUE)
1330 {
1331 throw new IOException("Could not identify mid-key index within the "
1332 + "leaf-level block containing mid-key: out of range ("
1333 + subEntryWithinEntry + ", numSubEntriesBefore="
1334 + numSubEntriesBefore + ", midKeySubEntry=" + midKeySubEntry
1335 + ")");
1336 }
1337
1338 baosDos.writeInt((int) subEntryWithinEntry);
1339
1340 if (baosDos.size() != MID_KEY_METADATA_SIZE) {
1341 throw new IOException("Could not write mid-key metadata: size=" +
1342 baosDos.size() + ", correct size: " + MID_KEY_METADATA_SIZE);
1343 }
1344
1345
1346 baos.close();
1347
1348 return baos.toByteArray();
1349 }
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360 void writeNonRoot(DataOutput out) throws IOException {
1361
1362 out.writeInt(blockKeys.size());
1363
1364 if (secondaryIndexOffsetMarks.size() != blockKeys.size()) {
1365 throw new IOException("Corrupted block index chunk writer: " +
1366 blockKeys.size() + " entries but " +
1367 secondaryIndexOffsetMarks.size() + " secondary index items");
1368 }
1369
1370
1371
1372
1373
1374 for (int currentSecondaryIndex : secondaryIndexOffsetMarks)
1375 out.writeInt(currentSecondaryIndex);
1376
1377
1378
1379 out.writeInt(curTotalNonRootEntrySize);
1380
1381 for (int i = 0; i < blockKeys.size(); ++i) {
1382 out.writeLong(blockOffsets.get(i));
1383 out.writeInt(onDiskDataSizes.get(i));
1384 out.write(blockKeys.get(i));
1385 }
1386 }
1387
1388
1389
1390
1391
1392 int getNonRootSize() {
1393 return Bytes.SIZEOF_INT
1394 + Bytes.SIZEOF_INT * (blockKeys.size() + 1)
1395 + curTotalNonRootEntrySize;
1396 }
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408 void writeRoot(DataOutput out) throws IOException {
1409 for (int i = 0; i < blockKeys.size(); ++i) {
1410 out.writeLong(blockOffsets.get(i));
1411 out.writeInt(onDiskDataSizes.get(i));
1412 Bytes.writeByteArray(out, blockKeys.get(i));
1413 }
1414 }
1415
1416
1417
1418
1419 int getRootSize() {
1420 return curTotalRootSize;
1421 }
1422
1423
1424
1425
1426 public int getNumEntries() {
1427 return blockKeys.size();
1428 }
1429
1430 public byte[] getBlockKey(int i) {
1431 return blockKeys.get(i);
1432 }
1433
1434 public long getBlockOffset(int i) {
1435 return blockOffsets.get(i);
1436 }
1437
1438 public int getOnDiskDataSize(int i) {
1439 return onDiskDataSizes.get(i);
1440 }
1441
1442 public long getCumulativeNumKV(int i) {
1443 if (i < 0)
1444 return 0;
1445 return numSubEntriesAt.get(i);
1446 }
1447
1448 }
1449
1450 public static int getMaxChunkSize(Configuration conf) {
1451 return conf.getInt(MAX_CHUNK_SIZE_KEY, DEFAULT_MAX_CHUNK_SIZE);
1452 }
1453 }