1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.hfile;
20
21 import java.io.ByteArrayOutputStream;
22 import java.io.DataInput;
23 import java.io.DataInputStream;
24 import java.io.DataOutput;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.nio.ByteBuffer;
28 import java.util.ArrayList;
29 import java.util.Arrays;
30 import java.util.Collections;
31 import java.util.List;
32 import java.util.concurrent.atomic.AtomicReference;
33
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.classification.InterfaceAudience;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FSDataOutputStream;
39 import org.apache.hadoop.hbase.HConstants;
40 import org.apache.hadoop.hbase.KeyValue;
41 import org.apache.hadoop.hbase.io.HeapSize;
42 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
43 import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader;
44 import org.apache.hadoop.hbase.util.Bytes;
45 import org.apache.hadoop.hbase.util.ClassSize;
46 import org.apache.hadoop.hbase.util.CompoundBloomFilterWriter;
47 import org.apache.hadoop.io.RawComparator;
48 import org.apache.hadoop.io.WritableUtils;
49 import org.apache.hadoop.util.StringUtils;
50
51
52
53
54
55
56
57
58
59
60 @InterfaceAudience.Private
61 public class HFileBlockIndex {
62
63 private static final Log LOG = LogFactory.getLog(HFileBlockIndex.class);
64
65 static final int DEFAULT_MAX_CHUNK_SIZE = 128 * 1024;
66
67
68
69
70
71 public static final String MAX_CHUNK_SIZE_KEY = "hfile.index.block.max.size";
72
73
74
75
76
77
78
79 static final int SECONDARY_INDEX_ENTRY_OVERHEAD = Bytes.SIZEOF_INT
80 + Bytes.SIZEOF_LONG;
81
82
83
84
85 private static final String INLINE_BLOCKS_NOT_ALLOWED =
86 "Inline blocks are not allowed in the single-level-only mode";
87
88
89
90
91
92
93
94 private static final int MID_KEY_METADATA_SIZE = Bytes.SIZEOF_LONG +
95 2 * Bytes.SIZEOF_INT;
96
97
98
99
100
101
102
103
104
105
106
107 public static class BlockIndexReader implements HeapSize {
108
109 private final RawComparator<byte[]> comparator;
110
111
112 private byte[][] blockKeys;
113 private long[] blockOffsets;
114 private int[] blockDataSizes;
115 private int rootByteSize = 0;
116 private int rootCount = 0;
117
118
119 private long midLeafBlockOffset = -1;
120 private int midLeafBlockOnDiskSize = -1;
121 private int midKeyEntry = -1;
122
123
124 private AtomicReference<byte[]> midKey = new AtomicReference<byte[]>();
125
126
127
128
129
130 private int searchTreeLevel;
131
132
133 private CachingBlockReader cachingBlockReader;
134
135 public BlockIndexReader(final RawComparator<byte[]> c, final int treeLevel,
136 final CachingBlockReader cachingBlockReader) {
137 this(c, treeLevel);
138 this.cachingBlockReader = cachingBlockReader;
139 }
140
141 public BlockIndexReader(final RawComparator<byte[]> c, final int treeLevel)
142 {
143 comparator = c;
144 searchTreeLevel = treeLevel;
145 }
146
147
148
149
150 public boolean isEmpty() {
151 return blockKeys.length == 0;
152 }
153
154
155
156
157
158 public void ensureNonEmpty() {
159 if (blockKeys.length == 0) {
160 throw new IllegalStateException("Block index is empty or not loaded");
161 }
162 }
163
164
165
166
167
168
169
170
171
172
173
174
175
176 public HFileBlock seekToDataBlock(final byte[] key, int keyOffset,
177 int keyLength, HFileBlock currentBlock, boolean cacheBlocks,
178 boolean pread, boolean isCompaction)
179 throws IOException {
180 BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, keyOffset, keyLength,
181 currentBlock, cacheBlocks, pread, isCompaction);
182 if (blockWithScanInfo == null) {
183 return null;
184 } else {
185 return blockWithScanInfo.getHFileBlock();
186 }
187 }
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206 public BlockWithScanInfo loadDataBlockWithScanInfo(final byte[] key, int keyOffset,
207 int keyLength, HFileBlock currentBlock, boolean cacheBlocks,
208 boolean pread, boolean isCompaction)
209 throws IOException {
210 int rootLevelIndex = rootBlockContainingKey(key, keyOffset, keyLength);
211 if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
212 return null;
213 }
214
215
216 byte[] nextIndexedKey = null;
217
218
219 long currentOffset = blockOffsets[rootLevelIndex];
220 int currentOnDiskSize = blockDataSizes[rootLevelIndex];
221
222 if (rootLevelIndex < blockKeys.length - 1) {
223 nextIndexedKey = blockKeys[rootLevelIndex + 1];
224 } else {
225 nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY;
226 }
227
228 int lookupLevel = 1;
229 int index = -1;
230
231 HFileBlock block;
232 while (true) {
233
234 if (currentBlock != null && currentBlock.getOffset() == currentOffset)
235 {
236
237
238
239
240 block = currentBlock;
241 } else {
242
243
244 boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
245 BlockType expectedBlockType;
246 if (lookupLevel < searchTreeLevel - 1) {
247 expectedBlockType = BlockType.INTERMEDIATE_INDEX;
248 } else if (lookupLevel == searchTreeLevel - 1) {
249 expectedBlockType = BlockType.LEAF_INDEX;
250 } else {
251
252 expectedBlockType = BlockType.DATA;
253 }
254 block = cachingBlockReader.readBlock(currentOffset,
255 currentOnDiskSize, shouldCache, pread, isCompaction,
256 expectedBlockType);
257 }
258
259 if (block == null) {
260 throw new IOException("Failed to read block at offset " +
261 currentOffset + ", onDiskSize=" + currentOnDiskSize);
262 }
263
264
265 if (block.getBlockType().equals(BlockType.DATA) ||
266 block.getBlockType().equals(BlockType.ENCODED_DATA)) {
267 break;
268 }
269
270
271
272 if (++lookupLevel > searchTreeLevel) {
273 throw new IOException("Search Tree Level overflow: lookupLevel="+
274 lookupLevel + ", searchTreeLevel=" + searchTreeLevel);
275 }
276
277
278
279 ByteBuffer buffer = block.getBufferWithoutHeader();
280 index = locateNonRootIndexEntry(buffer, key, keyOffset, keyLength, comparator);
281 if (index == -1) {
282 throw new IOException("The key "
283 + Bytes.toStringBinary(key, keyOffset, keyLength)
284 + " is before the" + " first key of the non-root index block "
285 + block);
286 }
287
288 currentOffset = buffer.getLong();
289 currentOnDiskSize = buffer.getInt();
290
291
292 byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1);
293 if (tmpNextIndexedKey != null) {
294 nextIndexedKey = tmpNextIndexedKey;
295 }
296 }
297
298 if (lookupLevel != searchTreeLevel) {
299 throw new IOException("Reached a data block at level " + lookupLevel +
300 " but the number of levels is " + searchTreeLevel);
301 }
302
303
304 BlockWithScanInfo blockWithScanInfo = new BlockWithScanInfo(block, nextIndexedKey);
305 return blockWithScanInfo;
306 }
307
308
309
310
311
312
313
314
315 public byte[] midkey() throws IOException {
316 if (rootCount == 0)
317 throw new IOException("HFile empty");
318
319 byte[] targetMidKey = this.midKey.get();
320 if (targetMidKey != null) {
321 return targetMidKey;
322 }
323
324 if (midLeafBlockOffset >= 0) {
325 if (cachingBlockReader == null) {
326 throw new IOException("Have to read the middle leaf block but " +
327 "no block reader available");
328 }
329
330
331 HFileBlock midLeafBlock = cachingBlockReader.readBlock(
332 midLeafBlockOffset, midLeafBlockOnDiskSize, true, true, false,
333 BlockType.LEAF_INDEX);
334
335 ByteBuffer b = midLeafBlock.getBufferWithoutHeader();
336 int numDataBlocks = b.getInt();
337 int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 1));
338 int keyLen = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 2)) -
339 keyRelOffset;
340 int keyOffset = b.arrayOffset() +
341 Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset +
342 SECONDARY_INDEX_ENTRY_OVERHEAD;
343 targetMidKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset + keyLen);
344 } else {
345
346 targetMidKey = blockKeys[rootCount / 2];
347 }
348
349 this.midKey.set(targetMidKey);
350 return targetMidKey;
351 }
352
353
354
355
356 public byte[] getRootBlockKey(int i) {
357 return blockKeys[i];
358 }
359
360
361
362
363 public long getRootBlockOffset(int i) {
364 return blockOffsets[i];
365 }
366
367
368
369
370
371
372 public int getRootBlockDataSize(int i) {
373 return blockDataSizes[i];
374 }
375
376
377
378
379 public int getRootBlockCount() {
380 return rootCount;
381 }
382
383
384
385
386
387
388
389
390
391
392 public int rootBlockContainingKey(final byte[] key, int offset,
393 int length) {
394 int pos = Bytes.binarySearch(blockKeys, key, offset, length,
395 comparator);
396
397
398
399 if (pos >= 0) {
400
401 assert pos < blockKeys.length;
402 return pos;
403 }
404
405
406
407
408
409
410 int i = -pos - 1;
411 assert 0 <= i && i <= blockKeys.length;
412 return i - 1;
413 }
414
415
416
417
418
419
420
421
422 private void add(final byte[] key, final long offset, final int dataSize) {
423 blockOffsets[rootCount] = offset;
424 blockKeys[rootCount] = key;
425 blockDataSizes[rootCount] = dataSize;
426
427 rootCount++;
428 rootByteSize += SECONDARY_INDEX_ENTRY_OVERHEAD + key.length;
429 }
430
431
432
433
434
435
436
437 private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
438 int numEntries = nonRootIndex.getInt(0);
439 if (i < 0 || i >= numEntries) {
440 return null;
441 }
442
443
444
445 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
446
447 int targetKeyRelOffset = nonRootIndex.getInt(
448 Bytes.SIZEOF_INT * (i + 1));
449
450
451 int targetKeyOffset = entriesOffset
452 + targetKeyRelOffset
453 + SECONDARY_INDEX_ENTRY_OVERHEAD;
454
455
456
457
458 int targetKeyLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 2)) -
459 targetKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
460
461 int from = nonRootIndex.arrayOffset() + targetKeyOffset;
462 int to = from + targetKeyLength;
463 return Arrays.copyOfRange(nonRootIndex.array(), from, to);
464 }
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482 static int binarySearchNonRootIndex(byte[] key, int keyOffset,
483 int keyLength, ByteBuffer nonRootIndex,
484 RawComparator<byte[]> comparator) {
485
486 int numEntries = nonRootIndex.getInt(0);
487 int low = 0;
488 int high = numEntries - 1;
489 int mid = 0;
490
491
492
493 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
494
495
496
497
498
499 while (low <= high) {
500 mid = (low + high) >>> 1;
501
502
503 int midKeyRelOffset = nonRootIndex.getInt(
504 Bytes.SIZEOF_INT * (mid + 1));
505
506
507 int midKeyOffset = entriesOffset
508 + midKeyRelOffset
509 + SECONDARY_INDEX_ENTRY_OVERHEAD;
510
511
512
513
514 int midLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (mid + 2)) -
515 midKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
516
517
518
519 int cmp = comparator.compare(key, keyOffset, keyLength,
520 nonRootIndex.array(), nonRootIndex.arrayOffset() + midKeyOffset,
521 midLength);
522
523
524 if (cmp > 0)
525 low = mid + 1;
526
527 else if (cmp < 0)
528 high = mid - 1;
529 else
530 return mid;
531 }
532
533
534
535
536
537 if (low != high + 1) {
538 throw new IllegalStateException("Binary search broken: low=" + low
539 + " " + "instead of " + (high + 1));
540 }
541
542
543
544 int i = low - 1;
545
546
547 if (i < -1 || i >= numEntries) {
548 throw new IllegalStateException("Binary search broken: result is " +
549 i + " but expected to be between -1 and (numEntries - 1) = " +
550 (numEntries - 1));
551 }
552
553 return i;
554 }
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570 static int locateNonRootIndexEntry(ByteBuffer nonRootBlock, byte[] key,
571 int keyOffset, int keyLength, RawComparator<byte[]> comparator) {
572 int entryIndex = binarySearchNonRootIndex(key, keyOffset, keyLength,
573 nonRootBlock, comparator);
574
575 if (entryIndex != -1) {
576 int numEntries = nonRootBlock.getInt(0);
577
578
579 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
580
581
582
583 int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT
584 * (1 + entryIndex));
585
586 nonRootBlock.position(entriesOffset + entryRelOffset);
587 }
588
589 return entryIndex;
590 }
591
592
593
594
595
596
597
598
599
600
601
602 public void readRootIndex(DataInput in, final int numEntries)
603 throws IOException {
604 blockOffsets = new long[numEntries];
605 blockKeys = new byte[numEntries][];
606 blockDataSizes = new int[numEntries];
607
608
609 if (numEntries > 0) {
610 for (int i = 0; i < numEntries; ++i) {
611 long offset = in.readLong();
612 int dataSize = in.readInt();
613 byte[] key = Bytes.readByteArray(in);
614 add(key, offset, dataSize);
615 }
616 }
617 }
618
619
620
621
622
623
624
625
626
627
628
629
630 public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
631 DataInputStream in = blk.getByteStream();
632 readRootIndex(in, numEntries);
633 return in;
634 }
635
636
637
638
639
640
641
642
643
644
645 public void readMultiLevelIndexRoot(HFileBlock blk,
646 final int numEntries) throws IOException {
647 DataInputStream in = readRootIndex(blk, numEntries);
648
649
650 int checkSumBytes = blk.totalChecksumBytes();
651 if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
652
653 return;
654 }
655 midLeafBlockOffset = in.readLong();
656 midLeafBlockOnDiskSize = in.readInt();
657 midKeyEntry = in.readInt();
658 }
659
660 @Override
661 public String toString() {
662 StringBuilder sb = new StringBuilder();
663 sb.append("size=" + rootCount).append("\n");
664 for (int i = 0; i < rootCount; i++) {
665 sb.append("key=").append(KeyValue.keyToString(blockKeys[i]))
666 .append("\n offset=").append(blockOffsets[i])
667 .append(", dataSize=" + blockDataSizes[i]).append("\n");
668 }
669 return sb.toString();
670 }
671
672 @Override
673 public long heapSize() {
674 long heapSize = ClassSize.align(6 * ClassSize.REFERENCE +
675 3 * Bytes.SIZEOF_INT + ClassSize.OBJECT);
676
677
678 heapSize += MID_KEY_METADATA_SIZE;
679
680
681 if (blockKeys != null) {
682
683 heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length
684 * ClassSize.REFERENCE);
685
686
687 for (byte[] key : blockKeys) {
688 heapSize += ClassSize.align(ClassSize.ARRAY + key.length);
689 }
690 }
691
692 if (blockOffsets != null) {
693 heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length
694 * Bytes.SIZEOF_LONG);
695 }
696
697 if (blockDataSizes != null) {
698 heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length
699 * Bytes.SIZEOF_INT);
700 }
701
702 return ClassSize.align(heapSize);
703 }
704
705 }
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722 public static class BlockIndexWriter implements InlineBlockWriter {
723
724
725
726
727
728
729
730
731
732 private BlockIndexChunk rootChunk = new BlockIndexChunk();
733
734
735
736
737
738 private BlockIndexChunk curInlineChunk = new BlockIndexChunk();
739
740
741
742
743
744
745
746
747
748
749 private int numLevels = 1;
750
751 private HFileBlock.Writer blockWriter;
752 private byte[] firstKey = null;
753
754
755
756
757
758
759 private long totalNumEntries;
760
761
762 private long totalBlockOnDiskSize;
763
764
765 private long totalBlockUncompressedSize;
766
767
768 private int maxChunkSize;
769
770
771 private boolean singleLevelOnly;
772
773
774 private BlockCache blockCache;
775
776
777 private String nameForCaching;
778
779
780 public BlockIndexWriter() {
781 this(null, null, null);
782 singleLevelOnly = true;
783 }
784
785
786
787
788
789
790
791
792 public BlockIndexWriter(HFileBlock.Writer blockWriter,
793 BlockCache blockCache, String nameForCaching) {
794 if ((blockCache == null) != (nameForCaching == null)) {
795 throw new IllegalArgumentException("Block cache and file name for " +
796 "caching must be both specified or both null");
797 }
798
799 this.blockWriter = blockWriter;
800 this.blockCache = blockCache;
801 this.nameForCaching = nameForCaching;
802 this.maxChunkSize = HFileBlockIndex.DEFAULT_MAX_CHUNK_SIZE;
803 }
804
805 public void setMaxChunkSize(int maxChunkSize) {
806 if (maxChunkSize <= 0) {
807 throw new IllegalArgumentException("Invald maximum index block size");
808 }
809 this.maxChunkSize = maxChunkSize;
810 }
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830 public long writeIndexBlocks(FSDataOutputStream out) throws IOException {
831 if (curInlineChunk != null && curInlineChunk.getNumEntries() != 0) {
832 throw new IOException("Trying to write a multi-level block index, " +
833 "but are " + curInlineChunk.getNumEntries() + " entries in the " +
834 "last inline chunk.");
835 }
836
837
838
839 byte[] midKeyMetadata = numLevels > 1 ? rootChunk.getMidKeyMetadata()
840 : null;
841
842 if (curInlineChunk != null) {
843 while (rootChunk.getRootSize() > maxChunkSize) {
844 rootChunk = writeIntermediateLevel(out, rootChunk);
845 numLevels += 1;
846 }
847 }
848
849
850 long rootLevelIndexPos = out.getPos();
851
852 {
853 DataOutput blockStream =
854 blockWriter.startWriting(BlockType.ROOT_INDEX);
855 rootChunk.writeRoot(blockStream);
856 if (midKeyMetadata != null)
857 blockStream.write(midKeyMetadata);
858 blockWriter.writeHeaderAndData(out);
859 }
860
861
862 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
863 totalBlockUncompressedSize +=
864 blockWriter.getUncompressedSizeWithoutHeader();
865
866 if (LOG.isTraceEnabled()) {
867 LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "
868 + rootLevelIndexPos + ", " + rootChunk.getNumEntries()
869 + " root-level entries, " + totalNumEntries + " total entries, "
870 + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) +
871 " on-disk size, "
872 + StringUtils.humanReadableInt(totalBlockUncompressedSize) +
873 " total uncompressed size.");
874 }
875 return rootLevelIndexPos;
876 }
877
878
879
880
881
882
883
884
885
886
887
888 public void writeSingleLevelIndex(DataOutput out, String description)
889 throws IOException {
890 expectNumLevels(1);
891
892 if (!singleLevelOnly)
893 throw new IOException("Single-level mode is turned off");
894
895 if (rootChunk.getNumEntries() > 0)
896 throw new IOException("Root-level entries already added in " +
897 "single-level mode");
898
899 rootChunk = curInlineChunk;
900 curInlineChunk = new BlockIndexChunk();
901
902 if (LOG.isTraceEnabled()) {
903 LOG.trace("Wrote a single-level " + description + " index with "
904 + rootChunk.getNumEntries() + " entries, " + rootChunk.getRootSize()
905 + " bytes");
906 }
907 rootChunk.writeRoot(out);
908 }
909
910
911
912
913
914
915
916
917
918
919
920
921
922 private BlockIndexChunk writeIntermediateLevel(FSDataOutputStream out,
923 BlockIndexChunk currentLevel) throws IOException {
924
925 BlockIndexChunk parent = new BlockIndexChunk();
926
927
928 BlockIndexChunk curChunk = new BlockIndexChunk();
929
930 for (int i = 0; i < currentLevel.getNumEntries(); ++i) {
931 curChunk.add(currentLevel.getBlockKey(i),
932 currentLevel.getBlockOffset(i), currentLevel.getOnDiskDataSize(i));
933
934 if (curChunk.getRootSize() >= maxChunkSize)
935 writeIntermediateBlock(out, parent, curChunk);
936 }
937
938 if (curChunk.getNumEntries() > 0) {
939 writeIntermediateBlock(out, parent, curChunk);
940 }
941
942 return parent;
943 }
944
945 private void writeIntermediateBlock(FSDataOutputStream out,
946 BlockIndexChunk parent, BlockIndexChunk curChunk) throws IOException {
947 long beginOffset = out.getPos();
948 DataOutputStream dos = blockWriter.startWriting(
949 BlockType.INTERMEDIATE_INDEX);
950 curChunk.writeNonRoot(dos);
951 byte[] curFirstKey = curChunk.getBlockKey(0);
952 blockWriter.writeHeaderAndData(out);
953
954 if (blockCache != null) {
955 HFileBlock blockForCaching = blockWriter.getBlockForCaching();
956 blockCache.cacheBlock(new BlockCacheKey(nameForCaching,
957 beginOffset, DataBlockEncoding.NONE,
958 blockForCaching.getBlockType()), blockForCaching);
959 }
960
961
962 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
963 totalBlockUncompressedSize +=
964 blockWriter.getUncompressedSizeWithoutHeader();
965
966
967
968
969
970
971 parent.add(curFirstKey, beginOffset,
972 blockWriter.getOnDiskSizeWithHeader());
973
974
975 curChunk.clear();
976 curFirstKey = null;
977 }
978
979
980
981
982 public final int getNumRootEntries() {
983 return rootChunk.getNumEntries();
984 }
985
986
987
988
989 public int getNumLevels() {
990 return numLevels;
991 }
992
993 private void expectNumLevels(int expectedNumLevels) {
994 if (numLevels != expectedNumLevels) {
995 throw new IllegalStateException("Number of block index levels is "
996 + numLevels + "but is expected to be " + expectedNumLevels);
997 }
998 }
999
1000
1001
1002
1003
1004
1005 @Override
1006 public boolean shouldWriteBlock(boolean closing) {
1007 if (singleLevelOnly) {
1008 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1009 }
1010
1011 if (curInlineChunk == null) {
1012 throw new IllegalStateException("curInlineChunk is null; has shouldWriteBlock been " +
1013 "called with closing=true and then called again?");
1014 }
1015
1016 if (curInlineChunk.getNumEntries() == 0) {
1017 return false;
1018 }
1019
1020
1021 if (closing) {
1022 if (rootChunk.getNumEntries() == 0) {
1023
1024
1025
1026 expectNumLevels(1);
1027 rootChunk = curInlineChunk;
1028 curInlineChunk = null;
1029 return false;
1030 }
1031
1032 return true;
1033 } else {
1034 return curInlineChunk.getNonRootSize() >= maxChunkSize;
1035 }
1036 }
1037
1038
1039
1040
1041
1042
1043
1044 @Override
1045 public void writeInlineBlock(DataOutput out) throws IOException {
1046 if (singleLevelOnly)
1047 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1048
1049
1050
1051 curInlineChunk.writeNonRoot(out);
1052
1053
1054
1055 firstKey = curInlineChunk.getBlockKey(0);
1056
1057
1058 curInlineChunk.clear();
1059 }
1060
1061
1062
1063
1064
1065 @Override
1066 public void blockWritten(long offset, int onDiskSize, int uncompressedSize)
1067 {
1068
1069 totalBlockOnDiskSize += onDiskSize;
1070 totalBlockUncompressedSize += uncompressedSize;
1071
1072 if (singleLevelOnly)
1073 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1074
1075 if (firstKey == null) {
1076 throw new IllegalStateException("Trying to add second-level index " +
1077 "entry with offset=" + offset + " and onDiskSize=" + onDiskSize +
1078 "but the first key was not set in writeInlineBlock");
1079 }
1080
1081 if (rootChunk.getNumEntries() == 0) {
1082
1083 expectNumLevels(1);
1084 numLevels = 2;
1085 }
1086
1087
1088
1089 rootChunk.add(firstKey, offset, onDiskSize, totalNumEntries);
1090 firstKey = null;
1091 }
1092
1093 @Override
1094 public BlockType getInlineBlockType() {
1095 return BlockType.LEAF_INDEX;
1096 }
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108 public void addEntry(byte[] firstKey, long blockOffset, int blockDataSize)
1109 {
1110 curInlineChunk.add(firstKey, blockOffset, blockDataSize);
1111 ++totalNumEntries;
1112 }
1113
1114
1115
1116
1117 public void ensureSingleLevel() throws IOException {
1118 if (numLevels > 1) {
1119 throw new IOException ("Wrote a " + numLevels + "-level index with " +
1120 rootChunk.getNumEntries() + " root-level entries, but " +
1121 "this is expected to be a single-level block index.");
1122 }
1123 }
1124
1125
1126
1127
1128
1129
1130 @Override
1131 public boolean getCacheOnWrite() {
1132 return blockCache != null;
1133 }
1134
1135
1136
1137
1138
1139
1140
1141 public long getTotalUncompressedSize() {
1142 return totalBlockUncompressedSize;
1143 }
1144
1145 }
1146
1147
1148
1149
1150
1151
1152 static class BlockIndexChunk {
1153
1154
1155 private final List<byte[]> blockKeys = new ArrayList<byte[]>();
1156
1157
1158 private final List<Long> blockOffsets = new ArrayList<Long>();
1159
1160
1161 private final List<Integer> onDiskDataSizes = new ArrayList<Integer>();
1162
1163
1164
1165
1166
1167
1168 private final List<Long> numSubEntriesAt = new ArrayList<Long>();
1169
1170
1171
1172
1173
1174
1175 private int curTotalNonRootEntrySize = 0;
1176
1177
1178
1179
1180 private int curTotalRootSize = 0;
1181
1182
1183
1184
1185
1186
1187 private final List<Integer> secondaryIndexOffsetMarks =
1188 new ArrayList<Integer>();
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203 void add(byte[] firstKey, long blockOffset, int onDiskDataSize,
1204 long curTotalNumSubEntries) {
1205
1206 secondaryIndexOffsetMarks.add(curTotalNonRootEntrySize);
1207 curTotalNonRootEntrySize += SECONDARY_INDEX_ENTRY_OVERHEAD
1208 + firstKey.length;
1209
1210 curTotalRootSize += Bytes.SIZEOF_LONG + Bytes.SIZEOF_INT
1211 + WritableUtils.getVIntSize(firstKey.length) + firstKey.length;
1212
1213 blockKeys.add(firstKey);
1214 blockOffsets.add(blockOffset);
1215 onDiskDataSizes.add(onDiskDataSize);
1216
1217 if (curTotalNumSubEntries != -1) {
1218 numSubEntriesAt.add(curTotalNumSubEntries);
1219
1220
1221 if (numSubEntriesAt.size() != blockKeys.size()) {
1222 throw new IllegalStateException("Only have key/value count " +
1223 "stats for " + numSubEntriesAt.size() + " block index " +
1224 "entries out of " + blockKeys.size());
1225 }
1226 }
1227 }
1228
1229
1230
1231
1232
1233
1234
1235 public void add(byte[] firstKey, long blockOffset, int onDiskDataSize) {
1236 add(firstKey, blockOffset, onDiskDataSize, -1);
1237 }
1238
1239 public void clear() {
1240 blockKeys.clear();
1241 blockOffsets.clear();
1242 onDiskDataSizes.clear();
1243 secondaryIndexOffsetMarks.clear();
1244 numSubEntriesAt.clear();
1245 curTotalNonRootEntrySize = 0;
1246 curTotalRootSize = 0;
1247 }
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266 public int getEntryBySubEntry(long k) {
1267
1268
1269
1270 int i = Collections.binarySearch(numSubEntriesAt, k);
1271
1272
1273
1274
1275 if (i >= 0)
1276 return i + 1;
1277
1278
1279 return -i - 1;
1280 }
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290 public byte[] getMidKeyMetadata() throws IOException {
1291 ByteArrayOutputStream baos = new ByteArrayOutputStream(
1292 MID_KEY_METADATA_SIZE);
1293 DataOutputStream baosDos = new DataOutputStream(baos);
1294 long totalNumSubEntries = numSubEntriesAt.get(blockKeys.size() - 1);
1295 if (totalNumSubEntries == 0) {
1296 throw new IOException("No leaf-level entries, mid-key unavailable");
1297 }
1298 long midKeySubEntry = (totalNumSubEntries - 1) / 2;
1299 int midKeyEntry = getEntryBySubEntry(midKeySubEntry);
1300
1301 baosDos.writeLong(blockOffsets.get(midKeyEntry));
1302 baosDos.writeInt(onDiskDataSizes.get(midKeyEntry));
1303
1304 long numSubEntriesBefore = midKeyEntry > 0
1305 ? numSubEntriesAt.get(midKeyEntry - 1) : 0;
1306 long subEntryWithinEntry = midKeySubEntry - numSubEntriesBefore;
1307 if (subEntryWithinEntry < 0 || subEntryWithinEntry > Integer.MAX_VALUE)
1308 {
1309 throw new IOException("Could not identify mid-key index within the "
1310 + "leaf-level block containing mid-key: out of range ("
1311 + subEntryWithinEntry + ", numSubEntriesBefore="
1312 + numSubEntriesBefore + ", midKeySubEntry=" + midKeySubEntry
1313 + ")");
1314 }
1315
1316 baosDos.writeInt((int) subEntryWithinEntry);
1317
1318 if (baosDos.size() != MID_KEY_METADATA_SIZE) {
1319 throw new IOException("Could not write mid-key metadata: size=" +
1320 baosDos.size() + ", correct size: " + MID_KEY_METADATA_SIZE);
1321 }
1322
1323
1324 baos.close();
1325
1326 return baos.toByteArray();
1327 }
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338 void writeNonRoot(DataOutput out) throws IOException {
1339
1340 out.writeInt(blockKeys.size());
1341
1342 if (secondaryIndexOffsetMarks.size() != blockKeys.size()) {
1343 throw new IOException("Corrupted block index chunk writer: " +
1344 blockKeys.size() + " entries but " +
1345 secondaryIndexOffsetMarks.size() + " secondary index items");
1346 }
1347
1348
1349
1350
1351
1352 for (int currentSecondaryIndex : secondaryIndexOffsetMarks)
1353 out.writeInt(currentSecondaryIndex);
1354
1355
1356
1357 out.writeInt(curTotalNonRootEntrySize);
1358
1359 for (int i = 0; i < blockKeys.size(); ++i) {
1360 out.writeLong(blockOffsets.get(i));
1361 out.writeInt(onDiskDataSizes.get(i));
1362 out.write(blockKeys.get(i));
1363 }
1364 }
1365
1366
1367
1368
1369
1370 int getNonRootSize() {
1371 return Bytes.SIZEOF_INT
1372 + Bytes.SIZEOF_INT * (blockKeys.size() + 1)
1373 + curTotalNonRootEntrySize;
1374 }
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386 void writeRoot(DataOutput out) throws IOException {
1387 for (int i = 0; i < blockKeys.size(); ++i) {
1388 out.writeLong(blockOffsets.get(i));
1389 out.writeInt(onDiskDataSizes.get(i));
1390 Bytes.writeByteArray(out, blockKeys.get(i));
1391 }
1392 }
1393
1394
1395
1396
1397 int getRootSize() {
1398 return curTotalRootSize;
1399 }
1400
1401
1402
1403
1404 public int getNumEntries() {
1405 return blockKeys.size();
1406 }
1407
1408 public byte[] getBlockKey(int i) {
1409 return blockKeys.get(i);
1410 }
1411
1412 public long getBlockOffset(int i) {
1413 return blockOffsets.get(i);
1414 }
1415
1416 public int getOnDiskDataSize(int i) {
1417 return onDiskDataSizes.get(i);
1418 }
1419
1420 public long getCumulativeNumKV(int i) {
1421 if (i < 0)
1422 return 0;
1423 return numSubEntriesAt.get(i);
1424 }
1425
1426 }
1427
1428 public static int getMaxChunkSize(Configuration conf) {
1429 return conf.getInt(MAX_CHUNK_SIZE_KEY, DEFAULT_MAX_CHUNK_SIZE);
1430 }
1431 }