1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.DataOutput;
26 import java.io.DataOutputStream;
27 import java.io.IOException;
28 import java.nio.ByteBuffer;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.Collections;
32 import java.util.List;
33 import java.util.concurrent.atomic.AtomicReference;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FSDataOutputStream;
39 import org.apache.hadoop.hbase.HConstants;
40 import org.apache.hadoop.hbase.KeyValue;
41 import org.apache.hadoop.hbase.io.HeapSize;
42 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
43 import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader;
44 import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured;
45 import org.apache.hadoop.hbase.util.Bytes;
46 import org.apache.hadoop.hbase.util.ClassSize;
47 import org.apache.hadoop.hbase.util.CompoundBloomFilterWriter;
48 import org.apache.hadoop.io.RawComparator;
49 import org.apache.hadoop.io.WritableUtils;
50 import org.apache.hadoop.util.StringUtils;
51
52
53
54
55
56
57
58
59
60
61 public class HFileBlockIndex {
62
63 private static final Log LOG = LogFactory.getLog(HFileBlockIndex.class);
64
65 static final int DEFAULT_MAX_CHUNK_SIZE = 128 * 1024;
66
67
68
69
70
71 public static final String MAX_CHUNK_SIZE_KEY = "hfile.index.block.max.size";
72
73
74
75
76
77
78
79 static final int SECONDARY_INDEX_ENTRY_OVERHEAD = Bytes.SIZEOF_INT
80 + Bytes.SIZEOF_LONG;
81
82
83
84
85 private static final String INLINE_BLOCKS_NOT_ALLOWED =
86 "Inline blocks are not allowed in the single-level-only mode";
87
88
89
90
91
92
93
94 private static final int MID_KEY_METADATA_SIZE = Bytes.SIZEOF_LONG +
95 2 * Bytes.SIZEOF_INT;
96
97
98
99
100
101
102
103
104
105
106
107 public static class BlockIndexReader implements HeapSize {
108
109 private final RawComparator<byte[]> comparator;
110
111
112 private byte[][] blockKeys;
113 private long[] blockOffsets;
114 private int[] blockDataSizes;
115 private int rootByteSize = 0;
116 private int rootCount = 0;
117
118
119 private long midLeafBlockOffset = -1;
120 private int midLeafBlockOnDiskSize = -1;
121 private int midKeyEntry = -1;
122
123
124 private AtomicReference<byte[]> midKey = new AtomicReference<byte[]>();
125
126
127
128
129
130 private int searchTreeLevel;
131
132
133 private CachingBlockReader cachingBlockReader;
134
135 public BlockIndexReader(final RawComparator<byte[]> c, final int treeLevel,
136 final CachingBlockReader cachingBlockReader) {
137 this(c, treeLevel);
138 this.cachingBlockReader = cachingBlockReader;
139 }
140
141 public BlockIndexReader(final RawComparator<byte[]> c, final int treeLevel)
142 {
143 comparator = c;
144 searchTreeLevel = treeLevel;
145 }
146
147
148
149
150 public boolean isEmpty() {
151 return blockKeys.length == 0;
152 }
153
154
155
156
157
158 public void ensureNonEmpty() {
159 if (blockKeys.length == 0) {
160 throw new IllegalStateException("Block index is empty or not loaded");
161 }
162 }
163
164
165
166
167
168
169
170
171
172
173
174
175
176 public HFileBlock seekToDataBlock(final byte[] key, int keyOffset,
177 int keyLength, HFileBlock currentBlock, boolean cacheBlocks,
178 boolean pread, boolean isCompaction)
179 throws IOException {
180 BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, keyOffset, keyLength,
181 currentBlock, cacheBlocks, pread, isCompaction);
182 if (blockWithScanInfo == null) {
183 return null;
184 } else {
185 return blockWithScanInfo.getHFileBlock();
186 }
187 }
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206 public BlockWithScanInfo loadDataBlockWithScanInfo(final byte[] key, int keyOffset,
207 int keyLength, HFileBlock currentBlock, boolean cacheBlocks,
208 boolean pread, boolean isCompaction)
209 throws IOException {
210 int rootLevelIndex = rootBlockContainingKey(key, keyOffset, keyLength);
211 if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
212 return null;
213 }
214
215
216 byte[] nextIndexedKey = null;
217
218
219 long currentOffset = blockOffsets[rootLevelIndex];
220 int currentOnDiskSize = blockDataSizes[rootLevelIndex];
221
222 if (rootLevelIndex < blockKeys.length - 1) {
223 nextIndexedKey = blockKeys[rootLevelIndex + 1];
224 } else {
225 nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY;
226 }
227
228 int lookupLevel = 1;
229 int index = -1;
230
231 HFileBlock block;
232 while (true) {
233
234 if (currentBlock != null && currentBlock.getOffset() == currentOffset)
235 {
236
237
238
239
240 block = currentBlock;
241 } else {
242
243
244 boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
245 BlockType expectedBlockType;
246 if (lookupLevel < searchTreeLevel - 1) {
247 expectedBlockType = BlockType.INTERMEDIATE_INDEX;
248 } else if (lookupLevel == searchTreeLevel - 1) {
249 expectedBlockType = BlockType.LEAF_INDEX;
250 } else {
251
252 expectedBlockType = BlockType.DATA;
253 }
254 block = cachingBlockReader.readBlock(currentOffset,
255 currentOnDiskSize, shouldCache, pread, isCompaction,
256 expectedBlockType);
257 }
258
259 if (block == null) {
260 throw new IOException("Failed to read block at offset " +
261 currentOffset + ", onDiskSize=" + currentOnDiskSize);
262 }
263
264
265 if (block.getBlockType().equals(BlockType.DATA) ||
266 block.getBlockType().equals(BlockType.ENCODED_DATA)) {
267 break;
268 }
269
270
271
272 if (++lookupLevel > searchTreeLevel) {
273 throw new IOException("Search Tree Level overflow: lookupLevel="+
274 lookupLevel + ", searchTreeLevel=" + searchTreeLevel);
275 }
276
277
278
279 ByteBuffer buffer = block.getBufferWithoutHeader();
280 index = locateNonRootIndexEntry(buffer, key, keyOffset, keyLength, comparator);
281 if (index == -1) {
282 throw new IOException("The key "
283 + Bytes.toStringBinary(key, keyOffset, keyLength)
284 + " is before the" + " first key of the non-root index block "
285 + block);
286 }
287
288 currentOffset = buffer.getLong();
289 currentOnDiskSize = buffer.getInt();
290
291
292 byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1);
293 if (tmpNextIndexedKey != null) {
294 nextIndexedKey = tmpNextIndexedKey;
295 }
296 }
297
298 if (lookupLevel != searchTreeLevel) {
299 throw new IOException("Reached a data block at level " + lookupLevel +
300 " but the number of levels is " + searchTreeLevel);
301 }
302
303
304 BlockWithScanInfo blockWithScanInfo = new BlockWithScanInfo(block, nextIndexedKey);
305 return blockWithScanInfo;
306 }
307
308
309
310
311
312
313
314
315 public byte[] midkey() throws IOException {
316 if (rootCount == 0)
317 throw new IOException("HFile empty");
318
319 byte[] midKey = this.midKey.get();
320 if (midKey != null)
321 return midKey;
322
323 if (midLeafBlockOffset >= 0) {
324 if (cachingBlockReader == null) {
325 throw new IOException("Have to read the middle leaf block but " +
326 "no block reader available");
327 }
328
329
330 HFileBlock midLeafBlock = cachingBlockReader.readBlock(
331 midLeafBlockOffset, midLeafBlockOnDiskSize, true, true, false,
332 BlockType.LEAF_INDEX);
333
334 ByteBuffer b = midLeafBlock.getBufferWithoutHeader();
335 int numDataBlocks = b.getInt();
336 int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 1));
337 int keyLen = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 2)) -
338 keyRelOffset;
339 int keyOffset = b.arrayOffset() +
340 Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset +
341 SECONDARY_INDEX_ENTRY_OVERHEAD;
342 midKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset + keyLen);
343 } else {
344
345 midKey = blockKeys[rootCount / 2];
346 }
347
348 this.midKey.set(midKey);
349 return midKey;
350 }
351
352
353
354
355 public byte[] getRootBlockKey(int i) {
356 return blockKeys[i];
357 }
358
359
360
361
362 public long getRootBlockOffset(int i) {
363 return blockOffsets[i];
364 }
365
366
367
368
369
370
371 public int getRootBlockDataSize(int i) {
372 return blockDataSizes[i];
373 }
374
375
376
377
378 public int getRootBlockCount() {
379 return rootCount;
380 }
381
382
383
384
385
386
387
388
389
390
391 public int rootBlockContainingKey(final byte[] key, int offset,
392 int length) {
393 int pos = Bytes.binarySearch(blockKeys, key, offset, length,
394 comparator);
395
396
397
398 if (pos >= 0) {
399
400 assert pos < blockKeys.length;
401 return pos;
402 }
403
404
405
406
407
408
409 int i = -pos - 1;
410 assert 0 <= i && i <= blockKeys.length;
411 return i - 1;
412 }
413
414
415
416
417
418
419
420
421 private void add(final byte[] key, final long offset, final int dataSize) {
422 blockOffsets[rootCount] = offset;
423 blockKeys[rootCount] = key;
424 blockDataSizes[rootCount] = dataSize;
425
426 rootCount++;
427 rootByteSize += SECONDARY_INDEX_ENTRY_OVERHEAD + key.length;
428 }
429
430
431
432
433
434
435
436 private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
437 int numEntries = nonRootIndex.getInt(0);
438 if (i < 0 || i >= numEntries) {
439 return null;
440 }
441
442
443
444 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
445
446 int targetKeyRelOffset = nonRootIndex.getInt(
447 Bytes.SIZEOF_INT * (i + 1));
448
449
450 int targetKeyOffset = entriesOffset
451 + targetKeyRelOffset
452 + SECONDARY_INDEX_ENTRY_OVERHEAD;
453
454
455
456
457 int targetKeyLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 2)) -
458 targetKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
459
460 int from = nonRootIndex.arrayOffset() + targetKeyOffset;
461 int to = from + targetKeyLength;
462 return Arrays.copyOfRange(nonRootIndex.array(), from, to);
463 }
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481 static int binarySearchNonRootIndex(byte[] key, int keyOffset,
482 int keyLength, ByteBuffer nonRootIndex,
483 RawComparator<byte[]> comparator) {
484
485 int numEntries = nonRootIndex.getInt(0);
486 int low = 0;
487 int high = numEntries - 1;
488 int mid = 0;
489
490
491
492 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
493
494
495
496
497
498 while (low <= high) {
499 mid = (low + high) >>> 1;
500
501
502 int midKeyRelOffset = nonRootIndex.getInt(
503 Bytes.SIZEOF_INT * (mid + 1));
504
505
506 int midKeyOffset = entriesOffset
507 + midKeyRelOffset
508 + SECONDARY_INDEX_ENTRY_OVERHEAD;
509
510
511
512
513 int midLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (mid + 2)) -
514 midKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
515
516
517
518 int cmp = comparator.compare(key, keyOffset, keyLength,
519 nonRootIndex.array(), nonRootIndex.arrayOffset() + midKeyOffset,
520 midLength);
521
522
523 if (cmp > 0)
524 low = mid + 1;
525
526 else if (cmp < 0)
527 high = mid - 1;
528 else
529 return mid;
530 }
531
532
533
534
535
536 if (low != high + 1) {
537 throw new IllegalStateException("Binary search broken: low=" + low
538 + " " + "instead of " + (high + 1));
539 }
540
541
542
543 int i = low - 1;
544
545
546 if (i < -1 || i >= numEntries) {
547 throw new IllegalStateException("Binary search broken: result is " +
548 i + " but expected to be between -1 and (numEntries - 1) = " +
549 (numEntries - 1));
550 }
551
552 return i;
553 }
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569 static int locateNonRootIndexEntry(ByteBuffer nonRootBlock, byte[] key,
570 int keyOffset, int keyLength, RawComparator<byte[]> comparator) {
571 int entryIndex = binarySearchNonRootIndex(key, keyOffset, keyLength,
572 nonRootBlock, comparator);
573
574 if (entryIndex != -1) {
575 int numEntries = nonRootBlock.getInt(0);
576
577
578 int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
579
580
581
582 int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT
583 * (1 + entryIndex));
584
585 nonRootBlock.position(entriesOffset + entryRelOffset);
586 }
587
588 return entryIndex;
589 }
590
591
592
593
594
595
596
597
598
599
600
601 public void readRootIndex(DataInput in, final int numEntries)
602 throws IOException {
603 blockOffsets = new long[numEntries];
604 blockKeys = new byte[numEntries][];
605 blockDataSizes = new int[numEntries];
606
607
608 if (numEntries > 0) {
609 for (int i = 0; i < numEntries; ++i) {
610 long offset = in.readLong();
611 int dataSize = in.readInt();
612 byte[] key = Bytes.readByteArray(in);
613 add(key, offset, dataSize);
614 }
615 }
616 }
617
618
619
620
621
622
623
624
625
626
627
628
629 public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
630 DataInputStream in = blk.getByteStream();
631 readRootIndex(in, numEntries);
632 return in;
633 }
634
635
636
637
638
639
640
641
642
643
644 public void readMultiLevelIndexRoot(HFileBlock blk,
645 final int numEntries) throws IOException {
646 DataInputStream in = readRootIndex(blk, numEntries);
647
648
649 int checkSumBytes = blk.totalChecksumBytes();
650 if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
651
652 return;
653 }
654 midLeafBlockOffset = in.readLong();
655 midLeafBlockOnDiskSize = in.readInt();
656 midKeyEntry = in.readInt();
657 }
658
659 @Override
660 public String toString() {
661 StringBuilder sb = new StringBuilder();
662 sb.append("size=" + rootCount).append("\n");
663 for (int i = 0; i < rootCount; i++) {
664 sb.append("key=").append(KeyValue.keyToString(blockKeys[i]))
665 .append("\n offset=").append(blockOffsets[i])
666 .append(", dataSize=" + blockDataSizes[i]).append("\n");
667 }
668 return sb.toString();
669 }
670
671 @Override
672 public long heapSize() {
673 long heapSize = ClassSize.align(6 * ClassSize.REFERENCE +
674 3 * Bytes.SIZEOF_INT + ClassSize.OBJECT);
675
676
677 heapSize += MID_KEY_METADATA_SIZE;
678
679
680 if (blockKeys != null) {
681
682 heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length
683 * ClassSize.REFERENCE);
684
685
686 for (byte[] key : blockKeys) {
687 heapSize += ClassSize.align(ClassSize.ARRAY + key.length);
688 }
689 }
690
691 if (blockOffsets != null) {
692 heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length
693 * Bytes.SIZEOF_LONG);
694 }
695
696 if (blockDataSizes != null) {
697 heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length
698 * Bytes.SIZEOF_INT);
699 }
700
701 return ClassSize.align(heapSize);
702 }
703
704 }
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721 public static class BlockIndexWriter extends SchemaConfigured
722 implements InlineBlockWriter {
723
724
725
726
727
728
729
730
731
732 private BlockIndexChunk rootChunk = new BlockIndexChunk();
733
734
735
736
737
738 private BlockIndexChunk curInlineChunk = new BlockIndexChunk();
739
740
741
742
743
744
745
746
747
748
749 private int numLevels = 1;
750
751 private HFileBlock.Writer blockWriter;
752 private byte[] firstKey = null;
753
754
755
756
757
758
759 private long totalNumEntries;
760
761
762 private long totalBlockOnDiskSize;
763
764
765 private long totalBlockUncompressedSize;
766
767
768 private int maxChunkSize;
769
770
771 private boolean singleLevelOnly;
772
773
774 private BlockCache blockCache;
775
776
777 private String nameForCaching;
778
779
780 public BlockIndexWriter() {
781 this(null, null, null);
782 singleLevelOnly = true;
783 }
784
785
786
787
788
789
790
791
792 public BlockIndexWriter(HFileBlock.Writer blockWriter,
793 BlockCache blockCache, String nameForCaching) {
794 if ((blockCache == null) != (nameForCaching == null)) {
795 throw new IllegalArgumentException("Block cache and file name for " +
796 "caching must be both specified or both null");
797 }
798
799 this.blockWriter = blockWriter;
800 this.blockCache = blockCache;
801 this.nameForCaching = nameForCaching;
802 this.maxChunkSize = HFileBlockIndex.DEFAULT_MAX_CHUNK_SIZE;
803 }
804
805 public void setMaxChunkSize(int maxChunkSize) {
806 if (maxChunkSize <= 0) {
807 throw new IllegalArgumentException("Invald maximum index block size");
808 }
809 this.maxChunkSize = maxChunkSize;
810 }
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830 public long writeIndexBlocks(FSDataOutputStream out) throws IOException {
831 if (curInlineChunk != null && curInlineChunk.getNumEntries() != 0) {
832 throw new IOException("Trying to write a multi-level block index, " +
833 "but are " + curInlineChunk.getNumEntries() + " entries in the " +
834 "last inline chunk.");
835 }
836
837
838
839 byte[] midKeyMetadata = numLevels > 1 ? rootChunk.getMidKeyMetadata()
840 : null;
841
842 if (curInlineChunk != null) {
843 while (rootChunk.getRootSize() > maxChunkSize) {
844 rootChunk = writeIntermediateLevel(out, rootChunk);
845 numLevels += 1;
846 }
847 }
848
849
850 long rootLevelIndexPos = out.getPos();
851
852 {
853 DataOutput blockStream =
854 blockWriter.startWriting(BlockType.ROOT_INDEX);
855 rootChunk.writeRoot(blockStream);
856 if (midKeyMetadata != null)
857 blockStream.write(midKeyMetadata);
858 blockWriter.writeHeaderAndData(out);
859 }
860
861
862 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
863 totalBlockUncompressedSize +=
864 blockWriter.getUncompressedSizeWithoutHeader();
865
866 if (LOG.isTraceEnabled()) {
867 LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "
868 + rootLevelIndexPos + ", " + rootChunk.getNumEntries()
869 + " root-level entries, " + totalNumEntries + " total entries, "
870 + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) +
871 " on-disk size, "
872 + StringUtils.humanReadableInt(totalBlockUncompressedSize) +
873 " total uncompressed size.");
874 }
875 return rootLevelIndexPos;
876 }
877
878
879
880
881
882
883
884
885
886
887
888 public void writeSingleLevelIndex(DataOutput out, String description)
889 throws IOException {
890 expectNumLevels(1);
891
892 if (!singleLevelOnly)
893 throw new IOException("Single-level mode is turned off");
894
895 if (rootChunk.getNumEntries() > 0)
896 throw new IOException("Root-level entries already added in " +
897 "single-level mode");
898
899 rootChunk = curInlineChunk;
900 curInlineChunk = new BlockIndexChunk();
901
902 if (LOG.isTraceEnabled()) {
903 LOG.trace("Wrote a single-level " + description + " index with "
904 + rootChunk.getNumEntries() + " entries, " + rootChunk.getRootSize()
905 + " bytes");
906 }
907 rootChunk.writeRoot(out);
908 }
909
910
911
912
913
914
915
916
917
918
919
920
921
922 private BlockIndexChunk writeIntermediateLevel(FSDataOutputStream out,
923 BlockIndexChunk currentLevel) throws IOException {
924
925 BlockIndexChunk parent = new BlockIndexChunk();
926
927
928 BlockIndexChunk curChunk = new BlockIndexChunk();
929
930 for (int i = 0; i < currentLevel.getNumEntries(); ++i) {
931 curChunk.add(currentLevel.getBlockKey(i),
932 currentLevel.getBlockOffset(i), currentLevel.getOnDiskDataSize(i));
933
934 if (curChunk.getRootSize() >= maxChunkSize)
935 writeIntermediateBlock(out, parent, curChunk);
936 }
937
938 if (curChunk.getNumEntries() > 0) {
939 writeIntermediateBlock(out, parent, curChunk);
940 }
941
942 return parent;
943 }
944
945 private void writeIntermediateBlock(FSDataOutputStream out,
946 BlockIndexChunk parent, BlockIndexChunk curChunk) throws IOException {
947 long beginOffset = out.getPos();
948 DataOutputStream dos = blockWriter.startWriting(
949 BlockType.INTERMEDIATE_INDEX);
950 curChunk.writeNonRoot(dos);
951 byte[] curFirstKey = curChunk.getBlockKey(0);
952 blockWriter.writeHeaderAndData(out);
953
954 if (blockCache != null) {
955 HFileBlock blockForCaching = blockWriter.getBlockForCaching();
956 passSchemaMetricsTo(blockForCaching);
957 blockCache.cacheBlock(new BlockCacheKey(nameForCaching,
958 beginOffset, DataBlockEncoding.NONE,
959 blockForCaching.getBlockType()), blockForCaching);
960 }
961
962
963 totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader();
964 totalBlockUncompressedSize +=
965 blockWriter.getUncompressedSizeWithoutHeader();
966
967
968
969
970
971
972 parent.add(curFirstKey, beginOffset,
973 blockWriter.getOnDiskSizeWithHeader());
974
975
976 curChunk.clear();
977 curFirstKey = null;
978 }
979
980
981
982
983 public final int getNumRootEntries() {
984 return rootChunk.getNumEntries();
985 }
986
987
988
989
990 public int getNumLevels() {
991 return numLevels;
992 }
993
994 private void expectNumLevels(int expectedNumLevels) {
995 if (numLevels != expectedNumLevels) {
996 throw new IllegalStateException("Number of block index levels is "
997 + numLevels + "but is expected to be " + expectedNumLevels);
998 }
999 }
1000
1001
1002
1003
1004
1005
1006 @Override
1007 public boolean shouldWriteBlock(boolean closing) {
1008 if (singleLevelOnly) {
1009 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1010 }
1011
1012 if (curInlineChunk == null) {
1013 throw new IllegalStateException("curInlineChunk is null; has shouldWriteBlock been " +
1014 "called with closing=true and then called again?");
1015 }
1016
1017 if (curInlineChunk.getNumEntries() == 0) {
1018 return false;
1019 }
1020
1021
1022 if (closing) {
1023 if (rootChunk.getNumEntries() == 0) {
1024
1025
1026
1027 expectNumLevels(1);
1028 rootChunk = curInlineChunk;
1029 curInlineChunk = null;
1030 return false;
1031 }
1032
1033 return true;
1034 } else {
1035 return curInlineChunk.getNonRootSize() >= maxChunkSize;
1036 }
1037 }
1038
1039
1040
1041
1042
1043
1044
1045 @Override
1046 public void writeInlineBlock(DataOutput out) throws IOException {
1047 if (singleLevelOnly)
1048 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1049
1050
1051
1052 curInlineChunk.writeNonRoot(out);
1053
1054
1055
1056 firstKey = curInlineChunk.getBlockKey(0);
1057
1058
1059 curInlineChunk.clear();
1060 }
1061
1062
1063
1064
1065
1066 @Override
1067 public void blockWritten(long offset, int onDiskSize, int uncompressedSize)
1068 {
1069
1070 totalBlockOnDiskSize += onDiskSize;
1071 totalBlockUncompressedSize += uncompressedSize;
1072
1073 if (singleLevelOnly)
1074 throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED);
1075
1076 if (firstKey == null) {
1077 throw new IllegalStateException("Trying to add second-level index " +
1078 "entry with offset=" + offset + " and onDiskSize=" + onDiskSize +
1079 "but the first key was not set in writeInlineBlock");
1080 }
1081
1082 if (rootChunk.getNumEntries() == 0) {
1083
1084 expectNumLevels(1);
1085 numLevels = 2;
1086 }
1087
1088
1089
1090 rootChunk.add(firstKey, offset, onDiskSize, totalNumEntries);
1091 firstKey = null;
1092 }
1093
1094 @Override
1095 public BlockType getInlineBlockType() {
1096 return BlockType.LEAF_INDEX;
1097 }
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109 public void addEntry(byte[] firstKey, long blockOffset, int blockDataSize)
1110 {
1111 curInlineChunk.add(firstKey, blockOffset, blockDataSize);
1112 ++totalNumEntries;
1113 }
1114
1115
1116
1117
1118 public void ensureSingleLevel() throws IOException {
1119 if (numLevels > 1) {
1120 throw new IOException ("Wrote a " + numLevels + "-level index with " +
1121 rootChunk.getNumEntries() + " root-level entries, but " +
1122 "this is expected to be a single-level block index.");
1123 }
1124 }
1125
1126
1127
1128
1129
1130
1131 @Override
1132 public boolean cacheOnWrite() {
1133 return blockCache != null;
1134 }
1135
1136
1137
1138
1139
1140
1141
1142 public long getTotalUncompressedSize() {
1143 return totalBlockUncompressedSize;
1144 }
1145
1146 }
1147
1148
1149
1150
1151
1152
1153 static class BlockIndexChunk {
1154
1155
1156 private final List<byte[]> blockKeys = new ArrayList<byte[]>();
1157
1158
1159 private final List<Long> blockOffsets = new ArrayList<Long>();
1160
1161
1162 private final List<Integer> onDiskDataSizes = new ArrayList<Integer>();
1163
1164
1165
1166
1167
1168
1169 private final List<Long> numSubEntriesAt = new ArrayList<Long>();
1170
1171
1172
1173
1174
1175
1176 private int curTotalNonRootEntrySize = 0;
1177
1178
1179
1180
1181 private int curTotalRootSize = 0;
1182
1183
1184
1185
1186
1187
1188 private final List<Integer> secondaryIndexOffsetMarks =
1189 new ArrayList<Integer>();
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204 void add(byte[] firstKey, long blockOffset, int onDiskDataSize,
1205 long curTotalNumSubEntries) {
1206
1207 secondaryIndexOffsetMarks.add(curTotalNonRootEntrySize);
1208 curTotalNonRootEntrySize += SECONDARY_INDEX_ENTRY_OVERHEAD
1209 + firstKey.length;
1210
1211 curTotalRootSize += Bytes.SIZEOF_LONG + Bytes.SIZEOF_INT
1212 + WritableUtils.getVIntSize(firstKey.length) + firstKey.length;
1213
1214 blockKeys.add(firstKey);
1215 blockOffsets.add(blockOffset);
1216 onDiskDataSizes.add(onDiskDataSize);
1217
1218 if (curTotalNumSubEntries != -1) {
1219 numSubEntriesAt.add(curTotalNumSubEntries);
1220
1221
1222 if (numSubEntriesAt.size() != blockKeys.size()) {
1223 throw new IllegalStateException("Only have key/value count " +
1224 "stats for " + numSubEntriesAt.size() + " block index " +
1225 "entries out of " + blockKeys.size());
1226 }
1227 }
1228 }
1229
1230
1231
1232
1233
1234
1235
1236 public void add(byte[] firstKey, long blockOffset, int onDiskDataSize) {
1237 add(firstKey, blockOffset, onDiskDataSize, -1);
1238 }
1239
1240 public void clear() {
1241 blockKeys.clear();
1242 blockOffsets.clear();
1243 onDiskDataSizes.clear();
1244 secondaryIndexOffsetMarks.clear();
1245 numSubEntriesAt.clear();
1246 curTotalNonRootEntrySize = 0;
1247 curTotalRootSize = 0;
1248 }
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267 public int getEntryBySubEntry(long k) {
1268
1269
1270
1271 int i = Collections.binarySearch(numSubEntriesAt, k);
1272
1273
1274
1275
1276 if (i >= 0)
1277 return i + 1;
1278
1279
1280 return -i - 1;
1281 }
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291 public byte[] getMidKeyMetadata() throws IOException {
1292 ByteArrayOutputStream baos = new ByteArrayOutputStream(
1293 MID_KEY_METADATA_SIZE);
1294 DataOutputStream baosDos = new DataOutputStream(baos);
1295 long totalNumSubEntries = numSubEntriesAt.get(blockKeys.size() - 1);
1296 if (totalNumSubEntries == 0) {
1297 throw new IOException("No leaf-level entries, mid-key unavailable");
1298 }
1299 long midKeySubEntry = (totalNumSubEntries - 1) / 2;
1300 int midKeyEntry = getEntryBySubEntry(midKeySubEntry);
1301
1302 baosDos.writeLong(blockOffsets.get(midKeyEntry));
1303 baosDos.writeInt(onDiskDataSizes.get(midKeyEntry));
1304
1305 long numSubEntriesBefore = midKeyEntry > 0
1306 ? numSubEntriesAt.get(midKeyEntry - 1) : 0;
1307 long subEntryWithinEntry = midKeySubEntry - numSubEntriesBefore;
1308 if (subEntryWithinEntry < 0 || subEntryWithinEntry > Integer.MAX_VALUE)
1309 {
1310 throw new IOException("Could not identify mid-key index within the "
1311 + "leaf-level block containing mid-key: out of range ("
1312 + subEntryWithinEntry + ", numSubEntriesBefore="
1313 + numSubEntriesBefore + ", midKeySubEntry=" + midKeySubEntry
1314 + ")");
1315 }
1316
1317 baosDos.writeInt((int) subEntryWithinEntry);
1318
1319 if (baosDos.size() != MID_KEY_METADATA_SIZE) {
1320 throw new IOException("Could not write mid-key metadata: size=" +
1321 baosDos.size() + ", correct size: " + MID_KEY_METADATA_SIZE);
1322 }
1323
1324
1325 baos.close();
1326
1327 return baos.toByteArray();
1328 }
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339 void writeNonRoot(DataOutput out) throws IOException {
1340
1341 out.writeInt(blockKeys.size());
1342
1343 if (secondaryIndexOffsetMarks.size() != blockKeys.size()) {
1344 throw new IOException("Corrupted block index chunk writer: " +
1345 blockKeys.size() + " entries but " +
1346 secondaryIndexOffsetMarks.size() + " secondary index items");
1347 }
1348
1349
1350
1351
1352
1353 for (int currentSecondaryIndex : secondaryIndexOffsetMarks)
1354 out.writeInt(currentSecondaryIndex);
1355
1356
1357
1358 out.writeInt(curTotalNonRootEntrySize);
1359
1360 for (int i = 0; i < blockKeys.size(); ++i) {
1361 out.writeLong(blockOffsets.get(i));
1362 out.writeInt(onDiskDataSizes.get(i));
1363 out.write(blockKeys.get(i));
1364 }
1365 }
1366
1367
1368
1369
1370
1371 int getNonRootSize() {
1372 return Bytes.SIZEOF_INT
1373 + Bytes.SIZEOF_INT * (blockKeys.size() + 1)
1374 + curTotalNonRootEntrySize;
1375 }
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387 void writeRoot(DataOutput out) throws IOException {
1388 for (int i = 0; i < blockKeys.size(); ++i) {
1389 out.writeLong(blockOffsets.get(i));
1390 out.writeInt(onDiskDataSizes.get(i));
1391 Bytes.writeByteArray(out, blockKeys.get(i));
1392 }
1393 }
1394
1395
1396
1397
1398 int getRootSize() {
1399 return curTotalRootSize;
1400 }
1401
1402
1403
1404
1405 public int getNumEntries() {
1406 return blockKeys.size();
1407 }
1408
1409 public byte[] getBlockKey(int i) {
1410 return blockKeys.get(i);
1411 }
1412
1413 public long getBlockOffset(int i) {
1414 return blockOffsets.get(i);
1415 }
1416
1417 public int getOnDiskDataSize(int i) {
1418 return onDiskDataSizes.get(i);
1419 }
1420
1421 public long getCumulativeNumKV(int i) {
1422 if (i < 0)
1423 return 0;
1424 return numSubEntriesAt.get(i);
1425 }
1426
1427 }
1428
1429 public static int getMaxChunkSize(Configuration conf) {
1430 return conf.getInt(MAX_CHUNK_SIZE_KEY, DEFAULT_MAX_CHUNK_SIZE);
1431 }
1432 }