View Javadoc

1   /**
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.BufferedInputStream;
23  import java.io.Closeable;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.ByteArrayInputStream;
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.io.OutputStream;
30  import java.nio.ByteBuffer;
31  import java.util.ArrayList;
32  import java.util.Arrays;
33  import java.util.List;
34  import java.util.Map;
35  
36  import org.apache.commons.cli.CommandLine;
37  import org.apache.commons.cli.CommandLineParser;
38  import org.apache.commons.cli.HelpFormatter;
39  import org.apache.commons.cli.Options;
40  import org.apache.commons.cli.PosixParser;
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.fs.FSDataInputStream;
45  import org.apache.hadoop.fs.FSDataOutputStream;
46  import org.apache.hadoop.fs.FileStatus;
47  import org.apache.hadoop.fs.FileSystem;
48  import org.apache.hadoop.fs.Path;
49  import org.apache.hadoop.fs.PathFilter;
50  import org.apache.hadoop.hbase.KeyValue.KeyComparator;
51  import org.apache.hadoop.hbase.HBaseConfiguration;
52  import org.apache.hadoop.hbase.HRegionInfo;
53  import org.apache.hadoop.hbase.KeyValue;
54  import org.apache.hadoop.hbase.io.HbaseMapWritable;
55  import org.apache.hadoop.hbase.io.HeapSize;
56  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
57  import org.apache.hadoop.hbase.util.BloomFilter;
58  import org.apache.hadoop.hbase.util.ByteBloomFilter;
59  import org.apache.hadoop.hbase.util.Bytes;
60  import org.apache.hadoop.hbase.util.ClassSize;
61  import org.apache.hadoop.hbase.util.CompressionTest;
62  import org.apache.hadoop.hbase.util.FSUtils;
63  import org.apache.hadoop.hbase.util.Writables;
64  import org.apache.hadoop.io.IOUtils;
65  import org.apache.hadoop.io.RawComparator;
66  import org.apache.hadoop.io.Writable;
67  import org.apache.hadoop.io.compress.Compressor;
68  import org.apache.hadoop.io.compress.Decompressor;
69  
70  /**
71   * File format for hbase.
72   * A file of sorted key/value pairs. Both keys and values are byte arrays.
73   * <p>
74   * The memory footprint of a HFile includes the following (below is taken from the
75   * <a
76   * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation
77   * but applies also to HFile):
78   * <ul>
79   * <li>Some constant overhead of reading or writing a compressed block.
80   * <ul>
81   * <li>Each compressed block requires one compression/decompression codec for
82   * I/O.
83   * <li>Temporary space to buffer the key.
84   * <li>Temporary space to buffer the value.
85   * </ul>
86   * <li>HFile index, which is proportional to the total number of Data Blocks.
87   * The total amount of memory needed to hold the index can be estimated as
88   * (56+AvgKeySize)*NumBlocks.
89   * </ul>
90   * Suggestions on performance optimization.
91   * <ul>
92   * <li>Minimum block size. We recommend a setting of minimum block size between
93   * 8KB to 1MB for general usage. Larger block size is preferred if files are
94   * primarily for sequential access. However, it would lead to inefficient random
95   * access (because there are more data to decompress). Smaller blocks are good
96   * for random access, but require more memory to hold the block index, and may
97   * be slower to create (because we must flush the compressor stream at the
98   * conclusion of each data block, which leads to an FS I/O flush). Further, due
99   * to the internal caching in Compression codec, the smallest possible block
100  * size would be around 20KB-30KB.
101  * <li>The current implementation does not offer true multi-threading for
102  * reading. The implementation uses FSDataInputStream seek()+read(), which is
103  * shown to be much faster than positioned-read call in single thread mode.
104  * However, it also means that if multiple threads attempt to access the same
105  * HFile (using multiple scanners) simultaneously, the actual I/O is carried out
106  * sequentially even if they access different DFS blocks (Reexamine! pread seems
107  * to be 10% faster than seek+read in my testing -- stack).
108  * <li>Compression codec. Use "none" if the data is not very compressable (by
109  * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
110  * as the starting point for experimenting. "gz" overs slightly better
111  * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
112  * decompress, comparing to "lzo".
113  * </ul>
114  *
115  * For more on the background behind HFile, see <a
116  * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
117  * <p>
118  * File is made of data blocks followed by meta data blocks (if any), a fileinfo
119  * block, data block index, meta data block index, and a fixed size trailer
120  * which records the offsets at which file changes content type.
121  * <pre>&lt;data blocks>&lt;meta blocks>&lt;fileinfo>&lt;data index>&lt;meta index>&lt;trailer></pre>
122  * Each block has a bit of magic at its start.  Block are comprised of
123  * key/values.  In data blocks, they are both byte arrays.  Metadata blocks are
124  * a String key and a byte array value.  An empty file looks like this:
125  * <pre>&lt;fileinfo>&lt;trailer></pre>.  That is, there are not data nor meta
126  * blocks present.
127  * <p>
128  * TODO: Do scanners need to be able to take a start and end row?
129  * TODO: Should BlockIndex know the name of its file?  Should it have a Path
130  * that points at its file say for the case where an index lives apart from
131  * an HFile instance?
132  */
133 public class HFile {
134   static final Log LOG = LogFactory.getLog(HFile.class);
135 
136   /* These values are more or less arbitrary, and they are used as a
137    * form of check to make sure the file isn't completely corrupt.
138    */
139   final static byte [] DATABLOCKMAGIC =
140     {'D', 'A', 'T', 'A', 'B', 'L', 'K', 42 };
141   final static byte [] INDEXBLOCKMAGIC =
142     { 'I', 'D', 'X', 'B', 'L', 'K', 41, 43 };
143   final static byte [] METABLOCKMAGIC =
144     { 'M', 'E', 'T', 'A', 'B', 'L', 'K', 99 };
145   final static byte [] TRAILERBLOCKMAGIC =
146     { 'T', 'R', 'A', 'B', 'L', 'K', 34, 36 };
147 
148   /**
149    * Maximum length of key in HFile.
150    */
151   public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
152 
153   /**
154    * Default blocksize for hfile.
155    */
156   public final static int DEFAULT_BLOCKSIZE = 64 * 1024;
157 
158   /**
159    * Default compression: none.
160    */
161   public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
162     Compression.Algorithm.NONE;
163   /** Default compression name: none. */
164   public final static String DEFAULT_COMPRESSION =
165     DEFAULT_COMPRESSION_ALGORITHM.getName();
166 
167   // For measuring latency of "typical" reads and writes
168   private static volatile long readOps;
169   private static volatile long readTime;
170   private static volatile long writeOps;
171   private static volatile long writeTime;
172 
173   public static final long getReadOps() {
174     long ret = readOps;
175     readOps = 0;
176     return ret;
177   }
178 
179   public static final long getReadTime() {
180     long ret = readTime;
181     readTime = 0;
182     return ret;
183   }
184 
185   public static final long getWriteOps() {
186     long ret = writeOps;
187     writeOps = 0;
188     return ret;
189   }
190 
191   public static final long getWriteTime() {
192     long ret = writeTime;
193     writeTime = 0;
194     return ret;
195   }
196 
197   /**
198    * HFile Writer.
199    */
200   public static class Writer implements Closeable {
201     // FileSystem stream to write on.
202     private FSDataOutputStream outputStream;
203     // True if we opened the <code>outputStream</code> (and so will close it).
204     private boolean closeOutputStream;
205 
206     // Name for this object used when logging or in toString.  Is either
207     // the result of a toString on stream or else toString of passed file Path.
208     protected String name;
209 
210     // Total uncompressed bytes, maybe calculate a compression ratio later.
211     private long totalBytes = 0;
212 
213     // Total # of key/value entries, ie: how many times add() was called.
214     private int entryCount = 0;
215 
216     // Used calculating average key and value lengths.
217     private long keylength = 0;
218     private long valuelength = 0;
219 
220     // Used to ensure we write in order.
221     private final RawComparator<byte []> comparator;
222 
223     // A stream made per block written.
224     private DataOutputStream out;
225 
226     // Number of uncompressed bytes per block.  Reinitialized when we start
227     // new block.
228     private int blocksize;
229 
230     // Offset where the current block began.
231     private long blockBegin;
232 
233     // First key in a block (Not first key in file).
234     private byte [] firstKey = null;
235 
236     // Key previously appended.  Becomes the last key in the file.
237     private byte [] lastKeyBuffer = null;
238     private int lastKeyOffset = -1;
239     private int lastKeyLength = -1;
240 
241     // See {@link BlockIndex}. Below four fields are used to write the block
242     // index.
243     ArrayList<byte[]> blockKeys = new ArrayList<byte[]>();
244     // Block offset in backing stream.
245     ArrayList<Long> blockOffsets = new ArrayList<Long>();
246     // Raw (decompressed) data size.
247     ArrayList<Integer> blockDataSizes = new ArrayList<Integer>();
248 
249     // Meta block system.
250     private ArrayList<byte []> metaNames = new ArrayList<byte []>();
251     private ArrayList<Writable> metaData = new ArrayList<Writable>();
252 
253     // Used compression.  Used even if no compression -- 'none'.
254     private final Compression.Algorithm compressAlgo;
255     private Compressor compressor;
256 
257     // Special datastructure to hold fileinfo.
258     private FileInfo fileinfo = new FileInfo();
259 
260     // May be null if we were passed a stream.
261     private Path path = null;
262 
263     /**
264      * Constructor that uses all defaults for compression and block size.
265      * @param fs
266      * @param path
267      * @throws IOException
268      */
269     public Writer(FileSystem fs, Path path)
270     throws IOException {
271       this(fs, path, DEFAULT_BLOCKSIZE, (Compression.Algorithm) null, null);
272     }
273 
274     /**
275      * Constructor that takes a Path.
276      * @param fs
277      * @param path
278      * @param blocksize
279      * @param compress
280      * @param comparator
281      * @throws IOException
282      * @throws IOException
283      */
284     public Writer(FileSystem fs, Path path, int blocksize,
285       String compress, final KeyComparator comparator)
286     throws IOException {
287       this(fs, path, blocksize,
288         compress == null? DEFAULT_COMPRESSION_ALGORITHM:
289           Compression.getCompressionAlgorithmByName(compress),
290         comparator);
291     }
292 
293     /**
294      * Constructor that takes a Path.
295      * @param fs
296      * @param path
297      * @param blocksize
298      * @param compress
299      * @param comparator
300      * @throws IOException
301      */
302     public Writer(FileSystem fs, Path path, int blocksize,
303       Compression.Algorithm compress,
304       final KeyComparator comparator)
305     throws IOException {
306       this(fs.create(path), blocksize, compress, comparator);
307       this.closeOutputStream = true;
308       this.name = path.toString();
309       this.path = path;
310     }
311 
312     /**
313      * Constructor that takes a stream.
314      * @param ostream Stream to use.
315      * @param blocksize
316      * @param compress
317      * @param c RawComparator to use.
318      * @throws IOException
319      */
320     public Writer(final FSDataOutputStream ostream, final int blocksize,
321       final String  compress, final KeyComparator c)
322     throws IOException {
323       this(ostream, blocksize,
324         Compression.getCompressionAlgorithmByName(compress), c);
325     }
326 
327     /**
328      * Constructor that takes a stream.
329      * @param ostream Stream to use.
330      * @param blocksize
331      * @param compress
332      * @param c
333      * @throws IOException
334      */
335     public Writer(final FSDataOutputStream ostream, final int blocksize,
336       final Compression.Algorithm  compress, final KeyComparator c)
337     throws IOException {
338       this.outputStream = ostream;
339       this.closeOutputStream = false;
340       this.blocksize = blocksize;
341       this.comparator = c == null? Bytes.BYTES_RAWCOMPARATOR: c;
342       this.name = this.outputStream.toString();
343       this.compressAlgo = compress == null?
344         DEFAULT_COMPRESSION_ALGORITHM: compress;
345     }
346 
347     /*
348      * If at block boundary, opens new block.
349      * @throws IOException
350      */
351     private void checkBlockBoundary() throws IOException {
352       if (this.out != null && this.out.size() < blocksize) return;
353       finishBlock();
354       newBlock();
355     }
356 
357     /*
358      * Do the cleanup if a current block.
359      * @throws IOException
360      */
361     private void finishBlock() throws IOException {
362       if (this.out == null) return;
363       long now = System.currentTimeMillis();
364 
365       int size = releaseCompressingStream(this.out);
366       this.out = null;
367       blockKeys.add(firstKey);
368       blockOffsets.add(Long.valueOf(blockBegin));
369       blockDataSizes.add(Integer.valueOf(size));
370       this.totalBytes += size;
371 
372       writeTime += System.currentTimeMillis() - now;
373       writeOps++;
374     }
375 
376     /*
377      * Ready a new block for writing.
378      * @throws IOException
379      */
380     private void newBlock() throws IOException {
381       // This is where the next block begins.
382       blockBegin = outputStream.getPos();
383       this.out = getCompressingStream();
384       this.out.write(DATABLOCKMAGIC);
385       firstKey = null;
386     }
387 
388     /*
389      * Sets up a compressor and creates a compression stream on top of
390      * this.outputStream.  Get one per block written.
391      * @return A compressing stream; if 'none' compression, returned stream
392      * does not compress.
393      * @throws IOException
394      * @see {@link #releaseCompressingStream(DataOutputStream)}
395      */
396     private DataOutputStream getCompressingStream() throws IOException {
397       this.compressor = compressAlgo.getCompressor();
398       // Get new DOS compression stream.  In tfile, the DOS, is not closed,
399       // just finished, and that seems to be fine over there.  TODO: Check
400       // no memory retention of the DOS.  Should I disable the 'flush' on the
401       // DOS as the BCFile over in tfile does?  It wants to make it so flushes
402       // don't go through to the underlying compressed stream.  Flush on the
403       // compressed downstream should be only when done.  I was going to but
404       // looks like when we call flush in here, its legitimate flush that
405       // should go through to the compressor.
406       OutputStream os =
407         this.compressAlgo.createCompressionStream(this.outputStream,
408         this.compressor, 0);
409       return new DataOutputStream(os);
410     }
411 
412     /*
413      * Let go of block compressor and compressing stream gotten in call
414      * {@link #getCompressingStream}.
415      * @param dos
416      * @return How much was written on this stream since it was taken out.
417      * @see #getCompressingStream()
418      * @throws IOException
419      */
420     private int releaseCompressingStream(final DataOutputStream dos)
421     throws IOException {
422       dos.flush();
423       this.compressAlgo.returnCompressor(this.compressor);
424       this.compressor = null;
425       return dos.size();
426     }
427 
428     /**
429      * Add a meta block to the end of the file. Call before close().
430      * Metadata blocks are expensive.  Fill one with a bunch of serialized data
431      * rather than do a metadata block per metadata instance.  If metadata is
432      * small, consider adding to file info using
433      * {@link #appendFileInfo(byte[], byte[])}
434      * @param metaBlockName name of the block
435      * @param content will call readFields to get data later (DO NOT REUSE)
436      */
437     public void appendMetaBlock(String metaBlockName, Writable content) {
438       byte[] key = Bytes.toBytes(metaBlockName);
439       int i;
440       for (i = 0; i < metaNames.size(); ++i) {
441         // stop when the current key is greater than our own
442         byte[] cur = metaNames.get(i);
443         if (Bytes.BYTES_RAWCOMPARATOR.compare(cur, 0, cur.length, 
444             key, 0, key.length) > 0) {
445           break;
446         }
447       }
448       metaNames.add(i, key);
449       metaData.add(i, content);
450     }
451 
452     /**
453      * Add to the file info.  Added key value can be gotten out of the return
454      * from {@link Reader#loadFileInfo()}.
455      * @param k Key
456      * @param v Value
457      * @throws IOException
458      */
459     public void appendFileInfo(final byte [] k, final byte [] v)
460     throws IOException {
461       appendFileInfo(this.fileinfo, k, v, true);
462     }
463 
464     static FileInfo appendFileInfo(FileInfo fi, final byte [] k, final byte [] v,
465       final boolean checkPrefix)
466     throws IOException {
467       if (k == null || v == null) {
468         throw new NullPointerException("Key nor value may be null");
469       }
470       if (checkPrefix &&
471           Bytes.startsWith(k, FileInfo.RESERVED_PREFIX_BYTES)) {
472         throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX +
473           " are reserved");
474       }
475       fi.put(k, v);
476       return fi;
477     }
478 
479     /**
480      * @return Path or null if we were passed a stream rather than a Path.
481      */
482     public Path getPath() {
483       return this.path;
484     }
485 
486     @Override
487     public String toString() {
488       return "writer=" + this.name + ", compression=" +
489         this.compressAlgo.getName();
490     }
491 
492     /**
493      * Add key/value to file.
494      * Keys must be added in an order that agrees with the Comparator passed
495      * on construction.
496      * @param kv KeyValue to add.  Cannot be empty nor null.
497      * @throws IOException
498      */
499     public void append(final KeyValue kv)
500     throws IOException {
501       append(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength(),
502         kv.getBuffer(), kv.getValueOffset(), kv.getValueLength());
503     }
504 
505     /**
506      * Add key/value to file.
507      * Keys must be added in an order that agrees with the Comparator passed
508      * on construction.
509      * @param key Key to add.  Cannot be empty nor null.
510      * @param value Value to add.  Cannot be empty nor null.
511      * @throws IOException
512      */
513     public void append(final byte [] key, final byte [] value)
514     throws IOException {
515       append(key, 0, key.length, value, 0, value.length);
516     }
517 
518     /**
519      * Add key/value to file.
520      * Keys must be added in an order that agrees with the Comparator passed
521      * on construction.
522      * @param key
523      * @param koffset
524      * @param klength
525      * @param value
526      * @param voffset
527      * @param vlength
528      * @throws IOException
529      */
530     private void append(final byte [] key, final int koffset, final int klength,
531         final byte [] value, final int voffset, final int vlength)
532     throws IOException {
533       boolean dupKey = checkKey(key, koffset, klength);
534       checkValue(value, voffset, vlength);
535       if (!dupKey) {
536         checkBlockBoundary();
537       }
538       // Write length of key and value and then actual key and value bytes.
539       this.out.writeInt(klength);
540       this.keylength += klength;
541       this.out.writeInt(vlength);
542       this.valuelength += vlength;
543       this.out.write(key, koffset, klength);
544       this.out.write(value, voffset, vlength);
545       // Are we the first key in this block?
546       if (this.firstKey == null) {
547         // Copy the key.
548         this.firstKey = new byte [klength];
549         System.arraycopy(key, koffset, this.firstKey, 0, klength);
550       }
551       this.lastKeyBuffer = key;
552       this.lastKeyOffset = koffset;
553       this.lastKeyLength = klength;
554       this.entryCount ++;
555     }
556 
557     /*
558      * @param key Key to check.
559      * @return the flag of duplicate Key or not
560      * @throws IOException
561      */
562     private boolean checkKey(final byte [] key, final int offset, final int length)
563     throws IOException {
564       boolean dupKey = false;
565 
566       if (key == null || length <= 0) {
567         throw new IOException("Key cannot be null or empty");
568       }
569       if (length > MAXIMUM_KEY_LENGTH) {
570         throw new IOException("Key length " + length + " > " +
571           MAXIMUM_KEY_LENGTH);
572       }
573       if (this.lastKeyBuffer != null) {
574         int keyComp = this.comparator.compare(this.lastKeyBuffer, this.lastKeyOffset,
575             this.lastKeyLength, key, offset, length);
576         if (keyComp > 0) {
577           throw new IOException("Added a key not lexically larger than" +
578             " previous key=" + Bytes.toStringBinary(key, offset, length) +
579             ", lastkey=" + Bytes.toStringBinary(this.lastKeyBuffer, this.lastKeyOffset,
580                 this.lastKeyLength));
581         } else if (keyComp == 0) {
582           dupKey = true;
583         }
584       }
585       return dupKey;
586     }
587 
588     private void checkValue(final byte [] value, final int offset,
589         final int length) throws IOException {
590       if (value == null) {
591         throw new IOException("Value cannot be null");
592       }
593     }
594 
595     public long getTotalBytes() {
596       return this.totalBytes;
597     }
598 
599     public void close() throws IOException {
600       if (this.outputStream == null) {
601         return;
602       }
603       // Write out the end of the data blocks, then write meta data blocks.
604       // followed by fileinfo, data block index and meta block index.
605 
606       finishBlock();
607 
608       FixedFileTrailer trailer = new FixedFileTrailer();
609 
610       // Write out the metadata blocks if any.
611       ArrayList<Long> metaOffsets = null;
612       ArrayList<Integer> metaDataSizes = null;
613       if (metaNames.size() > 0) {
614         metaOffsets = new ArrayList<Long>(metaNames.size());
615         metaDataSizes = new ArrayList<Integer>(metaNames.size());
616         for (int i = 0 ; i < metaNames.size() ; ++ i ) {
617           // store the beginning offset
618           long curPos = outputStream.getPos();
619           metaOffsets.add(curPos);
620           // write the metadata content
621           DataOutputStream dos = getCompressingStream();
622           dos.write(METABLOCKMAGIC);
623           metaData.get(i).write(dos);
624           int size = releaseCompressingStream(dos);
625           // store the metadata size
626           metaDataSizes.add(size);
627         }
628       }
629 
630       // Write fileinfo.
631       trailer.fileinfoOffset = writeFileInfo(this.outputStream);
632 
633       // Write the data block index.
634       trailer.dataIndexOffset = BlockIndex.writeIndex(this.outputStream,
635         this.blockKeys, this.blockOffsets, this.blockDataSizes);
636 
637       // Meta block index.
638       if (metaNames.size() > 0) {
639         trailer.metaIndexOffset = BlockIndex.writeIndex(this.outputStream,
640           this.metaNames, metaOffsets, metaDataSizes);
641       }
642 
643       // Now finish off the trailer.
644       trailer.dataIndexCount = blockKeys.size();
645       trailer.metaIndexCount = metaNames.size();
646 
647       trailer.totalUncompressedBytes = totalBytes;
648       trailer.entryCount = entryCount;
649 
650       trailer.compressionCodec = this.compressAlgo.ordinal();
651 
652       trailer.serialize(outputStream);
653 
654       if (this.closeOutputStream) {
655         this.outputStream.close();
656         this.outputStream = null;
657       }
658     }
659 
660     /*
661      * Add last bits of metadata to fileinfo and then write it out.
662      * Reader will be expecting to find all below.
663      * @param o Stream to write on.
664      * @return Position at which we started writing.
665      * @throws IOException
666      */
667     private long writeFileInfo(FSDataOutputStream o) throws IOException {
668       if (this.lastKeyBuffer != null) {
669         // Make a copy.  The copy is stuffed into HMapWritable.  Needs a clean
670         // byte buffer.  Won't take a tuple.
671         byte [] b = new byte[this.lastKeyLength];
672         System.arraycopy(this.lastKeyBuffer, this.lastKeyOffset, b, 0,
673           this.lastKeyLength);
674         appendFileInfo(this.fileinfo, FileInfo.LASTKEY, b, false);
675       }
676       int avgKeyLen = this.entryCount == 0? 0:
677         (int)(this.keylength/this.entryCount);
678       appendFileInfo(this.fileinfo, FileInfo.AVG_KEY_LEN,
679         Bytes.toBytes(avgKeyLen), false);
680       int avgValueLen = this.entryCount == 0? 0:
681         (int)(this.valuelength/this.entryCount);
682       appendFileInfo(this.fileinfo, FileInfo.AVG_VALUE_LEN,
683         Bytes.toBytes(avgValueLen), false);
684       appendFileInfo(this.fileinfo, FileInfo.COMPARATOR,
685         Bytes.toBytes(this.comparator.getClass().getName()), false);
686       long pos = o.getPos();
687       this.fileinfo.write(o);
688       return pos;
689     }
690   }
691 
692   /**
693    * HFile Reader.
694    */
695   public static class Reader implements Closeable {
696     // Stream to read from.
697     private FSDataInputStream istream;
698     // True if we should close istream when done.  We don't close it if we
699     // didn't open it.
700     private boolean closeIStream;
701 
702     // These are read in when the file info is loaded.
703     HFile.BlockIndex blockIndex;
704     private BlockIndex metaIndex;
705     FixedFileTrailer trailer;
706     private volatile boolean fileInfoLoaded = false;
707 
708     // Filled when we read in the trailer.
709     private Compression.Algorithm compressAlgo;
710 
711     // Last key in the file.  Filled in when we read in the file info
712     private byte [] lastkey = null;
713     // Stats read in when we load file info.
714     private int avgKeyLen = -1;
715     private int avgValueLen = -1;
716 
717     // Used to ensure we seek correctly.
718     RawComparator<byte []> comparator;
719 
720     // Size of this file.
721     private final long fileSize;
722 
723     // Block cache to use.
724     private final BlockCache cache;
725     public int cacheHits = 0;
726     public int blockLoads = 0;
727     public int metaLoads = 0;
728 
729     // Whether file is from in-memory store
730     private boolean inMemory = false;
731 
732     // Name for this object used when logging or in toString.  Is either
733     // the result of a toString on the stream or else is toString of passed
734     // file Path plus metadata key/value pairs.
735     protected String name;
736 
737     /**
738      * Opens a HFile.  You must load the file info before you can
739      * use it by calling {@link #loadFileInfo()}.
740      *
741      * @param fs filesystem to load from
742      * @param path path within said filesystem
743      * @param cache block cache. Pass null if none.
744      * @throws IOException
745      */
746     public Reader(FileSystem fs, Path path, BlockCache cache, boolean inMemory)
747     throws IOException {
748       this(fs.open(path), fs.getFileStatus(path).getLen(), cache, inMemory);
749       this.closeIStream = true;
750       this.name = path.toString();
751     }
752 
753     /**
754      * Opens a HFile.  You must load the index before you can
755      * use it by calling {@link #loadFileInfo()}.
756      *
757      * @param fsdis input stream.  Caller is responsible for closing the passed
758      * stream.
759      * @param size Length of the stream.
760      * @param cache block cache. Pass null if none.
761      * @throws IOException
762      */
763     public Reader(final FSDataInputStream fsdis, final long size,
764         final BlockCache cache, final boolean inMemory) {
765       this.cache = cache;
766       this.fileSize = size;
767       this.istream = fsdis;
768       this.closeIStream = false;
769       this.name = this.istream == null? "": this.istream.toString();
770       this.inMemory = inMemory;
771     }
772 
773     @Override
774     public String toString() {
775       return "reader=" + this.name +
776           (!isFileInfoLoaded()? "":
777             ", compression=" + this.compressAlgo.getName() +
778             ", inMemory=" + this.inMemory +
779             ", firstKey=" + toStringFirstKey() +
780             ", lastKey=" + toStringLastKey()) +
781             ", avgKeyLen=" + this.avgKeyLen +
782             ", avgValueLen=" + this.avgValueLen +
783             ", entries=" + this.trailer.entryCount +
784             ", length=" + this.fileSize;
785     }
786 
787     protected String toStringFirstKey() {
788       return KeyValue.keyToString(getFirstKey());
789     }
790 
791     protected String toStringLastKey() {
792       return KeyValue.keyToString(getLastKey());
793     }
794 
795     public long length() {
796       return this.fileSize;
797     }
798 
799     public boolean inMemory() {
800       return this.inMemory;
801     }
802 
803     private byte[] readAllIndex(final FSDataInputStream in, final long indexOffset,
804         final int indexSize) throws IOException {
805       byte[] allIndex = new byte[indexSize];
806       in.seek(indexOffset);
807       IOUtils.readFully(in, allIndex, 0, allIndex.length);
808       return allIndex;
809     }
810 
811     /**
812      * Read in the index and file info.
813      * @return A map of fileinfo data.
814      * See {@link Writer#appendFileInfo(byte[], byte[])}.
815      * @throws IOException
816      */
817     public Map<byte [], byte []> loadFileInfo()
818     throws IOException {
819       this.trailer = readTrailer();
820 
821       // Read in the fileinfo and get what we need from it.
822       this.istream.seek(this.trailer.fileinfoOffset);
823       FileInfo fi = new FileInfo();
824       fi.readFields(this.istream);
825       this.lastkey = fi.get(FileInfo.LASTKEY);
826       this.avgKeyLen = Bytes.toInt(fi.get(FileInfo.AVG_KEY_LEN));
827       this.avgValueLen = Bytes.toInt(fi.get(FileInfo.AVG_VALUE_LEN));
828       String clazzName = Bytes.toString(fi.get(FileInfo.COMPARATOR));
829       this.comparator = getComparator(clazzName);
830 
831       int allIndexSize = (int)(this.fileSize - this.trailer.dataIndexOffset - FixedFileTrailer.trailerSize());
832       byte[] dataAndMetaIndex = readAllIndex(this.istream, this.trailer.dataIndexOffset, allIndexSize);
833 
834       ByteArrayInputStream bis = new ByteArrayInputStream(dataAndMetaIndex);
835       DataInputStream dis = new DataInputStream(bis);
836 
837       // Read in the data index.
838       this.blockIndex =
839           BlockIndex.readIndex(this.comparator, dis, this.trailer.dataIndexCount);
840 
841       // Read in the metadata index.
842       if (trailer.metaIndexCount > 0) {
843         this.metaIndex = BlockIndex.readIndex(Bytes.BYTES_RAWCOMPARATOR, dis,
844             this.trailer.metaIndexCount);
845       }
846       this.fileInfoLoaded = true;
847 
848       if (null != dis) {
849         dis.close();
850       }
851 
852       return fi;
853     }
854 
855     boolean isFileInfoLoaded() {
856       return this.fileInfoLoaded;
857     }
858 
859     @SuppressWarnings("unchecked")
860     private RawComparator<byte []> getComparator(final String clazzName)
861     throws IOException {
862       if (clazzName == null || clazzName.length() == 0) {
863         return null;
864       }
865       try {
866         return (RawComparator<byte []>)Class.forName(clazzName).newInstance();
867       } catch (InstantiationException e) {
868         throw new IOException(e);
869       } catch (IllegalAccessException e) {
870         throw new IOException(e);
871       } catch (ClassNotFoundException e) {
872         throw new IOException(e);
873       }
874     }
875 
876     /* Read the trailer off the input stream.  As side effect, sets the
877      * compression algorithm.
878      * @return Populated FixedFileTrailer.
879      * @throws IOException
880      */
881     private FixedFileTrailer readTrailer() throws IOException {
882       FixedFileTrailer fft = new FixedFileTrailer();
883       long seekPoint = this.fileSize - FixedFileTrailer.trailerSize();
884       this.istream.seek(seekPoint);
885       fft.deserialize(this.istream);
886       // Set up the codec.
887       this.compressAlgo =
888         Compression.Algorithm.values()[fft.compressionCodec];
889 
890       CompressionTest.testCompression(this.compressAlgo);
891 
892       return fft;
893     }
894 
895     /**
896      * Create a Scanner on this file.  No seeks or reads are done on creation.
897      * Call {@link HFileScanner#seekTo(byte[])} to position an start the read.
898      * There is nothing to clean up in a Scanner. Letting go of your references
899      * to the scanner is sufficient.
900      * @param pread Use positional read rather than seek+read if true (pread is
901      * better for random reads, seek+read is better scanning).
902      * @param cacheBlocks True if we should cache blocks read in by this scanner.
903      * @return Scanner on this file.
904      */
905     public HFileScanner getScanner(boolean cacheBlocks, final boolean pread) {
906       return new Scanner(this, cacheBlocks, pread);
907     }
908 
909     /**
910      * @param key Key to search.
911      * @return Block number of the block containing the key or -1 if not in this
912      * file.
913      */
914     protected int blockContainingKey(final byte [] key, int offset, int length) {
915       if (blockIndex == null) {
916         throw new RuntimeException("Block index not loaded");
917       }
918       return blockIndex.blockContainingKey(key, offset, length);
919     }
920     /**
921      * @param metaBlockName
922      * @param cacheBlock Add block to cache, if found
923      * @return Block wrapped in a ByteBuffer
924      * @throws IOException
925      */
926     public ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock)
927     throws IOException {
928       if (trailer.metaIndexCount == 0) {
929         return null; // there are no meta blocks
930       }
931       if (metaIndex == null) {
932         throw new IOException("Meta index not loaded");
933       }
934 
935       byte [] mbname = Bytes.toBytes(metaBlockName);
936       int block = metaIndex.blockContainingKey(mbname, 0, mbname.length);
937       if (block == -1)
938         return null;
939       long blockSize;
940       if (block == metaIndex.count - 1) {
941         blockSize = trailer.fileinfoOffset - metaIndex.blockOffsets[block];
942       } else {
943         blockSize = metaIndex.blockOffsets[block+1] - metaIndex.blockOffsets[block];
944       }
945 
946       long now = System.currentTimeMillis();
947 
948       // Per meta key from any given file, synchronize reads for said block
949       synchronized (metaIndex.blockKeys[block]) {
950         metaLoads++;
951         // Check cache for block.  If found return.
952         if (cache != null) {
953           ByteBuffer cachedBuf = cache.getBlock(name + "meta" + block,
954               cacheBlock);
955           if (cachedBuf != null) {
956             // Return a distinct 'shallow copy' of the block,
957             // so pos doesnt get messed by the scanner
958             cacheHits++;
959             return cachedBuf.duplicate();
960           }
961           // Cache Miss, please load.
962         }
963 
964         ByteBuffer buf = decompress(metaIndex.blockOffsets[block],
965           longToInt(blockSize), metaIndex.blockDataSizes[block], true);
966         byte [] magic = new byte[METABLOCKMAGIC.length];
967         buf.get(magic, 0, magic.length);
968 
969         if (! Arrays.equals(magic, METABLOCKMAGIC)) {
970           throw new IOException("Meta magic is bad in block " + block);
971         }
972 
973         // Create a new ByteBuffer 'shallow copy' to hide the magic header
974         buf = buf.slice();
975 
976         readTime += System.currentTimeMillis() - now;
977         readOps++;
978 
979         // Cache the block
980         if(cacheBlock && cache != null) {
981           cache.cacheBlock(name + "meta" + block, buf.duplicate(), inMemory);
982         }
983 
984         return buf;
985       }
986     }
987 
988     /**
989      * Read in a file block.
990      * @param block Index of block to read.
991      * @param pread Use positional read instead of seek+read (positional is
992      * better doing random reads whereas seek+read is better scanning).
993      * @return Block wrapped in a ByteBuffer.
994      * @throws IOException
995      */
996     ByteBuffer readBlock(int block, boolean cacheBlock, final boolean pread)
997     throws IOException {
998       if (blockIndex == null) {
999         throw new IOException("Block index not loaded");
1000       }
1001       if (block < 0 || block >= blockIndex.count) {
1002         throw new IOException("Requested block is out of range: " + block +
1003           ", max: " + blockIndex.count);
1004       }
1005       // For any given block from any given file, synchronize reads for said
1006       // block.
1007       // Without a cache, this synchronizing is needless overhead, but really
1008       // the other choice is to duplicate work (which the cache would prevent you from doing).
1009       synchronized (blockIndex.blockKeys[block]) {
1010         blockLoads++;
1011         // Check cache for block.  If found return.
1012         if (cache != null) {
1013           ByteBuffer cachedBuf = cache.getBlock(name + block, cacheBlock);
1014           if (cachedBuf != null) {
1015             // Return a distinct 'shallow copy' of the block,
1016             // so pos doesnt get messed by the scanner
1017             cacheHits++;
1018             return cachedBuf.duplicate();
1019           }
1020           // Carry on, please load.
1021         }
1022 
1023         // Load block from filesystem.
1024         long now = System.currentTimeMillis();
1025         long onDiskBlockSize;
1026         if (block == blockIndex.count - 1) {
1027           // last block!  The end of data block is first meta block if there is
1028           // one or if there isn't, the fileinfo offset.
1029           long offset = this.metaIndex != null?
1030             this.metaIndex.blockOffsets[0]: this.trailer.fileinfoOffset;
1031           onDiskBlockSize = offset - blockIndex.blockOffsets[block];
1032         } else {
1033           onDiskBlockSize = blockIndex.blockOffsets[block+1] -
1034           blockIndex.blockOffsets[block];
1035         }
1036         ByteBuffer buf = decompress(blockIndex.blockOffsets[block],
1037           longToInt(onDiskBlockSize), this.blockIndex.blockDataSizes[block],
1038           pread);
1039 
1040         byte [] magic = new byte[DATABLOCKMAGIC.length];
1041         buf.get(magic, 0, magic.length);
1042         if (!Arrays.equals(magic, DATABLOCKMAGIC)) {
1043           throw new IOException("Data magic is bad in block " + block);
1044         }
1045 
1046         // 'shallow copy' to hide the header
1047         // NOTE: you WILL GET BIT if you call buf.array() but don't start
1048         //       reading at buf.arrayOffset()
1049         buf = buf.slice();
1050 
1051         readTime += System.currentTimeMillis() - now;
1052         readOps++;
1053 
1054         // Cache the block
1055         if(cacheBlock && cache != null) {
1056           cache.cacheBlock(name + block, buf.duplicate(), inMemory);
1057         }
1058 
1059         return buf;
1060       }
1061     }
1062 
1063     /*
1064      * Decompress <code>compressedSize</code> bytes off the backing
1065      * FSDataInputStream.
1066      * @param offset
1067      * @param compressedSize
1068      * @param decompressedSize
1069      *
1070      * @return
1071      * @throws IOException
1072      */
1073     private ByteBuffer decompress(final long offset, final int compressedSize,
1074       final int decompressedSize, final boolean pread)
1075     throws IOException {
1076       Decompressor decompressor = null;
1077       ByteBuffer buf = null;
1078       try {
1079         decompressor = this.compressAlgo.getDecompressor();
1080         // My guess is that the bounded range fis is needed to stop the
1081         // decompressor reading into next block -- IIRC, it just grabs a
1082         // bunch of data w/o regard to whether decompressor is coming to end of a
1083         // decompression.
1084 
1085         // We use a buffer of DEFAULT_BLOCKSIZE size.  This might be extreme.
1086         // Could maybe do with less. Study and figure it: TODO
1087         InputStream is = this.compressAlgo.createDecompressionStream(
1088             new BufferedInputStream(
1089                 new BoundedRangeFileInputStream(this.istream, offset, compressedSize,
1090                                                 pread),
1091                 Math.min(DEFAULT_BLOCKSIZE, compressedSize)),
1092             decompressor, 0);
1093         buf = ByteBuffer.allocate(decompressedSize);
1094         IOUtils.readFully(is, buf.array(), 0, buf.capacity());
1095         is.close();
1096       } finally {
1097         if (null != decompressor) {
1098           this.compressAlgo.returnDecompressor(decompressor);
1099         }
1100       }
1101       return buf;
1102     }
1103 
1104     /**
1105      * @return First key in the file.  May be null if file has no entries.
1106      * Note that this is not the first rowkey, but rather the byte form of
1107      * the first KeyValue.
1108      */
1109     public byte [] getFirstKey() {
1110       if (blockIndex == null) {
1111         throw new RuntimeException("Block index not loaded");
1112       }
1113       return this.blockIndex.isEmpty()? null: this.blockIndex.blockKeys[0];
1114     }
1115 
1116     /**
1117      * @return the first row key, or null if the file is empty.
1118      * TODO move this to StoreFile after Ryan's patch goes in
1119      * to eliminate KeyValue here
1120      */
1121     public byte[] getFirstRowKey() {
1122       byte[] firstKey = getFirstKey();
1123       if (firstKey == null) return null;
1124       return KeyValue.createKeyValueFromKey(firstKey).getRow();
1125     }
1126 
1127     /**
1128      * @return number of KV entries in this HFile
1129      */
1130     public int getEntries() {
1131       if (!this.isFileInfoLoaded()) {
1132         throw new RuntimeException("File info not loaded");
1133       }
1134       return this.trailer.entryCount;
1135     }
1136 
1137     /**
1138      * @return Last key in the file.  May be null if file has no entries.
1139      * Note that this is not the last rowkey, but rather the byte form of
1140      * the last KeyValue.
1141      */
1142     public byte [] getLastKey() {
1143       if (!isFileInfoLoaded()) {
1144         throw new RuntimeException("Load file info first");
1145       }
1146       return this.blockIndex.isEmpty()? null: this.lastkey;
1147     }
1148 
1149     /**
1150      * @return the last row key, or null if the file is empty.
1151      * TODO move this to StoreFile after Ryan's patch goes in
1152      * to eliminate KeyValue here
1153      */
1154     public byte[] getLastRowKey() {
1155       byte[] lastKey = getLastKey();
1156       if (lastKey == null) return null;
1157       return KeyValue.createKeyValueFromKey(lastKey).getRow();
1158     }
1159 
1160     /**
1161      * @return number of K entries in this HFile's filter.  Returns KV count if no filter.
1162      */
1163     public int getFilterEntries() {
1164       return getEntries();
1165     }
1166 
1167     /**
1168      * @return Comparator.
1169      */
1170     public RawComparator<byte []> getComparator() {
1171       return this.comparator;
1172     }
1173 
1174     /**
1175      * @return index size
1176      */
1177     public long indexSize() {
1178       return (this.blockIndex != null? this.blockIndex.heapSize(): 0) +
1179         ((this.metaIndex != null)? this.metaIndex.heapSize(): 0);
1180     }
1181 
1182     /**
1183      * @return Midkey for this file.  We work with block boundaries only so
1184      * returned midkey is an approximation only.
1185      * @throws IOException
1186      */
1187     public byte [] midkey() throws IOException {
1188       if (!isFileInfoLoaded() || this.blockIndex.isEmpty()) {
1189         return null;
1190       }
1191       return this.blockIndex.midkey();
1192     }
1193 
1194     public void close() throws IOException {
1195       if (this.closeIStream && this.istream != null) {
1196         this.istream.close();
1197         this.istream = null;
1198       }
1199     }
1200 
1201     public String getName() {
1202       return name;
1203     }
1204 
1205     /*
1206      * Implementation of {@link HFileScanner} interface.
1207      */
1208     protected static class Scanner implements HFileScanner {
1209       private final Reader reader;
1210       private ByteBuffer block;
1211       private int currBlock;
1212 
1213       private final boolean cacheBlocks;
1214       private final boolean pread;
1215 
1216       private int currKeyLen = 0;
1217       private int currValueLen = 0;
1218 
1219       public int blockFetches = 0;
1220 
1221       public Scanner(Reader r, boolean cacheBlocks, final boolean pread) {
1222         this.reader = r;
1223         this.cacheBlocks = cacheBlocks;
1224         this.pread = pread;
1225       }
1226 
1227       public KeyValue getKeyValue() {
1228         if(this.block == null) {
1229           return null;
1230         }
1231         return new KeyValue(this.block.array(),
1232             this.block.arrayOffset() + this.block.position() - 8,
1233             this.currKeyLen+this.currValueLen+8);
1234       }
1235 
1236       public ByteBuffer getKey() {
1237         if (this.block == null || this.currKeyLen == 0) {
1238           throw new RuntimeException("you need to seekTo() before calling getKey()");
1239         }
1240         ByteBuffer keyBuff = this.block.slice();
1241         keyBuff.limit(this.currKeyLen);
1242         keyBuff.rewind();
1243         // Do keyBuff.asReadOnly()?
1244         return keyBuff;
1245       }
1246 
1247       public ByteBuffer getValue() {
1248         if (block == null || currKeyLen == 0) {
1249           throw new RuntimeException("you need to seekTo() before calling getValue()");
1250         }
1251         // TODO: Could this be done with one ByteBuffer rather than create two?
1252         ByteBuffer valueBuff = this.block.slice();
1253         valueBuff.position(this.currKeyLen);
1254         valueBuff = valueBuff.slice();
1255         valueBuff.limit(currValueLen);
1256         valueBuff.rewind();
1257         return valueBuff;
1258       }
1259 
1260       public boolean next() throws IOException {
1261         // LOG.deug("rem:" + block.remaining() + " p:" + block.position() +
1262         // " kl: " + currKeyLen + " kv: " + currValueLen);
1263         if (block == null) {
1264           throw new IOException("Next called on non-seeked scanner");
1265         }
1266         block.position(block.position() + currKeyLen + currValueLen);
1267         if (block.remaining() <= 0) {
1268           // LOG.debug("Fetch next block");
1269           currBlock++;
1270           if (currBlock >= reader.blockIndex.count) {
1271             // damn we are at the end
1272             currBlock = 0;
1273             block = null;
1274             return false;
1275           }
1276           block = reader.readBlock(this.currBlock, this.cacheBlocks, this.pread);
1277           currKeyLen = Bytes.toInt(block.array(), block.arrayOffset()+block.position(), 4);
1278           currValueLen = Bytes.toInt(block.array(), block.arrayOffset()+block.position()+4, 4);
1279           block.position(block.position()+8);
1280           blockFetches++;
1281           return true;
1282         }
1283         // LOG.debug("rem:" + block.remaining() + " p:" + block.position() +
1284         // " kl: " + currKeyLen + " kv: " + currValueLen);
1285         currKeyLen = Bytes.toInt(block.array(), block.arrayOffset()+block.position(), 4);
1286         currValueLen = Bytes.toInt(block.array(), block.arrayOffset()+block.position()+4, 4);
1287         block.position(block.position()+8);
1288         return true;
1289       }
1290 
1291       public int seekTo(byte [] key) throws IOException {
1292         return seekTo(key, 0, key.length);
1293       }
1294 
1295       public int seekTo(byte[] key, int offset, int length) throws IOException {
1296         int b = reader.blockContainingKey(key, offset, length);
1297         if (b < 0) return -1; // falls before the beginning of the file! :-(
1298         // Avoid re-reading the same block (that'd be dumb).
1299         loadBlock(b, true);
1300         return blockSeek(key, offset, length, false);
1301       }
1302 
1303       public int reseekTo(byte [] key) throws IOException {
1304         return reseekTo(key, 0, key.length);
1305       }
1306 
1307       public int reseekTo(byte[] key, int offset, int length)
1308         throws IOException {
1309 
1310         if (this.block != null && this.currKeyLen != 0) {
1311           ByteBuffer bb = getKey();
1312           int compared = this.reader.comparator.compare(key, offset, length,
1313               bb.array(), bb.arrayOffset(), bb.limit());
1314           if (compared < 1) {
1315             //If the required key is less than or equal to current key, then
1316             //don't do anything.
1317             return compared;
1318           }
1319         }
1320 
1321         int b = reader.blockContainingKey(key, offset, length);
1322         if (b < 0) {
1323           return -1;
1324         }
1325         loadBlock(b, false);
1326         return blockSeek(key, offset, length, false);
1327       }
1328 
1329       /**
1330        * Within a loaded block, seek looking for the first key
1331        * that is smaller than (or equal to?) the key we are interested in.
1332        *
1333        * A note on the seekBefore - if you have seekBefore = true, AND the
1334        * first key in the block = key, then you'll get thrown exceptions.
1335        * @param key to find
1336        * @param seekBefore find the key before the exact match.
1337        * @return
1338        */
1339       private int blockSeek(byte[] key, int offset, int length, boolean seekBefore) {
1340         int klen, vlen;
1341         int lastLen = 0;
1342         do {
1343           klen = block.getInt();
1344           vlen = block.getInt();
1345           int comp = this.reader.comparator.compare(key, offset, length,
1346             block.array(), block.arrayOffset() + block.position(), klen);
1347           if (comp == 0) {
1348             if (seekBefore) {
1349               block.position(block.position() - lastLen - 16);
1350               currKeyLen = block.getInt();
1351               currValueLen = block.getInt();
1352               return 1; // non exact match.
1353             }
1354             currKeyLen = klen;
1355             currValueLen = vlen;
1356             return 0; // indicate exact match
1357           }
1358           if (comp < 0) {
1359             // go back one key:
1360             block.position(block.position() - lastLen - 16);
1361             currKeyLen = block.getInt();
1362             currValueLen = block.getInt();
1363             return 1;
1364           }
1365           block.position(block.position() + klen + vlen);
1366           lastLen = klen + vlen ;
1367         } while(block.remaining() > 0);
1368         // ok we are at the end, so go back a littleeeeee....
1369         // The 8 in the below is intentionally different to the 16s in the above
1370         // Do the math you you'll figure it.
1371         block.position(block.position() - lastLen - 8);
1372         currKeyLen = block.getInt();
1373         currValueLen = block.getInt();
1374         return 1; // didn't exactly find it.
1375       }
1376 
1377       public boolean seekBefore(byte [] key) throws IOException {
1378         return seekBefore(key, 0, key.length);
1379       }
1380 
1381       public boolean seekBefore(byte[] key, int offset, int length)
1382       throws IOException {
1383         int b = reader.blockContainingKey(key, offset, length);
1384         if (b < 0)
1385           return false; // key is before the start of the file.
1386 
1387         // Question: does this block begin with 'key'?
1388         if (this.reader.comparator.compare(reader.blockIndex.blockKeys[b],
1389             0, reader.blockIndex.blockKeys[b].length,
1390             key, offset, length) == 0) {
1391           // Ok the key we're interested in is the first of the block, so go back one.
1392           if (b == 0) {
1393             // we have a 'problem', the key we want is the first of the file.
1394             return false;
1395           }
1396           b--;
1397           // TODO shortcut: seek forward in this block to the last key of the block.
1398         }
1399         loadBlock(b, true);
1400         blockSeek(key, offset, length, true);
1401         return true;
1402       }
1403 
1404       public String getKeyString() {
1405         return Bytes.toStringBinary(block.array(), block.arrayOffset() +
1406           block.position(), currKeyLen);
1407       }
1408 
1409       public String getValueString() {
1410         return Bytes.toString(block.array(), block.arrayOffset() +
1411           block.position() + currKeyLen, currValueLen);
1412       }
1413 
1414       public Reader getReader() {
1415         return this.reader;
1416       }
1417 
1418       public boolean isSeeked(){
1419         return this.block != null;
1420       }
1421 
1422       public boolean seekTo() throws IOException {
1423         if (this.reader.blockIndex.isEmpty()) {
1424           return false;
1425         }
1426         if (block != null && currBlock == 0) {
1427           block.rewind();
1428           currKeyLen = block.getInt();
1429           currValueLen = block.getInt();
1430           return true;
1431         }
1432         currBlock = 0;
1433         block = reader.readBlock(this.currBlock, this.cacheBlocks, this.pread);
1434         currKeyLen = block.getInt();
1435         currValueLen = block.getInt();
1436         blockFetches++;
1437         return true;
1438       }
1439 
1440       private void loadBlock(int bloc, boolean rewind) throws IOException {
1441         if (block == null) {
1442           block = reader.readBlock(bloc, this.cacheBlocks, this.pread);
1443           currBlock = bloc;
1444           blockFetches++;
1445         } else {
1446           if (bloc != currBlock) {
1447             block = reader.readBlock(bloc, this.cacheBlocks, this.pread);
1448             currBlock = bloc;
1449             blockFetches++;
1450           } else {
1451             // we are already in the same block, just rewind to seek again.
1452             if (rewind) {
1453               block.rewind();
1454             }
1455             else {
1456               //Go back by (size of rowlength + size of valuelength) = 8 bytes
1457               block.position(block.position()-8);
1458             }
1459           }
1460         }
1461       }
1462 
1463       @Override
1464       public String toString() {
1465         return "HFileScanner for reader " + String.valueOf(reader);
1466       }
1467     }
1468 
1469     public String getTrailerInfo() {
1470       return trailer.toString();
1471     }
1472   }
1473 
1474   /*
1475    * The RFile has a fixed trailer which contains offsets to other variable
1476    * parts of the file.  Also includes basic metadata on this file.
1477    */
1478   private static class FixedFileTrailer {
1479     // Offset to the fileinfo data, a small block of vitals..
1480     long fileinfoOffset;
1481     // Offset to the data block index.
1482     long dataIndexOffset;
1483     // How many index counts are there (aka: block count)
1484     int dataIndexCount;
1485     // Offset to the meta block index.
1486     long metaIndexOffset;
1487     // How many meta block index entries (aka: meta block count)
1488     int metaIndexCount;
1489     long totalUncompressedBytes;
1490     int entryCount;
1491     int compressionCodec;
1492     int version = 1;
1493 
1494     FixedFileTrailer() {
1495       super();
1496     }
1497 
1498     static int trailerSize() {
1499       // Keep this up to date...
1500       return
1501       ( Bytes.SIZEOF_INT * 5 ) +
1502       ( Bytes.SIZEOF_LONG * 4 ) +
1503       TRAILERBLOCKMAGIC.length;
1504     }
1505 
1506     void serialize(DataOutputStream outputStream) throws IOException {
1507       outputStream.write(TRAILERBLOCKMAGIC);
1508       outputStream.writeLong(fileinfoOffset);
1509       outputStream.writeLong(dataIndexOffset);
1510       outputStream.writeInt(dataIndexCount);
1511       outputStream.writeLong(metaIndexOffset);
1512       outputStream.writeInt(metaIndexCount);
1513       outputStream.writeLong(totalUncompressedBytes);
1514       outputStream.writeInt(entryCount);
1515       outputStream.writeInt(compressionCodec);
1516       outputStream.writeInt(version);
1517     }
1518 
1519     void deserialize(DataInputStream inputStream) throws IOException {
1520       byte [] header = new byte[TRAILERBLOCKMAGIC.length];
1521       inputStream.readFully(header);
1522       if ( !Arrays.equals(header, TRAILERBLOCKMAGIC)) {
1523         throw new IOException("Trailer 'header' is wrong; does the trailer " +
1524           "size match content?");
1525       }
1526       fileinfoOffset         = inputStream.readLong();
1527       dataIndexOffset        = inputStream.readLong();
1528       dataIndexCount         = inputStream.readInt();
1529 
1530       metaIndexOffset        = inputStream.readLong();
1531       metaIndexCount         = inputStream.readInt();
1532 
1533       totalUncompressedBytes = inputStream.readLong();
1534       entryCount             = inputStream.readInt();
1535       compressionCodec       = inputStream.readInt();
1536       version                = inputStream.readInt();
1537 
1538       if (version != 1) {
1539         throw new IOException("Wrong version: " + version);
1540       }
1541     }
1542 
1543     @Override
1544     public String toString() {
1545       return "fileinfoOffset=" + fileinfoOffset +
1546       ", dataIndexOffset=" + dataIndexOffset +
1547       ", dataIndexCount=" + dataIndexCount +
1548       ", metaIndexOffset=" + metaIndexOffset +
1549       ", metaIndexCount=" + metaIndexCount +
1550       ", totalBytes=" + totalUncompressedBytes +
1551       ", entryCount=" + entryCount +
1552       ", version=" + version;
1553     }
1554   }
1555 
1556   /*
1557    * The block index for a RFile.
1558    * Used reading.
1559    */
1560   static class BlockIndex implements HeapSize {
1561     // How many actual items are there? The next insert location too.
1562     int count = 0;
1563     byte [][] blockKeys;
1564     long [] blockOffsets;
1565     int [] blockDataSizes;
1566     int size = 0;
1567 
1568     /* Needed doing lookup on blocks.
1569      */
1570     final RawComparator<byte []> comparator;
1571 
1572     /*
1573      * Shutdown default constructor
1574      */
1575     @SuppressWarnings("unused")
1576     private BlockIndex() {
1577       this(null);
1578     }
1579 
1580 
1581     /**
1582      * @param c comparator used to compare keys.
1583      */
1584     BlockIndex(final RawComparator<byte []>c) {
1585       this.comparator = c;
1586       // Guess that cost of three arrays + this object is 4 * 8 bytes.
1587       this.size += (4 * 8);
1588     }
1589 
1590     /**
1591      * @return True if block index is empty.
1592      */
1593     boolean isEmpty() {
1594       return this.blockKeys.length <= 0;
1595     }
1596 
1597     /**
1598      * Adds a new entry in the block index.
1599      *
1600      * @param key Last key in the block
1601      * @param offset file offset where the block is stored
1602      * @param dataSize the uncompressed data size
1603      */
1604     void add(final byte[] key, final long offset, final int dataSize) {
1605       blockOffsets[count] = offset;
1606       blockKeys[count] = key;
1607       blockDataSizes[count] = dataSize;
1608       count++;
1609       this.size += (Bytes.SIZEOF_INT * 2 + key.length);
1610     }
1611 
1612     /**
1613      * @param key Key to find
1614      * @return Offset of block containing <code>key</code> or -1 if this file
1615      * does not contain the request.
1616      */
1617     int blockContainingKey(final byte[] key, int offset, int length) {
1618       int pos = Bytes.binarySearch(blockKeys, key, offset, length, this.comparator);
1619       if (pos < 0) {
1620         pos ++;
1621         pos *= -1;
1622         if (pos == 0) {
1623           // falls before the beginning of the file.
1624           return -1;
1625         }
1626         // When switched to "first key in block" index, binarySearch now returns
1627         // the block with a firstKey < key.  This means the value we want is potentially
1628         // in the next block.
1629         pos --; // in previous block.
1630 
1631         return pos;
1632       }
1633       // wow, a perfect hit, how unlikely?
1634       return pos;
1635     }
1636 
1637     /*
1638      * @return File midkey.  Inexact.  Operates on block boundaries.  Does
1639      * not go into blocks.
1640      */
1641     byte [] midkey() throws IOException {
1642       int pos = ((this.count - 1)/2);              // middle of the index
1643       if (pos < 0) {
1644         throw new IOException("HFile empty");
1645       }
1646       return this.blockKeys[pos];
1647     }
1648 
1649     /*
1650      * Write out index. Whatever we write here must jibe with what
1651      * BlockIndex#readIndex is expecting.  Make sure the two ends of the
1652      * index serialization match.
1653      * @param o
1654      * @param keys
1655      * @param offsets
1656      * @param sizes
1657      * @param c
1658      * @return Position at which we entered the index.
1659      * @throws IOException
1660      */
1661     static long writeIndex(final FSDataOutputStream o,
1662       final List<byte []> keys, final List<Long> offsets,
1663       final List<Integer> sizes)
1664     throws IOException {
1665       long pos = o.getPos();
1666       // Don't write an index if nothing in the index.
1667       if (keys.size() > 0) {
1668         o.write(INDEXBLOCKMAGIC);
1669         // Write the index.
1670         for (int i = 0; i < keys.size(); ++i) {
1671           o.writeLong(offsets.get(i).longValue());
1672           o.writeInt(sizes.get(i).intValue());
1673           byte [] key = keys.get(i);
1674           Bytes.writeByteArray(o, key);
1675         }
1676       }
1677       return pos;
1678     }
1679 
1680     /*
1681      * Read in the index that is at <code>indexOffset</code>
1682      * Must match what was written by writeIndex in the Writer.close.
1683      * @param c Comparator to use.
1684      * @param in
1685      * @param indexSize
1686      * @throws IOException
1687      */
1688     static BlockIndex readIndex(final RawComparator<byte []> c,
1689         DataInputStream in, final int indexSize)
1690     throws IOException {
1691       BlockIndex bi = new BlockIndex(c);
1692       bi.blockOffsets = new long[indexSize];
1693       bi.blockKeys = new byte[indexSize][];
1694       bi.blockDataSizes = new int[indexSize];
1695       // If index size is zero, no index was written.
1696       if (indexSize > 0) {
1697         byte [] magic = new byte[INDEXBLOCKMAGIC.length];
1698         in.readFully(magic);
1699         if (!Arrays.equals(magic, INDEXBLOCKMAGIC)) {
1700           throw new IOException("Index block magic is wrong: " +
1701             Arrays.toString(magic));
1702         }
1703         for (int i = 0; i < indexSize; ++i ) {
1704           long offset   = in.readLong();
1705           int dataSize  = in.readInt();
1706           byte [] key = Bytes.readByteArray(in);
1707           bi.add(key, offset, dataSize);
1708         }
1709       }
1710       return bi;
1711     }
1712 
1713     @Override
1714     public String toString() {
1715       StringBuilder sb = new StringBuilder();
1716       sb.append("size=" + count).append("\n");
1717       for (int i = 0; i < count ; i++) {
1718         sb.append("key=").append(KeyValue.keyToString(blockKeys[i])).
1719           append("\n  offset=").append(blockOffsets[i]).
1720           append(", dataSize=" + blockDataSizes[i]).
1721           append("\n");
1722       }
1723       return sb.toString();
1724     }
1725 
1726     public long heapSize() {
1727       long heapsize = ClassSize.align(ClassSize.OBJECT +
1728           2 * Bytes.SIZEOF_INT + (3 + 1) * ClassSize.REFERENCE);
1729       //Calculating the size of blockKeys
1730       if(blockKeys != null) {
1731         //Adding array + references overhead
1732         heapsize += ClassSize.align(ClassSize.ARRAY +
1733             blockKeys.length * ClassSize.REFERENCE);
1734         //Adding bytes
1735         for(byte [] bs : blockKeys) {
1736           heapsize += ClassSize.align(ClassSize.ARRAY + bs.length);
1737         }
1738       }
1739       if(blockOffsets != null) {
1740         heapsize += ClassSize.align(ClassSize.ARRAY +
1741             blockOffsets.length * Bytes.SIZEOF_LONG);
1742       }
1743       if(blockDataSizes != null) {
1744         heapsize += ClassSize.align(ClassSize.ARRAY +
1745             blockDataSizes.length * Bytes.SIZEOF_INT);
1746       }
1747 
1748       return ClassSize.align(heapsize);
1749     }
1750 
1751   }
1752 
1753   /*
1754    * Metadata for this file.  Conjured by the writer.  Read in by the reader.
1755    */
1756   static class FileInfo extends HbaseMapWritable<byte [], byte []> {
1757     static final String RESERVED_PREFIX = "hfile.";
1758     static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
1759     static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
1760     static final byte [] AVG_KEY_LEN =
1761       Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
1762     static final byte [] AVG_VALUE_LEN =
1763       Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
1764     static final byte [] COMPARATOR =
1765       Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
1766 
1767     /*
1768      * Constructor.
1769      */
1770     FileInfo() {
1771       super();
1772     }
1773   }
1774 
1775   /**
1776    * Return true if the given file info key is reserved for internal
1777    * use by HFile.
1778    */
1779   public static boolean isReservedFileInfoKey(byte[] key) {
1780     return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
1781   }
1782 
1783 
1784   /**
1785    * Get names of supported compression algorithms. The names are acceptable by
1786    * HFile.Writer.
1787    *
1788    * @return Array of strings, each represents a supported compression
1789    *         algorithm. Currently, the following compression algorithms are
1790    *         supported.
1791    *         <ul>
1792    *         <li>"none" - No compression.
1793    *         <li>"gz" - GZIP compression.
1794    *         </ul>
1795    */
1796   public static String[] getSupportedCompressionAlgorithms() {
1797     return Compression.getSupportedAlgorithms();
1798   }
1799 
1800   // Utility methods.
1801   /*
1802    * @param l Long to convert to an int.
1803    * @return <code>l</code> cast as an int.
1804    */
1805   static int longToInt(final long l) {
1806     // Expecting the size() of a block not exceeding 4GB. Assuming the
1807     // size() will wrap to negative integer if it exceeds 2GB (From tfile).
1808     return (int)(l & 0x00000000ffffffffL);
1809   }
1810 
1811   /**
1812    * Returns all files belonging to the given region directory. Could return an
1813    * empty list.
1814    *
1815    * @param fs  The file system reference.
1816    * @param regionDir  The region directory to scan.
1817    * @return The list of files found.
1818    * @throws IOException When scanning the files fails.
1819    */
1820   static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
1821   throws IOException {
1822     List<Path> res = new ArrayList<Path>();
1823     PathFilter dirFilter = new FSUtils.DirFilter(fs);
1824     FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
1825     for(FileStatus dir : familyDirs) {
1826       FileStatus[] files = fs.listStatus(dir.getPath());
1827       for (FileStatus file : files) {
1828         if (!file.isDir()) {
1829           res.add(file.getPath());
1830         }
1831       }
1832     }
1833     return res;
1834   }
1835 
1836   public static void main(String []args) throws IOException {
1837     try {
1838       // create options
1839       Options options = new Options();
1840       options.addOption("v", "verbose", false, "Verbose output; emits file and meta data delimiters");
1841       options.addOption("p", "printkv", false, "Print key/value pairs");
1842       options.addOption("e", "printkey", false, "Print keys");
1843       options.addOption("m", "printmeta", false, "Print meta data of file");
1844       options.addOption("b", "printblocks", false, "Print block index meta data");
1845       options.addOption("k", "checkrow", false,
1846         "Enable row order check; looks for out-of-order keys");
1847       options.addOption("a", "checkfamily", false, "Enable family check");
1848       options.addOption("f", "file", true,
1849         "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
1850       options.addOption("r", "region", true,
1851         "Region to scan. Pass region name; e.g. '.META.,,1'");
1852       if (args.length == 0) {
1853         HelpFormatter formatter = new HelpFormatter();
1854         formatter.printHelp("HFile ", options, true);
1855         System.exit(-1);
1856       }
1857       CommandLineParser parser = new PosixParser();
1858       CommandLine cmd = parser.parse(options, args);
1859       boolean verbose = cmd.hasOption("v");
1860       boolean printValue = cmd.hasOption("p");
1861       boolean printKey = cmd.hasOption("e") || printValue;
1862       boolean printMeta = cmd.hasOption("m");
1863       boolean printBlocks = cmd.hasOption("b");
1864       boolean checkRow = cmd.hasOption("k");
1865       boolean checkFamily = cmd.hasOption("a");
1866       // get configuration, file system and get list of files
1867       Configuration conf = HBaseConfiguration.create();
1868       conf.set("fs.defaultFS",
1869         conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
1870       conf.set("fs.default.name",
1871         conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
1872       ArrayList<Path> files = new ArrayList<Path>();
1873       if (cmd.hasOption("f")) {
1874         files.add(new Path(cmd.getOptionValue("f")));
1875       }
1876       if (cmd.hasOption("r")) {
1877         String regionName = cmd.getOptionValue("r");
1878         byte[] rn = Bytes.toBytes(regionName);
1879         byte[][] hri = HRegionInfo.parseRegionName(rn);
1880         Path rootDir = FSUtils.getRootDir(conf);
1881         Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
1882         String enc = HRegionInfo.encodeRegionName(rn);
1883         Path regionDir = new Path(tableDir, enc);
1884         if (verbose) System.out.println("region dir -> " + regionDir);
1885         List<Path> regionFiles =
1886           getStoreFiles(FileSystem.get(conf), regionDir);
1887         if (verbose) System.out.println("Number of region files found -> " +
1888           regionFiles.size());
1889         if (verbose) {
1890           int i = 1;
1891           for (Path p : regionFiles) {
1892             if (verbose) System.out.println("Found file[" + i++ + "] -> " + p);
1893           }
1894         }
1895         files.addAll(regionFiles);
1896       }
1897       // iterate over all files found
1898       for (Path file : files) {
1899         if (verbose) System.out.println("Scanning -> " + file);
1900         FileSystem fs = file.getFileSystem(conf);
1901         if (!fs.exists(file)) {
1902           System.err.println("ERROR, file doesnt exist: " + file);
1903           continue;
1904         }
1905         // create reader and load file info
1906         HFile.Reader reader = new HFile.Reader(fs, file, null, false);
1907         Map<byte[],byte[]> fileInfo = reader.loadFileInfo();
1908         int count = 0;
1909         if (verbose || printKey || checkRow || checkFamily) {
1910           // scan over file and read key/value's and check if requested
1911           HFileScanner scanner = reader.getScanner(false, false);
1912           scanner.seekTo();
1913           KeyValue pkv = null;
1914           do {
1915             KeyValue kv = scanner.getKeyValue();
1916             // dump key value
1917            if (printKey) {
1918              System.out.print("K: " + kv);
1919              if (printValue) {
1920                System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
1921              }
1922              System.out.println();
1923             }
1924             // check if rows are in order
1925             if (checkRow && pkv != null) {
1926               if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
1927                 System.err.println("WARNING, previous row is greater then" +
1928                     " current row\n\tfilename -> " + file +
1929                     "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey()) +
1930                     "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
1931               }
1932             }
1933             // check if families are consistent
1934             if (checkFamily) {
1935               String fam = Bytes.toString(kv.getFamily());
1936               if (!file.toString().contains(fam)) {
1937                 System.err.println("WARNING, filename does not match kv family," +
1938                     "\n\tfilename -> " + file +
1939                     "\n\tkeyvalue -> " + Bytes.toStringBinary(kv.getKey()));
1940               }
1941               if (pkv != null && Bytes.compareTo(pkv.getFamily(), kv.getFamily()) != 0) {
1942                 System.err.println("WARNING, previous kv has different family" +
1943                     " compared to current key\n\tfilename -> " + file +
1944                     "\n\tprevious -> " +  Bytes.toStringBinary(pkv.getKey()) +
1945                     "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
1946               }
1947             }
1948             pkv = kv;
1949             count++;
1950           } while (scanner.next());
1951         }
1952         if (verbose || printKey) {
1953           System.out.println("Scanned kv count -> " + count);
1954         }
1955         // print meta data
1956         if (printMeta) {
1957           System.out.println("Block index size as per heapsize: " + reader.indexSize());
1958           System.out.println(reader.toString());
1959           System.out.println(reader.getTrailerInfo());
1960           System.out.println("Fileinfo:");
1961           for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
1962             System.out.print(Bytes.toString(e.getKey()) + " = " );
1963             if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY"))==0) {
1964               long seqid = Bytes.toLong(e.getValue());
1965               System.out.println(seqid);
1966             } else if (Bytes.compareTo(e.getKey(),
1967                 Bytes.toBytes("TIMERANGE")) == 0) {
1968               TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
1969               Writables.copyWritable(e.getValue(), timeRangeTracker);
1970               System.out.println(timeRangeTracker.getMinimumTimestamp() +
1971                   "...." + timeRangeTracker.getMaximumTimestamp());
1972             } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0 ||
1973                 Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
1974               System.out.println(Bytes.toInt(e.getValue()));
1975             } else {
1976               System.out.println(Bytes.toStringBinary(e.getValue()));
1977             }
1978           }
1979 
1980           //Printing bloom information
1981           ByteBuffer b = reader.getMetaBlock("BLOOM_FILTER_META", false);
1982           if (b!= null) {
1983             BloomFilter bloomFilter = new ByteBloomFilter(b);
1984             System.out.println("BloomSize: " + bloomFilter.getByteSize());
1985             System.out.println("No of Keys in bloom: " +
1986                 bloomFilter.getKeyCount());
1987             System.out.println("Max Keys for bloom: " +
1988                 bloomFilter.getMaxKeys());
1989           } else {
1990             System.out.println("Could not get bloom data from meta block");
1991           }
1992         }
1993         if (printBlocks) {
1994           System.out.println("Block Index:");
1995           System.out.println(reader.blockIndex);
1996         }
1997         reader.close();
1998       }
1999     } catch (Exception e) {
2000       e.printStackTrace();
2001     }
2002   }
2003 }