View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.Closeable;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.io.SequenceInputStream;
28  import java.lang.reflect.Method;
29  import java.net.InetSocketAddress;
30  import java.nio.ByteBuffer;
31  import java.util.ArrayList;
32  import java.util.Collection;
33  import java.util.Comparator;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.Set;
37  import java.util.SortedMap;
38  import java.util.TreeMap;
39  import java.util.concurrent.ArrayBlockingQueue;
40  import java.util.concurrent.BlockingQueue;
41  import java.util.concurrent.atomic.AtomicInteger;
42  import java.util.concurrent.atomic.AtomicLong;
43  
44  import org.apache.hadoop.hbase.util.ByteStringer;
45  import org.apache.commons.logging.Log;
46  import org.apache.commons.logging.LogFactory;
47  import org.apache.hadoop.hbase.classification.InterfaceAudience;
48  import org.apache.hadoop.conf.Configuration;
49  import org.apache.hadoop.fs.FSDataInputStream;
50  import org.apache.hadoop.fs.FSDataOutputStream;
51  import org.apache.hadoop.fs.FileStatus;
52  import org.apache.hadoop.fs.FileSystem;
53  import org.apache.hadoop.fs.Path;
54  import org.apache.hadoop.fs.PathFilter;
55  import org.apache.hadoop.hbase.HConstants;
56  import org.apache.hadoop.hbase.KeyValue;
57  import org.apache.hadoop.hbase.KeyValue.KVComparator;
58  import org.apache.hadoop.hbase.fs.HFileSystem;
59  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
60  import org.apache.hadoop.hbase.io.compress.Compression;
61  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
62  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
63  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
64  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
65  import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
66  import org.apache.hadoop.hbase.util.BloomFilterWriter;
67  import org.apache.hadoop.hbase.util.Bytes;
68  import org.apache.hadoop.hbase.util.ChecksumType;
69  import org.apache.hadoop.hbase.util.FSUtils;
70  import org.apache.hadoop.io.Writable;
71  
72  import com.google.common.base.Preconditions;
73  import com.google.common.collect.Lists;
74  
75  /**
76   * File format for hbase.
77   * A file of sorted key/value pairs. Both keys and values are byte arrays.
78   * <p>
79   * The memory footprint of a HFile includes the following (below is taken from the
80   * <a
81   * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation
82   * but applies also to HFile):
83   * <ul>
84   * <li>Some constant overhead of reading or writing a compressed block.
85   * <ul>
86   * <li>Each compressed block requires one compression/decompression codec for
87   * I/O.
88   * <li>Temporary space to buffer the key.
89   * <li>Temporary space to buffer the value.
90   * </ul>
91   * <li>HFile index, which is proportional to the total number of Data Blocks.
92   * The total amount of memory needed to hold the index can be estimated as
93   * (56+AvgKeySize)*NumBlocks.
94   * </ul>
95   * Suggestions on performance optimization.
96   * <ul>
97   * <li>Minimum block size. We recommend a setting of minimum block size between
98   * 8KB to 1MB for general usage. Larger block size is preferred if files are
99   * primarily for sequential access. However, it would lead to inefficient random
100  * access (because there are more data to decompress). Smaller blocks are good
101  * for random access, but require more memory to hold the block index, and may
102  * be slower to create (because we must flush the compressor stream at the
103  * conclusion of each data block, which leads to an FS I/O flush). Further, due
104  * to the internal caching in Compression codec, the smallest possible block
105  * size would be around 20KB-30KB.
106  * <li>The current implementation does not offer true multi-threading for
107  * reading. The implementation uses FSDataInputStream seek()+read(), which is
108  * shown to be much faster than positioned-read call in single thread mode.
109  * However, it also means that if multiple threads attempt to access the same
110  * HFile (using multiple scanners) simultaneously, the actual I/O is carried out
111  * sequentially even if they access different DFS blocks (Reexamine! pread seems
112  * to be 10% faster than seek+read in my testing -- stack).
113  * <li>Compression codec. Use "none" if the data is not very compressable (by
114  * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
115  * as the starting point for experimenting. "gz" overs slightly better
116  * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
117  * decompress, comparing to "lzo".
118  * </ul>
119  *
120  * For more on the background behind HFile, see <a
121  * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
122  * <p>
123  * File is made of data blocks followed by meta data blocks (if any), a fileinfo
124  * block, data block index, meta data block index, and a fixed size trailer
125  * which records the offsets at which file changes content type.
126  * <pre>&lt;data blocks>&lt;meta blocks>&lt;fileinfo>&lt;data index>&lt;meta index>&lt;trailer></pre>
127  * Each block has a bit of magic at its start.  Block are comprised of
128  * key/values.  In data blocks, they are both byte arrays.  Metadata blocks are
129  * a String key and a byte array value.  An empty file looks like this:
130  * <pre>&lt;fileinfo>&lt;trailer></pre>.  That is, there are not data nor meta
131  * blocks present.
132  * <p>
133  * TODO: Do scanners need to be able to take a start and end row?
134  * TODO: Should BlockIndex know the name of its file?  Should it have a Path
135  * that points at its file say for the case where an index lives apart from
136  * an HFile instance?
137  */
138 @InterfaceAudience.Private
139 public class HFile {
140   static final Log LOG = LogFactory.getLog(HFile.class);
141 
142   /**
143    * Maximum length of key in HFile.
144    */
145   public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
146 
147   /**
148    * Default compression: none.
149    */
150   public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
151     Compression.Algorithm.NONE;
152 
153   /** Minimum supported HFile format version */
154   public static final int MIN_FORMAT_VERSION = 2;
155 
156   /** Maximum supported HFile format version
157    */
158   public static final int MAX_FORMAT_VERSION = 3;
159 
160   /**
161    * Minimum HFile format version with support for persisting cell tags
162    */
163   public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
164 
165   /** Default compression name: none. */
166   public final static String DEFAULT_COMPRESSION =
167     DEFAULT_COMPRESSION_ALGORITHM.getName();
168 
169   /** Meta data block name for bloom filter bits. */
170   public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
171 
172   /**
173    * We assume that HFile path ends with
174    * ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at least this
175    * many levels of nesting. This is needed for identifying table and CF name
176    * from an HFile path.
177    */
178   public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
179 
180   /**
181    * The number of bytes per checksum.
182    */
183   public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
184   public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32;
185 
186   // For measuring number of checksum failures
187   static final AtomicLong checksumFailures = new AtomicLong();
188 
189   // for test purpose
190   public static final AtomicLong dataBlockReadCnt = new AtomicLong(0);
191 
192   /**
193    * Number of checksum verification failures. It also
194    * clears the counter.
195    */
196   public static final long getChecksumFailuresCount() {
197     return checksumFailures.getAndSet(0);
198   }
199 
200   /** API required to write an {@link HFile} */
201   public interface Writer extends Closeable {
202 
203     /** Add an element to the file info map. */
204     void appendFileInfo(byte[] key, byte[] value) throws IOException;
205 
206     void append(KeyValue kv) throws IOException;
207 
208     void append(byte[] key, byte[] value) throws IOException;
209 
210     void append (byte[] key, byte[] value, byte[] tag) throws IOException;
211 
212     /** @return the path to this {@link HFile} */
213     Path getPath();
214 
215     /**
216      * Adds an inline block writer such as a multi-level block index writer or
217      * a compound Bloom filter writer.
218      */
219     void addInlineBlockWriter(InlineBlockWriter bloomWriter);
220 
221     // The below three methods take Writables.  We'd like to undo Writables but undoing the below would be pretty
222     // painful.  Could take a byte [] or a Message but we want to be backward compatible around hfiles so would need
223     // to map between Message and Writable or byte [] and current Writable serialization.  This would be a bit of work
224     // to little gain.  Thats my thinking at moment.  St.Ack 20121129
225 
226     void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
227 
228     /**
229      * Store general Bloom filter in the file. This does not deal with Bloom filter
230      * internals but is necessary, since Bloom filters are stored differently
231      * in HFile version 1 and version 2.
232      */
233     void addGeneralBloomFilter(BloomFilterWriter bfw);
234 
235     /**
236      * Store delete family Bloom filter in the file, which is only supported in
237      * HFile V2.
238      */
239     void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
240 
241     /**
242      * Return the file context for the HFile this writer belongs to
243      */
244     HFileContext getFileContext();
245   }
246 
247   /**
248    * This variety of ways to construct writers is used throughout the code, and
249    * we want to be able to swap writer implementations.
250    */
251   public static abstract class WriterFactory {
252     protected final Configuration conf;
253     protected final CacheConfig cacheConf;
254     protected FileSystem fs;
255     protected Path path;
256     protected FSDataOutputStream ostream;
257     protected KVComparator comparator = KeyValue.COMPARATOR;
258     protected InetSocketAddress[] favoredNodes;
259     private HFileContext fileContext;
260     protected boolean shouldDropBehind = false;
261 
262     WriterFactory(Configuration conf, CacheConfig cacheConf) {
263       this.conf = conf;
264       this.cacheConf = cacheConf;
265     }
266 
267     public WriterFactory withPath(FileSystem fs, Path path) {
268       Preconditions.checkNotNull(fs);
269       Preconditions.checkNotNull(path);
270       this.fs = fs;
271       this.path = path;
272       return this;
273     }
274 
275     public WriterFactory withOutputStream(FSDataOutputStream ostream) {
276       Preconditions.checkNotNull(ostream);
277       this.ostream = ostream;
278       return this;
279     }
280 
281     public WriterFactory withComparator(KVComparator comparator) {
282       Preconditions.checkNotNull(comparator);
283       this.comparator = comparator;
284       return this;
285     }
286 
287     public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
288       // Deliberately not checking for null here.
289       this.favoredNodes = favoredNodes;
290       return this;
291     }
292 
293     public WriterFactory withFileContext(HFileContext fileContext) {
294       this.fileContext = fileContext;
295       return this;
296     }
297 
298     public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
299       this.shouldDropBehind = shouldDropBehind;
300       return this;
301     }
302 
303 
304     public Writer create() throws IOException {
305       if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
306         throw new AssertionError("Please specify exactly one of " +
307             "filesystem/path or path");
308       }
309       if (path != null) {
310         ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes);
311         try {
312           Class<? extends FSDataOutputStream> outStreamClass = ostream.getClass();
313           try {
314             Method m = outStreamClass.getDeclaredMethod("setDropBehind",
315               new Class[]{ boolean.class });
316             m.invoke(ostream, new Object[] {
317               shouldDropBehind && cacheConf.shouldDropBehindCompaction() });
318           } catch (NoSuchMethodException e) {
319             // Not supported, we can just ignore it
320           } catch (Exception e) {
321             if (LOG.isDebugEnabled()) {
322               LOG.debug("Failed to invoke output stream's setDropBehind method, continuing");
323             }
324           }
325         } catch (UnsupportedOperationException uoe) {
326           LOG.debug("Unable to set drop behind on " + path, uoe);
327         }
328       }
329       return createWriter(fs, path, ostream,
330                    comparator, fileContext);
331     }
332 
333     protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
334         KVComparator comparator, HFileContext fileContext) throws IOException;
335   }
336 
337   /** The configuration key for HFile version to use for new files */
338   public static final String FORMAT_VERSION_KEY = "hfile.format.version";
339 
340   public static int getFormatVersion(Configuration conf) {
341     int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
342     checkFormatVersion(version);
343     return version;
344   }
345 
346   /**
347    * Returns the factory to be used to create {@link HFile} writers.
348    * Disables block cache access for all writers created through the
349    * returned factory.
350    */
351   public static final WriterFactory getWriterFactoryNoCache(Configuration
352        conf) {
353     Configuration tempConf = new Configuration(conf);
354     tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
355     return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
356   }
357 
358   /**
359    * Returns the factory to be used to create {@link HFile} writers
360    */
361   public static final WriterFactory getWriterFactory(Configuration conf,
362       CacheConfig cacheConf) {
363     int version = getFormatVersion(conf);
364     switch (version) {
365     case 2:
366       return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
367     case 3:
368       return new HFileWriterV3.WriterFactoryV3(conf, cacheConf);
369     default:
370       throw new IllegalArgumentException("Cannot create writer for HFile " +
371           "format version " + version);
372     }
373   }
374 
375   /** An abstraction used by the block index */
376   public interface CachingBlockReader {
377     HFileBlock readBlock(long offset, long onDiskBlockSize,
378         boolean cacheBlock, final boolean pread, final boolean isCompaction,
379         final boolean updateCacheMetrics, BlockType expectedBlockType)
380         throws IOException;
381   }
382 
383   /** An interface used by clients to open and iterate an {@link HFile}. */
384   public interface Reader extends Closeable, CachingBlockReader {
385     /**
386      * Returns this reader's "name". Usually the last component of the path.
387      * Needs to be constant as the file is being moved to support caching on
388      * write.
389      */
390     String getName();
391 
392     KVComparator getComparator();
393 
394     HFileScanner getScanner(boolean cacheBlocks,
395        final boolean pread, final boolean isCompaction);
396 
397     ByteBuffer getMetaBlock(String metaBlockName,
398        boolean cacheBlock) throws IOException;
399 
400     Map<byte[], byte[]> loadFileInfo() throws IOException;
401 
402     byte[] getLastKey();
403 
404     byte[] midkey() throws IOException;
405 
406     long length();
407 
408     long getEntries();
409 
410     byte[] getFirstKey();
411 
412     long indexSize();
413 
414     byte[] getFirstRowKey();
415 
416     byte[] getLastRowKey();
417 
418     FixedFileTrailer getTrailer();
419 
420     HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
421 
422     HFileScanner getScanner(boolean cacheBlocks, boolean pread);
423 
424     Compression.Algorithm getCompressionAlgorithm();
425 
426     /**
427      * Retrieves general Bloom filter metadata as appropriate for each
428      * {@link HFile} version.
429      * Knows nothing about how that metadata is structured.
430      */
431     DataInput getGeneralBloomFilterMetadata() throws IOException;
432 
433     /**
434      * Retrieves delete family Bloom filter metadata as appropriate for each
435      * {@link HFile}  version.
436      * Knows nothing about how that metadata is structured.
437      */
438     DataInput getDeleteBloomFilterMetadata() throws IOException;
439 
440     Path getPath();
441 
442     /** Close method with optional evictOnClose */
443     void close(boolean evictOnClose) throws IOException;
444 
445     DataBlockEncoding getDataBlockEncoding();
446 
447     boolean hasMVCCInfo();
448 
449     /**
450      * Return the file context of the HFile this reader belongs to
451      */
452     HFileContext getFileContext();
453   }
454 
455   /**
456    * Method returns the reader given the specified arguments.
457    * TODO This is a bad abstraction.  See HBASE-6635.
458    *
459    * @param path hfile's path
460    * @param fsdis stream of path's file
461    * @param size max size of the trailer.
462    * @param cacheConf Cache configuation values, cannot be null.
463    * @param hfs
464    * @return an appropriate instance of HFileReader
465    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
466    */
467   private static Reader pickReaderVersion(Path path, FSDataInputStreamWrapper fsdis,
468       long size, CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
469     FixedFileTrailer trailer = null;
470     try {
471       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
472       assert !isHBaseChecksum; // Initially we must read with FS checksum.
473       trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
474       switch (trailer.getMajorVersion()) {
475       case 2:
476         return new HFileReaderV2(path, trailer, fsdis, size, cacheConf, hfs, conf);
477       case 3 :
478         return new HFileReaderV3(path, trailer, fsdis, size, cacheConf, hfs, conf);
479       default:
480         throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
481       }
482     } catch (Throwable t) {
483       try {
484         fsdis.close();
485       } catch (Throwable t2) {
486         LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
487       }
488       throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
489     }
490   }
491 
492   /**
493    * @param fs A file system
494    * @param path Path to HFile
495    * @param fsdis a stream of path's file
496    * @param size max size of the trailer.
497    * @param cacheConf Cache configuration for hfile's contents
498    * @param conf Configuration
499    * @return A version specific Hfile Reader
500    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
501    */
502   public static Reader createReader(FileSystem fs, Path path,
503       FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
504       throws IOException {
505     HFileSystem hfs = null;
506 
507     // If the fs is not an instance of HFileSystem, then create an
508     // instance of HFileSystem that wraps over the specified fs.
509     // In this case, we will not be able to avoid checksumming inside
510     // the filesystem.
511     if (!(fs instanceof HFileSystem)) {
512       hfs = new HFileSystem(fs);
513     } else {
514       hfs = (HFileSystem)fs;
515     }
516     return pickReaderVersion(path, fsdis, size, cacheConf, hfs, conf);
517   }
518 
519   /**
520    *
521    * @param fs filesystem
522    * @param path Path to file to read
523    * @param cacheConf This must not be null.  @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
524    * @return an active Reader instance
525    * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile is corrupt/invalid.
526    */
527   public static Reader createReader(
528       FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
529     Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
530     FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
531     return pickReaderVersion(path, stream, fs.getFileStatus(path).getLen(),
532       cacheConf, stream.getHfs(), conf);
533   }
534 
535   /**
536    * This factory method is used only by unit tests
537    */
538   static Reader createReaderFromStream(Path path,
539       FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
540       throws IOException {
541     FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
542     return pickReaderVersion(path, wrapper, size, cacheConf, null, conf);
543   }
544 
545   /**
546    * Returns true if the specified file has a valid HFile Trailer.
547    * @param fs filesystem
548    * @param path Path to file to verify
549    * @return true if the file has a valid HFile Trailer, otherwise false
550    * @throws IOException if failed to read from the underlying stream
551    */
552   public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
553     return isHFileFormat(fs, fs.getFileStatus(path));
554   }
555 
556   /**
557    * Returns true if the specified file has a valid HFile Trailer.
558    * @param fs filesystem
559    * @param fileStatus the file to verify
560    * @return true if the file has a valid HFile Trailer, otherwise false
561    * @throws IOException if failed to read from the underlying stream
562    */
563   public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
564       throws IOException {
565     final Path path = fileStatus.getPath();
566     final long size = fileStatus.getLen();
567     FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path);
568     try {
569       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
570       assert !isHBaseChecksum; // Initially we must read with FS checksum.
571       FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
572       return true;
573     } catch (IllegalArgumentException e) {
574       return false;
575     } catch (IOException e) {
576       throw e;
577     } finally {
578       try {
579         fsdis.close();
580       } catch (Throwable t) {
581         LOG.warn("Error closing fsdis FSDataInputStreamWrapper: " + path, t);
582       }
583     }
584   }
585 
586   /**
587    * Metadata for this file. Conjured by the writer. Read in by the reader.
588    */
589   public static class FileInfo implements SortedMap<byte[], byte[]> {
590     static final String RESERVED_PREFIX = "hfile.";
591     static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
592     static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
593     static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
594     static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
595     static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
596     static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
597     public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
598     private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
599 
600     public FileInfo() {
601       super();
602     }
603 
604     /**
605      * Append the given key/value pair to the file info, optionally checking the
606      * key prefix.
607      *
608      * @param k key to add
609      * @param v value to add
610      * @param checkPrefix whether to check that the provided key does not start
611      *          with the reserved prefix
612      * @return this file info object
613      * @throws IOException if the key or value is invalid
614      */
615     public FileInfo append(final byte[] k, final byte[] v,
616         final boolean checkPrefix) throws IOException {
617       if (k == null || v == null) {
618         throw new NullPointerException("Key nor value may be null");
619       }
620       if (checkPrefix && isReservedFileInfoKey(k)) {
621         throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
622             + " are reserved");
623       }
624       put(k, v);
625       return this;
626     }
627 
628     public void clear() {
629       this.map.clear();
630     }
631 
632     public Comparator<? super byte[]> comparator() {
633       return map.comparator();
634     }
635 
636     public boolean containsKey(Object key) {
637       return map.containsKey(key);
638     }
639 
640     public boolean containsValue(Object value) {
641       return map.containsValue(value);
642     }
643 
644     public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
645       return map.entrySet();
646     }
647 
648     public boolean equals(Object o) {
649       return map.equals(o);
650     }
651 
652     public byte[] firstKey() {
653       return map.firstKey();
654     }
655 
656     public byte[] get(Object key) {
657       return map.get(key);
658     }
659 
660     public int hashCode() {
661       return map.hashCode();
662     }
663 
664     public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
665       return this.map.headMap(toKey);
666     }
667 
668     public boolean isEmpty() {
669       return map.isEmpty();
670     }
671 
672     public Set<byte[]> keySet() {
673       return map.keySet();
674     }
675 
676     public byte[] lastKey() {
677       return map.lastKey();
678     }
679 
680     public byte[] put(byte[] key, byte[] value) {
681       return this.map.put(key, value);
682     }
683 
684     public void putAll(Map<? extends byte[], ? extends byte[]> m) {
685       this.map.putAll(m);
686     }
687 
688     public byte[] remove(Object key) {
689       return this.map.remove(key);
690     }
691 
692     public int size() {
693       return map.size();
694     }
695 
696     public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
697       return this.map.subMap(fromKey, toKey);
698     }
699 
700     public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
701       return this.map.tailMap(fromKey);
702     }
703 
704     public Collection<byte[]> values() {
705       return map.values();
706     }
707 
708     /**
709      * Write out this instance on the passed in <code>out</code> stream.
710      * We write it as a protobuf.
711      * @param out
712      * @throws IOException
713      * @see #read(DataInputStream)
714      */
715     void write(final DataOutputStream out) throws IOException {
716       HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
717       for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
718         HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
719         bbpBuilder.setFirst(ByteStringer.wrap(e.getKey()));
720         bbpBuilder.setSecond(ByteStringer.wrap(e.getValue()));
721         builder.addMapEntry(bbpBuilder.build());
722       }
723       out.write(ProtobufUtil.PB_MAGIC);
724       builder.build().writeDelimitedTo(out);
725     }
726 
727     /**
728      * Populate this instance with what we find on the passed in <code>in</code> stream.
729      * Can deserialize protobuf of old Writables format.
730      * @param in
731      * @throws IOException
732      * @see #write(DataOutputStream)
733      */
734     void read(final DataInputStream in) throws IOException {
735       // This code is tested over in TestHFileReaderV1 where we read an old hfile w/ this new code.
736       int pblen = ProtobufUtil.lengthOfPBMagic();
737       byte [] pbuf = new byte[pblen];
738       if (in.markSupported()) in.mark(pblen);
739       int read = in.read(pbuf);
740       if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
741       if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
742         parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
743       } else {
744         if (in.markSupported()) {
745           in.reset();
746           parseWritable(in);
747         } else {
748           // We cannot use BufferedInputStream, it consumes more than we read from the underlying IS
749           ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
750           SequenceInputStream sis = new SequenceInputStream(bais, in); // Concatenate input streams
751           // TODO: Am I leaking anything here wrapping the passed in stream?  We are not calling close on the wrapped
752           // streams but they should be let go after we leave this context?  I see that we keep a reference to the
753           // passed in inputstream but since we no longer have a reference to this after we leave, we should be ok.
754           parseWritable(new DataInputStream(sis));
755         }
756       }
757     }
758 
759     /** Now parse the old Writable format.  It was a list of Map entries.  Each map entry was a key and a value of
760      * a byte [].  The old map format had a byte before each entry that held a code which was short for the key or
761      * value type.  We know it was a byte [] so in below we just read and dump it.
762      * @throws IOException
763      */
764     void parseWritable(final DataInputStream in) throws IOException {
765       // First clear the map.  Otherwise we will just accumulate entries every time this method is called.
766       this.map.clear();
767       // Read the number of entries in the map
768       int entries = in.readInt();
769       // Then read each key/value pair
770       for (int i = 0; i < entries; i++) {
771         byte [] key = Bytes.readByteArray(in);
772         // We used to read a byte that encoded the class type.  Read and ignore it because it is always byte [] in hfile
773         in.readByte();
774         byte [] value = Bytes.readByteArray(in);
775         this.map.put(key, value);
776       }
777     }
778 
779     /**
780      * Fill our map with content of the pb we read off disk
781      * @param fip protobuf message to read
782      */
783     void parsePB(final HFileProtos.FileInfoProto fip) {
784       this.map.clear();
785       for (BytesBytesPair pair: fip.getMapEntryList()) {
786         this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
787       }
788     }
789   }
790 
791   /** Return true if the given file info key is reserved for internal use. */
792   public static boolean isReservedFileInfoKey(byte[] key) {
793     return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
794   }
795 
796   /**
797    * Get names of supported compression algorithms. The names are acceptable by
798    * HFile.Writer.
799    *
800    * @return Array of strings, each represents a supported compression
801    *         algorithm. Currently, the following compression algorithms are
802    *         supported.
803    *         <ul>
804    *         <li>"none" - No compression.
805    *         <li>"gz" - GZIP compression.
806    *         </ul>
807    */
808   public static String[] getSupportedCompressionAlgorithms() {
809     return Compression.getSupportedAlgorithms();
810   }
811 
812   // Utility methods.
813   /*
814    * @param l Long to convert to an int.
815    * @return <code>l</code> cast as an int.
816    */
817   static int longToInt(final long l) {
818     // Expecting the size() of a block not exceeding 4GB. Assuming the
819     // size() will wrap to negative integer if it exceeds 2GB (From tfile).
820     return (int)(l & 0x00000000ffffffffL);
821   }
822 
823   /**
824    * Returns all files belonging to the given region directory. Could return an
825    * empty list.
826    *
827    * @param fs  The file system reference.
828    * @param regionDir  The region directory to scan.
829    * @return The list of files found.
830    * @throws IOException When scanning the files fails.
831    */
832   static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
833       throws IOException {
834     List<Path> res = new ArrayList<Path>();
835     PathFilter dirFilter = new FSUtils.DirFilter(fs);
836     FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
837     for(FileStatus dir : familyDirs) {
838       FileStatus[] files = fs.listStatus(dir.getPath());
839       for (FileStatus file : files) {
840         if (!file.isDir()) {
841           res.add(file.getPath());
842         }
843       }
844     }
845     return res;
846   }
847 
848   /**
849    * Checks the given {@link HFile} format version, and throws an exception if
850    * invalid. Note that if the version number comes from an input file and has
851    * not been verified, the caller needs to re-throw an {@link IOException} to
852    * indicate that this is not a software error, but corrupted input.
853    *
854    * @param version an HFile version
855    * @throws IllegalArgumentException if the version is invalid
856    */
857   public static void checkFormatVersion(int version)
858       throws IllegalArgumentException {
859     if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
860       throw new IllegalArgumentException("Invalid HFile version: " + version
861           + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
862           + MAX_FORMAT_VERSION + ")");
863     }
864   }
865 
866   public static void main(String[] args) throws Exception {
867     // delegate to preserve old behavior
868     HFilePrettyPrinter.main(args);
869   }
870 }