View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.nio.ByteBuffer;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.classification.InterfaceAudience;
32  import org.apache.hadoop.fs.FSDataInputStream;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.io.compress.Compression;
35  import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
36  import org.apache.hadoop.hbase.util.Bytes;
37  import org.apache.hadoop.io.RawComparator;
38  
39  import com.google.common.io.NullOutputStream;
40  
41  /**
42   * The {@link HFile} has a fixed trailer which contains offsets to other
43   * variable parts of the file. Also includes basic metadata on this file. The
44   * trailer size is fixed within a given {@link HFile} format version only, but
45   * we always store the version number as the last four-byte integer of the file.
46   * The version number itself is split into two portions, a major 
47   * version and a minor version. 
48   * The last three bytes of a file is the major
49   * version and a single preceding byte is the minor number. The major version
50   * determines which readers/writers to use to read/write a hfile while a minor
51   * version determines smaller changes in hfile format that do not need a new
52   * reader/writer type.
53   */
54  @InterfaceAudience.Private
55  public class FixedFileTrailer {
56  
57    private static final Log LOG = LogFactory.getLog(FixedFileTrailer.class);
58  
59    /** HFile minor version that introduced pbuf filetrailer */
60    private static final int PBUF_TRAILER_MINOR_VERSION = 2;
61  
62    /**
63     * We store the comparator class name as a fixed-length field in the trailer.
64     */
65    private static final int MAX_COMPARATOR_NAME_LENGTH = 128;
66  
67    /**
68     * Offset to the fileinfo data, a small block of vitals. Necessary in v1 but
69     * only potentially useful for pretty-printing in v2.
70     */
71    private long fileInfoOffset;
72  
73    /**
74     * In version 1, the offset to the data block index. Starting from version 2,
75     * the meaning of this field is the offset to the section of the file that
76     * should be loaded at the time the file is being opened, and as of the time
77     * of writing, this happens to be the offset of the file info section.
78     */
79    private long loadOnOpenDataOffset;
80  
81    /** The number of entries in the root data index. */
82    private int dataIndexCount;
83  
84    /** Total uncompressed size of all blocks of the data index */
85    private long uncompressedDataIndexSize;
86  
87    /** The number of entries in the meta index */
88    private int metaIndexCount;
89  
90    /** The total uncompressed size of keys/values stored in the file. */
91    private long totalUncompressedBytes;
92  
93    /**
94     * The number of key/value pairs in the file. This field was int in version 1,
95     * but is now long.
96     */
97    private long entryCount;
98  
99    /** The compression codec used for all blocks. */
100   private Compression.Algorithm compressionCodec = Compression.Algorithm.NONE;
101 
102   /**
103    * The number of levels in the potentially multi-level data index. Used from
104    * version 2 onwards.
105    */
106   private int numDataIndexLevels;
107 
108   /** The offset of the first data block. */
109   private long firstDataBlockOffset;
110 
111   /**
112    * It is guaranteed that no key/value data blocks start after this offset in
113    * the file.
114    */
115   private long lastDataBlockOffset;
116 
117   /** Raw key comparator class name in version 2 */
118   private String comparatorClassName = KeyValue.KEY_COMPARATOR.getClass().getName();
119 
120   /** The {@link HFile} format major version. */
121   private final int majorVersion;
122 
123   /** The {@link HFile} format minor version. */
124   private final int minorVersion;
125 
126   FixedFileTrailer(int majorVersion, int minorVersion) {
127     this.majorVersion = majorVersion;
128     this.minorVersion = minorVersion;
129     HFile.checkFormatVersion(majorVersion);
130   }
131 
132   private static int[] computeTrailerSizeByVersion() {
133     int versionToSize[] = new int[HFile.MAX_FORMAT_VERSION + 1];
134     for (int version = HFile.MIN_FORMAT_VERSION;
135          version <= HFile.MAX_FORMAT_VERSION;
136          ++version) {
137       FixedFileTrailer fft = new FixedFileTrailer(version, HFileBlock.MINOR_VERSION_NO_CHECKSUM);
138       DataOutputStream dos = new DataOutputStream(new NullOutputStream());
139       try {
140         fft.serialize(dos);
141       } catch (IOException ex) {
142         // The above has no reason to fail.
143         throw new RuntimeException(ex);
144       }
145       versionToSize[version] = dos.size();
146     }
147     return versionToSize;
148   }
149 
150   private static int getMaxTrailerSize() {
151     int maxSize = 0;
152     for (int version = HFile.MIN_FORMAT_VERSION;
153          version <= HFile.MAX_FORMAT_VERSION;
154          ++version)
155       maxSize = Math.max(getTrailerSize(version), maxSize);
156     return maxSize;
157   }
158 
159   private static final int TRAILER_SIZE[] = computeTrailerSizeByVersion();
160   private static final int MAX_TRAILER_SIZE = getMaxTrailerSize();
161 
162   private static final int NOT_PB_SIZE = BlockType.MAGIC_LENGTH + Bytes.SIZEOF_INT;
163 
164   static int getTrailerSize(int version) {
165     return TRAILER_SIZE[version];
166   }
167 
168   public int getTrailerSize() {
169     return getTrailerSize(majorVersion);
170   }
171 
172   /**
173    * Write the trailer to a data stream. We support writing version 1 for
174    * testing and for determining version 1 trailer size. It is also easy to see
175    * what fields changed in version 2.
176    *
177    * @param outputStream
178    * @throws IOException
179    */
180   void serialize(DataOutputStream outputStream) throws IOException {
181     HFile.checkFormatVersion(majorVersion);
182 
183     ByteArrayOutputStream baos = new ByteArrayOutputStream();
184     DataOutputStream baosDos = new DataOutputStream(baos);
185 
186     BlockType.TRAILER.write(baosDos);
187     if (majorVersion > 2 || (majorVersion == 2 && minorVersion >= PBUF_TRAILER_MINOR_VERSION)) {
188       serializeAsPB(baosDos);
189     } else {
190       serializeAsWritable(baosDos);
191     }
192 
193     // The last 4 bytes of the file encode the major and minor version universally
194     baosDos.writeInt(materializeVersion(majorVersion, minorVersion));
195 
196     outputStream.write(baos.toByteArray());
197   }
198 
199   /**
200    * Write trailer data as protobuf
201    * @param outputStream
202    * @throws IOException
203    */
204   void serializeAsPB(DataOutputStream output) throws IOException {
205     ByteArrayOutputStream baos = new ByteArrayOutputStream();
206     HFileProtos.FileTrailerProto.newBuilder()
207       .setFileInfoOffset(fileInfoOffset)
208       .setLoadOnOpenDataOffset(loadOnOpenDataOffset)
209       .setUncompressedDataIndexSize(uncompressedDataIndexSize)
210       .setTotalUncompressedBytes(totalUncompressedBytes)
211       .setDataIndexCount(dataIndexCount)
212       .setMetaIndexCount(metaIndexCount)
213       .setEntryCount(entryCount)
214       .setNumDataIndexLevels(numDataIndexLevels)
215       .setFirstDataBlockOffset(firstDataBlockOffset)
216       .setLastDataBlockOffset(lastDataBlockOffset)
217       .setComparatorClassName(comparatorClassName)
218       .setCompressionCodec(compressionCodec.ordinal())
219       .build().writeDelimitedTo(baos);
220     output.write(baos.toByteArray());
221     // Pad to make up the difference between variable PB encoding length and the
222     // length when encoded as writable under earlier V2 formats. Failure to pad
223     // properly or if the PB encoding is too big would mean the trailer wont be read
224     // in properly by HFile.
225     int padding = getTrailerSize() - NOT_PB_SIZE - baos.size();
226     if (padding < 0) {
227       throw new IOException("Pbuf encoding size exceeded fixed trailer size limit");
228     }
229     for (int i = 0; i < padding; i++) {
230       output.write(0);
231     }
232   }
233 
234   /**
235    * Write trailer data as writable
236    * @param outputStream
237    * @throws IOException
238    */
239   void serializeAsWritable(DataOutputStream output) throws IOException {
240     output.writeLong(fileInfoOffset);
241     output.writeLong(loadOnOpenDataOffset);
242     output.writeInt(dataIndexCount);
243 
244     output.writeLong(uncompressedDataIndexSize);
245 
246     output.writeInt(metaIndexCount);
247     output.writeLong(totalUncompressedBytes);
248     output.writeLong(entryCount);
249     output.writeInt(compressionCodec.ordinal());
250 
251     output.writeInt(numDataIndexLevels);
252     output.writeLong(firstDataBlockOffset);
253     output.writeLong(lastDataBlockOffset);
254     Bytes.writeStringFixedSize(output, comparatorClassName, MAX_COMPARATOR_NAME_LENGTH);
255   }
256 
257   /**
258    * Deserialize the fixed file trailer from the given stream. The version needs
259    * to already be specified. Make sure this is consistent with
260    * {@link #serialize(DataOutputStream)}.
261    *
262    * @param inputStream
263    * @throws IOException
264    */
265   void deserialize(DataInputStream inputStream) throws IOException {
266     HFile.checkFormatVersion(majorVersion);
267 
268     BlockType.TRAILER.readAndCheck(inputStream);
269 
270     if (majorVersion > 2 || (majorVersion == 2 && minorVersion >= PBUF_TRAILER_MINOR_VERSION)) {
271       deserializeFromPB(inputStream);
272     } else {
273       deserializeFromWritable(inputStream);
274     }
275 
276     // The last 4 bytes of the file encode the major and minor version universally
277     int version = inputStream.readInt();
278     expectMajorVersion(extractMajorVersion(version));
279     expectMinorVersion(extractMinorVersion(version));
280   }
281 
282   /**
283    * Deserialize the file trailer as protobuf
284    * @param inputStream
285    * @throws IOException
286    */
287   void deserializeFromPB(DataInputStream inputStream) throws IOException {
288     // read PB and skip padding
289     int start = inputStream.available();
290     HFileProtos.FileTrailerProto.Builder builder = HFileProtos.FileTrailerProto.newBuilder();
291     builder.mergeDelimitedFrom(inputStream);
292     int size = start - inputStream.available();
293     inputStream.skip(getTrailerSize() - NOT_PB_SIZE - size);
294 
295     // process the PB
296     if (builder.hasFileInfoOffset()) {
297       fileInfoOffset = builder.getFileInfoOffset();
298     }
299     if (builder.hasLoadOnOpenDataOffset()) {
300       loadOnOpenDataOffset = builder.getLoadOnOpenDataOffset();
301     }
302     if (builder.hasUncompressedDataIndexSize()) {
303       uncompressedDataIndexSize = builder.getUncompressedDataIndexSize();
304     }
305     if (builder.hasTotalUncompressedBytes()) {
306       totalUncompressedBytes = builder.getTotalUncompressedBytes();
307     }
308     if (builder.hasDataIndexCount()) {
309       dataIndexCount = builder.getDataIndexCount();
310     }
311     if (builder.hasMetaIndexCount()) {
312       metaIndexCount = builder.getMetaIndexCount();
313     }
314     if (builder.hasEntryCount()) {
315       entryCount = builder.getEntryCount();
316     }
317     if (builder.hasNumDataIndexLevels()) {
318       numDataIndexLevels = builder.getNumDataIndexLevels();
319     }
320     if (builder.hasFirstDataBlockOffset()) {
321       firstDataBlockOffset = builder.getFirstDataBlockOffset();
322     }
323     if (builder.hasLastDataBlockOffset()) {
324       lastDataBlockOffset = builder.getLastDataBlockOffset();
325     }
326     if (builder.hasComparatorClassName()) {
327       setComparatorClass(getComparatorClass(builder.getComparatorClassName()));
328     }
329     if (builder.hasCompressionCodec()) {
330       compressionCodec = Compression.Algorithm.values()[builder.getCompressionCodec()];
331     } else {
332       compressionCodec = Compression.Algorithm.NONE;
333     }
334   }
335 
336   /**
337    * Deserialize the file trailer as writable data
338    * @param input
339    * @throws IOException
340    */
341   void deserializeFromWritable(DataInput input) throws IOException {
342     fileInfoOffset = input.readLong();
343     loadOnOpenDataOffset = input.readLong();
344     dataIndexCount = input.readInt();
345     uncompressedDataIndexSize = input.readLong();
346     metaIndexCount = input.readInt();
347 
348     totalUncompressedBytes = input.readLong();
349     entryCount = input.readLong();
350     compressionCodec = Compression.Algorithm.values()[input.readInt()];
351     numDataIndexLevels = input.readInt();
352     firstDataBlockOffset = input.readLong();
353     lastDataBlockOffset = input.readLong();
354     setComparatorClass(getComparatorClass(Bytes.readStringFixedSize(input,
355         MAX_COMPARATOR_NAME_LENGTH)));
356   }
357   
358   private void append(StringBuilder sb, String s) {
359     if (sb.length() > 0)
360       sb.append(", ");
361     sb.append(s);
362   }
363 
364   @Override
365   public String toString() {
366     StringBuilder sb = new StringBuilder();
367     append(sb, "fileinfoOffset=" + fileInfoOffset);
368     append(sb, "loadOnOpenDataOffset=" + loadOnOpenDataOffset);
369     append(sb, "dataIndexCount=" + dataIndexCount);
370     append(sb, "metaIndexCount=" + metaIndexCount);
371     append(sb, "totalUncomressedBytes=" + totalUncompressedBytes);
372     append(sb, "entryCount=" + entryCount);
373     append(sb, "compressionCodec=" + compressionCodec);
374     append(sb, "uncompressedDataIndexSize=" + uncompressedDataIndexSize);
375     append(sb, "numDataIndexLevels=" + numDataIndexLevels);
376     append(sb, "firstDataBlockOffset=" + firstDataBlockOffset);
377     append(sb, "lastDataBlockOffset=" + lastDataBlockOffset);
378     append(sb, "comparatorClassName=" + comparatorClassName);
379     append(sb, "majorVersion=" + majorVersion);
380     append(sb, "minorVersion=" + minorVersion);
381 
382     return sb.toString();
383   }
384 
385   /**
386    * Reads a file trailer from the given file.
387    *
388    * @param istream the input stream with the ability to seek. Does not have to
389    *          be buffered, as only one read operation is made.
390    * @param fileSize the file size. Can be obtained using
391    *          {@link org.apache.hadoop.fs.FileSystem#getFileStatus(
392    *          org.apache.hadoop.fs.Path)}.
393    * @return the fixed file trailer read
394    * @throws IOException if failed to read from the underlying stream, or the
395    *           trailer is corrupted, or the version of the trailer is
396    *           unsupported
397    */
398   public static FixedFileTrailer readFromStream(FSDataInputStream istream,
399       long fileSize) throws IOException {
400     int bufferSize = MAX_TRAILER_SIZE;
401     long seekPoint = fileSize - bufferSize;
402     if (seekPoint < 0) {
403       // It is hard to imagine such a small HFile.
404       seekPoint = 0;
405       bufferSize = (int) fileSize;
406     }
407 
408     istream.seek(seekPoint);
409     ByteBuffer buf = ByteBuffer.allocate(bufferSize);
410     istream.readFully(buf.array(), buf.arrayOffset(),
411         buf.arrayOffset() + buf.limit());
412 
413     // Read the version from the last int of the file.
414     buf.position(buf.limit() - Bytes.SIZEOF_INT);
415     int version = buf.getInt();
416 
417     // Extract the major and minor versions.
418     int majorVersion = extractMajorVersion(version);
419     int minorVersion = extractMinorVersion(version);
420 
421     HFile.checkFormatVersion(majorVersion); // throws IAE if invalid
422 
423     int trailerSize = getTrailerSize(majorVersion);
424 
425     FixedFileTrailer fft = new FixedFileTrailer(majorVersion, minorVersion);
426     fft.deserialize(new DataInputStream(new ByteArrayInputStream(buf.array(),
427         buf.arrayOffset() + bufferSize - trailerSize, trailerSize)));
428     return fft;
429   }
430 
431   public void expectMajorVersion(int expected) {
432     if (majorVersion != expected) {
433       throw new IllegalArgumentException("Invalid HFile major version: "
434           + majorVersion 
435           + " (expected: " + expected + ")");
436     }
437   }
438 
439   public void expectMinorVersion(int expected) {
440     if (minorVersion != expected) {
441       throw new IllegalArgumentException("Invalid HFile minor version: "
442           + minorVersion + " (expected: " + expected + ")");
443     }
444   }
445 
446   public void expectAtLeastMajorVersion(int lowerBound) {
447     if (majorVersion < lowerBound) {
448       throw new IllegalArgumentException("Invalid HFile major version: "
449           + majorVersion
450           + " (expected: " + lowerBound + " or higher).");
451     }
452   }
453 
454   public long getFileInfoOffset() {
455     return fileInfoOffset;
456   }
457 
458   public void setFileInfoOffset(long fileInfoOffset) {
459     this.fileInfoOffset = fileInfoOffset;
460   }
461 
462   public long getLoadOnOpenDataOffset() {
463     return loadOnOpenDataOffset;
464   }
465 
466   public void setLoadOnOpenOffset(long loadOnOpenDataOffset) {
467     this.loadOnOpenDataOffset = loadOnOpenDataOffset;
468   }
469 
470   public int getDataIndexCount() {
471     return dataIndexCount;
472   }
473 
474   public void setDataIndexCount(int dataIndexCount) {
475     this.dataIndexCount = dataIndexCount;
476   }
477 
478   public int getMetaIndexCount() {
479     return metaIndexCount;
480   }
481 
482   public void setMetaIndexCount(int metaIndexCount) {
483     this.metaIndexCount = metaIndexCount;
484   }
485 
486   public long getTotalUncompressedBytes() {
487     return totalUncompressedBytes;
488   }
489 
490   public void setTotalUncompressedBytes(long totalUncompressedBytes) {
491     this.totalUncompressedBytes = totalUncompressedBytes;
492   }
493 
494   public long getEntryCount() {
495     return entryCount;
496   }
497 
498   public void setEntryCount(long newEntryCount) {
499     entryCount = newEntryCount;
500   }
501 
502   public Compression.Algorithm getCompressionCodec() {
503     return compressionCodec;
504   }
505 
506   public void setCompressionCodec(Compression.Algorithm compressionCodec) {
507     this.compressionCodec = compressionCodec;
508   }
509 
510   public int getNumDataIndexLevels() {
511     expectAtLeastMajorVersion(2);
512     return numDataIndexLevels;
513   }
514 
515   public void setNumDataIndexLevels(int numDataIndexLevels) {
516     expectAtLeastMajorVersion(2);
517     this.numDataIndexLevels = numDataIndexLevels;
518   }
519 
520   public long getLastDataBlockOffset() {
521     expectAtLeastMajorVersion(2);
522     return lastDataBlockOffset;
523   }
524 
525   public void setLastDataBlockOffset(long lastDataBlockOffset) {
526     expectAtLeastMajorVersion(2);
527     this.lastDataBlockOffset = lastDataBlockOffset;
528   }
529 
530   public long getFirstDataBlockOffset() {
531     expectAtLeastMajorVersion(2);
532     return firstDataBlockOffset;
533   }
534 
535   public void setFirstDataBlockOffset(long firstDataBlockOffset) {
536     expectAtLeastMajorVersion(2);
537     this.firstDataBlockOffset = firstDataBlockOffset;
538   }
539 
540   public String getComparatorClassName() {
541     return comparatorClassName;
542   }
543 
544   /**
545    * Returns the major version of this HFile format
546    */
547   public int getMajorVersion() {
548     return majorVersion;
549   }
550 
551   /**
552    * Returns the minor version of this HFile format
553    */
554   public int getMinorVersion() {
555     return minorVersion;
556   }
557 
558   @SuppressWarnings("rawtypes")
559   public void setComparatorClass(Class<? extends RawComparator> klass) {
560     // Is the comparator instantiable
561     try {
562       klass.newInstance();
563     } catch (Exception e) {
564       throw new RuntimeException("Comparator class " + klass.getName() +
565         " is not instantiable", e);
566     }
567     comparatorClassName = klass.getName();
568   }
569 
570   @SuppressWarnings("unchecked")
571   private static Class<? extends RawComparator<byte[]>> getComparatorClass(
572       String comparatorClassName) throws IOException {
573     try {
574       return (Class<? extends RawComparator<byte[]>>)
575           Class.forName(comparatorClassName);
576     } catch (ClassNotFoundException ex) {
577       throw new IOException(ex);
578     }
579   }
580 
581   public static RawComparator<byte[]> createComparator(
582       String comparatorClassName) throws IOException {
583     try {
584       return getComparatorClass(comparatorClassName).newInstance();
585     } catch (InstantiationException e) {
586       throw new IOException("Comparator class " + comparatorClassName +
587         " is not instantiable", e);
588     } catch (IllegalAccessException e) {
589       throw new IOException("Comparator class " + comparatorClassName +
590         " is not instantiable", e);
591     }
592   }
593 
594   RawComparator<byte[]> createComparator() throws IOException {
595     expectAtLeastMajorVersion(2);
596     return createComparator(comparatorClassName);
597   }
598 
599   public long getUncompressedDataIndexSize() {
600     return uncompressedDataIndexSize;
601   }
602 
603   public void setUncompressedDataIndexSize(
604       long uncompressedDataIndexSize) {
605     expectAtLeastMajorVersion(2);
606     this.uncompressedDataIndexSize = uncompressedDataIndexSize;
607   }
608 
609   /**
610    * Extracts the major version for a 4-byte serialized version data.
611    * The major version is the 3 least significant bytes
612    */
613   private static int extractMajorVersion(int serializedVersion) {
614     return (serializedVersion & 0x00ffffff);
615   }
616 
617   /**
618    * Extracts the minor version for a 4-byte serialized version data.
619    * The major version are the 3 the most significant bytes
620    */
621   private static int extractMinorVersion(int serializedVersion) {
622     return (serializedVersion >>> 24);
623   }
624 
625   /**
626    * Create a 4 byte serialized version number by combining the
627    * minor and major version numbers.
628    */
629   private static int materializeVersion(int majorVersion, int minorVersion) {
630     return ((majorVersion & 0x00ffffff) | (minorVersion << 24));
631   }
632 }