View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.GZ;
22  import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.NONE;
23  import static org.junit.Assert.assertEquals;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.ByteArrayOutputStream;
28  import java.io.DataOutputStream;
29  import java.io.IOException;
30  import java.io.OutputStream;
31  import java.nio.ByteBuffer;
32  import java.util.ArrayList;
33  import java.util.Collection;
34  import java.util.List;
35  
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.hadoop.fs.FSDataInputStream;
39  import org.apache.hadoop.fs.FSDataOutputStream;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.SmallTests;
44  import org.apache.hadoop.hbase.fs.HFileSystem;
45  import org.apache.hadoop.hbase.io.compress.Compression;
46  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
47  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
48  import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
49  import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable;
50  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.ChecksumType;
53  import org.apache.hadoop.io.compress.Compressor;
54  import org.junit.Before;
55  import org.junit.Test;
56  import org.junit.experimental.categories.Category;
57  import org.junit.runner.RunWith;
58  import org.junit.runners.Parameterized;
59  import org.junit.runners.Parameterized.Parameters;
60  
61  import com.google.common.base.Preconditions;
62  
63  /**
64   * This class has unit tests to prove that older versions of
65   * HFiles (without checksums) are compatible with current readers.
66   */
67  @Category(SmallTests.class)
68  @RunWith(Parameterized.class)
69  public class TestHFileBlockCompatibility {
70    // change this value to activate more logs
71    private static final boolean[] BOOLEAN_VALUES = new boolean[] { false, true };
72  
73    private static final Log LOG = LogFactory.getLog(TestHFileBlockCompatibility.class);
74  
75    private static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = {
76        NONE, GZ };
77  
78    // The mnior version for pre-checksum files
79    private static int MINOR_VERSION = 0;
80  
81    private static final HBaseTestingUtility TEST_UTIL =
82      new HBaseTestingUtility();
83    private HFileSystem fs;
84    private int uncompressedSizeV1;
85  
86    private final boolean includesMemstoreTS;
87  
88    public TestHFileBlockCompatibility(boolean includesMemstoreTS) {
89      this.includesMemstoreTS = includesMemstoreTS;
90    }
91  
92    @Parameters
93    public static Collection<Object[]> parameters() {
94      return HBaseTestingUtility.BOOLEAN_PARAMETERIZED;
95    }
96  
97    @Before
98    public void setUp() throws IOException {
99      fs = (HFileSystem)HFileSystem.get(TEST_UTIL.getConfiguration());
100   }
101 
102   public byte[] createTestV1Block(Compression.Algorithm algo)
103       throws IOException {
104     Compressor compressor = algo.getCompressor();
105     ByteArrayOutputStream baos = new ByteArrayOutputStream();
106     OutputStream os = algo.createCompressionStream(baos, compressor, 0);
107     DataOutputStream dos = new DataOutputStream(os);
108     BlockType.META.write(dos); // Let's make this a meta block.
109     TestHFileBlock.writeTestBlockContents(dos);
110     uncompressedSizeV1 = dos.size();
111     dos.flush();
112     algo.returnCompressor(compressor);
113     return baos.toByteArray();
114   }
115 
116   private Writer createTestV2Block(Compression.Algorithm algo)
117       throws IOException {
118     final BlockType blockType = BlockType.DATA;
119     Writer hbw = new Writer(algo, null,
120         includesMemstoreTS);
121     DataOutputStream dos = hbw.startWriting(blockType);
122     TestHFileBlock.writeTestBlockContents(dos);
123     // make sure the block is ready by calling hbw.getHeaderAndData()
124     hbw.getHeaderAndData();
125     assertEquals(1000 * 4, hbw.getUncompressedSizeWithoutHeader());
126     hbw.releaseCompressor();
127     return hbw;
128   }
129 
130  private String createTestBlockStr(Compression.Algorithm algo,
131       int correctLength) throws IOException {
132     Writer hbw = createTestV2Block(algo);
133     byte[] testV2Block = hbw.getHeaderAndData();
134     int osOffset = HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM + 9;
135     if (testV2Block.length == correctLength) {
136       // Force-set the "OS" field of the gzip header to 3 (Unix) to avoid
137       // variations across operating systems.
138       // See http://www.gzip.org/zlib/rfc-gzip.html for gzip format.
139       testV2Block[osOffset] = 3;
140     }
141     return Bytes.toStringBinary(testV2Block);
142   }
143 
144   @Test
145   public void testNoCompression() throws IOException {
146     assertEquals(4000, createTestV2Block(NONE).getBlockForCaching().
147                        getUncompressedSizeWithoutHeader());
148   }
149 
150   @Test
151   public void testGzipCompression() throws IOException {
152     final String correctTestBlockStr =
153         "DATABLK*\\x00\\x00\\x00:\\x00\\x00\\x0F\\xA0\\xFF\\xFF\\xFF\\xFF"
154             + "\\xFF\\xFF\\xFF\\xFF"
155             // gzip-compressed block: http://www.gzip.org/zlib/rfc-gzip.html
156             + "\\x1F\\x8B"  // gzip magic signature
157             + "\\x08"  // Compression method: 8 = "deflate"
158             + "\\x00"  // Flags
159             + "\\x00\\x00\\x00\\x00"  // mtime
160             + "\\x00"  // XFL (extra flags)
161             // OS (0 = FAT filesystems, 3 = Unix). However, this field
162             // sometimes gets set to 0 on Linux and Mac, so we reset it to 3.
163             + "\\x03"
164             + "\\xED\\xC3\\xC1\\x11\\x00 \\x08\\xC00DD\\xDD\\x7Fa"
165             + "\\xD6\\xE8\\xA3\\xB9K\\x84`\\x96Q\\xD3\\xA8\\xDB\\xA8e\\xD4c"
166             + "\\xD46\\xEA5\\xEA3\\xEA7\\xE7\\x00LI\\x5Cs\\xA0\\x0F\\x00\\x00";
167     final int correctGzipBlockLength = 82;
168 
169     String returnedStr = createTestBlockStr(GZ, correctGzipBlockLength);
170     assertEquals(correctTestBlockStr, returnedStr);
171   }
172 
173   @Test
174   public void testReaderV2() throws IOException {
175     for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
176       for (boolean pread : new boolean[] { false, true }) {
177           LOG.info("testReaderV2: Compression algorithm: " + algo + 
178                    ", pread=" + pread);
179         Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_"
180             + algo);
181         FSDataOutputStream os = fs.create(path);
182         Writer hbw = new Writer(algo, null,
183             includesMemstoreTS);
184         long totalSize = 0;
185         for (int blockId = 0; blockId < 2; ++blockId) {
186           DataOutputStream dos = hbw.startWriting(BlockType.DATA);
187           for (int i = 0; i < 1234; ++i)
188             dos.writeInt(i);
189           hbw.writeHeaderAndData(os);
190           totalSize += hbw.getOnDiskSizeWithHeader();
191         }
192         os.close();
193 
194         FSDataInputStream is = fs.open(path);
195         HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is),
196             algo, totalSize, MINOR_VERSION, fs, path);
197         HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
198         is.close();
199 
200         b.sanityCheck();
201         assertEquals(4936, b.getUncompressedSizeWithoutHeader());
202         assertEquals(algo == GZ ? 2173 : 4936, 
203                      b.getOnDiskSizeWithoutHeader() - b.totalChecksumBytes());
204         String blockStr = b.toString();
205 
206         if (algo == GZ) {
207           is = fs.open(path);
208           hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is),
209               algo, totalSize, MINOR_VERSION, fs, path);
210           b = hbr.readBlockData(0, 2173 + HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM +
211                                 b.totalChecksumBytes(), -1, pread);
212           assertEquals(blockStr, b.toString());
213           int wrongCompressedSize = 2172;
214           try {
215             b = hbr.readBlockData(0, wrongCompressedSize
216                 + HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM, -1, pread);
217             fail("Exception expected");
218           } catch (IOException ex) {
219             String expectedPrefix = "On-disk size without header provided is "
220                 + wrongCompressedSize + ", but block header contains "
221                 + b.getOnDiskSizeWithoutHeader() + ".";
222             assertTrue("Invalid exception message: '" + ex.getMessage()
223                 + "'.\nMessage is expected to start with: '" + expectedPrefix
224                 + "'", ex.getMessage().startsWith(expectedPrefix));
225           }
226           is.close();
227         }
228       }
229     }
230   }
231 
232   /**
233    * Test encoding/decoding data blocks.
234    * @throws IOException a bug or a problem with temporary files.
235    */
236   @Test
237   public void testDataBlockEncoding() throws IOException {
238     final int numBlocks = 5;
239     for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
240       for (boolean pread : new boolean[] { false, true }) {
241         for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
242           LOG.info("testDataBlockEncoding algo " + algo + 
243                    " pread = " + pread +
244                    " encoding " + encoding);
245           Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_"
246               + algo + "_" + encoding.toString());
247           FSDataOutputStream os = fs.create(path);
248           HFileDataBlockEncoder dataBlockEncoder =
249               new HFileDataBlockEncoderImpl(encoding, encoding,
250                   TestHFileBlockCompatibility.Writer.DUMMY_HEADER);
251           TestHFileBlockCompatibility.Writer hbw =
252               new TestHFileBlockCompatibility.Writer(algo,
253                   dataBlockEncoder, includesMemstoreTS);
254           long totalSize = 0;
255           final List<Integer> encodedSizes = new ArrayList<Integer>();
256           final List<ByteBuffer> encodedBlocks = new ArrayList<ByteBuffer>();
257           for (int blockId = 0; blockId < numBlocks; ++blockId) {
258             DataOutputStream dos = hbw.startWriting(BlockType.DATA);
259             TestHFileBlock.writeEncodedBlock(algo, encoding, dos, encodedSizes,
260                 encodedBlocks, blockId, includesMemstoreTS,
261                 TestHFileBlockCompatibility.Writer.DUMMY_HEADER);
262 
263             hbw.writeHeaderAndData(os);
264             totalSize += hbw.getOnDiskSizeWithHeader();
265           }
266           os.close();
267 
268           FSDataInputStream is = fs.open(path);
269           HFileBlock.FSReaderV2 hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is),
270               algo, totalSize, MINOR_VERSION, fs, path);
271           hbr.setDataBlockEncoder(dataBlockEncoder);
272           hbr.setIncludesMemstoreTS(includesMemstoreTS);
273 
274           HFileBlock b;
275           int pos = 0;
276           for (int blockId = 0; blockId < numBlocks; ++blockId) {
277             b = hbr.readBlockData(pos, -1, -1, pread);
278             b.sanityCheck();
279             pos += b.getOnDiskSizeWithHeader();
280 
281             assertEquals((int) encodedSizes.get(blockId),
282                 b.getUncompressedSizeWithoutHeader());
283             ByteBuffer actualBuffer = b.getBufferWithoutHeader();
284             if (encoding != DataBlockEncoding.NONE) {
285               // We expect a two-byte big-endian encoding id.
286               assertEquals(0, actualBuffer.get(0));
287               assertEquals(encoding.getId(), actualBuffer.get(1));
288               actualBuffer.position(2);
289               actualBuffer = actualBuffer.slice();
290             }
291 
292             ByteBuffer expectedBuffer = encodedBlocks.get(blockId);
293             expectedBuffer.rewind();
294 
295             // test if content matches, produce nice message
296             TestHFileBlock.assertBuffersEqual(expectedBuffer, actualBuffer,
297               algo, encoding, pread);
298           }
299           is.close();
300         }
301       }
302     }
303   }
304 
305 
306 
307   /**
308    * This is the version of the HFileBlock.Writer that is used to 
309    * create V2 blocks with minor version 0. These blocks do not 
310    * have hbase-level checksums. The code is here to test 
311    * backward compatibility. The reason we do not inherit from 
312    * HFileBlock.Writer is because we never ever want to change the code
313    * in this class but the code in HFileBlock.Writer will continually 
314    * evolve.
315    */
316   public static final class Writer {
317 
318     // These constants are as they were in minorVersion 0.
319     private static final int HEADER_SIZE = HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
320     private static final boolean DONT_FILL_HEADER = HFileBlock.DONT_FILL_HEADER;
321     private static final byte[] DUMMY_HEADER = 
322       HFileBlock.DUMMY_HEADER_NO_CHECKSUM;
323 
324     private enum State {
325       INIT,
326       WRITING,
327       BLOCK_READY
328     };
329 
330     /** Writer state. Used to ensure the correct usage protocol. */
331     private State state = State.INIT;
332 
333     /** Compression algorithm for all blocks this instance writes. */
334     private final Compression.Algorithm compressAlgo;
335 
336     /** Data block encoder used for data blocks */
337     private final HFileDataBlockEncoder dataBlockEncoder;
338 
339     private HFileBlockEncodingContext dataBlockEncodingCtx;
340     /** block encoding context for non-data blocks */
341     private HFileBlockDefaultEncodingContext defaultBlockEncodingCtx;
342 
343     /**
344      * The stream we use to accumulate data in uncompressed format for each
345      * block. We reset this stream at the end of each block and reuse it. The
346      * header is written as the first {@link #HEADER_SIZE} bytes into this
347      * stream.
348      */
349     private ByteArrayOutputStream baosInMemory;
350 
351     /** Compressor, which is also reused between consecutive blocks. */
352     private Compressor compressor;
353 
354     /**
355      * Current block type. Set in {@link #startWriting(BlockType)}. Could be
356      * changed in {@link #encodeDataBlockForDisk()} from {@link BlockType#DATA}
357      * to {@link BlockType#ENCODED_DATA}.
358      */
359     private BlockType blockType;
360 
361     /**
362      * A stream that we write uncompressed bytes to, which compresses them and
363      * writes them to {@link #baosInMemory}.
364      */
365     private DataOutputStream userDataStream;
366 
367     /**
368      * Bytes to be written to the file system, including the header. Compressed
369      * if compression is turned on.
370      */
371     private byte[] onDiskBytesWithHeader;
372 
373     /**
374      * Valid in the READY state. Contains the header and the uncompressed (but
375      * potentially encoded, if this is a data block) bytes, so the length is
376      * {@link #uncompressedSizeWithoutHeader} + {@link org.apache.hadoop.hbase.HConstants#HFILEBLOCK_HEADER_SIZE}.
377      */
378     private byte[] uncompressedBytesWithHeader;
379 
380     /**
381      * Current block's start offset in the {@link HFile}. Set in
382      * {@link #writeHeaderAndData(FSDataOutputStream)}.
383      */
384     private long startOffset;
385 
386     /**
387      * Offset of previous block by block type. Updated when the next block is
388      * started.
389      */
390     private long[] prevOffsetByType;
391 
392     /** The offset of the previous block of the same type */
393     private long prevOffset;
394 
395     /** Whether we are including memstore timestamp after every key/value */
396     private boolean includesMemstoreTS;
397 
398     /**
399      * @param compressionAlgorithm compression algorithm to use
400      * @param dataBlockEncoderAlgo data block encoding algorithm to use
401      */
402     public Writer(Compression.Algorithm compressionAlgorithm,
403           HFileDataBlockEncoder dataBlockEncoder, boolean includesMemstoreTS) {
404       compressAlgo = compressionAlgorithm == null ? NONE : compressionAlgorithm;
405       this.dataBlockEncoder = dataBlockEncoder != null
406           ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
407 
408       defaultBlockEncodingCtx =
409           new HFileBlockDefaultEncodingContext(compressionAlgorithm,
410               null, DUMMY_HEADER);
411       dataBlockEncodingCtx =
412         this.dataBlockEncoder.newOnDiskDataBlockEncodingContext(
413             compressionAlgorithm, DUMMY_HEADER);
414 
415       baosInMemory = new ByteArrayOutputStream();
416 
417       prevOffsetByType = new long[BlockType.values().length];
418       for (int i = 0; i < prevOffsetByType.length; ++i)
419         prevOffsetByType[i] = -1;
420 
421       this.includesMemstoreTS = includesMemstoreTS;
422     }
423 
424     /**
425      * Starts writing into the block. The previous block's data is discarded.
426      *
427      * @return the stream the user can write their data into
428      * @throws IOException
429      */
430     public DataOutputStream startWriting(BlockType newBlockType)
431         throws IOException {
432       if (state == State.BLOCK_READY && startOffset != -1) {
433         // We had a previous block that was written to a stream at a specific
434         // offset. Save that offset as the last offset of a block of that type.
435         prevOffsetByType[blockType.getId()] = startOffset;
436       }
437 
438       startOffset = -1;
439       blockType = newBlockType;
440 
441       baosInMemory.reset();
442       baosInMemory.write(DUMMY_HEADER);
443 
444       state = State.WRITING;
445 
446       // We will compress it later in finishBlock()
447       userDataStream = new DataOutputStream(baosInMemory);
448       return userDataStream;
449     }
450 
451     /**
452      * Returns the stream for the user to write to. The block writer takes care
453      * of handling compression and buffering for caching on write. Can only be
454      * called in the "writing" state.
455      *
456      * @return the data output stream for the user to write to
457      */
458     DataOutputStream getUserDataStream() {
459       expectState(State.WRITING);
460       return userDataStream;
461     }
462 
463     /**
464      * Transitions the block writer from the "writing" state to the "block
465      * ready" state.  Does nothing if a block is already finished.
466      */
467     private void ensureBlockReady() throws IOException {
468       Preconditions.checkState(state != State.INIT,
469           "Unexpected state: " + state);
470 
471       if (state == State.BLOCK_READY)
472         return;
473 
474       // This will set state to BLOCK_READY.
475       finishBlock();
476     }
477 
478     /**
479      * An internal method that flushes the compressing stream (if using
480      * compression), serializes the header, and takes care of the separate
481      * uncompressed stream for caching on write, if applicable. Sets block
482      * write state to "block ready".
483      */
484     private void finishBlock() throws IOException {
485       userDataStream.flush();
486       // This does an array copy, so it is safe to cache this byte array.
487       uncompressedBytesWithHeader = baosInMemory.toByteArray();
488       prevOffset = prevOffsetByType[blockType.getId()];
489 
490       // We need to set state before we can package the block up for
491       // cache-on-write. In a way, the block is ready, but not yet encoded or
492       // compressed.
493       state = State.BLOCK_READY;
494       if (blockType == BlockType.DATA) {
495         encodeDataBlockForDisk();
496       } else {
497         defaultBlockEncodingCtx.compressAfterEncodingWithBlockType(
498             uncompressedBytesWithHeader, blockType);
499         onDiskBytesWithHeader =
500           defaultBlockEncodingCtx.getOnDiskBytesWithHeader();
501       }
502 
503       // put the header for on disk bytes
504       putHeader(onDiskBytesWithHeader, 0,
505           onDiskBytesWithHeader.length,
506           uncompressedBytesWithHeader.length);
507       //set the header for the uncompressed bytes (for cache-on-write)
508       putHeader(uncompressedBytesWithHeader, 0,
509           onDiskBytesWithHeader.length,
510         uncompressedBytesWithHeader.length);
511     }
512 
513     /**
514      * Encodes this block if it is a data block and encoding is turned on in
515      * {@link #dataBlockEncoder}.
516      */
517     private void encodeDataBlockForDisk() throws IOException {
518       // do data block encoding, if data block encoder is set
519       ByteBuffer rawKeyValues =
520           ByteBuffer.wrap(uncompressedBytesWithHeader, HEADER_SIZE,
521               uncompressedBytesWithHeader.length - HEADER_SIZE).slice();
522 
523       //do the encoding
524       dataBlockEncoder.beforeWriteToDisk(rawKeyValues,
525               includesMemstoreTS, dataBlockEncodingCtx, blockType);
526 
527       uncompressedBytesWithHeader =
528           dataBlockEncodingCtx.getUncompressedBytesWithHeader();
529       onDiskBytesWithHeader =
530           dataBlockEncodingCtx.getOnDiskBytesWithHeader();
531       blockType = dataBlockEncodingCtx.getBlockType();
532     }
533 
534     /**
535      * Put the header into the given byte array at the given offset.
536      * @param onDiskSize size of the block on disk
537      * @param uncompressedSize size of the block after decompression (but
538      *          before optional data block decoding)
539      */
540     private void putHeader(byte[] dest, int offset, int onDiskSize,
541         int uncompressedSize) {
542       offset = blockType.put(dest, offset);
543       offset = Bytes.putInt(dest, offset, onDiskSize - HEADER_SIZE);
544       offset = Bytes.putInt(dest, offset, uncompressedSize - HEADER_SIZE);
545       Bytes.putLong(dest, offset, prevOffset);
546     }
547 
548     /**
549      * Similar to {@link #writeHeaderAndData(FSDataOutputStream)}, but records
550      * the offset of this block so that it can be referenced in the next block
551      * of the same type.
552      *
553      * @param out
554      * @throws IOException
555      */
556     public void writeHeaderAndData(FSDataOutputStream out) throws IOException {
557       long offset = out.getPos();
558       if (startOffset != -1 && offset != startOffset) {
559         throw new IOException("A " + blockType + " block written to a "
560             + "stream twice, first at offset " + startOffset + ", then at "
561             + offset);
562       }
563       startOffset = offset;
564 
565       writeHeaderAndData((DataOutputStream) out);
566     }
567 
568     /**
569      * Writes the header and the compressed data of this block (or uncompressed
570      * data when not using compression) into the given stream. Can be called in
571      * the "writing" state or in the "block ready" state. If called in the
572      * "writing" state, transitions the writer to the "block ready" state.
573      *
574      * @param out the output stream to write the
575      * @throws IOException
576      */
577     private void writeHeaderAndData(DataOutputStream out) throws IOException {
578       ensureBlockReady();
579       out.write(onDiskBytesWithHeader);
580     }
581 
582     /**
583      * Returns the header or the compressed data (or uncompressed data when not
584      * using compression) as a byte array. Can be called in the "writing" state
585      * or in the "block ready" state. If called in the "writing" state,
586      * transitions the writer to the "block ready" state.
587      *
588      * @return header and data as they would be stored on disk in a byte array
589      * @throws IOException
590      */
591     public byte[] getHeaderAndData() throws IOException {
592       ensureBlockReady();
593       return onDiskBytesWithHeader;
594     }
595 
596     /**
597      * Releases the compressor this writer uses to compress blocks into the
598      * compressor pool. Needs to be called before the writer is discarded.
599      */
600     public void releaseCompressor() {
601       if (compressor != null) {
602         compressAlgo.returnCompressor(compressor);
603         compressor = null;
604       }
605     }
606 
607     /**
608      * Returns the on-disk size of the data portion of the block. This is the
609      * compressed size if compression is enabled. Can only be called in the
610      * "block ready" state. Header is not compressed, and its size is not
611      * included in the return value.
612      *
613      * @return the on-disk size of the block, not including the header.
614      */
615     public int getOnDiskSizeWithoutHeader() {
616       expectState(State.BLOCK_READY);
617       return onDiskBytesWithHeader.length - HEADER_SIZE;
618     }
619 
620     /**
621      * Returns the on-disk size of the block. Can only be called in the
622      * "block ready" state.
623      *
624      * @return the on-disk size of the block ready to be written, including the
625      *         header size
626      */
627     public int getOnDiskSizeWithHeader() {
628       expectState(State.BLOCK_READY);
629       return onDiskBytesWithHeader.length;
630     }
631 
632     /**
633      * The uncompressed size of the block data. Does not include header size.
634      */
635     public int getUncompressedSizeWithoutHeader() {
636       expectState(State.BLOCK_READY);
637       return uncompressedBytesWithHeader.length - HEADER_SIZE;
638     }
639 
640     /**
641      * The uncompressed size of the block data, including header size.
642      */
643     public int getUncompressedSizeWithHeader() {
644       expectState(State.BLOCK_READY);
645       return uncompressedBytesWithHeader.length;
646     }
647 
648     /** @return true if a block is being written  */
649     public boolean isWriting() {
650       return state == State.WRITING;
651     }
652 
653     /**
654      * Returns the number of bytes written into the current block so far, or
655      * zero if not writing the block at the moment. Note that this will return
656      * zero in the "block ready" state as well.
657      *
658      * @return the number of bytes written
659      */
660     public int blockSizeWritten() {
661       if (state != State.WRITING)
662         return 0;
663       return userDataStream.size();
664     }
665 
666     /**
667      * Returns the header followed by the uncompressed data, even if using
668      * compression. This is needed for storing uncompressed blocks in the block
669      * cache. Can be called in the "writing" state or the "block ready" state.
670      *
671      * @return uncompressed block bytes for caching on write
672      */
673     private byte[] getUncompressedDataWithHeader() {
674       expectState(State.BLOCK_READY);
675 
676       return uncompressedBytesWithHeader;
677     }
678 
679     private void expectState(State expectedState) {
680       if (state != expectedState) {
681         throw new IllegalStateException("Expected state: " + expectedState +
682             ", actual state: " + state);
683       }
684     }
685 
686     /**
687      * Similar to {@link #getUncompressedBufferWithHeader()} but returns a byte
688      * buffer.
689      *
690      * @return uncompressed block for caching on write in the form of a buffer
691      */
692     public ByteBuffer getUncompressedBufferWithHeader() {
693       byte[] b = getUncompressedDataWithHeader();
694       return ByteBuffer.wrap(b, 0, b.length);
695     }
696 
697     /**
698      * Takes the given {@link BlockWritable} instance, creates a new block of
699      * its appropriate type, writes the writable into this block, and flushes
700      * the block into the output stream. The writer is instructed not to buffer
701      * uncompressed bytes for cache-on-write.
702      *
703      * @param bw the block-writable object to write as a block
704      * @param out the file system output stream
705      * @throws IOException
706      */
707     public void writeBlock(BlockWritable bw, FSDataOutputStream out)
708         throws IOException {
709       bw.writeToBlock(startWriting(bw.getBlockType()));
710       writeHeaderAndData(out);
711     }
712 
713     /**
714      * Creates a new HFileBlock. 
715      */
716     public HFileBlock getBlockForCaching() {
717       return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
718           getUncompressedSizeWithoutHeader(), prevOffset,
719           getUncompressedBufferWithHeader(), DONT_FILL_HEADER, startOffset,
720           includesMemstoreTS, MINOR_VERSION, 0, ChecksumType.NULL.getCode(),
721           getOnDiskSizeWithoutHeader());
722     }
723   }
724 
725 }
726