View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.DataOutput;
23  import java.io.DataOutputStream;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.List;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.classification.InterfaceAudience;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FSDataOutputStream;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.KeyValue;
36  import org.apache.hadoop.hbase.KeyValue.KVComparator;
37  import org.apache.hadoop.hbase.io.compress.Compression;
38  import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
39  import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable;
40  import org.apache.hadoop.hbase.util.ChecksumType;
41  import org.apache.hadoop.hbase.util.BloomFilterWriter;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.io.Writable;
44  import org.apache.hadoop.io.WritableUtils;
45  
46  /**
47   * Writes HFile format version 2.
48   */
49  @InterfaceAudience.Private
50  public class HFileWriterV2 extends AbstractHFileWriter {
51    static final Log LOG = LogFactory.getLog(HFileWriterV2.class);
52  
53    /** Max memstore (mvcc) timestamp in FileInfo */
54    public static final byte [] MAX_MEMSTORE_TS_KEY =
55        Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
56  
57    /** KeyValue version in FileInfo */
58    public static final byte [] KEY_VALUE_VERSION =
59        Bytes.toBytes("KEY_VALUE_VERSION");
60  
61    /** Version for KeyValue which includes memstore timestamp */
62    public static final int KEY_VALUE_VER_WITH_MEMSTORE = 1;
63  
64    /** Inline block writers for multi-level block index and compound Blooms. */
65    private List<InlineBlockWriter> inlineBlockWriters =
66        new ArrayList<InlineBlockWriter>();
67  
68    /** Unified version 2 block writer */
69    private HFileBlock.Writer fsBlockWriter;
70  
71    private HFileBlockIndex.BlockIndexWriter dataBlockIndexWriter;
72    private HFileBlockIndex.BlockIndexWriter metaBlockIndexWriter;
73  
74    /** The offset of the first data block or -1 if the file is empty. */
75    private long firstDataBlockOffset = -1;
76  
77    /** The offset of the last data block or 0 if the file is empty. */
78    private long lastDataBlockOffset;
79  
80    /** The last(stop) Key of the previous data block. */
81    private byte[] lastKeyOfPreviousBlock = null;
82  
83    /** Additional data items to be written to the "load-on-open" section. */
84    private List<BlockWritable> additionalLoadOnOpenData =
85      new ArrayList<BlockWritable>();
86  
87    /** Checksum related settings */
88    private ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE;
89    private int bytesPerChecksum = HFile.DEFAULT_BYTES_PER_CHECKSUM;
90  
91    private final boolean includeMemstoreTS;
92    private long maxMemstoreTS = 0;
93  
94    static class WriterFactoryV2 extends HFile.WriterFactory {
95      WriterFactoryV2(Configuration conf, CacheConfig cacheConf) {
96        super(conf, cacheConf);
97      }
98  
99      @Override
100     public Writer createWriter(FileSystem fs, Path path,
101         FSDataOutputStream ostream, int blockSize,
102         Compression.Algorithm compress, HFileDataBlockEncoder blockEncoder,
103         final KVComparator comparator, final ChecksumType checksumType,
104         final int bytesPerChecksum, boolean includeMVCCReadpoint) throws IOException {
105       return new HFileWriterV2(conf, cacheConf, fs, path, ostream, blockSize, compress,
106           blockEncoder, comparator, checksumType, bytesPerChecksum, includeMVCCReadpoint);
107     }
108   }
109 
110   /** Constructor that takes a path, creates and closes the output stream. */
111   public HFileWriterV2(Configuration conf, CacheConfig cacheConf,
112       FileSystem fs, Path path, FSDataOutputStream ostream, int blockSize,
113       Compression.Algorithm compressAlgo, HFileDataBlockEncoder blockEncoder,
114       final KVComparator comparator, final ChecksumType checksumType,
115       final int bytesPerChecksum, final boolean includeMVCCReadpoint) throws IOException {
116     super(cacheConf,
117         ostream == null ? createOutputStream(conf, fs, path, null) : ostream,
118         path, blockSize, compressAlgo, blockEncoder, comparator);
119     this.checksumType = checksumType;
120     this.bytesPerChecksum = bytesPerChecksum;
121     this.includeMemstoreTS = includeMVCCReadpoint;
122     finishInit(conf);
123   }
124 
125   /** Additional initialization steps */
126   private void finishInit(final Configuration conf) {
127     if (fsBlockWriter != null)
128       throw new IllegalStateException("finishInit called twice");
129 
130     // HFile filesystem-level (non-caching) block writer
131     fsBlockWriter = new HFileBlock.Writer(compressAlgo, blockEncoder,
132         includeMemstoreTS, checksumType, bytesPerChecksum);
133 
134     // Data block index writer
135     boolean cacheIndexesOnWrite = cacheConf.shouldCacheIndexesOnWrite();
136     dataBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter(fsBlockWriter,
137         cacheIndexesOnWrite ? cacheConf.getBlockCache(): null,
138         cacheIndexesOnWrite ? name : null);
139     dataBlockIndexWriter.setMaxChunkSize(
140         HFileBlockIndex.getMaxChunkSize(conf));
141     inlineBlockWriters.add(dataBlockIndexWriter);
142 
143     // Meta data block index writer
144     metaBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter();
145     if (LOG.isTraceEnabled()) LOG.trace("Initialized with " + cacheConf);
146   }
147 
148   /**
149    * At a block boundary, write all the inline blocks and opens new block.
150    *
151    * @throws IOException
152    */
153   private void checkBlockBoundary() throws IOException {
154     if (fsBlockWriter.blockSizeWritten() < blockSize)
155       return;
156 
157     finishBlock();
158     writeInlineBlocks(false);
159     newBlock();
160   }
161 
162   /** Clean up the current block */
163   private void finishBlock() throws IOException {
164     if (!fsBlockWriter.isWriting() || fsBlockWriter.blockSizeWritten() == 0)
165       return;
166 
167     long startTimeNs = System.nanoTime();
168     // Update the first data block offset for scanning.
169     if (firstDataBlockOffset == -1) {
170       firstDataBlockOffset = outputStream.getPos();
171     }
172     // Update the last data block offset
173     lastDataBlockOffset = outputStream.getPos();
174     fsBlockWriter.writeHeaderAndData(outputStream);
175     int onDiskSize = fsBlockWriter.getOnDiskSizeWithHeader();
176 
177     byte[] indexKey = comparator.calcIndexKey(lastKeyOfPreviousBlock, firstKeyInBlock);
178     dataBlockIndexWriter.addEntry(indexKey, lastDataBlockOffset, onDiskSize);
179     totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
180     HFile.offerWriteLatency(System.nanoTime() - startTimeNs);
181     if (cacheConf.shouldCacheDataOnWrite()) {
182       doCacheOnWrite(lastDataBlockOffset);
183     }
184   }
185 
186   /** Gives inline block writers an opportunity to contribute blocks. */
187   private void writeInlineBlocks(boolean closing) throws IOException {
188     for (InlineBlockWriter ibw : inlineBlockWriters) {
189       while (ibw.shouldWriteBlock(closing)) {
190         long offset = outputStream.getPos();
191         boolean cacheThisBlock = ibw.getCacheOnWrite();
192         ibw.writeInlineBlock(fsBlockWriter.startWriting(
193             ibw.getInlineBlockType()));
194         fsBlockWriter.writeHeaderAndData(outputStream);
195         ibw.blockWritten(offset, fsBlockWriter.getOnDiskSizeWithHeader(),
196             fsBlockWriter.getUncompressedSizeWithoutHeader());
197         totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
198 
199         if (cacheThisBlock) {
200           doCacheOnWrite(offset);
201         }
202       }
203     }
204   }
205 
206   /**
207    * Caches the last written HFile block.
208    * @param offset the offset of the block we want to cache. Used to determine
209    *          the cache key.
210    */
211   private void doCacheOnWrite(long offset) {
212     HFileBlock cacheFormatBlock = fsBlockWriter.getBlockForCaching();
213     cacheConf.getBlockCache().cacheBlock(
214         new BlockCacheKey(name, offset, blockEncoder.getDataBlockEncoding(),
215             cacheFormatBlock.getBlockType()), cacheFormatBlock);
216   }
217 
218   /**
219    * Ready a new block for writing.
220    *
221    * @throws IOException
222    */
223   private void newBlock() throws IOException {
224     // This is where the next block begins.
225     fsBlockWriter.startWriting(BlockType.DATA);
226     firstKeyInBlock = null;
227     if (lastKeyLength > 0) {
228       lastKeyOfPreviousBlock = new byte[lastKeyLength];
229       System.arraycopy(lastKeyBuffer, lastKeyOffset, lastKeyOfPreviousBlock, 0, lastKeyLength);
230     }
231   }
232 
233   /**
234    * Add a meta block to the end of the file. Call before close(). Metadata
235    * blocks are expensive. Fill one with a bunch of serialized data rather than
236    * do a metadata block per metadata instance. If metadata is small, consider
237    * adding to file info using {@link #appendFileInfo(byte[], byte[])}
238    *
239    * @param metaBlockName
240    *          name of the block
241    * @param content
242    *          will call readFields to get data later (DO NOT REUSE)
243    */
244   @Override
245   public void appendMetaBlock(String metaBlockName, Writable content) {
246     byte[] key = Bytes.toBytes(metaBlockName);
247     int i;
248     for (i = 0; i < metaNames.size(); ++i) {
249       // stop when the current key is greater than our own
250       byte[] cur = metaNames.get(i);
251       if (Bytes.BYTES_RAWCOMPARATOR.compare(cur, 0, cur.length, key, 0,
252           key.length) > 0) {
253         break;
254       }
255     }
256     metaNames.add(i, key);
257     metaData.add(i, content);
258   }
259 
260   /**
261    * Add key/value to file. Keys must be added in an order that agrees with the
262    * Comparator passed on construction.
263    *
264    * @param kv
265    *          KeyValue to add. Cannot be empty nor null.
266    * @throws IOException
267    */
268   @Override
269   public void append(final KeyValue kv) throws IOException {
270     append(kv.getMvccVersion(), kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength(),
271         kv.getBuffer(), kv.getValueOffset(), kv.getValueLength());
272     this.maxMemstoreTS = Math.max(this.maxMemstoreTS, kv.getMvccVersion());
273   }
274 
275   /**
276    * Add key/value to file. Keys must be added in an order that agrees with the
277    * Comparator passed on construction.
278    *
279    * @param key
280    *          Key to add. Cannot be empty nor null.
281    * @param value
282    *          Value to add. Cannot be empty nor null.
283    * @throws IOException
284    */
285   @Override
286   public void append(final byte[] key, final byte[] value) throws IOException {
287     append(0, key, 0, key.length, value, 0, value.length);
288   }
289 
290   /**
291    * Add key/value to file. Keys must be added in an order that agrees with the
292    * Comparator passed on construction.
293    *
294    * @param key
295    * @param koffset
296    * @param klength
297    * @param value
298    * @param voffset
299    * @param vlength
300    * @throws IOException
301    */
302   private void append(final long memstoreTS, final byte[] key, final int koffset, final int klength,
303       final byte[] value, final int voffset, final int vlength)
304       throws IOException {
305     boolean dupKey = checkKey(key, koffset, klength);
306     checkValue(value, voffset, vlength);
307     if (!dupKey) {
308       checkBlockBoundary();
309     }
310 
311     if (!fsBlockWriter.isWriting())
312       newBlock();
313 
314     // Write length of key and value and then actual key and value bytes.
315     // Additionally, we may also write down the memstoreTS.
316     {
317       DataOutputStream out = fsBlockWriter.getUserDataStream();
318       out.writeInt(klength);
319       totalKeyLength += klength;
320       out.writeInt(vlength);
321       totalValueLength += vlength;
322       out.write(key, koffset, klength);
323       out.write(value, voffset, vlength);
324       if (this.includeMemstoreTS) {
325         WritableUtils.writeVLong(out, memstoreTS);
326       }
327     }
328 
329     // Are we the first key in this block?
330     if (firstKeyInBlock == null) {
331       // Copy the key.
332       firstKeyInBlock = new byte[klength];
333       System.arraycopy(key, koffset, firstKeyInBlock, 0, klength);
334     }
335 
336     lastKeyBuffer = key;
337     lastKeyOffset = koffset;
338     lastKeyLength = klength;
339     entryCount++;
340   }
341 
342   @Override
343   public void close() throws IOException {
344     if (outputStream == null) {
345       return;
346     }
347     // Save data block encoder metadata in the file info.
348     blockEncoder.saveMetadata(this);
349     // Write out the end of the data blocks, then write meta data blocks.
350     // followed by fileinfo, data block index and meta block index.
351 
352     finishBlock();
353     writeInlineBlocks(true);
354 
355     FixedFileTrailer trailer = new FixedFileTrailer(2, 
356                                  HFileReaderV2.MAX_MINOR_VERSION);
357 
358     // Write out the metadata blocks if any.
359     if (!metaNames.isEmpty()) {
360       for (int i = 0; i < metaNames.size(); ++i) {
361         // store the beginning offset
362         long offset = outputStream.getPos();
363         // write the metadata content
364         DataOutputStream dos = fsBlockWriter.startWriting(BlockType.META);
365         metaData.get(i).write(dos);
366 
367         fsBlockWriter.writeHeaderAndData(outputStream);
368         totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
369 
370         // Add the new meta block to the meta index.
371         metaBlockIndexWriter.addEntry(metaNames.get(i), offset,
372             fsBlockWriter.getOnDiskSizeWithHeader());
373       }
374     }
375 
376     // Load-on-open section.
377 
378     // Data block index.
379     //
380     // In version 2, this section of the file starts with the root level data
381     // block index. We call a function that writes intermediate-level blocks
382     // first, then root level, and returns the offset of the root level block
383     // index.
384 
385     long rootIndexOffset = dataBlockIndexWriter.writeIndexBlocks(outputStream);
386     trailer.setLoadOnOpenOffset(rootIndexOffset);
387 
388     // Meta block index.
389     metaBlockIndexWriter.writeSingleLevelIndex(fsBlockWriter.startWriting(
390         BlockType.ROOT_INDEX), "meta");
391     fsBlockWriter.writeHeaderAndData(outputStream);
392     totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
393 
394     if (this.includeMemstoreTS) {
395       appendFileInfo(MAX_MEMSTORE_TS_KEY, Bytes.toBytes(maxMemstoreTS));
396       appendFileInfo(KEY_VALUE_VERSION, Bytes.toBytes(KEY_VALUE_VER_WITH_MEMSTORE));
397     }
398 
399     // File info
400     writeFileInfo(trailer, fsBlockWriter.startWriting(BlockType.FILE_INFO));
401     fsBlockWriter.writeHeaderAndData(outputStream);
402     totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
403 
404     // Load-on-open data supplied by higher levels, e.g. Bloom filters.
405     for (BlockWritable w : additionalLoadOnOpenData){
406       fsBlockWriter.writeBlock(w, outputStream);
407       totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
408     }
409 
410     // Now finish off the trailer.
411     trailer.setNumDataIndexLevels(dataBlockIndexWriter.getNumLevels());
412     trailer.setUncompressedDataIndexSize(
413         dataBlockIndexWriter.getTotalUncompressedSize());
414     trailer.setFirstDataBlockOffset(firstDataBlockOffset);
415     trailer.setLastDataBlockOffset(lastDataBlockOffset);
416     trailer.setComparatorClass(comparator.getClass());
417     trailer.setDataIndexCount(dataBlockIndexWriter.getNumRootEntries());
418 
419 
420     finishClose(trailer);
421 
422     fsBlockWriter.release();
423   }
424 
425   @Override
426   public void addInlineBlockWriter(InlineBlockWriter ibw) {
427     inlineBlockWriters.add(ibw);
428   }
429 
430   @Override
431   public void addGeneralBloomFilter(final BloomFilterWriter bfw) {
432     this.addBloomFilter(bfw, BlockType.GENERAL_BLOOM_META);
433   }
434 
435   @Override
436   public void addDeleteFamilyBloomFilter(final BloomFilterWriter bfw) {
437     this.addBloomFilter(bfw, BlockType.DELETE_FAMILY_BLOOM_META);
438   }
439 
440   private void addBloomFilter(final BloomFilterWriter bfw,
441       final BlockType blockType) {
442     if (bfw.getKeyCount() <= 0)
443       return;
444 
445     if (blockType != BlockType.GENERAL_BLOOM_META &&
446         blockType != BlockType.DELETE_FAMILY_BLOOM_META) {
447       throw new RuntimeException("Block Type: " + blockType.toString() +
448           "is not supported");
449     }
450     additionalLoadOnOpenData.add(new BlockWritable() {
451       @Override
452       public BlockType getBlockType() {
453         return blockType;
454       }
455 
456       @Override
457       public void writeToBlock(DataOutput out) throws IOException {
458         bfw.getMetaWriter().write(out);
459         Writable dataWriter = bfw.getDataWriter();
460         if (dataWriter != null)
461           dataWriter.write(out);
462       }
463     });
464   }
465 }