View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.hfile;
18  
19  import java.io.IOException;
20  import java.nio.ByteBuffer;
21  
22  import org.apache.hadoop.classification.InterfaceAudience;
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
25  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
26  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
27  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
28  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
29  import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
30  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
31  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
32  import org.apache.hadoop.hbase.util.Bytes;
33  
34  import com.google.common.base.Preconditions;
35  
36  /**
37   * Do different kinds of data block encoding according to column family
38   * options.
39   */
40  @InterfaceAudience.Private
41  public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
42    private final DataBlockEncoding onDisk;
43    private final DataBlockEncoding inCache;
44    private final byte[] dummyHeader;
45  
46    public HFileDataBlockEncoderImpl(DataBlockEncoding encoding) {
47      this(encoding, encoding);
48    }
49  
50    /**
51     * Do data block encoding with specified options.
52     * @param onDisk What kind of data block encoding will be used before writing
53     *          HFileBlock to disk. This must be either the same as inCache or
54     *          {@link DataBlockEncoding#NONE}.
55     * @param inCache What kind of data block encoding will be used in block
56     *          cache.
57     */
58    public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
59        DataBlockEncoding inCache) {
60      this(onDisk, inCache, HConstants.HFILEBLOCK_DUMMY_HEADER);
61    }
62  
63    /**
64     * Do data block encoding with specified options.
65     * @param onDisk What kind of data block encoding will be used before writing
66     *          HFileBlock to disk. This must be either the same as inCache or
67     *          {@link DataBlockEncoding#NONE}.
68     * @param inCache What kind of data block encoding will be used in block
69     *          cache.
70     * @param dummyHeader dummy header bytes
71     */
72    public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
73        DataBlockEncoding inCache, byte[] dummyHeader) {
74      this.onDisk = onDisk != null ?
75          onDisk : DataBlockEncoding.NONE;
76      this.inCache = inCache != null ?
77          inCache : DataBlockEncoding.NONE;
78      this.dummyHeader = dummyHeader;
79  
80      Preconditions.checkArgument(onDisk == DataBlockEncoding.NONE ||
81          onDisk == inCache, "on-disk encoding (" + onDisk + ") must be " +
82          "either the same as in-cache encoding (" + inCache + ") or " +
83          DataBlockEncoding.NONE);
84    }
85  
86    public static HFileDataBlockEncoder createFromFileInfo(
87        FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache)
88        throws IOException {
89      boolean hasPreferredCacheEncoding = preferredEncodingInCache != null
90          && preferredEncodingInCache != DataBlockEncoding.NONE;
91  
92      byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
93      if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) {
94        return NoOpDataBlockEncoder.INSTANCE;
95      }
96  
97      DataBlockEncoding onDisk;
98      if (dataBlockEncodingType == null) {
99        onDisk = DataBlockEncoding.NONE;
100     } else {
101       String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
102       try {
103         onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr);
104       } catch (IllegalArgumentException ex) {
105         throw new IOException("Invalid data block encoding type in file info: "
106             + dataBlockEncodingStr, ex);
107       }
108     }
109 
110     DataBlockEncoding inCache;
111     if (onDisk == DataBlockEncoding.NONE) {
112       // This is an "in-cache-only" encoding or fully-unencoded scenario.
113       // Either way, we use the given encoding (possibly NONE) specified by
114       // the column family in cache.
115       inCache = preferredEncodingInCache;
116     } else {
117       // Leave blocks in cache encoded the same way as they are on disk.
118       // If we switch encoding type for the CF or the in-cache-only encoding
119       // flag, old files will keep their encoding both on disk and in cache,
120       // but new files will be generated with the new encoding.
121       inCache = onDisk;
122     }
123     // TODO: we are not passing proper header size here based on minor version, presumably
124     //       because this encoder will never actually be used for encoding.
125     return new HFileDataBlockEncoderImpl(onDisk, inCache);
126   }
127 
128   @Override
129   public void saveMetadata(HFile.Writer writer) throws IOException {
130     writer.appendFileInfo(DATA_BLOCK_ENCODING, onDisk.getNameInBytes());
131   }
132 
133   @Override
134   public DataBlockEncoding getEncodingOnDisk() {
135     return onDisk;
136   }
137 
138   @Override
139   public DataBlockEncoding getEncodingInCache() {
140     return inCache;
141   }
142 
143   @Override
144   public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
145     if (!useEncodedScanner(isCompaction)) {
146       return DataBlockEncoding.NONE;
147     }
148     return inCache;
149   }
150 
151   @Override
152   public HFileBlock diskToCacheFormat(HFileBlock block, boolean isCompaction) {
153     if (block.getBlockType() == BlockType.DATA) {
154       if (!useEncodedScanner(isCompaction)) {
155         // Unencoded block, and we don't want to encode in cache.
156         return block;
157       }
158       // Encode the unencoded block with the in-cache encoding.
159       return encodeDataBlock(block, inCache, block.doesIncludeMemstoreTS(),
160           createInCacheEncodingContext());
161     }
162 
163     if (block.getBlockType() == BlockType.ENCODED_DATA) {
164       if (block.getDataBlockEncodingId() == onDisk.getId()) {
165         // The block is already in the desired in-cache encoding.
166         return block;
167       }
168       // We don't want to re-encode a block in a different encoding. The HFile
169       // reader should have been instantiated in such a way that we would not
170       // have to do this.
171       throw new AssertionError("Expected on-disk data block encoding " +
172           onDisk + ", got " + block.getDataBlockEncoding());
173     }
174     return block;
175   }
176 
177   /**
178    * Precondition: a non-encoded buffer. Postcondition: on-disk encoding.
179    *
180    * The encoded results can be stored in {@link HFileBlockEncodingContext}.
181    *
182    * @throws IOException
183    */
184   @Override
185   public void beforeWriteToDisk(ByteBuffer in,
186       boolean includesMemstoreTS,
187       HFileBlockEncodingContext encodeCtx,
188       BlockType blockType) throws IOException {
189     if (onDisk == DataBlockEncoding.NONE) {
190       // there is no need to encode the block before writing it to disk
191       ((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncodingWithBlockType(
192           in.array(), blockType);
193       return;
194     }
195     encodeBufferToHFileBlockBuffer(in, onDisk,
196         includesMemstoreTS, encodeCtx);
197   }
198 
199   @Override
200   public boolean useEncodedScanner(boolean isCompaction) {
201     if (isCompaction && onDisk == DataBlockEncoding.NONE) {
202       return false;
203     }
204     return inCache != DataBlockEncoding.NONE;
205   }
206 
207   /**
208    * Encode a block of key value pairs.
209    *
210    * @param in input data to encode
211    * @param algo encoding algorithm
212    * @param includesMemstoreTS includes memstore timestamp or not
213    * @param encodeCtx where will the output data be stored
214    */
215   private void encodeBufferToHFileBlockBuffer(ByteBuffer in,
216       DataBlockEncoding algo, boolean includesMemstoreTS,
217       HFileBlockEncodingContext encodeCtx) {
218     DataBlockEncoder encoder = algo.getEncoder();
219     try {
220       encoder.encodeKeyValues(in, includesMemstoreTS, encodeCtx);
221     } catch (IOException e) {
222       throw new RuntimeException(String.format(
223           "Bug in data block encoder "
224               + "'%s', it probably requested too much data, " +
225               "exception message: %s.",
226               algo.toString(), e.getMessage()), e);
227     }
228   }
229 
230   private HFileBlock encodeDataBlock(HFileBlock block,
231       DataBlockEncoding algo, boolean includesMemstoreTS,
232       HFileBlockEncodingContext encodingCtx) {
233     encodingCtx.setDummyHeader(block.getDummyHeaderForVersion());
234     encodeBufferToHFileBlockBuffer(
235       block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx);
236     byte[] encodedUncompressedBytes =
237       encodingCtx.getUncompressedBytesWithHeader();
238     ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes);
239     int sizeWithoutHeader = bufferWrapper.limit() - block.headerSize();
240     HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA,
241         block.getOnDiskSizeWithoutHeader(),
242         sizeWithoutHeader, block.getPrevBlockOffset(),
243         bufferWrapper, HFileBlock.FILL_HEADER, block.getOffset(),
244         includesMemstoreTS, block.getMinorVersion(),
245         block.getBytesPerChecksum(), block.getChecksumType(),
246         block.getOnDiskDataSizeWithHeader());
247     return encodedBlock;
248   }
249 
250   /**
251    * Returns a new encoding context given the inCache encoding scheme provided in the constructor.
252    * This used to be kept around but HFileBlockDefaultEncodingContext isn't thread-safe.
253    * See HBASE-8732
254    * @return a new in cache encoding context
255    */
256   private HFileBlockEncodingContext createInCacheEncodingContext() {
257     return (inCache != DataBlockEncoding.NONE) ?
258         this.inCache.getEncoder().newDataBlockEncodingContext(
259             Algorithm.NONE, this.inCache, dummyHeader)
260         :
261         // create a default encoding context
262         new HFileBlockDefaultEncodingContext(Algorithm.NONE,
263             this.inCache, dummyHeader);
264   }
265 
266   @Override
267   public String toString() {
268     return getClass().getSimpleName() + "(onDisk=" + onDisk + ", inCache=" +
269         inCache + ")";
270   }
271 
272   @Override
273   public HFileBlockEncodingContext newOnDiskDataBlockEncodingContext(
274       Algorithm compressionAlgorithm,  byte[] dummyHeader) {
275     if (onDisk != null) {
276       DataBlockEncoder encoder = onDisk.getEncoder();
277       if (encoder != null) {
278         return encoder.newDataBlockEncodingContext(
279             compressionAlgorithm, onDisk, dummyHeader);
280       }
281     }
282     return new HFileBlockDefaultEncodingContext(compressionAlgorithm,
283         null, dummyHeader);
284   }
285 
286   @Override
287   public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(
288       Algorithm compressionAlgorithm) {
289     if (onDisk != null) {
290       DataBlockEncoder encoder = onDisk.getEncoder();
291       if (encoder != null) {
292         return encoder.newDataBlockDecodingContext(
293             compressionAlgorithm);
294       }
295     }
296     return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
297   }
298 
299 }