View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.hfile;
18  
19  import java.io.IOException;
20  import java.nio.ByteBuffer;
21  
22  import org.apache.hadoop.classification.InterfaceAudience;
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
25  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
26  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
27  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
28  import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
29  import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
30  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
31  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
32  import org.apache.hadoop.hbase.util.Bytes;
33  
34  import com.google.common.base.Preconditions;
35  
36  /**
37   * Do different kinds of data block encoding according to column family
38   * options.
39   */
40  @InterfaceAudience.Private
41  public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
42    private final DataBlockEncoding onDisk;
43    private final DataBlockEncoding inCache;
44    private final HFileBlockEncodingContext inCacheEncodeCtx;
45  
46    public HFileDataBlockEncoderImpl(DataBlockEncoding encoding) {
47      this(encoding, encoding);
48    }
49  
50    /**
51     * Do data block encoding with specified options.
52     * @param onDisk What kind of data block encoding will be used before writing
53     *          HFileBlock to disk. This must be either the same as inCache or
54     *          {@link DataBlockEncoding#NONE}.
55     * @param inCache What kind of data block encoding will be used in block
56     *          cache.
57     */
58    public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
59        DataBlockEncoding inCache) {
60      this(onDisk, inCache, null);
61    }
62  
63    /**
64     * Do data block encoding with specified options.
65     * @param onDisk What kind of data block encoding will be used before writing
66     *          HFileBlock to disk. This must be either the same as inCache or
67     *          {@link DataBlockEncoding#NONE}.
68     * @param inCache What kind of data block encoding will be used in block
69     *          cache.
70     * @param dummyHeader dummy header bytes
71     */
72    public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
73        DataBlockEncoding inCache, byte[] dummyHeader) {
74      dummyHeader = dummyHeader == null ? HConstants.HFILEBLOCK_DUMMY_HEADER : dummyHeader;
75      this.onDisk = onDisk != null ?
76          onDisk : DataBlockEncoding.NONE;
77      this.inCache = inCache != null ?
78          inCache : DataBlockEncoding.NONE;
79      if (inCache != DataBlockEncoding.NONE) {
80        inCacheEncodeCtx =
81            this.inCache.getEncoder().newDataBlockEncodingContext(
82                Algorithm.NONE, this.inCache, dummyHeader);
83      } else {
84        // create a default encoding context
85        inCacheEncodeCtx =
86            new HFileBlockDefaultEncodingContext(Algorithm.NONE,
87                this.inCache, dummyHeader);
88      }
89  
90      Preconditions.checkArgument(onDisk == DataBlockEncoding.NONE ||
91          onDisk == inCache, "on-disk encoding (" + onDisk + ") must be " +
92          "either the same as in-cache encoding (" + inCache + ") or " +
93          DataBlockEncoding.NONE);
94    }
95  
96    public static HFileDataBlockEncoder createFromFileInfo(
97        FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache)
98        throws IOException {
99      byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
100     if (dataBlockEncodingType == null) {
101       return NoOpDataBlockEncoder.INSTANCE;
102     }
103 
104     String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
105     DataBlockEncoding onDisk;
106     try {
107       onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr);
108     } catch (IllegalArgumentException ex) {
109       throw new IOException("Invalid data block encoding type in file info: " +
110           dataBlockEncodingStr, ex);
111     }
112 
113     DataBlockEncoding inCache;
114     if (onDisk == DataBlockEncoding.NONE) {
115       // This is an "in-cache-only" encoding or fully-unencoded scenario.
116       // Either way, we use the given encoding (possibly NONE) specified by
117       // the column family in cache.
118       inCache = preferredEncodingInCache;
119     } else {
120       // Leave blocks in cache encoded the same way as they are on disk.
121       // If we switch encoding type for the CF or the in-cache-only encoding
122       // flag, old files will keep their encoding both on disk and in cache,
123       // but new files will be generated with the new encoding.
124       inCache = onDisk;
125     }
126     return new HFileDataBlockEncoderImpl(onDisk, inCache);
127   }
128 
129   @Override
130   public void saveMetadata(HFile.Writer writer) throws IOException {
131     writer.appendFileInfo(DATA_BLOCK_ENCODING, onDisk.getNameInBytes());
132   }
133 
134   @Override
135   public DataBlockEncoding getEncodingOnDisk() {
136     return onDisk;
137   }
138 
139   @Override
140   public DataBlockEncoding getEncodingInCache() {
141     return inCache;
142   }
143 
144   @Override
145   public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
146     if (!useEncodedScanner(isCompaction)) {
147       return DataBlockEncoding.NONE;
148     }
149     return inCache;
150   }
151 
152   @Override
153   public HFileBlock diskToCacheFormat(HFileBlock block, boolean isCompaction) {
154     if (block.getBlockType() == BlockType.DATA) {
155       if (!useEncodedScanner(isCompaction)) {
156         // Unencoded block, and we don't want to encode in cache.
157         return block;
158       }
159       // Encode the unencoded block with the in-cache encoding.
160       return encodeDataBlock(block, inCache, block.doesIncludeMemstoreTS(),
161           inCacheEncodeCtx);
162     }
163 
164     if (block.getBlockType() == BlockType.ENCODED_DATA) {
165       if (block.getDataBlockEncodingId() == onDisk.getId()) {
166         // The block is already in the desired in-cache encoding.
167         return block;
168       }
169       // We don't want to re-encode a block in a different encoding. The HFile
170       // reader should have been instantiated in such a way that we would not
171       // have to do this.
172       throw new AssertionError("Expected on-disk data block encoding " +
173           onDisk + ", got " + block.getDataBlockEncoding());
174     }
175     return block;
176   }
177 
178   /**
179    * Precondition: a non-encoded buffer. Postcondition: on-disk encoding.
180    *
181    * The encoded results can be stored in {@link HFileBlockEncodingContext}.
182    *
183    * @throws IOException
184    */
185   @Override
186   public void beforeWriteToDisk(ByteBuffer in,
187       boolean includesMemstoreTS,
188       HFileBlockEncodingContext encodeCtx,
189       BlockType blockType) throws IOException {
190     if (onDisk == DataBlockEncoding.NONE) {
191       // there is no need to encode the block before writing it to disk
192       ((HFileBlockDefaultEncodingContext) encodeCtx).compressAfterEncoding(
193           in.array(), blockType);
194       return;
195     }
196     encodeBufferToHFileBlockBuffer(in, onDisk,
197         includesMemstoreTS, encodeCtx);
198   }
199 
200   @Override
201   public boolean useEncodedScanner(boolean isCompaction) {
202     if (isCompaction && onDisk == DataBlockEncoding.NONE) {
203       return false;
204     }
205     return inCache != DataBlockEncoding.NONE;
206   }
207 
208   /**
209    * Encode a block of key value pairs.
210    *
211    * @param in input data to encode
212    * @param algo encoding algorithm
213    * @param includesMemstoreTS includes memstore timestamp or not
214    * @param encodeCtx where will the output data be stored
215    */
216   private void encodeBufferToHFileBlockBuffer(ByteBuffer in,
217       DataBlockEncoding algo, boolean includesMemstoreTS,
218       HFileBlockEncodingContext encodeCtx) {
219     DataBlockEncoder encoder = algo.getEncoder();
220     try {
221       encoder.encodeKeyValues(in, includesMemstoreTS, encodeCtx);
222     } catch (IOException e) {
223       throw new RuntimeException(String.format(
224           "Bug in data block encoder "
225               + "'%s', it probably requested too much data, " +
226               "exception message: %s.",
227               algo.toString(), e.getMessage()), e);
228     }
229   }
230 
231   private HFileBlock encodeDataBlock(HFileBlock block,
232       DataBlockEncoding algo, boolean includesMemstoreTS,
233       HFileBlockEncodingContext encodingCtx) {
234     encodeBufferToHFileBlockBuffer(
235       block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx);
236     byte[] encodedUncompressedBytes =
237       encodingCtx.getUncompressedBytesWithHeader();
238     ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes);
239     int sizeWithoutHeader = bufferWrapper.limit() - encodingCtx.getHeaderSize();
240     HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA,
241         block.getOnDiskSizeWithoutHeader(),
242         sizeWithoutHeader, block.getPrevBlockOffset(),
243         bufferWrapper, HFileBlock.FILL_HEADER, block.getOffset(),
244         includesMemstoreTS, block.getMinorVersion(),
245         block.getBytesPerChecksum(), block.getChecksumType(),
246         block.getOnDiskDataSizeWithHeader());
247     return encodedBlock;
248   }
249 
250   @Override
251   public String toString() {
252     return getClass().getSimpleName() + "(onDisk=" + onDisk + ", inCache=" +
253         inCache + ")";
254   }
255 
256   @Override
257   public HFileBlockEncodingContext newOnDiskDataBlockEncodingContext(
258       Algorithm compressionAlgorithm,  byte[] dummyHeader) {
259     if (onDisk != null) {
260       DataBlockEncoder encoder = onDisk.getEncoder();
261       if (encoder != null) {
262         return encoder.newDataBlockEncodingContext(
263             compressionAlgorithm, onDisk, dummyHeader);
264       }
265     }
266     return new HFileBlockDefaultEncodingContext(compressionAlgorithm,
267         null, dummyHeader);
268   }
269 
270   @Override
271   public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(
272       Algorithm compressionAlgorithm) {
273     if (onDisk != null) {
274       DataBlockEncoder encoder = onDisk.getEncoder();
275       if (encoder != null) {
276         return encoder.newDataBlockDecodingContext(
277             compressionAlgorithm);
278       }
279     }
280     return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
281   }
282 
283 }