View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.DataInputStream;
21  import java.io.DataOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.nio.ByteBuffer;
25  import java.util.Iterator;
26  
27  import org.apache.commons.lang.NotImplementedException;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.Cell;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.KeyValue;
32  import org.apache.hadoop.hbase.io.compress.Compression;
33  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.io.IOUtils;
36  import org.apache.hadoop.io.compress.Compressor;
37  
38  import com.google.common.base.Preconditions;
39  
40  /**
41   * Encapsulates a data block compressed using a particular encoding algorithm.
42   * Useful for testing and benchmarking.
43   */
44  @InterfaceAudience.Private
45  public class EncodedDataBlock {
46    private byte[] rawKVs;
47    private ByteBuffer rawBuffer;
48    private DataBlockEncoder dataBlockEncoder;
49  
50    private byte[] cachedEncodedData;
51    private boolean includesMemstoreTS;
52  
53    private final HFileBlockEncodingContext encodingCtx;
54  
55    /**
56     * Create a buffer which will be encoded using dataBlockEncoder.
57     * @param dataBlockEncoder Algorithm used for compression.
58     * @param encoding encoding type used
59     * @param rawKVs
60     */
61    public EncodedDataBlock(DataBlockEncoder dataBlockEncoder,
62        boolean includesMemstoreTS, DataBlockEncoding encoding, byte[] rawKVs) {
63      Preconditions.checkNotNull(encoding,
64          "Cannot create encoded data block with null encoder");
65      this.dataBlockEncoder = dataBlockEncoder;
66      encodingCtx =
67          dataBlockEncoder.newDataBlockEncodingContext(Compression.Algorithm.NONE,
68              encoding, HConstants.HFILEBLOCK_DUMMY_HEADER);
69      this.rawKVs = rawKVs;
70    }
71  
72    /**
73     * Provides access to compressed value.
74     * @param headerSize header size of the block.
75     * @return Forwards sequential iterator.
76     */
77    public Iterator<Cell> getIterator(int headerSize) {
78      final int rawSize = rawKVs.length;
79      byte[] encodedDataWithHeader = getEncodedData();
80      int bytesToSkip = headerSize + Bytes.SIZEOF_SHORT;
81      ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
82          bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
83      final DataInputStream dis = new DataInputStream(bais);
84  
85      return new Iterator<Cell>() {
86        private ByteBuffer decompressedData = null;
87  
88        @Override
89        public boolean hasNext() {
90          if (decompressedData == null) {
91            return rawSize > 0;
92          }
93          return decompressedData.hasRemaining();
94        }
95  
96        @Override
97        public Cell next() {
98          if (decompressedData == null) {
99            try {
100             decompressedData = dataBlockEncoder.decodeKeyValues(
101                 dis, includesMemstoreTS);
102           } catch (IOException e) {
103             throw new RuntimeException("Problem with data block encoder, " +
104                 "most likely it requested more bytes than are available.", e);
105           }
106           decompressedData.rewind();
107         }
108 
109         int offset = decompressedData.position();
110         KeyValue kv = new KeyValue(decompressedData.array(), offset);
111         decompressedData.position(offset + kv.getLength());
112 
113         return kv;
114       }
115 
116       @Override
117       public void remove() {
118         throw new NotImplementedException("remove() is not supported!");
119       }
120 
121       @Override
122       public String toString() {
123         return "Iterator of: " + dataBlockEncoder.getClass().getName();
124       }
125 
126     };
127   }
128 
129   /**
130    * Find the size of minimal buffer that could store compressed data.
131    * @return Size in bytes of compressed data.
132    */
133   public int getSize() {
134     return getEncodedData().length;
135   }
136 
137   /**
138    * Find the size of compressed data assuming that buffer will be compressed
139    * using given algorithm.
140    * @param algo compression algorithm
141    * @param compressor compressor already requested from codec
142    * @param inputBuffer Array to be compressed.
143    * @param offset Offset to beginning of the data.
144    * @param length Length to be compressed.
145    * @return Size of compressed data in bytes.
146    * @throws IOException
147    */
148   public static int getCompressedSize(Algorithm algo, Compressor compressor,
149       byte[] inputBuffer, int offset, int length) throws IOException {
150     DataOutputStream compressedStream = new DataOutputStream(
151         new IOUtils.NullOutputStream());
152     if (compressor != null) {
153       compressor.reset();
154     }
155     OutputStream compressingStream = null;
156 
157     try {
158       compressingStream = algo.createCompressionStream(
159           compressedStream, compressor, 0);
160 
161       compressingStream.write(inputBuffer, offset, length);
162       compressingStream.flush();
163 
164       return compressedStream.size();
165     } finally {
166       if (compressingStream != null) compressingStream.close();
167     }
168   }
169 
170   /**
171    * Estimate size after second stage of compression (e.g. LZO).
172    * @param comprAlgo compression algorithm to be used for compression
173    * @param compressor compressor corresponding to the given compression
174    *          algorithm
175    * @return Size after second stage of compression.
176    */
177   public int getEncodedCompressedSize(Algorithm comprAlgo,
178       Compressor compressor) throws IOException {
179     byte[] compressedBytes = getEncodedData();
180     return getCompressedSize(comprAlgo, compressor, compressedBytes, 0,
181         compressedBytes.length);
182   }
183 
184   /** @return encoded data with header */
185   private byte[] getEncodedData() {
186     if (cachedEncodedData != null) {
187       return cachedEncodedData;
188     }
189     cachedEncodedData = encodeData();
190     return cachedEncodedData;
191   }
192 
193   private ByteBuffer getUncompressedBuffer() {
194     if (rawBuffer == null || rawBuffer.limit() < rawKVs.length) {
195       rawBuffer = ByteBuffer.wrap(rawKVs);
196     }
197     return rawBuffer;
198   }
199 
200   /**
201    * Do the encoding, but do not cache the encoded data.
202    * @return encoded data block with header and checksum
203    */
204   public byte[] encodeData() {
205     try {
206       this.dataBlockEncoder.encodeKeyValues(
207           getUncompressedBuffer(), includesMemstoreTS, encodingCtx);
208     } catch (IOException e) {
209       throw new RuntimeException(String.format(
210           "Bug in encoding part of algorithm %s. " +
211           "Probably it requested more bytes than are available.",
212           toString()), e);
213     }
214     return encodingCtx.getUncompressedBytesWithHeader();
215   }
216 
217   @Override
218   public String toString() {
219     return dataBlockEncoder.toString();
220   }
221 }