View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.DataInputStream;
21  import java.io.DataOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.nio.ByteBuffer;
25  import java.util.Iterator;
26  
27  import org.apache.commons.lang.NotImplementedException;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.io.compress.Compression;
32  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.compress.Compressor;
35  
36  import com.google.common.base.Preconditions;
37  import com.google.common.io.NullOutputStream;
38  
39  /**
40   * Encapsulates a data block compressed using a particular encoding algorithm.
41   * Useful for testing and benchmarking.
42   */
43  @InterfaceAudience.Private
44  public class EncodedDataBlock {
45    private byte[] rawKVs;
46    private ByteBuffer rawBuffer;
47    private DataBlockEncoder dataBlockEncoder;
48  
49    private byte[] cachedEncodedData;
50    private boolean includesMemstoreTS;
51  
52    private final HFileBlockEncodingContext encodingCtx;
53  
54    /**
55     * Create a buffer which will be encoded using dataBlockEncoder.
56     * @param dataBlockEncoder Algorithm used for compression.
57     * @param encoding encoding type used
58     * @param rawKVs
59     */
60    public EncodedDataBlock(DataBlockEncoder dataBlockEncoder,
61        boolean includesMemstoreTS, DataBlockEncoding encoding, byte[] rawKVs) {
62      Preconditions.checkNotNull(encoding,
63          "Cannot create encoded data block with null encoder");
64      this.dataBlockEncoder = dataBlockEncoder;
65      encodingCtx =
66          dataBlockEncoder.newDataBlockEncodingContext(Compression.Algorithm.NONE,
67              encoding, HConstants.HFILEBLOCK_DUMMY_HEADER);
68      this.rawKVs = rawKVs;
69    }
70  
71    /**
72     * Provides access to compressed value.
73     * @param headerSize header size of the block.
74     * @return Forwards sequential iterator.
75     */
76    public Iterator<KeyValue> getIterator(int headerSize) {
77      final int rawSize = rawKVs.length;
78      byte[] encodedDataWithHeader = getEncodedData();
79      int bytesToSkip = headerSize + Bytes.SIZEOF_SHORT;
80      ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
81          bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
82      final DataInputStream dis = new DataInputStream(bais);
83  
84      return new Iterator<KeyValue>() {
85        private ByteBuffer decompressedData = null;
86  
87        @Override
88        public boolean hasNext() {
89          if (decompressedData == null) {
90            return rawSize > 0;
91          }
92          return decompressedData.hasRemaining();
93        }
94  
95        @Override
96        public KeyValue next() {
97          if (decompressedData == null) {
98            try {
99              decompressedData = dataBlockEncoder.decodeKeyValues(
100                 dis, includesMemstoreTS);
101           } catch (IOException e) {
102             throw new RuntimeException("Problem with data block encoder, " +
103                 "most likely it requested more bytes than are available.", e);
104           }
105           decompressedData.rewind();
106         }
107 
108         int offset = decompressedData.position();
109         KeyValue kv = new KeyValue(decompressedData.array(), offset);
110         decompressedData.position(offset + kv.getLength());
111 
112         return kv;
113       }
114 
115       @Override
116       public void remove() {
117         throw new NotImplementedException("remove() is not supported!");
118       }
119 
120       @Override
121       public String toString() {
122         return "Iterator of: " + dataBlockEncoder.getClass().getName();
123       }
124 
125     };
126   }
127 
128   /**
129    * Find the size of minimal buffer that could store compressed data.
130    * @return Size in bytes of compressed data.
131    */
132   public int getSize() {
133     return getEncodedData().length;
134   }
135 
136   /**
137    * Find the size of compressed data assuming that buffer will be compressed
138    * using given algorithm.
139    * @param algo compression algorithm
140    * @param compressor compressor already requested from codec
141    * @param inputBuffer Array to be compressed.
142    * @param offset Offset to beginning of the data.
143    * @param length Length to be compressed.
144    * @return Size of compressed data in bytes.
145    * @throws IOException
146    */
147   public static int getCompressedSize(Algorithm algo, Compressor compressor,
148       byte[] inputBuffer, int offset, int length) throws IOException {
149     DataOutputStream compressedStream = new DataOutputStream(
150         new NullOutputStream());
151     if (compressor != null) {
152       compressor.reset();
153     }
154     OutputStream compressingStream = algo.createCompressionStream(
155         compressedStream, compressor, 0);
156 
157     compressingStream.write(inputBuffer, offset, length);
158     compressingStream.flush();
159     compressingStream.close();
160 
161     return compressedStream.size();
162   }
163 
164   /**
165    * Estimate size after second stage of compression (e.g. LZO).
166    * @param comprAlgo compression algorithm to be used for compression
167    * @param compressor compressor corresponding to the given compression
168    *          algorithm
169    * @return Size after second stage of compression.
170    */
171   public int getEncodedCompressedSize(Algorithm comprAlgo,
172       Compressor compressor) throws IOException {
173     byte[] compressedBytes = getEncodedData();
174     return getCompressedSize(comprAlgo, compressor, compressedBytes, 0,
175         compressedBytes.length);
176   }
177 
178   /** @return encoded data with header */
179   private byte[] getEncodedData() {
180     if (cachedEncodedData != null) {
181       return cachedEncodedData;
182     }
183     cachedEncodedData = encodeData();
184     return cachedEncodedData;
185   }
186 
187   private ByteBuffer getUncompressedBuffer() {
188     if (rawBuffer == null || rawBuffer.limit() < rawKVs.length) {
189       rawBuffer = ByteBuffer.wrap(rawKVs);
190     }
191     return rawBuffer;
192   }
193 
194   /**
195    * Do the encoding, but do not cache the encoded data.
196    * @return encoded data block with header and checksum
197    */
198   public byte[] encodeData() {
199     try {
200       this.dataBlockEncoder.encodeKeyValues(
201           getUncompressedBuffer(), includesMemstoreTS, encodingCtx);
202     } catch (IOException e) {
203       throw new RuntimeException(String.format(
204           "Bug in encoding part of algorithm %s. " +
205           "Probably it requested more bytes than are available.",
206           toString()), e);
207     }
208     return encodingCtx.getUncompressedBytesWithHeader();
209   }
210 
211   @Override
212   public String toString() {
213     return dataBlockEncoder.toString();
214   }
215 }