View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.DataInputStream;
21  import java.io.DataOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.nio.ByteBuffer;
25  import java.util.Iterator;
26  
27  import org.apache.commons.lang.NotImplementedException;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.io.compress.Compression;
32  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.compress.Compressor;
35  
36  import com.google.common.base.Preconditions;
37  import com.google.common.io.NullOutputStream;
38  
39  /**
40   * Encapsulates a data block compressed using a particular encoding algorithm.
41   * Useful for testing and benchmarking.
42   */
43  @InterfaceAudience.Private
44  public class EncodedDataBlock {
45    private byte[] rawKVs;
46    private ByteBuffer rawBuffer;
47    private DataBlockEncoder dataBlockEncoder;
48  
49    private byte[] cachedEncodedData;
50    private boolean includesMemstoreTS;
51  
52    private final HFileBlockEncodingContext encodingCtx;
53  
54    /**
55     * Create a buffer which will be encoded using dataBlockEncoder.
56     * @param dataBlockEncoder Algorithm used for compression.
57     * @param encoding encoding type used
58     * @param rawKVs
59     */
60    public EncodedDataBlock(DataBlockEncoder dataBlockEncoder,
61        boolean includesMemstoreTS, DataBlockEncoding encoding, byte[] rawKVs) {
62      Preconditions.checkNotNull(encoding,
63          "Cannot create encoded data block with null encoder");
64      this.dataBlockEncoder = dataBlockEncoder;
65      encodingCtx =
66          dataBlockEncoder.newDataBlockEncodingContext(Compression.Algorithm.NONE,
67              encoding, HConstants.HFILEBLOCK_DUMMY_HEADER);
68      this.rawKVs = rawKVs;
69    }
70  
71    /**
72     * Provides access to compressed value.
73     * @return Forwards sequential iterator.
74     */
75    public Iterator<KeyValue> getIterator() {
76      final int rawSize = rawKVs.length;
77      byte[] encodedDataWithHeader = getEncodedData();
78      int bytesToSkip = encodingCtx.getHeaderSize() + Bytes.SIZEOF_SHORT;
79      ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
80          bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
81      final DataInputStream dis = new DataInputStream(bais);
82  
83      return new Iterator<KeyValue>() {
84        private ByteBuffer decompressedData = null;
85  
86        @Override
87        public boolean hasNext() {
88          if (decompressedData == null) {
89            return rawSize > 0;
90          }
91          return decompressedData.hasRemaining();
92        }
93  
94        @Override
95        public KeyValue next() {
96          if (decompressedData == null) {
97            try {
98              decompressedData = dataBlockEncoder.decodeKeyValues(
99                  dis, includesMemstoreTS);
100           } catch (IOException e) {
101             throw new RuntimeException("Problem with data block encoder, " +
102                 "most likely it requested more bytes than are available.", e);
103           }
104           decompressedData.rewind();
105         }
106 
107         int offset = decompressedData.position();
108         KeyValue kv = new KeyValue(decompressedData.array(), offset);
109         decompressedData.position(offset + kv.getLength());
110 
111         return kv;
112       }
113 
114       @Override
115       public void remove() {
116         throw new NotImplementedException("remove() is not supported!");
117       }
118 
119       @Override
120       public String toString() {
121         return "Iterator of: " + dataBlockEncoder.getClass().getName();
122       }
123 
124     };
125   }
126 
127   /**
128    * Find the size of minimal buffer that could store compressed data.
129    * @return Size in bytes of compressed data.
130    */
131   public int getSize() {
132     return getEncodedData().length;
133   }
134 
135   /**
136    * Find the size of compressed data assuming that buffer will be compressed
137    * using given algorithm.
138    * @param algo compression algorithm
139    * @param compressor compressor already requested from codec
140    * @param inputBuffer Array to be compressed.
141    * @param offset Offset to beginning of the data.
142    * @param length Length to be compressed.
143    * @return Size of compressed data in bytes.
144    * @throws IOException
145    */
146   public static int getCompressedSize(Algorithm algo, Compressor compressor,
147       byte[] inputBuffer, int offset, int length) throws IOException {
148     DataOutputStream compressedStream = new DataOutputStream(
149         new NullOutputStream());
150     if (compressor != null) {
151       compressor.reset();
152     }
153     OutputStream compressingStream = algo.createCompressionStream(
154         compressedStream, compressor, 0);
155 
156     compressingStream.write(inputBuffer, offset, length);
157     compressingStream.flush();
158     compressingStream.close();
159 
160     return compressedStream.size();
161   }
162 
163   /**
164    * Estimate size after second stage of compression (e.g. LZO).
165    * @param comprAlgo compression algorithm to be used for compression
166    * @param compressor compressor corresponding to the given compression
167    *          algorithm
168    * @return Size after second stage of compression.
169    */
170   public int getEncodedCompressedSize(Algorithm comprAlgo,
171       Compressor compressor) throws IOException {
172     byte[] compressedBytes = getEncodedData();
173     return getCompressedSize(comprAlgo, compressor, compressedBytes, 0,
174         compressedBytes.length);
175   }
176 
177   /** @return encoded data with header */
178   private byte[] getEncodedData() {
179     if (cachedEncodedData != null) {
180       return cachedEncodedData;
181     }
182     cachedEncodedData = encodeData();
183     return cachedEncodedData;
184   }
185 
186   private ByteBuffer getUncompressedBuffer() {
187     if (rawBuffer == null || rawBuffer.limit() < rawKVs.length) {
188       rawBuffer = ByteBuffer.wrap(rawKVs);
189     }
190     return rawBuffer;
191   }
192 
193   /**
194    * Do the encoding, but do not cache the encoded data.
195    * @return encoded data block with header and checksum
196    */
197   public byte[] encodeData() {
198     try {
199       this.dataBlockEncoder.encodeKeyValues(
200           getUncompressedBuffer(), includesMemstoreTS, encodingCtx);
201     } catch (IOException e) {
202       throw new RuntimeException(String.format(
203           "Bug in encoding part of algorithm %s. " +
204           "Probably it requested more bytes than are available.",
205           toString()), e);
206     }
207     return encodingCtx.getUncompressedBytesWithHeader();
208   }
209 
210   @Override
211   public String toString() {
212     return dataBlockEncoder.toString();
213   }
214 }