View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.DataInputStream;
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.KeyValue.KVComparator;
26  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
27  import org.apache.hadoop.io.RawComparator;
28  
29  /**
30   * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
31   * <ul>
32   * <li>the KeyValues are stored sorted by key</li>
33   * <li>we know the structure of KeyValue</li>
34   * <li>the values are always iterated forward from beginning of block</li>
35   * <li>knowledge of Key Value format</li>
36   * </ul>
37   * It is designed to work fast enough to be feasible as in memory compression.
38   *
39   * After encoding, it also optionally compresses the encoded data if a
40   * compression algorithm is specified in HFileBlockEncodingContext argument of
41   * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}.
42   */
43  @InterfaceAudience.Private
44  public interface DataBlockEncoder {
45  
46    /**
47     * Encodes KeyValues. It will first encode key value pairs, and then
48     * optionally do the compression for the encoded data.
49     *
50     * @param in
51     *          Source of KeyValue for compression.
52     * @param includesMemstoreTS
53     *          true if including memstore timestamp after every key-value pair
54     * @param encodingContext
55     *          the encoding context which will contain encoded uncompressed bytes
56     *          as well as compressed encoded bytes if compression is enabled, and
57     *          also it will reuse resources across multiple calls.
58     * @throws IOException
59     *           If there is an error writing to output stream.
60     */
61    void encodeKeyValues(
62      ByteBuffer in, boolean includesMemstoreTS, HFileBlockEncodingContext encodingContext
63    ) throws IOException;
64  
65    /**
66     * Decode.
67     * @param source Compressed stream of KeyValues.
68     * @param includesMemstoreTS true if including memstore timestamp after every
69     *          key-value pair
70     * @return Uncompressed block of KeyValues.
71     * @throws IOException If there is an error in source.
72     */
73    ByteBuffer decodeKeyValues(
74      DataInputStream source, boolean includesMemstoreTS
75    ) throws IOException;
76  
77    /**
78     * Uncompress.
79     * @param source encoded stream of KeyValues.
80     * @param allocateHeaderLength allocate this many bytes for the header.
81     * @param skipLastBytes Do not copy n last bytes.
82     * @param includesMemstoreTS true if including memstore timestamp after every
83     *          key-value pair
84     * @return Uncompressed block of KeyValues.
85     * @throws IOException If there is an error in source.
86     */
87    ByteBuffer decodeKeyValues(
88      DataInputStream source, int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS
89    )
90        throws IOException;
91  
92    /**
93     * Return first key in block. Useful for indexing. Typically does not make
94     * a deep copy but returns a buffer wrapping a segment of the actual block's
95     * byte array. This is because the first key in block is usually stored
96     * unencoded.
97     * @param block encoded block we want index, the position will not change
98     * @return First key in block.
99     */
100   ByteBuffer getFirstKeyInBlock(ByteBuffer block);
101 
102   /**
103    * Create a HFileBlock seeker which find KeyValues within a block.
104    * @param comparator what kind of comparison should be used
105    * @param includesMemstoreTS true if including memstore timestamp after every
106    *          key-value pair
107    * @return A newly created seeker.
108    */
109   EncodedSeeker createSeeker(
110     KVComparator comparator, boolean includesMemstoreTS
111   );
112 
113   /**
114    * Creates a encoder specific encoding context
115    *
116    * @param compressionAlgorithm
117    *          compression algorithm used if the final data needs to be
118    *          compressed
119    * @param encoding
120    *          encoding strategy used
121    * @param headerBytes
122    *          header bytes to be written, put a dummy header here if the header
123    *          is unknown
124    * @return a newly created encoding context
125    */
126   HFileBlockEncodingContext newDataBlockEncodingContext(
127     Algorithm compressionAlgorithm, DataBlockEncoding encoding, byte[] headerBytes
128   );
129 
130   /**
131    * Creates an encoder specific decoding context, which will prepare the data
132    * before actual decoding
133    *
134    * @param compressionAlgorithm
135    *          compression algorithm used if the data needs to be decompressed
136    * @return a newly created decoding context
137    */
138   HFileBlockDecodingContext newDataBlockDecodingContext(
139     Algorithm compressionAlgorithm
140   );
141 
142   /**
143    * An interface which enable to seek while underlying data is encoded.
144    *
145    * It works on one HFileBlock, but it is reusable. See
146    * {@link #setCurrentBuffer(ByteBuffer)}.
147    */
148   interface EncodedSeeker {
149     /**
150      * Set on which buffer there will be done seeking.
151      * @param buffer Used for seeking.
152      */
153     void setCurrentBuffer(ByteBuffer buffer);
154 
155     /**
156      * Does a deep copy of the key at the current position. A deep copy is
157      * necessary because buffers are reused in the decoder.
158      * @return key at current position
159      */
160     ByteBuffer getKeyDeepCopy();
161 
162     /**
163      * Does a shallow copy of the value at the current position. A shallow
164      * copy is possible because the returned buffer refers to the backing array
165      * of the original encoded buffer.
166      * @return value at current position
167      */
168     ByteBuffer getValueShallowCopy();
169 
170     /** @return key value at current position with position set to limit */
171     ByteBuffer getKeyValueBuffer();
172 
173     /**
174      * @return the KeyValue object at the current position. Includes memstore
175      *         timestamp.
176      */
177     KeyValue getKeyValue();
178 
179     /** Set position to beginning of given block */
180     void rewind();
181 
182     /**
183      * Move to next position
184      * @return true on success, false if there is no more positions.
185      */
186     boolean next();
187 
188     /**
189      * Moves the seeker position within the current block to:
190      * <ul>
191      * <li>the last key that that is less than or equal to the given key if
192      * <code>seekBefore</code> is false</li>
193      * <li>the last key that is strictly less than the given key if <code>
194      * seekBefore</code> is true. The caller is responsible for loading the
195      * previous block if the requested key turns out to be the first key of the
196      * current block.</li>
197      * </ul>
198      * @param key byte array containing the key
199      * @param offset key position the array
200      * @param length key length in bytes
201      * @param seekBefore find the key strictly less than the given key in case
202      *          of an exact match. Does not matter in case of an inexact match.
203      * @return 0 on exact match, 1 on inexact match.
204      */
205     int seekToKeyInBlock(
206       byte[] key, int offset, int length, boolean seekBefore
207     );
208 
209     /**
210      * Compare the given key against the current key
211      * @param comparator
212      * @param key
213      * @param offset
214      * @param length
215      * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
216      */
217     public int compareKey(KVComparator comparator, byte[] key, int offset, int length);
218   }
219 }