View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.DataInputStream;
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
26  import org.apache.hadoop.io.RawComparator;
27  
28  /**
29   * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
30   * <ul>
31   * <li>the KeyValues are stored sorted by key</li>
32   * <li>we know the structure of KeyValue</li>
33   * <li>the values are always iterated forward from beginning of block</li>
34   * <li>knowledge of Key Value format</li>
35   * </ul>
36   * It is designed to work fast enough to be feasible as in memory compression.
37   *
38   * After encoding, it also optionally compresses the encoded data if a
39   * compression algorithm is specified in HFileBlockEncodingContext argument of
40   * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}.
41   */
42  @InterfaceAudience.Private
43  public interface DataBlockEncoder {
44  
45    /**
46     * Encodes KeyValues. It will first encode key value pairs, and then
47     * optionally do the compression for the encoded data.
48     *
49     * @param in
50     *          Source of KeyValue for compression.
51     * @param includesMemstoreTS
52     *          true if including memstore timestamp after every key-value pair
53     * @param encodingContext
54     *          the encoding context which will contain encoded uncompressed bytes
55     *          as well as compressed encoded bytes if compression is enabled, and
56     *          also it will reuse resources across multiple calls.
57     * @throws IOException
58     *           If there is an error writing to output stream.
59     */
60    public void encodeKeyValues(
61        ByteBuffer in, boolean includesMemstoreTS,
62        HFileBlockEncodingContext encodingContext) throws IOException;
63  
64    /**
65     * Decode.
66     * @param source Compressed stream of KeyValues.
67     * @param includesMemstoreTS true if including memstore timestamp after every
68     *          key-value pair
69     * @return Uncompressed block of KeyValues.
70     * @throws IOException If there is an error in source.
71     */
72    public ByteBuffer decodeKeyValues(DataInputStream source,
73        boolean includesMemstoreTS) throws IOException;
74  
75    /**
76     * Uncompress.
77     * @param source encoded stream of KeyValues.
78     * @param allocateHeaderLength allocate this many bytes for the header.
79     * @param skipLastBytes Do not copy n last bytes.
80     * @param includesMemstoreTS true if including memstore timestamp after every
81     *          key-value pair
82     * @return Uncompressed block of KeyValues.
83     * @throws IOException If there is an error in source.
84     */
85    public ByteBuffer decodeKeyValues(DataInputStream source,
86        int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
87        throws IOException;
88  
89    /**
90     * Return first key in block. Useful for indexing. Typically does not make
91     * a deep copy but returns a buffer wrapping a segment of the actual block's
92     * byte array. This is because the first key in block is usually stored
93     * unencoded.
94     * @param block encoded block we want index, the position will not change
95     * @return First key in block.
96     */
97    public ByteBuffer getFirstKeyInBlock(ByteBuffer block);
98  
99    /**
100    * Create a HFileBlock seeker which find KeyValues within a block.
101    * @param comparator what kind of comparison should be used
102    * @param includesMemstoreTS true if including memstore timestamp after every
103    *          key-value pair
104    * @return A newly created seeker.
105    */
106   public EncodedSeeker createSeeker(RawComparator<byte[]> comparator,
107       boolean includesMemstoreTS);
108 
109   /**
110    * Creates a encoder specific encoding context
111    *
112    * @param compressionAlgorithm
113    *          compression algorithm used if the final data needs to be
114    *          compressed
115    * @param encoding
116    *          encoding strategy used
117    * @param headerBytes
118    *          header bytes to be written, put a dummy header here if the header
119    *          is unknown
120    * @return a newly created encoding context
121    */
122   public HFileBlockEncodingContext newDataBlockEncodingContext(
123       Algorithm compressionAlgorithm, DataBlockEncoding encoding,
124       byte[] headerBytes);
125 
126   /**
127    * Creates an encoder specific decoding context, which will prepare the data
128    * before actual decoding
129    *
130    * @param compressionAlgorithm
131    *          compression algorithm used if the data needs to be decompressed
132    * @return a newly created decoding context
133    */
134   public HFileBlockDecodingContext newDataBlockDecodingContext(
135       Algorithm compressionAlgorithm);
136 
137   /**
138    * An interface which enable to seek while underlying data is encoded.
139    *
140    * It works on one HFileBlock, but it is reusable. See
141    * {@link #setCurrentBuffer(ByteBuffer)}.
142    */
143   public static interface EncodedSeeker {
144     /**
145      * Set on which buffer there will be done seeking.
146      * @param buffer Used for seeking.
147      */
148     public void setCurrentBuffer(ByteBuffer buffer);
149 
150     /**
151      * Does a deep copy of the key at the current position. A deep copy is
152      * necessary because buffers are reused in the decoder.
153      * @return key at current position
154      */
155     public ByteBuffer getKeyDeepCopy();
156 
157     /**
158      * Does a shallow copy of the value at the current position. A shallow
159      * copy is possible because the returned buffer refers to the backing array
160      * of the original encoded buffer.
161      * @return value at current position
162      */
163     public ByteBuffer getValueShallowCopy();
164 
165     /** @return key value at current position with position set to limit */
166     public ByteBuffer getKeyValueBuffer();
167 
168     /**
169      * @return the KeyValue object at the current position. Includes memstore
170      *         timestamp.
171      */
172     public KeyValue getKeyValue();
173 
174     /** Set position to beginning of given block */
175     public void rewind();
176 
177     /**
178      * Move to next position
179      * @return true on success, false if there is no more positions.
180      */
181     public boolean next();
182 
183     /**
184      * Moves the seeker position within the current block to:
185      * <ul>
186      * <li>the last key that that is less than or equal to the given key if
187      * <code>seekBefore</code> is false</li>
188      * <li>the last key that is strictly less than the given key if <code>
189      * seekBefore</code> is true. The caller is responsible for loading the
190      * previous block if the requested key turns out to be the first key of the
191      * current block.</li>
192      * </ul>
193      * @param key byte array containing the key
194      * @param offset key position the array
195      * @param length key length in bytes
196      * @param seekBefore find the key strictly less than the given key in case
197      *          of an exact match. Does not matter in case of an inexact match.
198      * @return 0 on exact match, 1 on inexact match.
199      */
200     public int seekToKeyInBlock(byte[] key, int offset, int length,
201         boolean seekBefore);
202   }
203 }