1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with this 4 * work for additional information regarding copyright ownership. The ASF 5 * licenses this file to you under the Apache License, Version 2.0 (the 6 * "License"); you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 * License for the specific language governing permissions and limitations 15 * under the License. 16 */ 17 package org.apache.hadoop.hbase.io.encoding; 18 19 import java.io.DataInputStream; 20 import java.io.IOException; 21 import java.nio.ByteBuffer; 22 23 import org.apache.hadoop.classification.InterfaceAudience; 24 import org.apache.hadoop.hbase.KeyValue; 25 import org.apache.hadoop.hbase.KeyValue.KVComparator; 26 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 27 import org.apache.hadoop.io.RawComparator; 28 29 /** 30 * Encoding of KeyValue. It aims to be fast and efficient using assumptions: 31 * <ul> 32 * <li>the KeyValues are stored sorted by key</li> 33 * <li>we know the structure of KeyValue</li> 34 * <li>the values are always iterated forward from beginning of block</li> 35 * <li>knowledge of Key Value format</li> 36 * </ul> 37 * It is designed to work fast enough to be feasible as in memory compression. 38 * 39 * After encoding, it also optionally compresses the encoded data if a 40 * compression algorithm is specified in HFileBlockEncodingContext argument of 41 * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}. 42 */ 43 @InterfaceAudience.Private 44 public interface DataBlockEncoder { 45 46 /** 47 * Encodes KeyValues. It will first encode key value pairs, and then 48 * optionally do the compression for the encoded data. 49 * 50 * @param in 51 * Source of KeyValue for compression. 52 * @param includesMemstoreTS 53 * true if including memstore timestamp after every key-value pair 54 * @param encodingContext 55 * the encoding context which will contain encoded uncompressed bytes 56 * as well as compressed encoded bytes if compression is enabled, and 57 * also it will reuse resources across multiple calls. 58 * @throws IOException 59 * If there is an error writing to output stream. 60 */ 61 void encodeKeyValues( 62 ByteBuffer in, boolean includesMemstoreTS, HFileBlockEncodingContext encodingContext 63 ) throws IOException; 64 65 /** 66 * Decode. 67 * @param source Compressed stream of KeyValues. 68 * @param includesMemstoreTS true if including memstore timestamp after every 69 * key-value pair 70 * @return Uncompressed block of KeyValues. 71 * @throws IOException If there is an error in source. 72 */ 73 ByteBuffer decodeKeyValues( 74 DataInputStream source, boolean includesMemstoreTS 75 ) throws IOException; 76 77 /** 78 * Uncompress. 79 * @param source encoded stream of KeyValues. 80 * @param allocateHeaderLength allocate this many bytes for the header. 81 * @param skipLastBytes Do not copy n last bytes. 82 * @param includesMemstoreTS true if including memstore timestamp after every 83 * key-value pair 84 * @return Uncompressed block of KeyValues. 85 * @throws IOException If there is an error in source. 86 */ 87 ByteBuffer decodeKeyValues( 88 DataInputStream source, int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS 89 ) 90 throws IOException; 91 92 /** 93 * Return first key in block. Useful for indexing. Typically does not make 94 * a deep copy but returns a buffer wrapping a segment of the actual block's 95 * byte array. This is because the first key in block is usually stored 96 * unencoded. 97 * @param block encoded block we want index, the position will not change 98 * @return First key in block. 99 */ 100 ByteBuffer getFirstKeyInBlock(ByteBuffer block); 101 102 /** 103 * Create a HFileBlock seeker which find KeyValues within a block. 104 * @param comparator what kind of comparison should be used 105 * @param includesMemstoreTS true if including memstore timestamp after every 106 * key-value pair 107 * @return A newly created seeker. 108 */ 109 EncodedSeeker createSeeker( 110 KVComparator comparator, boolean includesMemstoreTS 111 ); 112 113 /** 114 * Creates a encoder specific encoding context 115 * 116 * @param compressionAlgorithm 117 * compression algorithm used if the final data needs to be 118 * compressed 119 * @param encoding 120 * encoding strategy used 121 * @param headerBytes 122 * header bytes to be written, put a dummy header here if the header 123 * is unknown 124 * @return a newly created encoding context 125 */ 126 HFileBlockEncodingContext newDataBlockEncodingContext( 127 Algorithm compressionAlgorithm, DataBlockEncoding encoding, byte[] headerBytes 128 ); 129 130 /** 131 * Creates an encoder specific decoding context, which will prepare the data 132 * before actual decoding 133 * 134 * @param compressionAlgorithm 135 * compression algorithm used if the data needs to be decompressed 136 * @return a newly created decoding context 137 */ 138 HFileBlockDecodingContext newDataBlockDecodingContext( 139 Algorithm compressionAlgorithm 140 ); 141 142 /** 143 * An interface which enable to seek while underlying data is encoded. 144 * 145 * It works on one HFileBlock, but it is reusable. See 146 * {@link #setCurrentBuffer(ByteBuffer)}. 147 */ 148 interface EncodedSeeker { 149 /** 150 * Set on which buffer there will be done seeking. 151 * @param buffer Used for seeking. 152 */ 153 void setCurrentBuffer(ByteBuffer buffer); 154 155 /** 156 * Does a deep copy of the key at the current position. A deep copy is 157 * necessary because buffers are reused in the decoder. 158 * @return key at current position 159 */ 160 ByteBuffer getKeyDeepCopy(); 161 162 /** 163 * Does a shallow copy of the value at the current position. A shallow 164 * copy is possible because the returned buffer refers to the backing array 165 * of the original encoded buffer. 166 * @return value at current position 167 */ 168 ByteBuffer getValueShallowCopy(); 169 170 /** @return key value at current position with position set to limit */ 171 ByteBuffer getKeyValueBuffer(); 172 173 /** 174 * @return the KeyValue object at the current position. Includes memstore 175 * timestamp. 176 */ 177 KeyValue getKeyValue(); 178 179 /** Set position to beginning of given block */ 180 void rewind(); 181 182 /** 183 * Move to next position 184 * @return true on success, false if there is no more positions. 185 */ 186 boolean next(); 187 188 /** 189 * Moves the seeker position within the current block to: 190 * <ul> 191 * <li>the last key that that is less than or equal to the given key if 192 * <code>seekBefore</code> is false</li> 193 * <li>the last key that is strictly less than the given key if <code> 194 * seekBefore</code> is true. The caller is responsible for loading the 195 * previous block if the requested key turns out to be the first key of the 196 * current block.</li> 197 * </ul> 198 * @param key byte array containing the key 199 * @param offset key position the array 200 * @param length key length in bytes 201 * @param seekBefore find the key strictly less than the given key in case 202 * of an exact match. Does not matter in case of an inexact match. 203 * @return 0 on exact match, 1 on inexact match. 204 */ 205 int seekToKeyInBlock( 206 byte[] key, int offset, int length, boolean seekBefore 207 ); 208 209 /** 210 * Compare the given key against the current key 211 * @param comparator 212 * @param key 213 * @param offset 214 * @param length 215 * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater 216 */ 217 public int compareKey(KVComparator comparator, byte[] key, int offset, int length); 218 } 219 }