1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with this 4 * work for additional information regarding copyright ownership. The ASF 5 * licenses this file to you under the Apache License, Version 2.0 (the 6 * "License"); you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 * License for the specific language governing permissions and limitations 15 * under the License. 16 */ 17 package org.apache.hadoop.hbase.io.encoding; 18 19 import java.io.DataInputStream; 20 import java.io.IOException; 21 import java.nio.ByteBuffer; 22 23 import org.apache.hadoop.classification.InterfaceAudience; 24 import org.apache.hadoop.hbase.KeyValue; 25 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 26 import org.apache.hadoop.io.RawComparator; 27 28 /** 29 * Encoding of KeyValue. It aims to be fast and efficient using assumptions: 30 * <ul> 31 * <li>the KeyValues are stored sorted by key</li> 32 * <li>we know the structure of KeyValue</li> 33 * <li>the values are always iterated forward from beginning of block</li> 34 * <li>knowledge of Key Value format</li> 35 * </ul> 36 * It is designed to work fast enough to be feasible as in memory compression. 37 * 38 * After encoding, it also optionally compresses the encoded data if a 39 * compression algorithm is specified in HFileBlockEncodingContext argument of 40 * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}. 41 */ 42 @InterfaceAudience.Private 43 public interface DataBlockEncoder { 44 45 /** 46 * Encodes KeyValues. It will first encode key value pairs, and then 47 * optionally do the compression for the encoded data. 48 * 49 * @param in 50 * Source of KeyValue for compression. 51 * @param includesMemstoreTS 52 * true if including memstore timestamp after every key-value pair 53 * @param encodingContext 54 * the encoding context which will contain encoded uncompressed bytes 55 * as well as compressed encoded bytes if compression is enabled, and 56 * also it will reuse resources across multiple calls. 57 * @throws IOException 58 * If there is an error writing to output stream. 59 */ 60 void encodeKeyValues( 61 ByteBuffer in, boolean includesMemstoreTS, HFileBlockEncodingContext encodingContext 62 ) throws IOException; 63 64 /** 65 * Decode. 66 * @param source Compressed stream of KeyValues. 67 * @param includesMemstoreTS true if including memstore timestamp after every 68 * key-value pair 69 * @return Uncompressed block of KeyValues. 70 * @throws IOException If there is an error in source. 71 */ 72 ByteBuffer decodeKeyValues( 73 DataInputStream source, boolean includesMemstoreTS 74 ) throws IOException; 75 76 /** 77 * Uncompress. 78 * @param source encoded stream of KeyValues. 79 * @param allocateHeaderLength allocate this many bytes for the header. 80 * @param skipLastBytes Do not copy n last bytes. 81 * @param includesMemstoreTS true if including memstore timestamp after every 82 * key-value pair 83 * @return Uncompressed block of KeyValues. 84 * @throws IOException If there is an error in source. 85 */ 86 ByteBuffer decodeKeyValues( 87 DataInputStream source, int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS 88 ) 89 throws IOException; 90 91 /** 92 * Return first key in block. Useful for indexing. Typically does not make 93 * a deep copy but returns a buffer wrapping a segment of the actual block's 94 * byte array. This is because the first key in block is usually stored 95 * unencoded. 96 * @param block encoded block we want index, the position will not change 97 * @return First key in block. 98 */ 99 ByteBuffer getFirstKeyInBlock(ByteBuffer block); 100 101 /** 102 * Create a HFileBlock seeker which find KeyValues within a block. 103 * @param comparator what kind of comparison should be used 104 * @param includesMemstoreTS true if including memstore timestamp after every 105 * key-value pair 106 * @return A newly created seeker. 107 */ 108 EncodedSeeker createSeeker( 109 RawComparator<byte[]> comparator, boolean includesMemstoreTS 110 ); 111 112 /** 113 * Creates a encoder specific encoding context 114 * 115 * @param compressionAlgorithm 116 * compression algorithm used if the final data needs to be 117 * compressed 118 * @param encoding 119 * encoding strategy used 120 * @param headerBytes 121 * header bytes to be written, put a dummy header here if the header 122 * is unknown 123 * @return a newly created encoding context 124 */ 125 HFileBlockEncodingContext newDataBlockEncodingContext( 126 Algorithm compressionAlgorithm, DataBlockEncoding encoding, byte[] headerBytes 127 ); 128 129 /** 130 * Creates an encoder specific decoding context, which will prepare the data 131 * before actual decoding 132 * 133 * @param compressionAlgorithm 134 * compression algorithm used if the data needs to be decompressed 135 * @return a newly created decoding context 136 */ 137 HFileBlockDecodingContext newDataBlockDecodingContext( 138 Algorithm compressionAlgorithm 139 ); 140 141 /** 142 * An interface which enable to seek while underlying data is encoded. 143 * 144 * It works on one HFileBlock, but it is reusable. See 145 * {@link #setCurrentBuffer(ByteBuffer)}. 146 */ 147 interface EncodedSeeker { 148 /** 149 * Set on which buffer there will be done seeking. 150 * @param buffer Used for seeking. 151 */ 152 void setCurrentBuffer(ByteBuffer buffer); 153 154 /** 155 * Does a deep copy of the key at the current position. A deep copy is 156 * necessary because buffers are reused in the decoder. 157 * @return key at current position 158 */ 159 ByteBuffer getKeyDeepCopy(); 160 161 /** 162 * Does a shallow copy of the value at the current position. A shallow 163 * copy is possible because the returned buffer refers to the backing array 164 * of the original encoded buffer. 165 * @return value at current position 166 */ 167 ByteBuffer getValueShallowCopy(); 168 169 /** @return key value at current position with position set to limit */ 170 ByteBuffer getKeyValueBuffer(); 171 172 /** 173 * @return the KeyValue object at the current position. Includes memstore 174 * timestamp. 175 */ 176 KeyValue getKeyValue(); 177 178 /** Set position to beginning of given block */ 179 void rewind(); 180 181 /** 182 * Move to next position 183 * @return true on success, false if there is no more positions. 184 */ 185 boolean next(); 186 187 /** 188 * Moves the seeker position within the current block to: 189 * <ul> 190 * <li>the last key that that is less than or equal to the given key if 191 * <code>seekBefore</code> is false</li> 192 * <li>the last key that is strictly less than the given key if <code> 193 * seekBefore</code> is true. The caller is responsible for loading the 194 * previous block if the requested key turns out to be the first key of the 195 * current block.</li> 196 * </ul> 197 * @param key byte array containing the key 198 * @param offset key position the array 199 * @param length key length in bytes 200 * @param seekBefore find the key strictly less than the given key in case 201 * of an exact match. Does not matter in case of an inexact match. 202 * @return 0 on exact match, 1 on inexact match. 203 */ 204 int seekToKeyInBlock( 205 byte[] key, int offset, int length, boolean seekBefore 206 ); 207 } 208 }