1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with this 4 * work for additional information regarding copyright ownership. The ASF 5 * licenses this file to you under the Apache License, Version 2.0 (the 6 * "License"); you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 * License for the specific language governing permissions and limitations 15 * under the License. 16 */ 17 package org.apache.hadoop.hbase.io.encoding; 18 19 import java.io.DataInputStream; 20 import java.io.IOException; 21 import java.nio.ByteBuffer; 22 23 import org.apache.hadoop.classification.InterfaceAudience; 24 import org.apache.hadoop.hbase.KeyValue; 25 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 26 import org.apache.hadoop.io.RawComparator; 27 28 /** 29 * Encoding of KeyValue. It aims to be fast and efficient using assumptions: 30 * <ul> 31 * <li>the KeyValues are stored sorted by key</li> 32 * <li>we know the structure of KeyValue</li> 33 * <li>the values are always iterated forward from beginning of block</li> 34 * <li>knowledge of Key Value format</li> 35 * </ul> 36 * It is designed to work fast enough to be feasible as in memory compression. 37 * 38 * After encoding, it also optionally compresses the encoded data if a 39 * compression algorithm is specified in HFileBlockEncodingContext argument of 40 * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}. 41 */ 42 @InterfaceAudience.Private 43 public interface DataBlockEncoder { 44 45 /** 46 * Encodes KeyValues. It will first encode key value pairs, and then 47 * optionally do the compression for the encoded data. 48 * 49 * @param in 50 * Source of KeyValue for compression. 51 * @param includesMemstoreTS 52 * true if including memstore timestamp after every key-value pair 53 * @param encodingContext 54 * the encoding context which will contain encoded uncompressed bytes 55 * as well as compressed encoded bytes if compression is enabled, and 56 * also it will reuse resources across multiple calls. 57 * @throws IOException 58 * If there is an error writing to output stream. 59 */ 60 public void encodeKeyValues( 61 ByteBuffer in, boolean includesMemstoreTS, 62 HFileBlockEncodingContext encodingContext) throws IOException; 63 64 /** 65 * Decode. 66 * @param source Compressed stream of KeyValues. 67 * @param includesMemstoreTS true if including memstore timestamp after every 68 * key-value pair 69 * @return Uncompressed block of KeyValues. 70 * @throws IOException If there is an error in source. 71 */ 72 public ByteBuffer decodeKeyValues(DataInputStream source, 73 boolean includesMemstoreTS) throws IOException; 74 75 /** 76 * Uncompress. 77 * @param source encoded stream of KeyValues. 78 * @param allocateHeaderLength allocate this many bytes for the header. 79 * @param skipLastBytes Do not copy n last bytes. 80 * @param includesMemstoreTS true if including memstore timestamp after every 81 * key-value pair 82 * @return Uncompressed block of KeyValues. 83 * @throws IOException If there is an error in source. 84 */ 85 public ByteBuffer decodeKeyValues(DataInputStream source, 86 int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS) 87 throws IOException; 88 89 /** 90 * Return first key in block. Useful for indexing. Typically does not make 91 * a deep copy but returns a buffer wrapping a segment of the actual block's 92 * byte array. This is because the first key in block is usually stored 93 * unencoded. 94 * @param block encoded block we want index, the position will not change 95 * @return First key in block. 96 */ 97 public ByteBuffer getFirstKeyInBlock(ByteBuffer block); 98 99 /** 100 * Create a HFileBlock seeker which find KeyValues within a block. 101 * @param comparator what kind of comparison should be used 102 * @param includesMemstoreTS true if including memstore timestamp after every 103 * key-value pair 104 * @return A newly created seeker. 105 */ 106 public EncodedSeeker createSeeker(RawComparator<byte[]> comparator, 107 boolean includesMemstoreTS); 108 109 /** 110 * Creates a encoder specific encoding context 111 * 112 * @param compressionAlgorithm 113 * compression algorithm used if the final data needs to be 114 * compressed 115 * @param encoding 116 * encoding strategy used 117 * @param headerBytes 118 * header bytes to be written, put a dummy header here if the header 119 * is unknown 120 * @return a newly created encoding context 121 */ 122 public HFileBlockEncodingContext newDataBlockEncodingContext( 123 Algorithm compressionAlgorithm, DataBlockEncoding encoding, 124 byte[] headerBytes); 125 126 /** 127 * Creates an encoder specific decoding context, which will prepare the data 128 * before actual decoding 129 * 130 * @param compressionAlgorithm 131 * compression algorithm used if the data needs to be decompressed 132 * @return a newly created decoding context 133 */ 134 public HFileBlockDecodingContext newDataBlockDecodingContext( 135 Algorithm compressionAlgorithm); 136 137 /** 138 * An interface which enable to seek while underlying data is encoded. 139 * 140 * It works on one HFileBlock, but it is reusable. See 141 * {@link #setCurrentBuffer(ByteBuffer)}. 142 */ 143 public static interface EncodedSeeker { 144 /** 145 * Set on which buffer there will be done seeking. 146 * @param buffer Used for seeking. 147 */ 148 public void setCurrentBuffer(ByteBuffer buffer); 149 150 /** 151 * Does a deep copy of the key at the current position. A deep copy is 152 * necessary because buffers are reused in the decoder. 153 * @return key at current position 154 */ 155 public ByteBuffer getKeyDeepCopy(); 156 157 /** 158 * Does a shallow copy of the value at the current position. A shallow 159 * copy is possible because the returned buffer refers to the backing array 160 * of the original encoded buffer. 161 * @return value at current position 162 */ 163 public ByteBuffer getValueShallowCopy(); 164 165 /** @return key value at current position with position set to limit */ 166 public ByteBuffer getKeyValueBuffer(); 167 168 /** 169 * @return the KeyValue object at the current position. Includes memstore 170 * timestamp. 171 */ 172 public KeyValue getKeyValue(); 173 174 /** Set position to beginning of given block */ 175 public void rewind(); 176 177 /** 178 * Move to next position 179 * @return true on success, false if there is no more positions. 180 */ 181 public boolean next(); 182 183 /** 184 * Moves the seeker position within the current block to: 185 * <ul> 186 * <li>the last key that that is less than or equal to the given key if 187 * <code>seekBefore</code> is false</li> 188 * <li>the last key that is strictly less than the given key if <code> 189 * seekBefore</code> is true. The caller is responsible for loading the 190 * previous block if the requested key turns out to be the first key of the 191 * current block.</li> 192 * </ul> 193 * @param key byte array containing the key 194 * @param offset key position the array 195 * @param length key length in bytes 196 * @param seekBefore find the key strictly less than the given key in case 197 * of an exact match. Does not matter in case of an inexact match. 198 * @return 0 on exact match, 1 on inexact match. 199 */ 200 public int seekToKeyInBlock(byte[] key, int offset, int length, 201 boolean seekBefore); 202 } 203 }