1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with this 4 * work for additional information regarding copyright ownership. The ASF 5 * licenses this file to you under the Apache License, Version 2.0 (the 6 * "License"); you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 * License for the specific language governing permissions and limitations 15 * under the License. 16 */ 17 package org.apache.hadoop.hbase.io.encoding; 18 19 import java.io.DataInputStream; 20 import java.io.DataOutputStream; 21 import java.io.IOException; 22 import java.nio.ByteBuffer; 23 24 import org.apache.hadoop.hbase.KeyValue; 25 import org.apache.hadoop.io.RawComparator; 26 27 /** 28 * Encoding of KeyValue. It aims to be fast and efficient using assumptions: 29 * <ul> 30 * <li>the KeyValues are stored sorted by key</li> 31 * <li>we know the structure of KeyValue</li> 32 * <li>the values are always iterated forward from beginning of block</li> 33 * <li>knowledge of Key Value format</li> 34 * </ul> 35 * It is designed to work fast enough to be feasible as in memory compression. 36 */ 37 public interface DataBlockEncoder { 38 /** 39 * Compress KeyValues and write them to output buffer. 40 * @param out Where to write compressed data. 41 * @param in Source of KeyValue for compression. 42 * @param includesMemstoreTS true if including memstore timestamp after every 43 * key-value pair 44 * @throws IOException If there is an error writing to output stream. 45 */ 46 public void compressKeyValues(DataOutputStream out, 47 ByteBuffer in, boolean includesMemstoreTS) throws IOException; 48 49 /** 50 * Uncompress. 51 * @param source Compressed stream of KeyValues. 52 * @param includesMemstoreTS true if including memstore timestamp after every 53 * key-value pair 54 * @return Uncompressed block of KeyValues. 55 * @throws IOException If there is an error in source. 56 */ 57 public ByteBuffer uncompressKeyValues(DataInputStream source, 58 boolean includesMemstoreTS) throws IOException; 59 60 /** 61 * Uncompress. 62 * @param source Compressed stream of KeyValues. 63 * @param allocateHeaderLength allocate this many bytes for the header. 64 * @param skipLastBytes Do not copy n last bytes. 65 * @param includesMemstoreTS true if including memstore timestamp after every 66 * key-value pair 67 * @return Uncompressed block of KeyValues. 68 * @throws IOException If there is an error in source. 69 */ 70 public ByteBuffer uncompressKeyValues(DataInputStream source, 71 int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS) 72 throws IOException; 73 74 /** 75 * Return first key in block. Useful for indexing. Typically does not make 76 * a deep copy but returns a buffer wrapping a segment of the actual block's 77 * byte array. This is because the first key in block is usually stored 78 * unencoded. 79 * @param block encoded block we want index, the position will not change 80 * @return First key in block. 81 */ 82 public ByteBuffer getFirstKeyInBlock(ByteBuffer block); 83 84 /** 85 * Create a HFileBlock seeker which find KeyValues within a block. 86 * @param comparator what kind of comparison should be used 87 * @param includesMemstoreTS true if including memstore timestamp after every 88 * key-value pair 89 * @return A newly created seeker. 90 */ 91 public EncodedSeeker createSeeker(RawComparator<byte[]> comparator, 92 boolean includesMemstoreTS); 93 94 /** 95 * An interface which enable to seek while underlying data is encoded. 96 * 97 * It works on one HFileBlock, but it is reusable. See 98 * {@link #setCurrentBuffer(ByteBuffer)}. 99 */ 100 public static interface EncodedSeeker { 101 /** 102 * Set on which buffer there will be done seeking. 103 * @param buffer Used for seeking. 104 */ 105 public void setCurrentBuffer(ByteBuffer buffer); 106 107 /** 108 * Does a deep copy of the key at the current position. A deep copy is 109 * necessary because buffers are reused in the decoder. 110 * @return key at current position 111 */ 112 public ByteBuffer getKeyDeepCopy(); 113 114 /** 115 * Does a shallow copy of the value at the current position. A shallow 116 * copy is possible because the returned buffer refers to the backing array 117 * of the original encoded buffer. 118 * @return value at current position 119 */ 120 public ByteBuffer getValueShallowCopy(); 121 122 /** @return key value at current position. */ 123 public ByteBuffer getKeyValueBuffer(); 124 125 /** 126 * @return the KeyValue object at the current position. Includes memstore 127 * timestamp. 128 */ 129 public KeyValue getKeyValue(); 130 131 /** Set position to beginning of given block */ 132 public void rewind(); 133 134 /** 135 * Move to next position 136 * @return true on success, false if there is no more positions. 137 */ 138 public boolean next(); 139 140 /** 141 * Moves the seeker position within the current block to: 142 * <ul> 143 * <li>the last key that that is less than or equal to the given key if 144 * <code>seekBefore</code> is false</li> 145 * <li>the last key that is strictly less than the given key if <code> 146 * seekBefore</code> is true. The caller is responsible for loading the 147 * previous block if the requested key turns out to be the first key of the 148 * current block.</li> 149 * </ul> 150 * @param key byte array containing the key 151 * @param offset key position the array 152 * @param length key length in bytes 153 * @param seekBefore find the key strictly less than the given key in case 154 * of an exact match. Does not matter in case of an inexact match. 155 * @return 0 on exact match, 1 on inexact match. 156 */ 157 public int seekToKeyInBlock(byte[] key, int offset, int length, 158 boolean seekBefore); 159 } 160 }