View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.DataInputStream;
20  import java.io.DataOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.util.ByteBufferUtils;
26  import org.apache.hadoop.hbase.util.Bytes;
27  import org.apache.hadoop.io.RawComparator;
28  
29  /**
30   * Compress key by storing size of common prefix with previous KeyValue
31   * and storing raw size of rest.
32   *
33   * Format:
34   * 1-5 bytes: compressed key length minus prefix (7-bit encoding)
35   * 1-5 bytes: compressed value length (7-bit encoding)
36   * 1-3 bytes: compressed length of common key prefix
37   * ... bytes: rest of key (including timestamp)
38   * ... bytes: value
39   *
40   * In a worst case compressed KeyValue will be three bytes longer than original.
41   *
42   */
43  public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
44  
45    private int addKV(int prevKeyOffset, DataOutputStream out,
46        ByteBuffer in, int prevKeyLength) throws IOException {
47      int keyLength = in.getInt();
48      int valueLength = in.getInt();
49  
50      if (prevKeyOffset == -1) {
51        // copy the key, there is no common prefix with none
52        ByteBufferUtils.putCompressedInt(out, keyLength);
53        ByteBufferUtils.putCompressedInt(out, valueLength);
54        ByteBufferUtils.putCompressedInt(out, 0);
55        ByteBufferUtils.moveBufferToStream(out, in, keyLength + valueLength);
56      } else {
57        // find a common prefix and skip it
58        int common = ByteBufferUtils.findCommonPrefix(
59            in, prevKeyOffset + KeyValue.ROW_OFFSET,
60            in.position(),
61            Math.min(prevKeyLength, keyLength));
62  
63        ByteBufferUtils.putCompressedInt(out, keyLength - common);
64        ByteBufferUtils.putCompressedInt(out, valueLength);
65        ByteBufferUtils.putCompressedInt(out, common);
66  
67        ByteBufferUtils.skip(in, common);
68        ByteBufferUtils.moveBufferToStream(out, in, keyLength - common
69            + valueLength);
70      }
71  
72      return keyLength;
73    }
74  
75    @Override
76    public void compressKeyValues(DataOutputStream writeHere,
77        ByteBuffer in, boolean includesMemstoreTS) throws IOException {
78      in.rewind();
79      ByteBufferUtils.putInt(writeHere, in.limit());
80      int prevOffset = -1;
81      int offset = 0;
82      int keyLength = 0;
83      while (in.hasRemaining()) {
84        offset = in.position();
85        keyLength = addKV(prevOffset, writeHere, in, keyLength);
86        afterEncodingKeyValue(in, writeHere, includesMemstoreTS);
87        prevOffset = offset;
88      }
89    }
90  
91    @Override
92    public ByteBuffer uncompressKeyValues(DataInputStream source,
93        int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
94            throws IOException {
95      int decompressedSize = source.readInt();
96      ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
97          allocHeaderLength);
98      buffer.position(allocHeaderLength);
99      int prevKeyOffset = 0;
100 
101     while (source.available() > skipLastBytes) {
102       prevKeyOffset = uncompressKeyValue(source, buffer, prevKeyOffset);
103       afterDecodingKeyValue(source, buffer, includesMemstoreTS);
104     }
105 
106     if (source.available() != skipLastBytes) {
107       throw new IllegalStateException("Read too many bytes.");
108     }
109 
110     buffer.limit(buffer.position());
111     return buffer;
112   }
113 
114   private int uncompressKeyValue(DataInputStream source, ByteBuffer buffer,
115       int prevKeyOffset)
116           throws IOException, EncoderBufferTooSmallException {
117     int keyLength = ByteBufferUtils.readCompressedInt(source);
118     int valueLength = ByteBufferUtils.readCompressedInt(source);
119     int commonLength = ByteBufferUtils.readCompressedInt(source);
120     int keyOffset;
121     keyLength += commonLength;
122 
123     ByteBufferUtils.ensureSpace(buffer, keyLength + valueLength
124         + KeyValue.ROW_OFFSET);
125 
126     buffer.putInt(keyLength);
127     buffer.putInt(valueLength);
128 
129     // copy the prefix
130     if (commonLength > 0) {
131       keyOffset = buffer.position();
132       ByteBufferUtils.copyFromBufferToBuffer(buffer, buffer, prevKeyOffset,
133           commonLength);
134     } else {
135       keyOffset = buffer.position();
136     }
137 
138     // copy rest of the key and value
139     int len = keyLength - commonLength + valueLength;
140     ByteBufferUtils.copyFromStreamToBuffer(buffer, source, len);
141     return keyOffset;
142   }
143 
144   @Override
145   public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
146     block.mark();
147     block.position(Bytes.SIZEOF_INT);
148     int keyLength = ByteBufferUtils.readCompressedInt(block);
149     ByteBufferUtils.readCompressedInt(block);
150     int commonLength = ByteBufferUtils.readCompressedInt(block);
151     if (commonLength != 0) {
152       throw new AssertionError("Nonzero common length in the first key in "
153           + "block: " + commonLength);
154     }
155     int pos = block.position();
156     block.reset();
157     return ByteBuffer.wrap(block.array(), block.arrayOffset() + pos, keyLength).slice();
158   }
159 
160   @Override
161   public String toString() {
162     return PrefixKeyDeltaEncoder.class.getSimpleName();
163   }
164 
165   @Override
166   public EncodedSeeker createSeeker(RawComparator<byte[]> comparator,
167       final boolean includesMemstoreTS) {
168     return new BufferedEncodedSeeker<SeekerState>(comparator) {
169       @Override
170       protected void decodeNext() {
171         current.keyLength = ByteBufferUtils.readCompressedInt(currentBuffer);
172         current.valueLength = ByteBufferUtils.readCompressedInt(currentBuffer);
173         current.lastCommonPrefix =
174             ByteBufferUtils.readCompressedInt(currentBuffer);
175         current.keyLength += current.lastCommonPrefix;
176         current.ensureSpaceForKey();
177         currentBuffer.get(current.keyBuffer, current.lastCommonPrefix,
178             current.keyLength - current.lastCommonPrefix);
179         current.valueOffset = currentBuffer.position();
180         ByteBufferUtils.skip(currentBuffer, current.valueLength);
181         if (includesMemstoreTS) {
182           current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
183         } else {
184           current.memstoreTS = 0;
185         }
186         current.nextKvOffset = currentBuffer.position();
187       }
188 
189       @Override
190       protected void decodeFirst() {
191         ByteBufferUtils.skip(currentBuffer, Bytes.SIZEOF_INT);
192         decodeNext();
193       }
194     };
195   }
196 }