View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.DataInputStream;
20  import java.io.DataOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.util.ByteBufferUtils;
27  import org.apache.hadoop.hbase.util.Bytes;
28  import org.apache.hadoop.io.RawComparator;
29  
30  /**
31   * Compress key by storing size of common prefix with previous KeyValue
32   * and storing raw size of rest.
33   *
34   * Format:
35   * 1-5 bytes: compressed key length minus prefix (7-bit encoding)
36   * 1-5 bytes: compressed value length (7-bit encoding)
37   * 1-3 bytes: compressed length of common key prefix
38   * ... bytes: rest of key (including timestamp)
39   * ... bytes: value
40   *
41   * In a worst case compressed KeyValue will be three bytes longer than original.
42   *
43   */
44  @InterfaceAudience.Private
45  public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
46  
47    private int addKV(int prevKeyOffset, DataOutputStream out,
48        ByteBuffer in, int prevKeyLength) throws IOException {
49      int keyLength = in.getInt();
50      int valueLength = in.getInt();
51  
52      if (prevKeyOffset == -1) {
53        // copy the key, there is no common prefix with none
54        ByteBufferUtils.putCompressedInt(out, keyLength);
55        ByteBufferUtils.putCompressedInt(out, valueLength);
56        ByteBufferUtils.putCompressedInt(out, 0);
57        ByteBufferUtils.moveBufferToStream(out, in, keyLength + valueLength);
58      } else {
59        // find a common prefix and skip it
60        int common = ByteBufferUtils.findCommonPrefix(
61            in, prevKeyOffset + KeyValue.ROW_OFFSET,
62            in.position(),
63            Math.min(prevKeyLength, keyLength));
64  
65        ByteBufferUtils.putCompressedInt(out, keyLength - common);
66        ByteBufferUtils.putCompressedInt(out, valueLength);
67        ByteBufferUtils.putCompressedInt(out, common);
68  
69        ByteBufferUtils.skip(in, common);
70        ByteBufferUtils.moveBufferToStream(out, in, keyLength - common
71            + valueLength);
72      }
73  
74      return keyLength;
75    }
76  
77    @Override
78    public void internalEncodeKeyValues(DataOutputStream writeHere,
79        ByteBuffer in, boolean includesMemstoreTS) throws IOException {
80      in.rewind();
81      ByteBufferUtils.putInt(writeHere, in.limit());
82      int prevOffset = -1;
83      int offset = 0;
84      int keyLength = 0;
85      while (in.hasRemaining()) {
86        offset = in.position();
87        keyLength = addKV(prevOffset, writeHere, in, keyLength);
88        afterEncodingKeyValue(in, writeHere, includesMemstoreTS);
89        prevOffset = offset;
90      }
91    }
92  
93    @Override
94    public ByteBuffer decodeKeyValues(DataInputStream source,
95        int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
96            throws IOException {
97      int decompressedSize = source.readInt();
98      ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
99          allocHeaderLength);
100     buffer.position(allocHeaderLength);
101     int prevKeyOffset = 0;
102 
103     while (source.available() > skipLastBytes) {
104       prevKeyOffset = decodeKeyValue(source, buffer, prevKeyOffset);
105       afterDecodingKeyValue(source, buffer, includesMemstoreTS);
106     }
107 
108     if (source.available() != skipLastBytes) {
109       throw new IllegalStateException("Read too many bytes.");
110     }
111 
112     buffer.limit(buffer.position());
113     return buffer;
114   }
115 
116   private int decodeKeyValue(DataInputStream source, ByteBuffer buffer,
117       int prevKeyOffset)
118           throws IOException, EncoderBufferTooSmallException {
119     int keyLength = ByteBufferUtils.readCompressedInt(source);
120     int valueLength = ByteBufferUtils.readCompressedInt(source);
121     int commonLength = ByteBufferUtils.readCompressedInt(source);
122     int keyOffset;
123     keyLength += commonLength;
124 
125     ensureSpace(buffer, keyLength + valueLength + KeyValue.ROW_OFFSET);
126 
127     buffer.putInt(keyLength);
128     buffer.putInt(valueLength);
129 
130     // copy the prefix
131     if (commonLength > 0) {
132       keyOffset = buffer.position();
133       ByteBufferUtils.copyFromBufferToBuffer(buffer, buffer, prevKeyOffset,
134           commonLength);
135     } else {
136       keyOffset = buffer.position();
137     }
138 
139     // copy rest of the key and value
140     int len = keyLength - commonLength + valueLength;
141     ByteBufferUtils.copyFromStreamToBuffer(buffer, source, len);
142     return keyOffset;
143   }
144 
145   @Override
146   public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
147     block.mark();
148     block.position(Bytes.SIZEOF_INT);
149     int keyLength = ByteBufferUtils.readCompressedInt(block);
150     ByteBufferUtils.readCompressedInt(block);
151     int commonLength = ByteBufferUtils.readCompressedInt(block);
152     if (commonLength != 0) {
153       throw new AssertionError("Nonzero common length in the first key in "
154           + "block: " + commonLength);
155     }
156     int pos = block.position();
157     block.reset();
158     return ByteBuffer.wrap(block.array(), pos, keyLength).slice();
159   }
160 
161   @Override
162   public String toString() {
163     return PrefixKeyDeltaEncoder.class.getSimpleName();
164   }
165 
166   @Override
167   public EncodedSeeker createSeeker(RawComparator<byte[]> comparator,
168       final boolean includesMemstoreTS) {
169     return new BufferedEncodedSeeker<SeekerState>(comparator) {
170       @Override
171       protected void decodeNext() {
172         current.keyLength = ByteBufferUtils.readCompressedInt(currentBuffer);
173         current.valueLength = ByteBufferUtils.readCompressedInt(currentBuffer);
174         current.lastCommonPrefix =
175             ByteBufferUtils.readCompressedInt(currentBuffer);
176         current.keyLength += current.lastCommonPrefix;
177         current.ensureSpaceForKey();
178         currentBuffer.get(current.keyBuffer, current.lastCommonPrefix,
179             current.keyLength - current.lastCommonPrefix);
180         current.valueOffset = currentBuffer.position();
181         ByteBufferUtils.skip(currentBuffer, current.valueLength);
182         if (includesMemstoreTS) {
183           current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
184         } else {
185           current.memstoreTS = 0;
186         }
187         current.nextKvOffset = currentBuffer.position();
188       }
189 
190       @Override
191       protected void decodeFirst() {
192         ByteBufferUtils.skip(currentBuffer, Bytes.SIZEOF_INT);
193         decodeNext();
194       }
195     };
196   }
197 }