1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.fail;
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.ByteArrayOutputStream;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.nio.ByteBuffer;
28  import java.util.ArrayList;
29  import java.util.Collection;
30  import java.util.List;
31  import java.util.Random;
32  
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.KeyValue.Type;
36  import org.apache.hadoop.hbase.LargeTests;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.junit.Test;
39  import org.junit.experimental.categories.Category;
40  import org.junit.runner.RunWith;
41  import org.junit.runners.Parameterized;
42  import org.junit.runners.Parameterized.Parameters;
43  
44  /**
45   * Test all of the data block encoding algorithms for correctness.
46   * Most of the class generate data which will test different branches in code.
47   */
48  @Category(LargeTests.class)
49  @RunWith(Parameterized.class)
50  public class TestDataBlockEncoders {
51    static int NUMBER_OF_KV = 10000;
52    static int NUM_RANDOM_SEEKS = 10000;
53  
54    private RedundantKVGenerator generator = new RedundantKVGenerator();
55    private Random randomizer = new Random(42l);
56  
57    private final boolean includesMemstoreTS;
58  
59    @Parameters
60    public static Collection<Object[]> parameters() {
61      return HBaseTestingUtility.BOOLEAN_PARAMETERIZED;
62    }
63  
64    public TestDataBlockEncoders(boolean includesMemstoreTS) {
65      this.includesMemstoreTS = includesMemstoreTS;
66    }
67  
68    private void testAlgorithm(ByteBuffer dataset, DataBlockEncoder encoder)
69        throws IOException {
70      // encode
71      ByteArrayOutputStream baos = new ByteArrayOutputStream();
72      DataOutputStream dataOut = new DataOutputStream(baos);
73      encoder.compressKeyValues(dataOut, dataset, includesMemstoreTS);
74  
75      // decode
76      ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
77      DataInputStream dis = new DataInputStream(bais);
78      ByteBuffer actualDataset;
79      actualDataset = encoder.uncompressKeyValues(dis, includesMemstoreTS);
80  
81      dataset.rewind();
82      actualDataset.rewind();
83  
84      assertEquals("Encoding -> decoding gives different results for " + encoder,
85          Bytes.toStringBinary(dataset), Bytes.toStringBinary(actualDataset));
86    }
87  
88    /**
89     * Test data block encoding of empty KeyValue.
90     * @throws IOException On test failure.
91     */
92    @Test
93    public void testEmptyKeyValues() throws IOException {
94      List<KeyValue> kvList = new ArrayList<KeyValue>();
95      byte[] row = new byte[0];
96      byte[] family = new byte[0];
97      byte[] qualifier = new byte[0];
98      byte[] value = new byte[0];
99      kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value));
100     kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value));
101     testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList,
102         includesMemstoreTS));
103   }
104 
105   /**
106    * Test KeyValues with negative timestamp.
107    * @throws IOException On test failure.
108    */
109   @Test
110   public void testNegativeTimestamps() throws IOException {
111     List<KeyValue> kvList = new ArrayList<KeyValue>();
112     byte[] row = new byte[0];
113     byte[] family = new byte[0];
114     byte[] qualifier = new byte[0];
115     byte[] value = new byte[0];
116     kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value));
117     kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value));
118     testEncodersOnDataset(
119         RedundantKVGenerator.convertKvToByteBuffer(kvList,
120             includesMemstoreTS));
121   }
122 
123   /**
124    * Test KeyValues with negative timestamp.
125    * @throws IOException On test failure.
126    */
127   @Test
128   public void testZeroByte() throws IOException {
129     List<KeyValue> kvList = new ArrayList<KeyValue>();
130     byte[] row = Bytes.toBytes("abcd");
131     byte[] family = new byte[] { 'f' };
132     byte[] qualifier0 = new byte[] { 'b' };
133     byte[] qualifier1 = new byte[] { 'c' };
134     byte[] value0 = new byte[] { 'd' };
135     byte[] value1 = new byte[] { 0x00 };
136     kvList.add(new KeyValue(row, family, qualifier0, 0, Type.Put, value0));
137     kvList.add(new KeyValue(row, family, qualifier1, 0, Type.Put, value1));
138     testEncodersOnDataset(
139         RedundantKVGenerator.convertKvToByteBuffer(kvList,
140             includesMemstoreTS));
141   }
142 
143   /**
144    * Test whether compression -> decompression gives the consistent results on
145    * pseudorandom sample.
146    * @throws IOException On test failure.
147    */
148   @Test
149   public void testExecutionOnSample() throws IOException {
150     testEncodersOnDataset(
151         RedundantKVGenerator.convertKvToByteBuffer(
152             generator.generateTestKeyValues(NUMBER_OF_KV),
153             includesMemstoreTS));
154   }
155 
156   /**
157    * Test seeking while file is encoded.
158    */
159   @Test
160   public void testSeekingOnSample() throws IOException{
161     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
162     ByteBuffer originalBuffer =
163         RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
164             includesMemstoreTS);
165     List<DataBlockEncoder> dataBlockEncoders =
166         DataBlockEncoding.getAllEncoders();
167 
168     // create all seekers
169     List<DataBlockEncoder.EncodedSeeker> encodedSeekers =
170         new ArrayList<DataBlockEncoder.EncodedSeeker>();
171     for (DataBlockEncoder encoder : dataBlockEncoders) {
172       ByteArrayOutputStream baos = new ByteArrayOutputStream();
173       DataOutputStream dataOut = new DataOutputStream(baos);
174       encoder.compressKeyValues(dataOut, originalBuffer, includesMemstoreTS);
175       ByteBuffer encodedBuffer = ByteBuffer.wrap(baos.toByteArray());
176       DataBlockEncoder.EncodedSeeker seeker =
177           encoder.createSeeker(KeyValue.KEY_COMPARATOR, includesMemstoreTS);
178       seeker.setCurrentBuffer(encodedBuffer);
179       encodedSeekers.add(seeker);
180     }
181 
182     // test it!
183     // try a few random seeks
184     for (boolean seekBefore : new boolean[] {false, true}) {
185       for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
186         int keyValueId;
187         if (!seekBefore) {
188           keyValueId = randomizer.nextInt(sampleKv.size());
189         } else {
190           keyValueId = randomizer.nextInt(sampleKv.size() - 1) + 1;
191         }
192 
193         KeyValue keyValue = sampleKv.get(keyValueId);
194         checkSeekingConsistency(encodedSeekers, seekBefore, keyValue);
195       }
196     }
197 
198     // check edge cases
199     checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
200     for (boolean seekBefore : new boolean[] {false, true}) {
201       checkSeekingConsistency(encodedSeekers, seekBefore,
202           sampleKv.get(sampleKv.size() - 1));
203       KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
204       KeyValue lastMidKv = midKv.createLastOnRowCol();
205       checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
206     }
207   }
208 
209   /**
210    * Test iterating on encoded buffers.
211    */
212   @Test
213   public void testNextOnSample() {
214     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
215     ByteBuffer originalBuffer =
216         RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
217             includesMemstoreTS);
218     List<DataBlockEncoder> dataBlockEncoders =
219         DataBlockEncoding.getAllEncoders();
220 
221     for (DataBlockEncoder encoder : dataBlockEncoders) {
222       ByteArrayOutputStream baos = new ByteArrayOutputStream();
223       DataOutputStream dataOut = new DataOutputStream(baos);
224       try {
225         encoder.compressKeyValues(dataOut, originalBuffer, includesMemstoreTS);
226       } catch (IOException e) {
227         throw new RuntimeException(String.format(
228             "Bug while encoding using '%s'", encoder.toString()), e);
229       }
230 
231       ByteBuffer encodedBuffer = ByteBuffer.wrap(baos.toByteArray());
232       DataBlockEncoder.EncodedSeeker seeker =
233           encoder.createSeeker(KeyValue.KEY_COMPARATOR, includesMemstoreTS);
234       seeker.setCurrentBuffer(encodedBuffer);
235       int i = 0;
236       do {
237         KeyValue expectedKeyValue = sampleKv.get(i);
238         ByteBuffer keyValue = seeker.getKeyValueBuffer();
239         if (0 != Bytes.compareTo(
240             keyValue.array(), keyValue.arrayOffset(), keyValue.limit(),
241             expectedKeyValue.getBuffer(), expectedKeyValue.getOffset(),
242             expectedKeyValue.getLength())) {
243 
244           int commonPrefix = 0;
245           byte[] left = keyValue.array();
246           byte[] right = expectedKeyValue.getBuffer();
247           int leftOff = keyValue.arrayOffset();
248           int rightOff = expectedKeyValue.getOffset();
249           int length = Math.min(keyValue.limit(), expectedKeyValue.getLength());
250           while (commonPrefix < length &&
251               left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) {
252             commonPrefix++;
253           }
254 
255           fail(String.format(
256               "next() produces wrong results " +
257               "encoder: %s i: %d commonPrefix: %d" +
258               "\n expected %s\n actual      %s",
259               encoder.toString(), i, commonPrefix,
260               Bytes.toStringBinary(expectedKeyValue.getBuffer(),
261                   expectedKeyValue.getOffset(), expectedKeyValue.getLength()),
262               Bytes.toStringBinary(keyValue)));
263         }
264         i++;
265       } while (seeker.next());
266     }
267   }
268 
269   /**
270    * Test whether the decompression of first key is implemented correctly.
271    */
272   @Test
273   public void testFirstKeyInBlockOnSample() {
274     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
275     ByteBuffer originalBuffer =
276         RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
277             includesMemstoreTS);
278     List<DataBlockEncoder> dataBlockEncoders =
279         DataBlockEncoding.getAllEncoders();
280 
281     for (DataBlockEncoder encoder : dataBlockEncoders) {
282       ByteArrayOutputStream baos = new ByteArrayOutputStream();
283       DataOutputStream dataOut = new DataOutputStream(baos);
284       try {
285         encoder.compressKeyValues(dataOut, originalBuffer, includesMemstoreTS);
286       } catch (IOException e) {
287         throw new RuntimeException(String.format(
288             "Bug while encoding using '%s'", encoder.toString()), e);
289       }
290 
291       ByteBuffer encodedBuffer = ByteBuffer.wrap(baos.toByteArray());
292       ByteBuffer keyBuffer = encoder.getFirstKeyInBlock(encodedBuffer);
293       KeyValue firstKv = sampleKv.get(0);
294       if (0 != Bytes.compareTo(
295           keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(),
296           firstKv.getBuffer(), firstKv.getKeyOffset(),
297           firstKv.getKeyLength())) {
298 
299         int commonPrefix = 0;
300         int length = Math.min(keyBuffer.limit(), firstKv.getKeyLength());
301         while (commonPrefix < length &&
302             keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] ==
303             firstKv.getBuffer()[firstKv.getKeyOffset() + commonPrefix]) {
304           commonPrefix++;
305         }
306         fail(String.format("Bug in '%s' commonPrefix %d",
307             encoder.toString(), commonPrefix));
308       }
309     }
310   }
311 
312   private void checkSeekingConsistency(
313       List<DataBlockEncoder.EncodedSeeker> encodedSeekers, boolean seekBefore,
314       KeyValue keyValue) {
315     ByteBuffer expectedKeyValue = null;
316     ByteBuffer expectedKey = null;
317     ByteBuffer expectedValue = null;
318 
319     for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) {
320       seeker.seekToKeyInBlock(keyValue.getBuffer(),
321           keyValue.getKeyOffset(), keyValue.getKeyLength(), seekBefore);
322       seeker.rewind();
323 
324       ByteBuffer actualKeyValue = seeker.getKeyValueBuffer();
325       ByteBuffer actualKey = seeker.getKeyDeepCopy();
326       ByteBuffer actualValue = seeker.getValueShallowCopy();
327 
328       if (expectedKeyValue != null) {
329         assertEquals(expectedKeyValue, actualKeyValue);
330       } else {
331         expectedKeyValue = actualKeyValue;
332       }
333 
334       if (expectedKey != null) {
335         assertEquals(expectedKey, actualKey);
336       } else {
337         expectedKey = actualKey;
338       }
339 
340       if (expectedValue != null) {
341         assertEquals(expectedValue, actualValue);
342       } else {
343         expectedValue = actualValue;
344       }
345     }
346   }
347 
348   private void testEncodersOnDataset(ByteBuffer onDataset)
349       throws IOException{
350     List<DataBlockEncoder> dataBlockEncoders =
351         DataBlockEncoding.getAllEncoders();
352     ByteBuffer dataset = ByteBuffer.allocate(onDataset.capacity());
353     onDataset.rewind();
354     dataset.put(onDataset);
355     onDataset.rewind();
356     dataset.flip();
357 
358     for (DataBlockEncoder encoder : dataBlockEncoders) {
359       testAlgorithm(dataset, encoder);
360 
361       // ensure that dataset is unchanged
362       dataset.rewind();
363       assertEquals("Input of two methods is changed", onDataset, dataset);
364     }
365   }
366 }