1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.fail;
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.ByteArrayOutputStream;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.nio.ByteBuffer;
28  import java.util.ArrayList;
29  import java.util.Collection;
30  import java.util.List;
31  import java.util.Random;
32  
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.KeyValue.Type;
36  import org.apache.hadoop.hbase.LargeTests;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.junit.Test;
39  import org.junit.experimental.categories.Category;
40  import org.junit.runner.RunWith;
41  import org.junit.runners.Parameterized;
42  import org.junit.runners.Parameterized.Parameters;
43  
44  /**
45   * Test all of the data block encoding algorithms for correctness.
46   * Most of the class generate data which will test different branches in code.
47   */
48  @Category(LargeTests.class)
49  @RunWith(Parameterized.class)
50  public class TestDataBlockEncoders {
51    static int NUMBER_OF_KV = 10000;
52    static int NUM_RANDOM_SEEKS = 10000;
53  
54    private RedundantKVGenerator generator = new RedundantKVGenerator();
55    private Random randomizer = new Random(42l);
56  
57    private final boolean includesMemstoreTS;
58  
59    @Parameters
60    public static Collection<Object[]> parameters() {
61      return HBaseTestingUtility.BOOLEAN_PARAMETERIZED;
62    }
63  
64    public TestDataBlockEncoders(boolean includesMemstoreTS) {
65      this.includesMemstoreTS = includesMemstoreTS;
66    }
67  
68    private void testAlgorithm(ByteBuffer dataset, DataBlockEncoder encoder)
69        throws IOException {
70      // encode
71      ByteArrayOutputStream baos = new ByteArrayOutputStream();
72      DataOutputStream dataOut = new DataOutputStream(baos);
73      encoder.compressKeyValues(dataOut, dataset, includesMemstoreTS);
74  
75      // decode
76      ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
77      DataInputStream dis = new DataInputStream(bais);
78      ByteBuffer actualDataset;
79      actualDataset = encoder.uncompressKeyValues(dis, includesMemstoreTS);
80  
81      dataset.rewind();
82      actualDataset.rewind();
83  
84      assertEquals("Encoding -> decoding gives different results for " + encoder,
85          Bytes.toStringBinary(dataset), Bytes.toStringBinary(actualDataset));
86    }
87  
88    /**
89     * Test data block encoding of empty KeyValue.
90     * @throws IOException On test failure.
91     */
92    @Test
93    public void testEmptyKeyValues() throws IOException {
94      List<KeyValue> kvList = new ArrayList<KeyValue>();
95      byte[] row = new byte[0];
96      byte[] family = new byte[0];
97      byte[] qualifier = new byte[0];
98      byte[] value = new byte[0];
99      kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value));
100     kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value));
101     testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList,
102         includesMemstoreTS));
103   }
104 
105   /**
106    * Test KeyValues with negative timestamp.
107    * @throws IOException On test failure.
108    */
109   @Test
110   public void testNegativeTimestamps() throws IOException {
111     List<KeyValue> kvList = new ArrayList<KeyValue>();
112     byte[] row = new byte[0];
113     byte[] family = new byte[0];
114     byte[] qualifier = new byte[0];
115     byte[] value = new byte[0];
116     kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value));
117     kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value));
118     testEncodersOnDataset(
119         RedundantKVGenerator.convertKvToByteBuffer(kvList,
120             includesMemstoreTS));
121   }
122 
123   /**
124    * Test whether compression -> decompression gives the consistent results on
125    * pseudorandom sample.
126    * @throws IOException On test failure.
127    */
128   @Test
129   public void testExecutionOnSample() throws IOException {
130     testEncodersOnDataset(
131         RedundantKVGenerator.convertKvToByteBuffer(
132             generator.generateTestKeyValues(NUMBER_OF_KV),
133             includesMemstoreTS));
134   }
135 
136   /**
137    * Test seeking while file is encoded.
138    */
139   @Test
140   public void testSeekingOnSample() throws IOException{
141     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
142     ByteBuffer originalBuffer =
143         RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
144             includesMemstoreTS);
145     List<DataBlockEncoder> dataBlockEncoders =
146         DataBlockEncoding.getAllEncoders();
147 
148     // create all seekers
149     List<DataBlockEncoder.EncodedSeeker> encodedSeekers =
150         new ArrayList<DataBlockEncoder.EncodedSeeker>();
151     for (DataBlockEncoder encoder : dataBlockEncoders) {
152       ByteArrayOutputStream baos = new ByteArrayOutputStream();
153       DataOutputStream dataOut = new DataOutputStream(baos);
154       encoder.compressKeyValues(dataOut, originalBuffer, includesMemstoreTS);
155       ByteBuffer encodedBuffer = ByteBuffer.wrap(baos.toByteArray());
156       DataBlockEncoder.EncodedSeeker seeker =
157           encoder.createSeeker(KeyValue.KEY_COMPARATOR, includesMemstoreTS);
158       seeker.setCurrentBuffer(encodedBuffer);
159       encodedSeekers.add(seeker);
160     }
161 
162     // test it!
163     // try a few random seeks
164     for (boolean seekBefore : new boolean[] {false, true}) {
165       for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
166         int keyValueId;
167         if (!seekBefore) {
168           keyValueId = randomizer.nextInt(sampleKv.size());
169         } else {
170           keyValueId = randomizer.nextInt(sampleKv.size() - 1) + 1;
171         }
172 
173         KeyValue keyValue = sampleKv.get(keyValueId);
174         checkSeekingConsistency(encodedSeekers, seekBefore, keyValue);
175       }
176     }
177 
178     // check edge cases
179     checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
180     for (boolean seekBefore : new boolean[] {false, true}) {
181       checkSeekingConsistency(encodedSeekers, seekBefore,
182           sampleKv.get(sampleKv.size() - 1));
183       KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
184       KeyValue lastMidKv = midKv.createLastOnRowCol();
185       checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
186     }
187   }
188 
189   /**
190    * Test iterating on encoded buffers.
191    */
192   @Test
193   public void testNextOnSample() {
194     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
195     ByteBuffer originalBuffer =
196         RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
197             includesMemstoreTS);
198     List<DataBlockEncoder> dataBlockEncoders =
199         DataBlockEncoding.getAllEncoders();
200 
201     for (DataBlockEncoder encoder : dataBlockEncoders) {
202       ByteArrayOutputStream baos = new ByteArrayOutputStream();
203       DataOutputStream dataOut = new DataOutputStream(baos);
204       try {
205         encoder.compressKeyValues(dataOut, originalBuffer, includesMemstoreTS);
206       } catch (IOException e) {
207         throw new RuntimeException(String.format(
208             "Bug while encoding using '%s'", encoder.toString()), e);
209       }
210 
211       ByteBuffer encodedBuffer = ByteBuffer.wrap(baos.toByteArray());
212       DataBlockEncoder.EncodedSeeker seeker =
213           encoder.createSeeker(KeyValue.KEY_COMPARATOR, includesMemstoreTS);
214       seeker.setCurrentBuffer(encodedBuffer);
215       int i = 0;
216       do {
217         KeyValue expectedKeyValue = sampleKv.get(i);
218         ByteBuffer keyValue = seeker.getKeyValueBuffer();
219         if (0 != Bytes.compareTo(
220             keyValue.array(), keyValue.arrayOffset(), keyValue.limit(),
221             expectedKeyValue.getBuffer(), expectedKeyValue.getOffset(),
222             expectedKeyValue.getLength())) {
223 
224           int commonPrefix = 0;
225           byte[] left = keyValue.array();
226           byte[] right = expectedKeyValue.getBuffer();
227           int leftOff = keyValue.arrayOffset();
228           int rightOff = expectedKeyValue.getOffset();
229           int length = Math.min(keyValue.limit(), expectedKeyValue.getLength());
230           while (commonPrefix < length &&
231               left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) {
232             commonPrefix++;
233           }
234 
235           fail(String.format(
236               "next() produces wrong results " +
237               "encoder: %s i: %d commonPrefix: %d" +
238               "\n expected %s\n actual      %s",
239               encoder.toString(), i, commonPrefix,
240               Bytes.toStringBinary(expectedKeyValue.getBuffer(),
241                   expectedKeyValue.getOffset(), expectedKeyValue.getLength()),
242               Bytes.toStringBinary(keyValue)));
243         }
244         i++;
245       } while (seeker.next());
246     }
247   }
248 
249   /**
250    * Test whether the decompression of first key is implemented correctly.
251    */
252   @Test
253   public void testFirstKeyInBlockOnSample() {
254     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
255     ByteBuffer originalBuffer =
256         RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
257             includesMemstoreTS);
258     List<DataBlockEncoder> dataBlockEncoders =
259         DataBlockEncoding.getAllEncoders();
260 
261     for (DataBlockEncoder encoder : dataBlockEncoders) {
262       ByteArrayOutputStream baos = new ByteArrayOutputStream();
263       DataOutputStream dataOut = new DataOutputStream(baos);
264       try {
265         encoder.compressKeyValues(dataOut, originalBuffer, includesMemstoreTS);
266       } catch (IOException e) {
267         throw new RuntimeException(String.format(
268             "Bug while encoding using '%s'", encoder.toString()), e);
269       }
270 
271       ByteBuffer encodedBuffer = ByteBuffer.wrap(baos.toByteArray());
272       ByteBuffer keyBuffer = encoder.getFirstKeyInBlock(encodedBuffer);
273       KeyValue firstKv = sampleKv.get(0);
274       if (0 != Bytes.compareTo(
275           keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(),
276           firstKv.getBuffer(), firstKv.getKeyOffset(),
277           firstKv.getKeyLength())) {
278 
279         int commonPrefix = 0;
280         int length = Math.min(keyBuffer.limit(), firstKv.getKeyLength());
281         while (commonPrefix < length &&
282             keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] ==
283             firstKv.getBuffer()[firstKv.getKeyOffset() + commonPrefix]) {
284           commonPrefix++;
285         }
286         fail(String.format("Bug in '%s' commonPrefix %d",
287             encoder.toString(), commonPrefix));
288       }
289     }
290   }
291 
292   private void checkSeekingConsistency(
293       List<DataBlockEncoder.EncodedSeeker> encodedSeekers, boolean seekBefore,
294       KeyValue keyValue) {
295     ByteBuffer expectedKeyValue = null;
296     ByteBuffer expectedKey = null;
297     ByteBuffer expectedValue = null;
298 
299     for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) {
300       seeker.seekToKeyInBlock(keyValue.getBuffer(),
301           keyValue.getKeyOffset(), keyValue.getKeyLength(), seekBefore);
302       seeker.rewind();
303 
304       ByteBuffer actualKeyValue = seeker.getKeyValueBuffer();
305       ByteBuffer actualKey = seeker.getKeyDeepCopy();
306       ByteBuffer actualValue = seeker.getValueShallowCopy();
307 
308       if (expectedKeyValue != null) {
309         assertEquals(expectedKeyValue, actualKeyValue);
310       } else {
311         expectedKeyValue = actualKeyValue;
312       }
313 
314       if (expectedKey != null) {
315         assertEquals(expectedKey, actualKey);
316       } else {
317         expectedKey = actualKey;
318       }
319 
320       if (expectedValue != null) {
321         assertEquals(expectedValue, actualValue);
322       } else {
323         expectedValue = actualValue;
324       }
325     }
326   }
327 
328   private void testEncodersOnDataset(ByteBuffer onDataset)
329       throws IOException{
330     List<DataBlockEncoder> dataBlockEncoders =
331         DataBlockEncoding.getAllEncoders();
332     ByteBuffer dataset = ByteBuffer.allocate(onDataset.capacity());
333     onDataset.rewind();
334     dataset.put(onDataset);
335     onDataset.rewind();
336     dataset.flip();
337 
338     for (DataBlockEncoder encoder : dataBlockEncoders) {
339       testAlgorithm(dataset, encoder);
340 
341       // ensure that dataset is unchanged
342       dataset.rewind();
343       assertEquals("Input of two methods is changed", onDataset, dataset);
344     }
345   }
346 }