View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.fail;
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.DataInputStream;
24  import java.io.IOException;
25  import java.nio.ByteBuffer;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.List;
29  import java.util.Random;
30  
31  import org.apache.hadoop.hbase.HBaseTestingUtility;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.KeyValue.Type;
35  import org.apache.hadoop.hbase.LargeTests;
36  import org.apache.hadoop.hbase.io.compress.Compression;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
39  import org.junit.Test;
40  import org.junit.experimental.categories.Category;
41  import org.junit.runner.RunWith;
42  import org.junit.runners.Parameterized;
43  import org.junit.runners.Parameterized.Parameters;
44  
45  /**
46   * Test all of the data block encoding algorithms for correctness.
47   * Most of the class generate data which will test different branches in code.
48   */
49  @Category(LargeTests.class)
50  @RunWith(Parameterized.class)
51  public class TestDataBlockEncoders {
52    static int NUMBER_OF_KV = 10000;
53    static int NUM_RANDOM_SEEKS = 10000;
54  
55    private static int ENCODED_DATA_OFFSET =
56        HConstants.HFILEBLOCK_HEADER_SIZE + DataBlockEncoding.ID_SIZE;
57  
58    private RedundantKVGenerator generator = new RedundantKVGenerator();
59    private Random randomizer = new Random(42l);
60  
61    private final boolean includesMemstoreTS;
62  
63    @Parameters
64    public static Collection<Object[]> parameters() {
65      return HBaseTestingUtility.BOOLEAN_PARAMETERIZED;
66    }
67  
68    public TestDataBlockEncoders(boolean includesMemstoreTS) {
69      this.includesMemstoreTS = includesMemstoreTS;
70    }
71  
72    private HFileBlockEncodingContext getEncodingContext(
73        Compression.Algorithm algo, DataBlockEncoding encoding) {
74      DataBlockEncoder encoder = encoding.getEncoder();
75      if (encoder != null) {
76        return encoder.newDataBlockEncodingContext(algo, encoding,
77            HConstants.HFILEBLOCK_DUMMY_HEADER);
78      } else {
79        return new HFileBlockDefaultEncodingContext(algo, encoding, HConstants.HFILEBLOCK_DUMMY_HEADER);
80      }
81    }
82  
83    private byte[] encodeBytes(DataBlockEncoding encoding,
84        ByteBuffer dataset) throws IOException {
85      DataBlockEncoder encoder = encoding.getEncoder();
86      HFileBlockEncodingContext encodingCtx =
87          getEncodingContext(Compression.Algorithm.NONE, encoding);
88  
89      encoder.encodeKeyValues(dataset, includesMemstoreTS,
90          encodingCtx);
91  
92      byte[] encodedBytesWithHeader =
93          encodingCtx.getUncompressedBytesWithHeader();
94      byte[] encodedData =
95          new byte[encodedBytesWithHeader.length - ENCODED_DATA_OFFSET];
96      System.arraycopy(encodedBytesWithHeader, ENCODED_DATA_OFFSET, encodedData,
97          0, encodedData.length);
98      return encodedData;
99    }
100 
101   private void testAlgorithm(ByteBuffer dataset, DataBlockEncoding encoding)
102       throws IOException {
103     // encode
104     byte[] encodedBytes = encodeBytes(encoding, dataset);
105     //decode
106     ByteArrayInputStream bais = new ByteArrayInputStream(encodedBytes);
107     DataInputStream dis = new DataInputStream(bais);
108     ByteBuffer actualDataset;
109     DataBlockEncoder encoder = encoding.getEncoder();
110     actualDataset = encoder.decodeKeyValues(dis, includesMemstoreTS);
111 
112     dataset.rewind();
113     actualDataset.rewind();
114 
115     assertEquals("Encoding -> decoding gives different results for " + encoder,
116         Bytes.toStringBinary(dataset), Bytes.toStringBinary(actualDataset));
117   }
118 
119   /**
120    * Test data block encoding of empty KeyValue.
121    * @throws IOException On test failure.
122    */
123   @Test
124   public void testEmptyKeyValues() throws IOException {
125     List<KeyValue> kvList = new ArrayList<KeyValue>();
126     byte[] row = new byte[0];
127     byte[] family = new byte[0];
128     byte[] qualifier = new byte[0];
129     byte[] value = new byte[0];
130     kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value));
131     kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value));
132     testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList,
133         includesMemstoreTS));
134   }
135 
136   /**
137    * Test KeyValues with negative timestamp.
138    * @throws IOException On test failure.
139    */
140   @Test
141   public void testNegativeTimestamps() throws IOException {
142     List<KeyValue> kvList = new ArrayList<KeyValue>();
143     byte[] row = new byte[0];
144     byte[] family = new byte[0];
145     byte[] qualifier = new byte[0];
146     byte[] value = new byte[0];
147     kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value));
148     kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value));
149     testEncodersOnDataset(
150         RedundantKVGenerator.convertKvToByteBuffer(kvList,
151             includesMemstoreTS));
152   }
153 
154   /**
155    * Test whether compression -> decompression gives the consistent results on
156    * pseudorandom sample.
157    * @throws IOException On test failure.
158    */
159   @Test
160   public void testExecutionOnSample() throws IOException {
161     testEncodersOnDataset(
162         RedundantKVGenerator.convertKvToByteBuffer(
163             generator.generateTestKeyValues(NUMBER_OF_KV),
164             includesMemstoreTS));
165   }
166 
167   /**
168    * Test seeking while file is encoded.
169    */
170   @Test
171   public void testSeekingOnSample() throws IOException{
172     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
173     ByteBuffer originalBuffer =
174         RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
175             includesMemstoreTS);
176 
177     // create all seekers
178     List<DataBlockEncoder.EncodedSeeker> encodedSeekers =
179         new ArrayList<DataBlockEncoder.EncodedSeeker>();
180     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
181       if (encoding.getEncoder() == null) {
182         continue;
183       }
184       ByteBuffer encodedBuffer =
185           ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
186       DataBlockEncoder encoder = encoding.getEncoder();
187       DataBlockEncoder.EncodedSeeker seeker =
188           encoder.createSeeker(KeyValue.KEY_COMPARATOR, includesMemstoreTS);
189       seeker.setCurrentBuffer(encodedBuffer);
190       encodedSeekers.add(seeker);
191     }
192 
193     // test it!
194     // try a few random seeks
195     for (boolean seekBefore : new boolean[] {false, true}) {
196       for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
197         int keyValueId;
198         if (!seekBefore) {
199           keyValueId = randomizer.nextInt(sampleKv.size());
200         } else {
201           keyValueId = randomizer.nextInt(sampleKv.size() - 1) + 1;
202         }
203 
204         KeyValue keyValue = sampleKv.get(keyValueId);
205         checkSeekingConsistency(encodedSeekers, seekBefore, keyValue);
206       }
207     }
208 
209     // check edge cases
210     checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
211     for (boolean seekBefore : new boolean[] {false, true}) {
212       checkSeekingConsistency(encodedSeekers, seekBefore,
213           sampleKv.get(sampleKv.size() - 1));
214       KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
215       KeyValue lastMidKv = midKv.createLastOnRowCol();
216       checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
217     }
218   }
219 
220   /**
221    * Test iterating on encoded buffers.
222    */
223   @Test
224   public void testNextOnSample() {
225     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
226     ByteBuffer originalBuffer =
227         RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
228             includesMemstoreTS);
229 
230     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
231       if (encoding.getEncoder() == null) {
232         continue;
233       }
234       DataBlockEncoder encoder = encoding.getEncoder();
235       ByteBuffer encodedBuffer = null;
236       try {
237         encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
238       } catch (IOException e) {
239         throw new RuntimeException(String.format(
240             "Bug while encoding using '%s'", encoder.toString()), e);
241       }
242       DataBlockEncoder.EncodedSeeker seeker =
243           encoder.createSeeker(KeyValue.KEY_COMPARATOR, includesMemstoreTS);
244       seeker.setCurrentBuffer(encodedBuffer);
245       int i = 0;
246       do {
247         KeyValue expectedKeyValue = sampleKv.get(i);
248         ByteBuffer keyValue = seeker.getKeyValueBuffer();
249         if (0 != Bytes.compareTo(
250             keyValue.array(), keyValue.arrayOffset(), keyValue.limit(),
251             expectedKeyValue.getBuffer(), expectedKeyValue.getOffset(),
252             expectedKeyValue.getLength())) {
253 
254           int commonPrefix = 0;
255           byte[] left = keyValue.array();
256           byte[] right = expectedKeyValue.getBuffer();
257           int leftOff = keyValue.arrayOffset();
258           int rightOff = expectedKeyValue.getOffset();
259           int length = Math.min(keyValue.limit(), expectedKeyValue.getLength());
260           while (commonPrefix < length &&
261               left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) {
262             commonPrefix++;
263           }
264 
265           fail(String.format(
266               "next() produces wrong results " +
267               "encoder: %s i: %d commonPrefix: %d" +
268               "\n expected %s\n actual      %s",
269               encoder.toString(), i, commonPrefix,
270               Bytes.toStringBinary(expectedKeyValue.getBuffer(),
271                   expectedKeyValue.getOffset(), expectedKeyValue.getLength()),
272               Bytes.toStringBinary(keyValue)));
273         }
274         i++;
275       } while (seeker.next());
276     }
277   }
278 
279   /**
280    * Test whether the decompression of first key is implemented correctly.
281    */
282   @Test
283   public void testFirstKeyInBlockOnSample() {
284     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
285     ByteBuffer originalBuffer =
286         RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
287             includesMemstoreTS);
288 
289     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
290       if (encoding.getEncoder() == null) {
291         continue;
292       }
293       DataBlockEncoder encoder = encoding.getEncoder();
294       ByteBuffer encodedBuffer = null;
295       try {
296         encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
297       } catch (IOException e) {
298         throw new RuntimeException(String.format(
299             "Bug while encoding using '%s'", encoder.toString()), e);
300       }
301       ByteBuffer keyBuffer = encoder.getFirstKeyInBlock(encodedBuffer);
302       KeyValue firstKv = sampleKv.get(0);
303       if (0 != Bytes.compareTo(
304           keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(),
305           firstKv.getBuffer(), firstKv.getKeyOffset(),
306           firstKv.getKeyLength())) {
307 
308         int commonPrefix = 0;
309         int length = Math.min(keyBuffer.limit(), firstKv.getKeyLength());
310         while (commonPrefix < length &&
311             keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] ==
312             firstKv.getBuffer()[firstKv.getKeyOffset() + commonPrefix]) {
313           commonPrefix++;
314         }
315         fail(String.format("Bug in '%s' commonPrefix %d",
316             encoder.toString(), commonPrefix));
317       }
318     }
319   }
320 
321   private void checkSeekingConsistency(
322       List<DataBlockEncoder.EncodedSeeker> encodedSeekers, boolean seekBefore,
323       KeyValue keyValue) {
324     ByteBuffer expectedKeyValue = null;
325     ByteBuffer expectedKey = null;
326     ByteBuffer expectedValue = null;
327 
328     for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) {
329       seeker.seekToKeyInBlock(keyValue.getBuffer(),
330           keyValue.getKeyOffset(), keyValue.getKeyLength(), seekBefore);
331       seeker.rewind();
332 
333       ByteBuffer actualKeyValue = seeker.getKeyValueBuffer();
334       ByteBuffer actualKey = seeker.getKeyDeepCopy();
335       ByteBuffer actualValue = seeker.getValueShallowCopy();
336 
337       if (expectedKeyValue != null) {
338         assertEquals(expectedKeyValue, actualKeyValue);
339       } else {
340         expectedKeyValue = actualKeyValue;
341       }
342 
343       if (expectedKey != null) {
344         assertEquals(expectedKey, actualKey);
345       } else {
346         expectedKey = actualKey;
347       }
348 
349       if (expectedValue != null) {
350         assertEquals(expectedValue, actualValue);
351       } else {
352         expectedValue = actualValue;
353       }
354     }
355   }
356 
357   private void testEncodersOnDataset(ByteBuffer onDataset)
358       throws IOException{
359     ByteBuffer dataset = ByteBuffer.allocate(onDataset.capacity());
360     onDataset.rewind();
361     dataset.put(onDataset);
362     onDataset.rewind();
363     dataset.flip();
364 
365     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
366       if (encoding.getEncoder() == null) {
367         continue;
368       }
369       testAlgorithm(dataset, encoding);
370 
371       // ensure that dataset is unchanged
372       dataset.rewind();
373       assertEquals("Input of two methods is changed", onDataset, dataset);
374     }
375   }
376 }