View Javadoc

1   /**
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements. See the NOTICE file distributed with this
6    * work for additional information regarding copyright ownership. The ASF
7    * licenses this file to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   * http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16   * License for the specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.hadoop.hbase.io.encoding;
20  
21  import static org.junit.Assert.assertArrayEquals;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.fail;
25  
26  import java.io.ByteArrayOutputStream;
27  import java.io.DataOutputStream;
28  import java.nio.ByteBuffer;
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.Random;
32  import java.util.concurrent.ConcurrentSkipListSet;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.hbase.KeyValue;
37  import org.apache.hadoop.hbase.SmallTests;
38  import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec;
39  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
40  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
41  import org.apache.hadoop.hbase.util.Bytes;
42  import org.apache.hadoop.hbase.util.CollectionBackedScanner;
43  import org.junit.Before;
44  import org.junit.Test;
45  import org.junit.experimental.categories.Category;
46  
47  /**
48   * Tests scanning/seeking data with PrefixTree Encoding.
49   */
50  @Category(SmallTests.class)
51  public class TestPrefixTreeEncoding {
52    private static final Log LOG = LogFactory
53        .getLog(TestPrefixTreeEncoding.class);
54    static final String CF = "EncodingTestCF";
55    static final byte[] CF_BYTES = Bytes.toBytes(CF);
56    private static final int NUM_ROWS_PER_BATCH = 50;
57    private static final int NUM_COLS_PER_ROW = 20;
58  
59    private int numBatchesWritten = 0;
60    private ConcurrentSkipListSet<KeyValue> kvset = new ConcurrentSkipListSet<KeyValue>(
61        KeyValue.COMPARATOR);
62  
63    private static boolean formatRowNum = false;
64  
65    @Before
66    public void setUp() throws Exception {
67      kvset.clear();
68      formatRowNum = false;
69    }
70  
71    @Test
72    public void testSeekBeforeWithFixedData() throws Exception {
73      formatRowNum = true;
74      PrefixTreeCodec encoder = new PrefixTreeCodec();
75      int batchId = numBatchesWritten++;
76      ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId, false);
77      HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
78          Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
79      encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
80      EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR, false);
81      byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
82      ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
83          DataBlockEncoding.ID_SIZE, onDiskBytes.length
84              - DataBlockEncoding.ID_SIZE);
85      seeker.setCurrentBuffer(readBuffer);
86  
87      // Seek before the first keyvalue;
88      KeyValue seekKey = KeyValue.createFirstDeleteFamilyOnRow(
89          getRowKey(batchId, 0), CF_BYTES);
90      seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(),
91          seekKey.getKeyLength(), true);
92      assertEquals(null, seeker.getKeyValue());
93  
94      // Seek before the middle keyvalue;
95      seekKey = KeyValue.createFirstDeleteFamilyOnRow(
96          getRowKey(batchId, NUM_ROWS_PER_BATCH / 3), CF_BYTES);
97      seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(),
98          seekKey.getKeyLength(), true);
99      assertNotNull(seeker.getKeyValue());
100     assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3 - 1), seeker
101         .getKeyValue().getRow());
102 
103     // Seek before the last keyvalue;
104     seekKey = KeyValue.createFirstDeleteFamilyOnRow(Bytes.toBytes("zzzz"),
105         CF_BYTES);
106     seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(),
107         seekKey.getKeyLength(), true);
108     assertNotNull(seeker.getKeyValue());
109     assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH - 1), seeker
110         .getKeyValue().getRow());
111   }
112 
113   @Test
114   public void testScanWithRandomData() throws Exception {
115     PrefixTreeCodec encoder = new PrefixTreeCodec();
116     ByteBuffer dataBuffer = generateRandomTestData(kvset, numBatchesWritten++);
117     HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
118         Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
119     encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
120     EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR, false);
121     byte[] onDiskBytes=blkEncodingCtx.getOnDiskBytesWithHeader();
122     ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
123         DataBlockEncoding.ID_SIZE, onDiskBytes.length
124             - DataBlockEncoding.ID_SIZE);
125     seeker.setCurrentBuffer(readBuffer);
126     KeyValue previousKV = null;
127     do{
128       KeyValue currentKV = seeker.getKeyValue();
129       if (previousKV != null && KeyValue.COMPARATOR.compare(currentKV, previousKV) < 0) {
130         dumpInputKVSet();
131         fail("Current kv " + currentKV + " is smaller than previous keyvalue "
132             + previousKV);
133       }
134       previousKV = currentKV;
135     } while (seeker.next());
136   }
137 
138   @Test
139   public void testSeekWithRandomData() throws Exception {
140     PrefixTreeCodec encoder = new PrefixTreeCodec();
141     int batchId = numBatchesWritten++;
142     ByteBuffer dataBuffer = generateRandomTestData(kvset, batchId);
143     HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
144         Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
145     encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
146     EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR, false);
147     byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
148     ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
149         DataBlockEncoding.ID_SIZE, onDiskBytes.length
150             - DataBlockEncoding.ID_SIZE);
151     verifySeeking(seeker, readBuffer, batchId);
152   }
153 
154   @Test
155   public void testSeekWithFixedData() throws Exception {
156     PrefixTreeCodec encoder = new PrefixTreeCodec();
157     int batchId = numBatchesWritten++;
158     ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId);
159     HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
160         Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
161     encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
162     EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR,
163         false);
164     byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
165     ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
166         DataBlockEncoding.ID_SIZE, onDiskBytes.length
167             - DataBlockEncoding.ID_SIZE);
168     verifySeeking(seeker, readBuffer, batchId);
169   }
170 
171   private void verifySeeking(EncodedSeeker encodeSeeker,
172       ByteBuffer encodedData, int batchId) {
173     List<KeyValue> kvList = new ArrayList<KeyValue>();
174     for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
175       kvList.clear();
176       encodeSeeker.setCurrentBuffer(encodedData);
177       KeyValue firstOnRow = KeyValue.createFirstOnRow(getRowKey(batchId, i));
178       encodeSeeker.seekToKeyInBlock(firstOnRow.getBuffer(),
179           firstOnRow.getKeyOffset(), firstOnRow.getKeyLength(), false);
180       boolean hasMoreOfEncodeScanner = encodeSeeker.next();
181       CollectionBackedScanner collectionScanner = new CollectionBackedScanner(
182           this.kvset);
183       boolean hasMoreOfCollectionScanner = collectionScanner.seek(firstOnRow);
184       if (hasMoreOfEncodeScanner != hasMoreOfCollectionScanner) {
185         dumpInputKVSet();
186         fail("Get error result after seeking " + firstOnRow);
187       }
188       if (hasMoreOfEncodeScanner) {
189         if (KeyValue.COMPARATOR.compare(encodeSeeker.getKeyValue(),
190             collectionScanner.peek()) != 0) {
191           dumpInputKVSet();
192           fail("Expected " + collectionScanner.peek() + " actual "
193               + encodeSeeker.getKeyValue() + ", after seeking " + firstOnRow);
194         }
195       }
196     }
197   }
198 
199   private void dumpInputKVSet() {
200     LOG.info("Dumping input keyvalue set in error case:");
201     for (KeyValue kv : kvset) {
202       System.out.println(kv);
203     }
204   }
205   
206   private static ByteBuffer generateFixedTestData(
207       ConcurrentSkipListSet<KeyValue> kvset, int batchId) throws Exception {
208     return generateFixedTestData(kvset, batchId, true);
209   }
210 
211   private static ByteBuffer generateFixedTestData(
212       ConcurrentSkipListSet<KeyValue> kvset, int batchId, boolean partial)
213       throws Exception {
214     ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
215     DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
216     for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
217       if (partial && i / 10 % 2 == 1) continue;
218       for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
219         KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES,
220             getQualifier(j), getValue(batchId, i, j));
221         kvset.add(kv);
222       }
223     }
224     for (KeyValue kv : kvset) {
225       userDataStream.writeInt(kv.getKeyLength());
226       userDataStream.writeInt(kv.getValueLength());
227       userDataStream
228           .write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
229       userDataStream.write(kv.getBuffer(), kv.getValueOffset(),
230           kv.getValueLength());
231     }
232     return ByteBuffer.wrap(baosInMemory.toByteArray());
233   }
234 
235   private static ByteBuffer generateRandomTestData(
236       ConcurrentSkipListSet<KeyValue> kvset, int batchId) throws Exception {
237     ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
238     DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
239     Random random = new Random();
240     for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
241       if (random.nextInt(100) < 50) continue;
242       for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
243         if (random.nextInt(100) < 50) continue;
244         KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES,
245             getQualifier(j), getValue(batchId, i, j));
246         kvset.add(kv);
247       }
248     }
249     for (KeyValue kv : kvset) {
250       userDataStream.writeInt(kv.getKeyLength());
251       userDataStream.writeInt(kv.getValueLength());
252       userDataStream
253           .write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
254       userDataStream.write(kv.getBuffer(), kv.getValueOffset(),
255           kv.getValueLength());
256     }
257     return ByteBuffer.wrap(baosInMemory.toByteArray());
258   }
259 
260   private static byte[] getRowKey(int batchId, int i) {
261     return Bytes.toBytes("batch" + batchId + "_row"
262         + (formatRowNum ? String.format("%04d", i) : i));
263   }
264 
265   private static byte[] getQualifier(int j) {
266     return Bytes.toBytes("col" + j);
267   }
268 
269   private static byte[] getValue(int batchId, int i, int j) {
270     return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i))
271         + "_col" + j);
272   }
273 
274 }