1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.encoding;
20
21 import static org.junit.Assert.assertArrayEquals;
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.fail;
25
26 import java.io.ByteArrayOutputStream;
27 import java.io.DataOutputStream;
28 import java.nio.ByteBuffer;
29 import java.util.ArrayList;
30 import java.util.List;
31 import java.util.Random;
32 import java.util.concurrent.ConcurrentSkipListSet;
33
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.hbase.KeyValue;
37 import org.apache.hadoop.hbase.SmallTests;
38 import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec;
39 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
40 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.hbase.util.CollectionBackedScanner;
43 import org.junit.Before;
44 import org.junit.Test;
45 import org.junit.experimental.categories.Category;
46
47
48
49
50 @Category(SmallTests.class)
51 public class TestPrefixTreeEncoding {
52 private static final Log LOG = LogFactory
53 .getLog(TestPrefixTreeEncoding.class);
54 static final String CF = "EncodingTestCF";
55 static final byte[] CF_BYTES = Bytes.toBytes(CF);
56 private static final int NUM_ROWS_PER_BATCH = 50;
57 private static final int NUM_COLS_PER_ROW = 20;
58
59 private int numBatchesWritten = 0;
60 private ConcurrentSkipListSet<KeyValue> kvset = new ConcurrentSkipListSet<KeyValue>(
61 KeyValue.COMPARATOR);
62
63 private static boolean formatRowNum = false;
64
65 @Before
66 public void setUp() throws Exception {
67 kvset.clear();
68 formatRowNum = false;
69 }
70
71 @Test
72 public void testSeekBeforeWithFixedData() throws Exception {
73 formatRowNum = true;
74 PrefixTreeCodec encoder = new PrefixTreeCodec();
75 int batchId = numBatchesWritten++;
76 ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId, false);
77 HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
78 Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
79 encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
80 EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR, false);
81 byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
82 ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
83 DataBlockEncoding.ID_SIZE, onDiskBytes.length
84 - DataBlockEncoding.ID_SIZE);
85 seeker.setCurrentBuffer(readBuffer);
86
87
88 KeyValue seekKey = KeyValue.createFirstDeleteFamilyOnRow(
89 getRowKey(batchId, 0), CF_BYTES);
90 seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(),
91 seekKey.getKeyLength(), true);
92 assertEquals(null, seeker.getKeyValue());
93
94
95 seekKey = KeyValue.createFirstDeleteFamilyOnRow(
96 getRowKey(batchId, NUM_ROWS_PER_BATCH / 3), CF_BYTES);
97 seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(),
98 seekKey.getKeyLength(), true);
99 assertNotNull(seeker.getKeyValue());
100 assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3 - 1), seeker
101 .getKeyValue().getRow());
102
103
104 seekKey = KeyValue.createFirstDeleteFamilyOnRow(Bytes.toBytes("zzzz"),
105 CF_BYTES);
106 seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(),
107 seekKey.getKeyLength(), true);
108 assertNotNull(seeker.getKeyValue());
109 assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH - 1), seeker
110 .getKeyValue().getRow());
111 }
112
113 @Test
114 public void testScanWithRandomData() throws Exception {
115 PrefixTreeCodec encoder = new PrefixTreeCodec();
116 ByteBuffer dataBuffer = generateRandomTestData(kvset, numBatchesWritten++);
117 HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
118 Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
119 encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
120 EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR, false);
121 byte[] onDiskBytes=blkEncodingCtx.getOnDiskBytesWithHeader();
122 ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
123 DataBlockEncoding.ID_SIZE, onDiskBytes.length
124 - DataBlockEncoding.ID_SIZE);
125 seeker.setCurrentBuffer(readBuffer);
126 KeyValue previousKV = null;
127 do{
128 KeyValue currentKV = seeker.getKeyValue();
129 if (previousKV != null && KeyValue.COMPARATOR.compare(currentKV, previousKV) < 0) {
130 dumpInputKVSet();
131 fail("Current kv " + currentKV + " is smaller than previous keyvalue "
132 + previousKV);
133 }
134 previousKV = currentKV;
135 } while (seeker.next());
136 }
137
138 @Test
139 public void testSeekWithRandomData() throws Exception {
140 PrefixTreeCodec encoder = new PrefixTreeCodec();
141 int batchId = numBatchesWritten++;
142 ByteBuffer dataBuffer = generateRandomTestData(kvset, batchId);
143 HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
144 Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
145 encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
146 EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR, false);
147 byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
148 ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
149 DataBlockEncoding.ID_SIZE, onDiskBytes.length
150 - DataBlockEncoding.ID_SIZE);
151 verifySeeking(seeker, readBuffer, batchId);
152 }
153
154 @Test
155 public void testSeekWithFixedData() throws Exception {
156 PrefixTreeCodec encoder = new PrefixTreeCodec();
157 int batchId = numBatchesWritten++;
158 ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId);
159 HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
160 Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
161 encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
162 EncodedSeeker seeker = encoder.createSeeker(KeyValue.KEY_COMPARATOR,
163 false);
164 byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
165 ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
166 DataBlockEncoding.ID_SIZE, onDiskBytes.length
167 - DataBlockEncoding.ID_SIZE);
168 verifySeeking(seeker, readBuffer, batchId);
169 }
170
171 private void verifySeeking(EncodedSeeker encodeSeeker,
172 ByteBuffer encodedData, int batchId) {
173 List<KeyValue> kvList = new ArrayList<KeyValue>();
174 for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
175 kvList.clear();
176 encodeSeeker.setCurrentBuffer(encodedData);
177 KeyValue firstOnRow = KeyValue.createFirstOnRow(getRowKey(batchId, i));
178 encodeSeeker.seekToKeyInBlock(firstOnRow.getBuffer(),
179 firstOnRow.getKeyOffset(), firstOnRow.getKeyLength(), false);
180 boolean hasMoreOfEncodeScanner = encodeSeeker.next();
181 CollectionBackedScanner collectionScanner = new CollectionBackedScanner(
182 this.kvset);
183 boolean hasMoreOfCollectionScanner = collectionScanner.seek(firstOnRow);
184 if (hasMoreOfEncodeScanner != hasMoreOfCollectionScanner) {
185 dumpInputKVSet();
186 fail("Get error result after seeking " + firstOnRow);
187 }
188 if (hasMoreOfEncodeScanner) {
189 if (KeyValue.COMPARATOR.compare(encodeSeeker.getKeyValue(),
190 collectionScanner.peek()) != 0) {
191 dumpInputKVSet();
192 fail("Expected " + collectionScanner.peek() + " actual "
193 + encodeSeeker.getKeyValue() + ", after seeking " + firstOnRow);
194 }
195 }
196 }
197 }
198
199 private void dumpInputKVSet() {
200 LOG.info("Dumping input keyvalue set in error case:");
201 for (KeyValue kv : kvset) {
202 System.out.println(kv);
203 }
204 }
205
206 private static ByteBuffer generateFixedTestData(
207 ConcurrentSkipListSet<KeyValue> kvset, int batchId) throws Exception {
208 return generateFixedTestData(kvset, batchId, true);
209 }
210
211 private static ByteBuffer generateFixedTestData(
212 ConcurrentSkipListSet<KeyValue> kvset, int batchId, boolean partial)
213 throws Exception {
214 ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
215 DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
216 for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
217 if (partial && i / 10 % 2 == 1) continue;
218 for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
219 KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES,
220 getQualifier(j), getValue(batchId, i, j));
221 kvset.add(kv);
222 }
223 }
224 for (KeyValue kv : kvset) {
225 userDataStream.writeInt(kv.getKeyLength());
226 userDataStream.writeInt(kv.getValueLength());
227 userDataStream
228 .write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
229 userDataStream.write(kv.getBuffer(), kv.getValueOffset(),
230 kv.getValueLength());
231 }
232 return ByteBuffer.wrap(baosInMemory.toByteArray());
233 }
234
235 private static ByteBuffer generateRandomTestData(
236 ConcurrentSkipListSet<KeyValue> kvset, int batchId) throws Exception {
237 ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
238 DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
239 Random random = new Random();
240 for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
241 if (random.nextInt(100) < 50) continue;
242 for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
243 if (random.nextInt(100) < 50) continue;
244 KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES,
245 getQualifier(j), getValue(batchId, i, j));
246 kvset.add(kv);
247 }
248 }
249 for (KeyValue kv : kvset) {
250 userDataStream.writeInt(kv.getKeyLength());
251 userDataStream.writeInt(kv.getValueLength());
252 userDataStream
253 .write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
254 userDataStream.write(kv.getBuffer(), kv.getValueOffset(),
255 kv.getValueLength());
256 }
257 return ByteBuffer.wrap(baosInMemory.toByteArray());
258 }
259
260 private static byte[] getRowKey(int batchId, int i) {
261 return Bytes.toBytes("batch" + batchId + "_row"
262 + (formatRowNum ? String.format("%04d", i) : i));
263 }
264
265 private static byte[] getQualifier(int j) {
266 return Bytes.toBytes("col" + j);
267 }
268
269 private static byte[] getValue(int batchId, int i, int j) {
270 return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i))
271 + "_col" + j);
272 }
273
274 }