1   /**
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.io.hfile;
22  
23  import static org.junit.Assert.*;
24  
25  import java.io.ByteArrayInputStream;
26  import java.io.DataInputStream;
27  import java.io.IOException;
28  import java.nio.ByteBuffer;
29  import java.util.ArrayList;
30  import java.util.Collection;
31  import java.util.List;
32  import java.util.Random;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FSDataInputStream;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.*;
41  import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm;
42  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.io.RawComparator;
45  import org.apache.hadoop.io.Text;
46  import org.apache.hadoop.io.WritableUtils;
47  import org.junit.Before;
48  import org.junit.Test;
49  import org.junit.experimental.categories.Category;
50  import org.junit.runner.RunWith;
51  import org.junit.runners.Parameterized;
52  
53  /**
54   * Testing writing a version 2 {@link HFile}. This is a low-level test written
55   * during the development of {@link HFileWriterV2}.
56   */
57  @Category(SmallTests.class)
58  @RunWith(Parameterized.class)
59  public class TestHFileWriterV2 {
60  
61    private final boolean useChecksums;
62  
63    @Parameterized.Parameters
64    public static Collection<Object[]> parameters() {
65      return HBaseTestingUtility.BOOLEAN_PARAMETERIZED;
66    }
67  
68    private static final Log LOG = LogFactory.getLog(TestHFileWriterV2.class);
69  
70    private static final HBaseTestingUtility TEST_UTIL =
71        new HBaseTestingUtility();
72  
73    private Configuration conf;
74    private FileSystem fs;
75  
76    public TestHFileWriterV2(boolean useChecksums) {
77      this.useChecksums = useChecksums;
78    }
79  
80    @Before
81    public void setUp() throws IOException {
82      conf = TEST_UTIL.getConfiguration();
83      conf.setBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, useChecksums);
84      fs = FileSystem.get(conf);
85    }
86  
87    @Test
88    public void testHFileFormatV2() throws IOException {
89      Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
90      "testHFileFormatV2");
91      final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
92      final int entryCount = 10000;
93      writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false);
94    }
95    
96    
97    @Test
98    public void testMidKeyInHFile() throws IOException{
99      Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
100     "testMidKeyInHFile");
101     Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
102     int entryCount = 50000;
103     writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true);
104   }
105 
106   private void writeDataAndReadFromHFile(Path hfilePath,
107       Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException {
108 
109     HFileWriterV2 writer = (HFileWriterV2)
110         new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf))
111             .withPath(fs, hfilePath)
112             .withBlockSize(4096)
113             .withCompression(compressAlgo)
114             .withComparator(KeyValue.KEY_COMPARATOR)
115             .create();
116 
117     long totalKeyLength = 0;
118     long totalValueLength = 0;
119 
120     Random rand = new Random(9713312); // Just a fixed seed.
121 
122     List<byte[]> keys = new ArrayList<byte[]>();
123     List<byte[]> values = new ArrayList<byte[]>();
124 
125     for (int i = 0; i < entryCount; ++i) {
126       byte[] keyBytes = randomOrderedKey(rand, i);
127 
128       // A random-length random value.
129       byte[] valueBytes = randomValue(rand);
130       writer.append(keyBytes, valueBytes);
131 
132       totalKeyLength += keyBytes.length;
133       totalValueLength += valueBytes.length;
134 
135       keys.add(keyBytes);
136       values.add(valueBytes);
137     }
138 
139     // Add in an arbitrary order. They will be sorted lexicographically by
140     // the key.
141     writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
142     writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
143     writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
144 
145     writer.close();
146     
147 
148     FSDataInputStream fsdis = fs.open(hfilePath);
149 
150     // A "manual" version of a new-format HFile reader. This unit test was
151     // written before the V2 reader was fully implemented.
152 
153     long fileSize = fs.getFileStatus(hfilePath).getLen();
154     FixedFileTrailer trailer =
155         FixedFileTrailer.readFromStream(fsdis, fileSize);
156 
157     assertEquals(2, trailer.getMajorVersion());
158     assertEquals(useChecksums?1:0, trailer.getMinorVersion());
159     assertEquals(entryCount, trailer.getEntryCount());
160 
161     HFileBlock.FSReader blockReader =
162         new HFileBlock.FSReaderV2(fsdis,fsdis, compressAlgo, fileSize,
163             this.useChecksums?HFileReaderV2.MAX_MINOR_VERSION:HFileReaderV2.MIN_MINOR_VERSION,
164             null, null);
165     // Comparator class name is stored in the trailer in version 2.
166     RawComparator<byte []> comparator = trailer.createComparator();
167     HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
168         new HFileBlockIndex.BlockIndexReader(comparator,
169             trailer.getNumDataIndexLevels());
170     HFileBlockIndex.BlockIndexReader metaBlockIndexReader =
171         new HFileBlockIndex.BlockIndexReader(
172             Bytes.BYTES_RAWCOMPARATOR, 1);
173 
174     HFileBlock.BlockIterator blockIter = blockReader.blockRange(
175         trailer.getLoadOnOpenDataOffset(),
176         fileSize - trailer.getTrailerSize());
177     // Data index. We also read statistics about the block index written after
178     // the root level.
179     dataBlockIndexReader.readMultiLevelIndexRoot(
180         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
181         trailer.getDataIndexCount());
182     
183     if (findMidKey) {
184       byte[] midkey = dataBlockIndexReader.midkey();
185       assertNotNull("Midkey should not be null", midkey);
186     }
187     
188     // Meta index.
189     metaBlockIndexReader.readRootIndex(
190         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(),
191         trailer.getMetaIndexCount());
192     // File info
193     FileInfo fileInfo = new FileInfo();
194     fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
195     byte [] keyValueFormatVersion = fileInfo.get(
196         HFileWriterV2.KEY_VALUE_VERSION);
197     boolean includeMemstoreTS = keyValueFormatVersion != null &&
198         Bytes.toInt(keyValueFormatVersion) > 0;
199 
200     // Counters for the number of key/value pairs and the number of blocks
201     int entriesRead = 0;
202     int blocksRead = 0;
203     long memstoreTS = 0;
204 
205     // Scan blocks the way the reader would scan them
206     fsdis.seek(0);
207     long curBlockPos = 0;
208     while (curBlockPos <= trailer.getLastDataBlockOffset()) {
209       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
210       assertEquals(BlockType.DATA, block.getBlockType());
211       ByteBuffer buf = block.getBufferWithoutHeader();
212       while (buf.hasRemaining()) {
213         int keyLen = buf.getInt();
214         int valueLen = buf.getInt();
215 
216         byte[] key = new byte[keyLen];
217         buf.get(key);
218 
219         byte[] value = new byte[valueLen];
220         buf.get(value);
221 
222         if (includeMemstoreTS) {
223           ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(),
224                                buf.arrayOffset() + buf.position(), buf.remaining());
225           DataInputStream data_input = new DataInputStream(byte_input);
226 
227           memstoreTS = WritableUtils.readVLong(data_input);
228           buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS));
229         }
230 
231         // A brute-force check to see that all keys and values are correct.
232         assertTrue(Bytes.compareTo(key, keys.get(entriesRead)) == 0);
233         assertTrue(Bytes.compareTo(value, values.get(entriesRead)) == 0);
234 
235         ++entriesRead;
236       }
237       ++blocksRead;
238       curBlockPos += block.getOnDiskSizeWithHeader();
239     }
240     LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
241         + blocksRead);
242     assertEquals(entryCount, entriesRead);
243 
244     // Meta blocks. We can scan until the load-on-open data offset (which is
245     // the root block index offset in version 2) because we are not testing
246     // intermediate-level index blocks here.
247 
248     int metaCounter = 0;
249     while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
250       LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " +
251           trailer.getLoadOnOpenDataOffset());
252       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
253       assertEquals(BlockType.META, block.getBlockType());
254       Text t = new Text();
255       block.readInto(t);
256       Text expectedText =
257           (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text(
258               "Moscow") : new Text("Washington, D.C."));
259       assertEquals(expectedText, t);
260       LOG.info("Read meta block data: " + t);
261       ++metaCounter;
262       curBlockPos += block.getOnDiskSizeWithHeader();
263     }
264 
265     fsdis.close();
266   }
267 
268 
269   // Static stuff used by various HFile v2 unit tests
270 
271   private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";
272   private static final int MIN_ROW_OR_QUALIFIER_LENGTH = 64;
273   private static final int MAX_ROW_OR_QUALIFIER_LENGTH = 128;
274 
275   /**
276    * Generates a random key that is guaranteed to increase as the given index i
277    * increases. The result consists of a prefix, which is a deterministic
278    * increasing function of i, and a random suffix.
279    *
280    * @param rand
281    *          random number generator to use
282    * @param i
283    * @return
284    */
285   public static byte[] randomOrderedKey(Random rand, int i) {
286     StringBuilder k = new StringBuilder();
287 
288     // The fixed-length lexicographically increasing part of the key.
289     for (int bitIndex = 31; bitIndex >= 0; --bitIndex) {
290       if ((i & (1 << bitIndex)) == 0)
291         k.append("a");
292       else
293         k.append("b");
294     }
295 
296     // A random-length random suffix of the key.
297     for (int j = 0; j < rand.nextInt(50); ++j)
298       k.append(randomReadableChar(rand));
299 
300     byte[] keyBytes = k.toString().getBytes();
301     return keyBytes;
302   }
303 
304   public static byte[] randomValue(Random rand) {
305     StringBuilder v = new StringBuilder();
306     for (int j = 0; j < 1 + rand.nextInt(2000); ++j) {
307       v.append((char) (32 + rand.nextInt(95)));
308     }
309 
310     byte[] valueBytes = v.toString().getBytes();
311     return valueBytes;
312   }
313 
314   public static final char randomReadableChar(Random rand) {
315     int i = rand.nextInt(26 * 2 + 10 + 1);
316     if (i < 26)
317       return (char) ('A' + i);
318     i -= 26;
319 
320     if (i < 26)
321       return (char) ('a' + i);
322     i -= 26;
323 
324     if (i < 10)
325       return (char) ('0' + i);
326     i -= 10;
327 
328     assert i == 0;
329     return '_';
330   }
331 
332   public static byte[] randomRowOrQualifier(Random rand) {
333     StringBuilder field = new StringBuilder();
334     int fieldLen = MIN_ROW_OR_QUALIFIER_LENGTH
335         + rand.nextInt(MAX_ROW_OR_QUALIFIER_LENGTH
336             - MIN_ROW_OR_QUALIFIER_LENGTH + 1);
337     for (int i = 0; i < fieldLen; ++i)
338       field.append(randomReadableChar(rand));
339     return field.toString().getBytes();
340   }
341 
342   public static KeyValue randomKeyValue(Random rand) {
343     return new KeyValue(randomRowOrQualifier(rand),
344         COLUMN_FAMILY_NAME.getBytes(), randomRowOrQualifier(rand),
345         randomValue(rand));
346   }
347 
348 
349   @org.junit.Rule
350   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
351     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
352 }
353