View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertFalse;
25  import static org.junit.Assert.assertTrue;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.Collection;
30  import java.util.EnumMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Random;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.Cell;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.KeyValue;
45  import org.apache.hadoop.hbase.testclassification.MediumTests;
46  import org.apache.hadoop.hbase.Tag;
47  import org.apache.hadoop.hbase.client.Durability;
48  import org.apache.hadoop.hbase.client.Put;
49  import org.apache.hadoop.hbase.fs.HFileSystem;
50  import org.apache.hadoop.hbase.io.compress.Compression;
51  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
52  import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
53  import org.apache.hadoop.hbase.regionserver.BloomType;
54  import org.apache.hadoop.hbase.regionserver.HRegion;
55  import org.apache.hadoop.hbase.regionserver.StoreFile;
56  import org.apache.hadoop.hbase.util.BloomFilterFactory;
57  import org.apache.hadoop.hbase.util.Bytes;
58  import org.apache.hadoop.hbase.util.ChecksumType;
59  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
60  import org.junit.After;
61  import org.junit.Before;
62  import org.junit.Test;
63  import org.junit.experimental.categories.Category;
64  import org.junit.runner.RunWith;
65  import org.junit.runners.Parameterized;
66  import org.junit.runners.Parameterized.Parameters;
67  
68  /**
69   * Tests {@link HFile} cache-on-write functionality for the following block
70   * types: data blocks, non-root index blocks, and Bloom filter blocks.
71   */
72  @RunWith(Parameterized.class)
73  @Category(MediumTests.class)
74  public class TestCacheOnWrite {
75  
76    private static final Log LOG = LogFactory.getLog(TestCacheOnWrite.class);
77  
78    private static final HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
79    private Configuration conf;
80    private CacheConfig cacheConf;
81    private FileSystem fs;
82    private Random rand = new Random(12983177L);
83    private Path storeFilePath;
84    private BlockCache blockCache;
85    private String testDescription;
86  
87    private final CacheOnWriteType cowType;
88    private final Compression.Algorithm compress;
89    private final BlockEncoderTestType encoderType;
90    private final HFileDataBlockEncoder encoder;
91    private final boolean cacheCompressedData;
92  
93    private static final int DATA_BLOCK_SIZE = 2048;
94    private static final int NUM_KV = 25000;
95    private static final int INDEX_BLOCK_SIZE = 512;
96    private static final int BLOOM_BLOCK_SIZE = 4096;
97    private static final BloomType BLOOM_TYPE = BloomType.ROWCOL;
98    private static final int CKBYTES = 512;
99  
100   /** The number of valid key types possible in a store file */
101   private static final int NUM_VALID_KEY_TYPES =
102       KeyValue.Type.values().length - 2;
103 
104   private static enum CacheOnWriteType {
105     DATA_BLOCKS(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY,
106         BlockType.DATA, BlockType.ENCODED_DATA),
107     BLOOM_BLOCKS(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
108         BlockType.BLOOM_CHUNK),
109     INDEX_BLOCKS(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
110         BlockType.LEAF_INDEX, BlockType.INTERMEDIATE_INDEX);
111 
112     private final String confKey;
113     private final BlockType blockType1;
114     private final BlockType blockType2;
115 
116     private CacheOnWriteType(String confKey, BlockType blockType) {
117       this(confKey, blockType, blockType);
118     }
119 
120     private CacheOnWriteType(String confKey, BlockType blockType1,
121         BlockType blockType2) {
122       this.blockType1 = blockType1;
123       this.blockType2 = blockType2;
124       this.confKey = confKey;
125     }
126 
127     public boolean shouldBeCached(BlockType blockType) {
128       return blockType == blockType1 || blockType == blockType2;
129     }
130 
131     public void modifyConf(Configuration conf) {
132       for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
133         conf.setBoolean(cowType.confKey, cowType == this);
134       }
135     }
136 
137   }
138 
139   private static final DataBlockEncoding ENCODING_ALGO =
140       DataBlockEncoding.PREFIX;
141 
142   /** Provides fancy names for three combinations of two booleans */
143   private static enum BlockEncoderTestType {
144     NO_BLOCK_ENCODING_NOOP(true, false),
145     NO_BLOCK_ENCODING(false, false),
146     BLOCK_ENCODING_EVERYWHERE(false, true);
147 
148     private final boolean noop;
149     private final boolean encode;
150 
151     BlockEncoderTestType(boolean noop, boolean encode) {
152       this.encode = encode;
153       this.noop = noop;
154     }
155 
156     public HFileDataBlockEncoder getEncoder() {
157       return noop ? NoOpDataBlockEncoder.INSTANCE : new HFileDataBlockEncoderImpl(
158         encode ? ENCODING_ALGO : DataBlockEncoding.NONE);
159     }
160   }
161 
162   public TestCacheOnWrite(CacheOnWriteType cowType, Compression.Algorithm compress,
163       BlockEncoderTestType encoderType, boolean cacheCompressedData, BlockCache blockCache) {
164     this.cowType = cowType;
165     this.compress = compress;
166     this.encoderType = encoderType;
167     this.encoder = encoderType.getEncoder();
168     this.cacheCompressedData = cacheCompressedData;
169     this.blockCache = blockCache;
170     testDescription = "[cacheOnWrite=" + cowType + ", compress=" + compress +
171         ", encoderType=" + encoderType + ", cacheCompressedData=" + cacheCompressedData + "]";
172     System.out.println(testDescription);
173   }
174 
175   private static List<BlockCache> getBlockCaches() throws IOException {
176     Configuration conf = TEST_UTIL.getConfiguration();
177     List<BlockCache> blockcaches = new ArrayList<BlockCache>();
178     // default
179     blockcaches.add(new CacheConfig(conf).getBlockCache());
180 
181     // memory
182     BlockCache lru = new LruBlockCache(128 * 1024 * 1024, 64 * 1024, TEST_UTIL.getConfiguration());
183     blockcaches.add(lru);
184 
185     // bucket cache
186     FileSystem.get(conf).mkdirs(TEST_UTIL.getDataTestDir());
187     int[] bucketSizes = {INDEX_BLOCK_SIZE, DATA_BLOCK_SIZE, BLOOM_BLOCK_SIZE, 64 * 1024 };
188     BlockCache bucketcache =
189         new BucketCache("file:" + TEST_UTIL.getDataTestDir() + "/bucket.data",
190             128 * 1024 * 1024, 64 * 1024, bucketSizes, 5, 64 * 100, null);
191     blockcaches.add(bucketcache);
192     return blockcaches;
193   }
194 
195   @Parameters
196   public static Collection<Object[]> getParameters() throws IOException {
197     List<Object[]> cowTypes = new ArrayList<Object[]>();
198     for (BlockCache blockache : getBlockCaches()) {
199       for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
200         for (Compression.Algorithm compress : HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
201           for (BlockEncoderTestType encoderType : BlockEncoderTestType.values()) {
202             for (boolean cacheCompressedData : new boolean[] { false, true }) {
203               cowTypes.add(new Object[] { cowType, compress, encoderType, cacheCompressedData, blockache});
204             }
205           }
206         }
207       }
208     }
209     return cowTypes;
210   }
211 
212   @Before
213   public void setUp() throws IOException {
214     conf = TEST_UTIL.getConfiguration();
215     this.conf.set("dfs.datanode.data.dir.perm", "700");
216     conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
217     conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE);
218     conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
219         BLOOM_BLOCK_SIZE);
220     conf.setBoolean(CacheConfig.CACHE_DATA_BLOCKS_COMPRESSED_KEY, cacheCompressedData);
221     cowType.modifyConf(conf);
222     fs = HFileSystem.get(conf);
223     cacheConf =
224         new CacheConfig(blockCache, true, true, cowType.shouldBeCached(BlockType.DATA),
225         cowType.shouldBeCached(BlockType.LEAF_INDEX),
226         cowType.shouldBeCached(BlockType.BLOOM_CHUNK), false, cacheCompressedData, true, false);
227   }
228 
229   @After
230   public void tearDown() {
231     cacheConf = new CacheConfig(conf);
232     blockCache = cacheConf.getBlockCache();
233   }
234 
235   @Test
236   public void testStoreFileCacheOnWrite() throws IOException {
237     testStoreFileCacheOnWriteInternals(false);
238     testStoreFileCacheOnWriteInternals(true);
239   }
240 
241   protected void testStoreFileCacheOnWriteInternals(boolean useTags) throws IOException {
242     writeStoreFile(useTags);
243     readStoreFile(useTags);
244   }
245 
246   private void readStoreFile(boolean useTags) throws IOException {
247     AbstractHFileReader reader;
248     if (useTags) {
249         reader = (HFileReaderV3) HFile.createReader(fs, storeFilePath, cacheConf, conf);
250     } else {
251         reader = (HFileReaderV2) HFile.createReader(fs, storeFilePath, cacheConf, conf);
252     }
253     LOG.info("HFile information: " + reader);
254     HFileContext meta = new HFileContextBuilder().withCompression(compress)
255       .withBytesPerCheckSum(CKBYTES).withChecksumType(ChecksumType.NULL)
256       .withBlockSize(DATA_BLOCK_SIZE).withDataBlockEncoding(encoder.getDataBlockEncoding())
257       .withIncludesTags(useTags).build();
258     final boolean cacheBlocks = false;
259     final boolean pread = false;
260     HFileScanner scanner = reader.getScanner(cacheBlocks, pread);
261     assertTrue(testDescription, scanner.seekTo());
262 
263     long offset = 0;
264     HFileBlock prevBlock = null;
265     EnumMap<BlockType, Integer> blockCountByType =
266         new EnumMap<BlockType, Integer>(BlockType.class);
267 
268     DataBlockEncoding encodingInCache =
269         encoderType.getEncoder().getDataBlockEncoding();
270     while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
271       long onDiskSize = -1;
272       if (prevBlock != null) {
273          onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
274       }
275       // Flags: don't cache the block, use pread, this is not a compaction.
276       // Also, pass null for expected block type to avoid checking it.
277       HFileBlock block = reader.readBlock(offset, onDiskSize, false, true,
278         false, true, null, encodingInCache);
279       BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
280           offset);
281       HFileBlock fromCache = (HFileBlock) blockCache.getBlock(blockCacheKey, true, false, true);
282       boolean isCached = fromCache != null;
283       boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
284       assertTrue("shouldBeCached: " + shouldBeCached+ "\n" +
285           "isCached: " + isCached + "\n" +
286           "Test description: " + testDescription + "\n" +
287           "block: " + block + "\n" +
288           "encodingInCache: " + encodingInCache + "\n" +
289           "blockCacheKey: " + blockCacheKey,
290         shouldBeCached == isCached);
291       if (isCached) {
292         if (cacheConf.shouldCacheCompressed(fromCache.getBlockType().getCategory())) {
293           if (compress != Compression.Algorithm.NONE) {
294             assertFalse(fromCache.isUnpacked());
295           }
296           fromCache = fromCache.unpack(meta, reader.getUncachedBlockReader());
297         } else {
298           assertTrue(fromCache.isUnpacked());
299         }
300         // block we cached at write-time and block read from file should be identical
301         assertEquals(block.getChecksumType(), fromCache.getChecksumType());
302         assertEquals(block.getBlockType(), fromCache.getBlockType());
303         if (block.getBlockType() == BlockType.ENCODED_DATA) {
304           assertEquals(block.getDataBlockEncodingId(), fromCache.getDataBlockEncodingId());
305           assertEquals(block.getDataBlockEncoding(), fromCache.getDataBlockEncoding());
306         }
307         assertEquals(block.getOnDiskSizeWithHeader(), fromCache.getOnDiskSizeWithHeader());
308         assertEquals(block.getOnDiskSizeWithoutHeader(), fromCache.getOnDiskSizeWithoutHeader());
309         assertEquals(
310           block.getUncompressedSizeWithoutHeader(), fromCache.getUncompressedSizeWithoutHeader());
311       }
312       prevBlock = block;
313       offset += block.getOnDiskSizeWithHeader();
314       BlockType bt = block.getBlockType();
315       Integer count = blockCountByType.get(bt);
316       blockCountByType.put(bt, (count == null ? 0 : count) + 1);
317     }
318 
319     LOG.info("Block count by type: " + blockCountByType);
320     String countByType = blockCountByType.toString();
321     BlockType cachedDataBlockType =
322         encoderType.encode ? BlockType.ENCODED_DATA : BlockType.DATA;
323     if (useTags) {
324       assertEquals("{" + cachedDataBlockType
325           + "=1550, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=20}", countByType);
326     } else {
327       assertEquals("{" + cachedDataBlockType
328           + "=1379, LEAF_INDEX=154, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=18}", countByType);
329     }
330 
331     // iterate all the keyvalue from hfile
332     while (scanner.next()) {
333       Cell cell = scanner.getKeyValue();
334     }
335     reader.close();
336   }
337 
338   public static KeyValue.Type generateKeyType(Random rand) {
339     if (rand.nextBoolean()) {
340       // Let's make half of KVs puts.
341       return KeyValue.Type.Put;
342     } else {
343       KeyValue.Type keyType =
344           KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
345       if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum)
346       {
347         throw new RuntimeException("Generated an invalid key type: " + keyType
348             + ". " + "Probably the layout of KeyValue.Type has changed.");
349       }
350       return keyType;
351     }
352   }
353 
354   public void writeStoreFile(boolean useTags) throws IOException {
355     if(useTags) {
356       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
357     } else {
358       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2);
359     }
360     Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(),
361         "test_cache_on_write");
362     HFileContext meta = new HFileContextBuilder().withCompression(compress)
363         .withBytesPerCheckSum(CKBYTES).withChecksumType(ChecksumType.NULL)
364         .withBlockSize(DATA_BLOCK_SIZE).withDataBlockEncoding(encoder.getDataBlockEncoding())
365         .withIncludesTags(useTags).build();
366     StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, cacheConf, fs)
367         .withOutputDir(storeFileParentDir).withComparator(KeyValue.COMPARATOR)
368         .withFileContext(meta)
369         .withBloomType(BLOOM_TYPE).withMaxKeyCount(NUM_KV).build();
370 
371     final int rowLen = 32;
372     for (int i = 0; i < NUM_KV; ++i) {
373       byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
374       byte[] v = TestHFileWriterV2.randomValue(rand);
375       int cfLen = rand.nextInt(k.length - rowLen + 1);
376       KeyValue kv;
377       if(useTags) {
378         Tag t = new Tag((byte) 1, "visibility");
379         List<Tag> tagList = new ArrayList<Tag>();
380         tagList.add(t);
381         Tag[] tags = new Tag[1];
382         tags[0] = t;
383         kv = new KeyValue(
384             k, 0, rowLen,
385             k, rowLen, cfLen,
386             k, rowLen + cfLen, k.length - rowLen - cfLen,
387             rand.nextLong(),
388             generateKeyType(rand),
389             v, 0, v.length, tagList);
390       } else {
391         kv = new KeyValue(
392           k, 0, rowLen,
393           k, rowLen, cfLen,
394           k, rowLen + cfLen, k.length - rowLen - cfLen,
395           rand.nextLong(),
396           generateKeyType(rand),
397           v, 0, v.length);
398       }
399       sfw.append(kv);
400     }
401 
402     sfw.close();
403     storeFilePath = sfw.getPath();
404   }
405 
406   @Test
407   public void testNotCachingDataBlocksDuringCompaction() throws IOException {
408     testNotCachingDataBlocksDuringCompactionInternals(false);
409     testNotCachingDataBlocksDuringCompactionInternals(true);
410   }
411 
412   protected void testNotCachingDataBlocksDuringCompactionInternals(boolean useTags) throws IOException {
413     if (useTags) {
414       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
415     } else {
416       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2);
417     }
418     // TODO: need to change this test if we add a cache size threshold for
419     // compactions, or if we implement some other kind of intelligent logic for
420     // deciding what blocks to cache-on-write on compaction.
421     final String table = "CompactionCacheOnWrite";
422     final String cf = "myCF";
423     final byte[] cfBytes = Bytes.toBytes(cf);
424     final int maxVersions = 3;
425     HRegion region = TEST_UTIL.createTestRegion(table, 
426         new HColumnDescriptor(cf)
427             .setCompressionType(compress)
428             .setBloomFilterType(BLOOM_TYPE)
429             .setMaxVersions(maxVersions)
430             .setDataBlockEncoding(encoder.getDataBlockEncoding())
431     );
432     int rowIdx = 0;
433     long ts = EnvironmentEdgeManager.currentTime();
434     for (int iFile = 0; iFile < 5; ++iFile) {
435       for (int iRow = 0; iRow < 500; ++iRow) {
436         String rowStr = "" + (rowIdx * rowIdx * rowIdx) + "row" + iFile + "_" + 
437             iRow;
438         Put p = new Put(Bytes.toBytes(rowStr));
439         ++rowIdx;
440         for (int iCol = 0; iCol < 10; ++iCol) {
441           String qualStr = "col" + iCol;
442           String valueStr = "value_" + rowStr + "_" + qualStr;
443           for (int iTS = 0; iTS < 5; ++iTS) {
444             if (useTags) {
445               Tag t = new Tag((byte) 1, "visibility");
446               Tag[] tags = new Tag[1];
447               tags[0] = t;
448               KeyValue kv = new KeyValue(Bytes.toBytes(rowStr), cfBytes, Bytes.toBytes(qualStr),
449                   HConstants.LATEST_TIMESTAMP, Bytes.toBytes(valueStr), tags);
450               p.add(kv);
451             } else {
452               p.add(cfBytes, Bytes.toBytes(qualStr), ts++, Bytes.toBytes(valueStr));
453             }
454           }
455         }
456         p.setDurability(Durability.ASYNC_WAL);
457         region.put(p);
458       }
459       region.flushcache();
460     }
461     LruBlockCache blockCache =
462         (LruBlockCache) new CacheConfig(conf).getBlockCache();
463     blockCache.clearCache();
464     assertEquals(0, blockCache.getBlockTypeCountsForTest().size());
465     region.compactStores();
466     LOG.debug("compactStores() returned");
467 
468     Map<BlockType, Integer> blockTypesInCache =
469         blockCache.getBlockTypeCountsForTest();
470     LOG.debug("Block types in cache: " + blockTypesInCache);
471     assertNull(blockTypesInCache.get(BlockType.ENCODED_DATA));
472     assertNull(blockTypesInCache.get(BlockType.DATA));
473     region.close();
474     blockCache.shutdown();
475   }
476 }
477