View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertFalse;
24  import static org.junit.Assert.assertNotEquals;
25  import static org.junit.Assert.assertTrue;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.Collection;
30  import java.util.EnumMap;
31  import java.util.List;
32  import java.util.Random;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HBaseTestingUtility;
40  import org.apache.hadoop.hbase.HColumnDescriptor;
41  import org.apache.hadoop.hbase.HConstants;
42  import org.apache.hadoop.hbase.KeyValue;
43  import org.apache.hadoop.hbase.testclassification.MediumTests;
44  import org.apache.hadoop.hbase.Tag;
45  import org.apache.hadoop.hbase.client.Durability;
46  import org.apache.hadoop.hbase.client.Put;
47  import org.apache.hadoop.hbase.fs.HFileSystem;
48  import org.apache.hadoop.hbase.io.compress.Compression;
49  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
50  import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
51  import org.apache.hadoop.hbase.regionserver.BloomType;
52  import org.apache.hadoop.hbase.regionserver.HRegion;
53  import org.apache.hadoop.hbase.regionserver.StoreFile;
54  import org.apache.hadoop.hbase.util.BloomFilterFactory;
55  import org.apache.hadoop.hbase.util.Bytes;
56  import org.apache.hadoop.hbase.util.ChecksumType;
57  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
58  import org.junit.After;
59  import org.junit.AfterClass;
60  import org.junit.Before;
61  import org.junit.Test;
62  import org.junit.experimental.categories.Category;
63  import org.junit.runner.RunWith;
64  import org.junit.runners.Parameterized;
65  import org.junit.runners.Parameterized.Parameters;
66  
67  import com.google.common.collect.Lists;
68  
69  /**
70   * Tests {@link HFile} cache-on-write functionality for the following block
71   * types: data blocks, non-root index blocks, and Bloom filter blocks.
72   */
73  @RunWith(Parameterized.class)
74  @Category(MediumTests.class)
75  public class TestCacheOnWrite {
76  
77    private static final Log LOG = LogFactory.getLog(TestCacheOnWrite.class);
78  
79    private static final HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
80    private Configuration conf;
81    private CacheConfig cacheConf;
82    private FileSystem fs;
83    private Random rand = new Random(12983177L);
84    private Path storeFilePath;
85    private BlockCache blockCache;
86    private String testDescription;
87  
88    private final CacheOnWriteType cowType;
89    private final Compression.Algorithm compress;
90    private final boolean cacheCompressedData;
91  
92    private static final int DATA_BLOCK_SIZE = 2048;
93    private static final int NUM_KV = 25000;
94    private static final int INDEX_BLOCK_SIZE = 512;
95    private static final int BLOOM_BLOCK_SIZE = 4096;
96    private static final BloomType BLOOM_TYPE = BloomType.ROWCOL;
97    private static final int CKBYTES = 512;
98  
99    /** The number of valid key types possible in a store file */
100   private static final int NUM_VALID_KEY_TYPES =
101       KeyValue.Type.values().length - 2;
102 
103   private static enum CacheOnWriteType {
104     DATA_BLOCKS(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY,
105         BlockType.DATA, BlockType.ENCODED_DATA),
106     BLOOM_BLOCKS(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
107         BlockType.BLOOM_CHUNK),
108     INDEX_BLOCKS(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
109         BlockType.LEAF_INDEX, BlockType.INTERMEDIATE_INDEX);
110 
111     private final String confKey;
112     private final BlockType blockType1;
113     private final BlockType blockType2;
114 
115     private CacheOnWriteType(String confKey, BlockType blockType) {
116       this(confKey, blockType, blockType);
117     }
118 
119     private CacheOnWriteType(String confKey, BlockType blockType1,
120         BlockType blockType2) {
121       this.blockType1 = blockType1;
122       this.blockType2 = blockType2;
123       this.confKey = confKey;
124     }
125 
126     public boolean shouldBeCached(BlockType blockType) {
127       return blockType == blockType1 || blockType == blockType2;
128     }
129 
130     public void modifyConf(Configuration conf) {
131       for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
132         conf.setBoolean(cowType.confKey, cowType == this);
133       }
134     }
135   }
136 
137   public TestCacheOnWrite(CacheOnWriteType cowType, Compression.Algorithm compress,
138       boolean cacheCompressedData, BlockCache blockCache) {
139     this.cowType = cowType;
140     this.compress = compress;
141     this.cacheCompressedData = cacheCompressedData;
142     this.blockCache = blockCache;
143     testDescription = "[cacheOnWrite=" + cowType + ", compress=" + compress +
144         ", cacheCompressedData=" + cacheCompressedData +
145         ", blockCache=" + blockCache.getClass().getSimpleName() + "]";
146     LOG.info(testDescription);
147   }
148 
149   private static List<BlockCache> getBlockCaches() throws IOException {
150     Configuration conf = TEST_UTIL.getConfiguration();
151     List<BlockCache> blockcaches = new ArrayList<BlockCache>();
152     // default
153     blockcaches.add(new CacheConfig(conf).getBlockCache());
154 
155     // memory
156     BlockCache lru = new LruBlockCache(128 * 1024 * 1024, 64 * 1024, TEST_UTIL.getConfiguration());
157     blockcaches.add(lru);
158 
159     // bucket cache
160     FileSystem.get(conf).mkdirs(TEST_UTIL.getDataTestDir());
161     int[] bucketSizes =
162         { INDEX_BLOCK_SIZE, DATA_BLOCK_SIZE, BLOOM_BLOCK_SIZE, 64 * 1024, 128 * 1024 };
163     BlockCache bucketcache =
164         new BucketCache("offheap", 128 * 1024 * 1024, 64 * 1024, bucketSizes, 5, 64 * 100, null);
165     blockcaches.add(bucketcache);
166     return blockcaches;
167   }
168 
169   @Parameters
170   public static Collection<Object[]> getParameters() throws IOException {
171     List<Object[]> params = new ArrayList<Object[]>();
172     for (BlockCache blockCache : getBlockCaches()) {
173       for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
174         for (Compression.Algorithm compress : HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
175           for (boolean cacheCompressedData : new boolean[] { false, true }) {
176             params.add(new Object[] { cowType, compress, cacheCompressedData, blockCache });
177           }
178         }
179       }
180     }
181     return params;
182   }
183 
184   private void clearBlockCache(BlockCache blockCache) throws InterruptedException {
185     if (blockCache instanceof LruBlockCache) {
186       ((LruBlockCache) blockCache).clearCache();
187     } else {
188       // BucketCache may not return all cached blocks(blocks in write queue), so check it here.
189       for (int clearCount = 0; blockCache.getBlockCount() > 0; clearCount++) {
190         if (clearCount > 0) {
191           LOG.warn("clear block cache " + blockCache + " " + clearCount + " times, "
192               + blockCache.getBlockCount() + " blocks remaining");
193           Thread.sleep(10);
194         }
195         for (CachedBlock block : Lists.newArrayList(blockCache)) {
196           blockCache.evictBlocksByHfileName(block.getFilename());
197         }
198       }
199     }
200   }
201 
202   @Before
203   public void setUp() throws IOException {
204     conf = TEST_UTIL.getConfiguration();
205     this.conf.set("dfs.datanode.data.dir.perm", "700");
206     conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
207     conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE);
208     conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
209         BLOOM_BLOCK_SIZE);
210     conf.setBoolean(CacheConfig.CACHE_DATA_BLOCKS_COMPRESSED_KEY, cacheCompressedData);
211     cowType.modifyConf(conf);
212     fs = HFileSystem.get(conf);
213     CacheConfig.GLOBAL_BLOCK_CACHE_INSTANCE = blockCache;
214     cacheConf =
215         new CacheConfig(blockCache, true, true, cowType.shouldBeCached(BlockType.DATA),
216         cowType.shouldBeCached(BlockType.LEAF_INDEX),
217         cowType.shouldBeCached(BlockType.BLOOM_CHUNK), false, cacheCompressedData, false, false);
218   }
219 
220   @After
221   public void tearDown() throws IOException, InterruptedException {
222     clearBlockCache(blockCache);
223   }
224 
225   @AfterClass
226   public static void afterClass() throws IOException {
227     TEST_UTIL.cleanupTestDir();
228   }
229 
230   private void testStoreFileCacheOnWriteInternals(boolean useTags) throws IOException {
231     writeStoreFile(useTags);
232     readStoreFile(useTags);
233   }
234 
235   private void readStoreFile(boolean useTags) throws IOException {
236     AbstractHFileReader reader;
237     if (useTags) {
238         reader = (HFileReaderV3) HFile.createReader(fs, storeFilePath, cacheConf, conf);
239     } else {
240         reader = (HFileReaderV2) HFile.createReader(fs, storeFilePath, cacheConf, conf);
241     }
242     LOG.info("HFile information: " + reader);
243     HFileContext meta = new HFileContextBuilder().withCompression(compress)
244       .withBytesPerCheckSum(CKBYTES).withChecksumType(ChecksumType.NULL)
245       .withBlockSize(DATA_BLOCK_SIZE)
246       .withDataBlockEncoding(NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding())
247       .withIncludesTags(useTags).build();
248     final boolean cacheBlocks = false;
249     final boolean pread = false;
250     HFileScanner scanner = reader.getScanner(cacheBlocks, pread);
251     assertTrue(testDescription, scanner.seekTo());
252 
253     long offset = 0;
254     HFileBlock prevBlock = null;
255     EnumMap<BlockType, Integer> blockCountByType =
256         new EnumMap<BlockType, Integer>(BlockType.class);
257 
258     DataBlockEncoding encodingInCache = NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding();
259     while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
260       long onDiskSize = -1;
261       if (prevBlock != null) {
262          onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
263       }
264       // Flags: don't cache the block, use pread, this is not a compaction.
265       // Also, pass null for expected block type to avoid checking it.
266       HFileBlock block = reader.readBlock(offset, onDiskSize, false, true,
267           false, true, null);
268       BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
269       offset, encodingInCache, block.getBlockType());
270       HFileBlock fromCache = (HFileBlock) blockCache.getBlock(blockCacheKey, true, false, true);
271       boolean isCached = fromCache != null;
272       boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
273       assertTrue("shouldBeCached: " + shouldBeCached+ "\n" +
274           "isCached: " + isCached + "\n" +
275           "Test description: " + testDescription + "\n" +
276           "block: " + block + "\n" +
277           "encodingInCache: " + encodingInCache + "\n" +
278           "blockCacheKey: " + blockCacheKey,
279         shouldBeCached == isCached);
280       if (isCached) {
281         if (cacheConf.shouldCacheCompressed(fromCache.getBlockType().getCategory())) {
282           if (compress != Compression.Algorithm.NONE) {
283             assertFalse(fromCache.isUnpacked());
284           }
285           fromCache = fromCache.unpack(meta, reader.getUncachedBlockReader());
286         } else {
287           assertTrue(fromCache.isUnpacked());
288         }
289         // block we cached at write-time and block read from file should be identical
290         assertEquals(block.getChecksumType(), fromCache.getChecksumType());
291         assertEquals(block.getBlockType(), fromCache.getBlockType());
292         assertNotEquals(block.getBlockType(), BlockType.ENCODED_DATA);
293         assertEquals(block.getOnDiskSizeWithHeader(), fromCache.getOnDiskSizeWithHeader());
294         assertEquals(block.getOnDiskSizeWithoutHeader(), fromCache.getOnDiskSizeWithoutHeader());
295         assertEquals(
296           block.getUncompressedSizeWithoutHeader(), fromCache.getUncompressedSizeWithoutHeader());
297       }
298       prevBlock = block;
299       offset += block.getOnDiskSizeWithHeader();
300       BlockType bt = block.getBlockType();
301       Integer count = blockCountByType.get(bt);
302       blockCountByType.put(bt, (count == null ? 0 : count) + 1);
303     }
304 
305     LOG.info("Block count by type: " + blockCountByType);
306     String countByType = blockCountByType.toString();
307     if (useTags) {
308       assertEquals("{" + BlockType.DATA
309           + "=2663, LEAF_INDEX=297, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=34}", countByType);
310     } else {
311       assertEquals("{" + BlockType.DATA
312           + "=2498, LEAF_INDEX=278, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=31}", countByType);
313     }
314 
315     // iterate all the keyvalue from hfile
316     while (scanner.next()) {
317       scanner.getKeyValue();
318     }
319     reader.close();
320   }
321 
322   public static KeyValue.Type generateKeyType(Random rand) {
323     if (rand.nextBoolean()) {
324       // Let's make half of KVs puts.
325       return KeyValue.Type.Put;
326     } else {
327       KeyValue.Type keyType = KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
328       if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum) {
329         throw new RuntimeException("Generated an invalid key type: " + keyType + ". "
330             + "Probably the layout of KeyValue.Type has changed.");
331       }
332       return keyType;
333     }
334   }
335 
336   private void writeStoreFile(boolean useTags) throws IOException {
337     if(useTags) {
338       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
339     } else {
340       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2);
341     }
342     Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(),
343         "test_cache_on_write");
344     HFileContext meta = new HFileContextBuilder().withCompression(compress)
345         .withBytesPerCheckSum(CKBYTES).withChecksumType(ChecksumType.NULL)
346         .withBlockSize(DATA_BLOCK_SIZE)
347         .withDataBlockEncoding(NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding())
348         .withIncludesTags(useTags).build();
349     StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, cacheConf, fs)
350         .withOutputDir(storeFileParentDir).withComparator(KeyValue.COMPARATOR)
351         .withFileContext(meta)
352         .withBloomType(BLOOM_TYPE).withMaxKeyCount(NUM_KV).build();
353     byte[] cf = Bytes.toBytes("fam");
354     for (int i = 0; i < NUM_KV; ++i) {
355       byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i);
356       byte[] qualifier = TestHFileWriterV2.randomRowOrQualifier(rand);
357       byte[] value = TestHFileWriterV2.randomValue(rand);
358       KeyValue kv;
359       if(useTags) {
360         Tag t = new Tag((byte) 1, "visibility");
361         List<Tag> tagList = new ArrayList<Tag>();
362         tagList.add(t);
363         Tag[] tags = new Tag[1];
364         tags[0] = t;
365         kv =
366             new KeyValue(row, 0, row.length, cf, 0, cf.length, qualifier, 0, qualifier.length,
367                 rand.nextLong(), generateKeyType(rand), value, 0, value.length, tagList);
368       } else {
369         kv =
370             new KeyValue(row, 0, row.length, cf, 0, cf.length, qualifier, 0, qualifier.length,
371                 rand.nextLong(), generateKeyType(rand), value, 0, value.length);
372       }
373       sfw.append(kv);
374     }
375 
376     sfw.close();
377     storeFilePath = sfw.getPath();
378   }
379 
380   private void testNotCachingDataBlocksDuringCompactionInternals(boolean useTags)
381       throws IOException, InterruptedException {
382     if (useTags) {
383       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
384     } else {
385       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2);
386     }
387     // TODO: need to change this test if we add a cache size threshold for
388     // compactions, or if we implement some other kind of intelligent logic for
389     // deciding what blocks to cache-on-write on compaction.
390     final String table = "CompactionCacheOnWrite";
391     final String cf = "myCF";
392     final byte[] cfBytes = Bytes.toBytes(cf);
393     final int maxVersions = 3;
394     HRegion region = TEST_UTIL.createTestRegion(table, 
395         new HColumnDescriptor(cf)
396             .setCompressionType(compress)
397             .setBloomFilterType(BLOOM_TYPE)
398             .setMaxVersions(maxVersions)
399             .setDataBlockEncoding(NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding())
400     );
401     int rowIdx = 0;
402     long ts = EnvironmentEdgeManager.currentTimeMillis();
403     for (int iFile = 0; iFile < 5; ++iFile) {
404       for (int iRow = 0; iRow < 500; ++iRow) {
405         String rowStr = "" + (rowIdx * rowIdx * rowIdx) + "row" + iFile + "_" + 
406             iRow;
407         Put p = new Put(Bytes.toBytes(rowStr));
408         ++rowIdx;
409         for (int iCol = 0; iCol < 10; ++iCol) {
410           String qualStr = "col" + iCol;
411           String valueStr = "value_" + rowStr + "_" + qualStr;
412           for (int iTS = 0; iTS < 5; ++iTS) {
413             if (useTags) {
414               Tag t = new Tag((byte) 1, "visibility");
415               Tag[] tags = new Tag[1];
416               tags[0] = t;
417               KeyValue kv = new KeyValue(Bytes.toBytes(rowStr), cfBytes, Bytes.toBytes(qualStr),
418                   HConstants.LATEST_TIMESTAMP, Bytes.toBytes(valueStr), tags);
419               p.add(kv);
420             } else {
421               p.add(cfBytes, Bytes.toBytes(qualStr), ts++, Bytes.toBytes(valueStr));
422             }
423           }
424         }
425         p.setDurability(Durability.ASYNC_WAL);
426         region.put(p);
427       }
428       region.flushcache();
429     }
430     clearBlockCache(blockCache);
431     assertEquals(0, blockCache.getBlockCount());
432     region.compactStores();
433     LOG.debug("compactStores() returned");
434 
435     for (CachedBlock block: blockCache) {
436       assertNotEquals(BlockType.ENCODED_DATA, block.getBlockType());
437       assertNotEquals(BlockType.DATA, block.getBlockType());
438     }
439     region.close();
440   }
441 
442   @Test
443   public void testStoreFileCacheOnWrite() throws IOException {
444     testStoreFileCacheOnWriteInternals(false);
445     testStoreFileCacheOnWriteInternals(true);
446   }
447 
448   @Test
449   public void testNotCachingDataBlocksDuringCompaction() throws IOException, InterruptedException {
450     testNotCachingDataBlocksDuringCompactionInternals(false);
451     testNotCachingDataBlocksDuringCompactionInternals(true);
452   }
453 }