View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertFalse;
25  import static org.junit.Assert.assertTrue;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.Collection;
30  import java.util.EnumMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Random;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HBaseTestingUtility;
41  import org.apache.hadoop.hbase.HColumnDescriptor;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.KeyValue;
44  import org.apache.hadoop.hbase.testclassification.MediumTests;
45  import org.apache.hadoop.hbase.Tag;
46  import org.apache.hadoop.hbase.client.Durability;
47  import org.apache.hadoop.hbase.client.Put;
48  import org.apache.hadoop.hbase.fs.HFileSystem;
49  import org.apache.hadoop.hbase.io.compress.Compression;
50  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
51  import org.apache.hadoop.hbase.regionserver.BloomType;
52  import org.apache.hadoop.hbase.regionserver.HRegion;
53  import org.apache.hadoop.hbase.regionserver.StoreFile;
54  import org.apache.hadoop.hbase.util.BloomFilterFactory;
55  import org.apache.hadoop.hbase.util.Bytes;
56  import org.apache.hadoop.hbase.util.ChecksumType;
57  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
58  import org.junit.After;
59  import org.junit.Before;
60  import org.junit.Test;
61  import org.junit.experimental.categories.Category;
62  import org.junit.runner.RunWith;
63  import org.junit.runners.Parameterized;
64  import org.junit.runners.Parameterized.Parameters;
65  
66  /**
67   * Tests {@link HFile} cache-on-write functionality for the following block
68   * types: data blocks, non-root index blocks, and Bloom filter blocks.
69   */
70  @RunWith(Parameterized.class)
71  @Category(MediumTests.class)
72  public class TestCacheOnWrite {
73  
74    private static final Log LOG = LogFactory.getLog(TestCacheOnWrite.class);
75  
76    private static final HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
77    private Configuration conf;
78    private CacheConfig cacheConf;
79    private FileSystem fs;
80    private Random rand = new Random(12983177L);
81    private Path storeFilePath;
82    private BlockCache blockCache;
83    private String testDescription;
84  
85    private final CacheOnWriteType cowType;
86    private final Compression.Algorithm compress;
87    private final BlockEncoderTestType encoderType;
88    private final HFileDataBlockEncoder encoder;
89    private final boolean cacheCompressedData;
90  
91    private static final int DATA_BLOCK_SIZE = 2048;
92    private static final int NUM_KV = 25000;
93    private static final int INDEX_BLOCK_SIZE = 512;
94    private static final int BLOOM_BLOCK_SIZE = 4096;
95    private static final BloomType BLOOM_TYPE = BloomType.ROWCOL;
96    private static final int CKBYTES = 512;
97  
98    /** The number of valid key types possible in a store file */
99    private static final int NUM_VALID_KEY_TYPES =
100       KeyValue.Type.values().length - 2;
101 
102   private static enum CacheOnWriteType {
103     DATA_BLOCKS(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY,
104         BlockType.DATA, BlockType.ENCODED_DATA),
105     BLOOM_BLOCKS(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
106         BlockType.BLOOM_CHUNK),
107     INDEX_BLOCKS(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
108         BlockType.LEAF_INDEX, BlockType.INTERMEDIATE_INDEX);
109 
110     private final String confKey;
111     private final BlockType blockType1;
112     private final BlockType blockType2;
113 
114     private CacheOnWriteType(String confKey, BlockType blockType) {
115       this(confKey, blockType, blockType);
116     }
117 
118     private CacheOnWriteType(String confKey, BlockType blockType1,
119         BlockType blockType2) {
120       this.blockType1 = blockType1;
121       this.blockType2 = blockType2;
122       this.confKey = confKey;
123     }
124 
125     public boolean shouldBeCached(BlockType blockType) {
126       return blockType == blockType1 || blockType == blockType2;
127     }
128 
129     public void modifyConf(Configuration conf) {
130       for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
131         conf.setBoolean(cowType.confKey, cowType == this);
132       }
133     }
134 
135   }
136 
137   private static final DataBlockEncoding ENCODING_ALGO =
138       DataBlockEncoding.PREFIX;
139 
140   /** Provides fancy names for three combinations of two booleans */
141   private static enum BlockEncoderTestType {
142     NO_BLOCK_ENCODING_NOOP(true, false),
143     NO_BLOCK_ENCODING(false, false),
144     BLOCK_ENCODING_EVERYWHERE(false, true);
145 
146     private final boolean noop;
147     private final boolean encode;
148 
149     BlockEncoderTestType(boolean noop, boolean encode) {
150       this.encode = encode;
151       this.noop = noop;
152     }
153 
154     public HFileDataBlockEncoder getEncoder() {
155       return noop ? NoOpDataBlockEncoder.INSTANCE : new HFileDataBlockEncoderImpl(
156         encode ? ENCODING_ALGO : DataBlockEncoding.NONE);
157     }
158   }
159 
160   public TestCacheOnWrite(CacheOnWriteType cowType, Compression.Algorithm compress,
161       BlockEncoderTestType encoderType, boolean cacheCompressedData) {
162     this.cowType = cowType;
163     this.compress = compress;
164     this.encoderType = encoderType;
165     this.encoder = encoderType.getEncoder();
166     this.cacheCompressedData = cacheCompressedData;
167     testDescription = "[cacheOnWrite=" + cowType + ", compress=" + compress +
168         ", encoderType=" + encoderType + ", cacheCompressedData=" + cacheCompressedData + "]";
169     System.out.println(testDescription);
170   }
171 
172   @Parameters
173   public static Collection<Object[]> getParameters() {
174     List<Object[]> cowTypes = new ArrayList<Object[]>();
175     for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
176       for (Compression.Algorithm compress :
177            HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
178         for (BlockEncoderTestType encoderType :
179              BlockEncoderTestType.values()) {
180           for (boolean cacheCompressedData : new boolean[] { false, true }) {
181             cowTypes.add(new Object[] { cowType, compress, encoderType, cacheCompressedData });
182           }
183         }
184       }
185     }
186     return cowTypes;
187   }
188 
189   @Before
190   public void setUp() throws IOException {
191     conf = TEST_UTIL.getConfiguration();
192     this.conf.set("dfs.datanode.data.dir.perm", "700");
193     conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
194     conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE);
195     conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
196         BLOOM_BLOCK_SIZE);
197     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY,
198       cowType.shouldBeCached(BlockType.DATA));
199     conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
200         cowType.shouldBeCached(BlockType.LEAF_INDEX));
201     conf.setBoolean(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
202         cowType.shouldBeCached(BlockType.BLOOM_CHUNK));
203     conf.setBoolean(CacheConfig.CACHE_DATA_BLOCKS_COMPRESSED_KEY, cacheCompressedData);
204     cowType.modifyConf(conf);
205     fs = HFileSystem.get(conf);
206     cacheConf = new CacheConfig(conf);
207     blockCache = cacheConf.getBlockCache();
208   }
209 
210   @After
211   public void tearDown() {
212     cacheConf = new CacheConfig(conf);
213     blockCache = cacheConf.getBlockCache();
214   }
215 
216   @Test
217   public void testStoreFileCacheOnWrite() throws IOException {
218     testStoreFileCacheOnWriteInternals(false);
219     testStoreFileCacheOnWriteInternals(true);
220   }
221 
222   protected void testStoreFileCacheOnWriteInternals(boolean useTags) throws IOException {
223     writeStoreFile(useTags);
224     readStoreFile(useTags);
225   }
226 
227   private void readStoreFile(boolean useTags) throws IOException {
228     AbstractHFileReader reader;
229     if (useTags) {
230         reader = (HFileReaderV3) HFile.createReader(fs, storeFilePath, cacheConf, conf);
231     } else {
232         reader = (HFileReaderV2) HFile.createReader(fs, storeFilePath, cacheConf, conf);
233     }
234     LOG.info("HFile information: " + reader);
235     HFileContext meta = new HFileContextBuilder().withCompression(compress)
236       .withBytesPerCheckSum(CKBYTES).withChecksumType(ChecksumType.NULL)
237       .withBlockSize(DATA_BLOCK_SIZE).withDataBlockEncoding(encoder.getDataBlockEncoding())
238       .withIncludesTags(useTags).build();
239     final boolean cacheBlocks = false;
240     final boolean pread = false;
241     HFileScanner scanner = reader.getScanner(cacheBlocks, pread);
242     assertTrue(testDescription, scanner.seekTo());
243 
244     long offset = 0;
245     HFileBlock prevBlock = null;
246     EnumMap<BlockType, Integer> blockCountByType =
247         new EnumMap<BlockType, Integer>(BlockType.class);
248 
249     DataBlockEncoding encodingInCache =
250         encoderType.getEncoder().getDataBlockEncoding();
251     while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
252       long onDiskSize = -1;
253       if (prevBlock != null) {
254          onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
255       }
256       // Flags: don't cache the block, use pread, this is not a compaction.
257       // Also, pass null for expected block type to avoid checking it.
258       HFileBlock block = reader.readBlock(offset, onDiskSize, false, true,
259           false, true, null);
260       BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
261       offset, encodingInCache, block.getBlockType());
262       HFileBlock fromCache = (HFileBlock) blockCache.getBlock(blockCacheKey, true, false, true);
263       boolean isCached = fromCache != null;
264       boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
265       assertTrue("shouldBeCached: " + shouldBeCached+ "\n" +
266           "isCached: " + isCached + "\n" +
267           "Test description: " + testDescription + "\n" +
268           "block: " + block + "\n" +
269           "encodingInCache: " + encodingInCache + "\n" +
270           "blockCacheKey: " + blockCacheKey,
271         shouldBeCached == isCached);
272       if (isCached) {
273         if (cacheConf.shouldCacheCompressed(fromCache.getBlockType().getCategory())) {
274           if (compress != Compression.Algorithm.NONE) {
275             assertFalse(fromCache.isUnpacked());
276           }
277           fromCache = fromCache.unpack(meta, reader.getUncachedBlockReader());
278         } else {
279           assertTrue(fromCache.isUnpacked());
280         }
281         // block we cached at write-time and block read from file should be identical
282         assertEquals(block.getChecksumType(), fromCache.getChecksumType());
283         assertEquals(block.getBlockType(), fromCache.getBlockType());
284         if (block.getBlockType() == BlockType.ENCODED_DATA) {
285           assertEquals(block.getDataBlockEncodingId(), fromCache.getDataBlockEncodingId());
286           assertEquals(block.getDataBlockEncoding(), fromCache.getDataBlockEncoding());
287         }
288         assertEquals(block.getOnDiskSizeWithHeader(), fromCache.getOnDiskSizeWithHeader());
289         assertEquals(block.getOnDiskSizeWithoutHeader(), fromCache.getOnDiskSizeWithoutHeader());
290         assertEquals(
291           block.getUncompressedSizeWithoutHeader(), fromCache.getUncompressedSizeWithoutHeader());
292       }
293       prevBlock = block;
294       offset += block.getOnDiskSizeWithHeader();
295       BlockType bt = block.getBlockType();
296       Integer count = blockCountByType.get(bt);
297       blockCountByType.put(bt, (count == null ? 0 : count) + 1);
298     }
299 
300     LOG.info("Block count by type: " + blockCountByType);
301     String countByType = blockCountByType.toString();
302     BlockType cachedDataBlockType =
303         encoderType.encode ? BlockType.ENCODED_DATA : BlockType.DATA;
304     if (useTags) {
305       assertEquals("{" + cachedDataBlockType
306           + "=1550, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=20}", countByType);
307     } else {
308       assertEquals("{" + cachedDataBlockType
309           + "=1379, LEAF_INDEX=154, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=18}", countByType);
310     }
311     reader.close();
312   }
313 
314   public static KeyValue.Type generateKeyType(Random rand) {
315     if (rand.nextBoolean()) {
316       // Let's make half of KVs puts.
317       return KeyValue.Type.Put;
318     } else {
319       KeyValue.Type keyType =
320           KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
321       if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum)
322       {
323         throw new RuntimeException("Generated an invalid key type: " + keyType
324             + ". " + "Probably the layout of KeyValue.Type has changed.");
325       }
326       return keyType;
327     }
328   }
329 
330   public void writeStoreFile(boolean useTags) throws IOException {
331     if(useTags) {
332       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
333     } else {
334       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2);
335     }
336     Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(),
337         "test_cache_on_write");
338     HFileContext meta = new HFileContextBuilder().withCompression(compress)
339         .withBytesPerCheckSum(CKBYTES).withChecksumType(ChecksumType.NULL)
340         .withBlockSize(DATA_BLOCK_SIZE).withDataBlockEncoding(encoder.getDataBlockEncoding())
341         .withIncludesTags(useTags).build();
342     StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, cacheConf, fs)
343         .withOutputDir(storeFileParentDir).withComparator(KeyValue.COMPARATOR)
344         .withFileContext(meta)
345         .withBloomType(BLOOM_TYPE).withMaxKeyCount(NUM_KV).build();
346 
347     final int rowLen = 32;
348     for (int i = 0; i < NUM_KV; ++i) {
349       byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
350       byte[] v = TestHFileWriterV2.randomValue(rand);
351       int cfLen = rand.nextInt(k.length - rowLen + 1);
352       KeyValue kv;
353       if(useTags) {
354         Tag t = new Tag((byte) 1, "visibility");
355         List<Tag> tagList = new ArrayList<Tag>();
356         tagList.add(t);
357         Tag[] tags = new Tag[1];
358         tags[0] = t;
359         kv = new KeyValue(
360             k, 0, rowLen,
361             k, rowLen, cfLen,
362             k, rowLen + cfLen, k.length - rowLen - cfLen,
363             rand.nextLong(),
364             generateKeyType(rand),
365             v, 0, v.length, tagList);
366       } else {
367         kv = new KeyValue(
368           k, 0, rowLen,
369           k, rowLen, cfLen,
370           k, rowLen + cfLen, k.length - rowLen - cfLen,
371           rand.nextLong(),
372           generateKeyType(rand),
373           v, 0, v.length);
374       }
375       sfw.append(kv);
376     }
377 
378     sfw.close();
379     storeFilePath = sfw.getPath();
380   }
381 
382   @Test
383   public void testNotCachingDataBlocksDuringCompaction() throws IOException {
384     testNotCachingDataBlocksDuringCompactionInternals(false);
385     testNotCachingDataBlocksDuringCompactionInternals(true);
386   }
387 
388   protected void testNotCachingDataBlocksDuringCompactionInternals(boolean useTags) throws IOException {
389     if (useTags) {
390       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
391     } else {
392       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2);
393     }
394     // TODO: need to change this test if we add a cache size threshold for
395     // compactions, or if we implement some other kind of intelligent logic for
396     // deciding what blocks to cache-on-write on compaction.
397     final String table = "CompactionCacheOnWrite";
398     final String cf = "myCF";
399     final byte[] cfBytes = Bytes.toBytes(cf);
400     final int maxVersions = 3;
401     HRegion region = TEST_UTIL.createTestRegion(table, 
402         new HColumnDescriptor(cf)
403             .setCompressionType(compress)
404             .setBloomFilterType(BLOOM_TYPE)
405             .setMaxVersions(maxVersions)
406             .setDataBlockEncoding(encoder.getDataBlockEncoding())
407     );
408     int rowIdx = 0;
409     long ts = EnvironmentEdgeManager.currentTimeMillis();
410     for (int iFile = 0; iFile < 5; ++iFile) {
411       for (int iRow = 0; iRow < 500; ++iRow) {
412         String rowStr = "" + (rowIdx * rowIdx * rowIdx) + "row" + iFile + "_" + 
413             iRow;
414         Put p = new Put(Bytes.toBytes(rowStr));
415         ++rowIdx;
416         for (int iCol = 0; iCol < 10; ++iCol) {
417           String qualStr = "col" + iCol;
418           String valueStr = "value_" + rowStr + "_" + qualStr;
419           for (int iTS = 0; iTS < 5; ++iTS) {
420             if (useTags) {
421               Tag t = new Tag((byte) 1, "visibility");
422               Tag[] tags = new Tag[1];
423               tags[0] = t;
424               KeyValue kv = new KeyValue(Bytes.toBytes(rowStr), cfBytes, Bytes.toBytes(qualStr),
425                   HConstants.LATEST_TIMESTAMP, Bytes.toBytes(valueStr), tags);
426               p.add(kv);
427             } else {
428               p.add(cfBytes, Bytes.toBytes(qualStr), ts++, Bytes.toBytes(valueStr));
429             }
430           }
431         }
432         p.setDurability(Durability.ASYNC_WAL);
433         region.put(p);
434       }
435       region.flushcache();
436     }
437     LruBlockCache blockCache =
438         (LruBlockCache) new CacheConfig(conf).getBlockCache();
439     blockCache.clearCache();
440     assertEquals(0, blockCache.getBlockTypeCountsForTest().size());
441     region.compactStores();
442     LOG.debug("compactStores() returned");
443 
444     Map<BlockType, Integer> blockTypesInCache =
445         blockCache.getBlockTypeCountsForTest();
446     LOG.debug("Block types in cache: " + blockTypesInCache);
447     assertNull(blockTypesInCache.get(BlockType.ENCODED_DATA));
448     assertNull(blockTypesInCache.get(BlockType.DATA));
449     region.close();
450     blockCache.shutdown();
451   }
452 }
453