View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.Collection;
29  import java.util.EnumMap;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Random;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HBaseTestingUtility;
40  import org.apache.hadoop.hbase.HColumnDescriptor;
41  import org.apache.hadoop.hbase.KeyValue;
42  import org.apache.hadoop.hbase.MediumTests;
43  import org.apache.hadoop.hbase.client.Put;
44  import org.apache.hadoop.hbase.fs.HFileSystem;
45  import org.apache.hadoop.hbase.io.compress.Compression;
46  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
47  import org.apache.hadoop.hbase.regionserver.HRegion;
48  import org.apache.hadoop.hbase.regionserver.StoreFile;
49  import org.apache.hadoop.hbase.regionserver.BloomType;
50  import org.apache.hadoop.hbase.util.BloomFilterFactory;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.ChecksumType;
53  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
54  import org.junit.After;
55  import org.junit.Before;
56  import org.junit.Test;
57  import org.junit.experimental.categories.Category;
58  import org.junit.runner.RunWith;
59  import org.junit.runners.Parameterized;
60  import org.junit.runners.Parameterized.Parameters;
61  
62  /**
63   * Tests {@link HFile} cache-on-write functionality for the following block
64   * types: data blocks, non-root index blocks, and Bloom filter blocks.
65   */
66  @RunWith(Parameterized.class)
67  @Category(MediumTests.class)
68  public class TestCacheOnWrite {
69  
70    private static final Log LOG = LogFactory.getLog(TestCacheOnWrite.class);
71  
72    private static final HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
73    private Configuration conf;
74    private CacheConfig cacheConf;
75    private FileSystem fs;
76    private Random rand = new Random(12983177L);
77    private Path storeFilePath;
78    private BlockCache blockCache;
79    private String testDescription;
80  
81    private final CacheOnWriteType cowType;
82    private final Compression.Algorithm compress;
83    private final BlockEncoderTestType encoderType;
84    private final HFileDataBlockEncoder encoder;
85  
86    private static final int DATA_BLOCK_SIZE = 2048;
87    private static final int NUM_KV = 25000;
88    private static final int INDEX_BLOCK_SIZE = 512;
89    private static final int BLOOM_BLOCK_SIZE = 4096;
90    private static final BloomType BLOOM_TYPE = BloomType.ROWCOL;
91    private static final ChecksumType CKTYPE = ChecksumType.CRC32;
92    private static final int CKBYTES = 512;
93  
94    /** The number of valid key types possible in a store file */
95    private static final int NUM_VALID_KEY_TYPES =
96        KeyValue.Type.values().length - 2;
97  
98    private static enum CacheOnWriteType {
99      DATA_BLOCKS(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY,
100         BlockType.DATA, BlockType.ENCODED_DATA),
101     BLOOM_BLOCKS(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
102         BlockType.BLOOM_CHUNK),
103     INDEX_BLOCKS(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
104         BlockType.LEAF_INDEX, BlockType.INTERMEDIATE_INDEX);
105 
106     private final String confKey;
107     private final BlockType blockType1;
108     private final BlockType blockType2;
109 
110     private CacheOnWriteType(String confKey, BlockType blockType) {
111       this(confKey, blockType, blockType);
112     }
113 
114     private CacheOnWriteType(String confKey, BlockType blockType1,
115         BlockType blockType2) {
116       this.blockType1 = blockType1;
117       this.blockType2 = blockType2;
118       this.confKey = confKey;
119     }
120 
121     public boolean shouldBeCached(BlockType blockType) {
122       return blockType == blockType1 || blockType == blockType2;
123     }
124 
125     public void modifyConf(Configuration conf) {
126       for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
127         conf.setBoolean(cowType.confKey, cowType == this);
128       }
129     }
130 
131   }
132 
133   private static final DataBlockEncoding ENCODING_ALGO =
134       DataBlockEncoding.PREFIX;
135 
136   /** Provides fancy names for three combinations of two booleans */
137   private static enum BlockEncoderTestType {
138     NO_BLOCK_ENCODING_NOOP(true, false),
139     NO_BLOCK_ENCODING(false, false),
140     BLOCK_ENCODING_EVERYWHERE(false, true);
141 
142     private final boolean noop;
143     private final boolean encode;
144 
145     BlockEncoderTestType(boolean noop, boolean encode) {
146       this.encode = encode;
147       this.noop = noop;
148     }
149 
150     public HFileDataBlockEncoder getEncoder() {
151       return noop ? NoOpDataBlockEncoder.INSTANCE : new HFileDataBlockEncoderImpl(
152         encode ? ENCODING_ALGO : DataBlockEncoding.NONE);
153     }
154   }
155 
156   public TestCacheOnWrite(CacheOnWriteType cowType,
157       Compression.Algorithm compress, BlockEncoderTestType encoderType) {
158     this.cowType = cowType;
159     this.compress = compress;
160     this.encoderType = encoderType;
161     this.encoder = encoderType.getEncoder();
162     testDescription = "[cacheOnWrite=" + cowType + ", compress=" + compress + 
163         ", encoderType=" + encoderType + "]";
164     System.out.println(testDescription);
165   }
166 
167   @Parameters
168   public static Collection<Object[]> getParameters() {
169     List<Object[]> cowTypes = new ArrayList<Object[]>();
170     for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
171       for (Compression.Algorithm compress :
172            HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
173         for (BlockEncoderTestType encoderType :
174              BlockEncoderTestType.values()) {
175           cowTypes.add(new Object[] { cowType, compress, encoderType });
176         }
177       }
178     }
179     return cowTypes;
180   }
181 
182   @Before
183   public void setUp() throws IOException {
184     conf = TEST_UTIL.getConfiguration();
185     conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
186     conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE);
187     conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
188         BLOOM_BLOCK_SIZE);
189     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY,
190         cowType.shouldBeCached(BlockType.DATA));
191     conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
192         cowType.shouldBeCached(BlockType.LEAF_INDEX));
193     conf.setBoolean(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
194         cowType.shouldBeCached(BlockType.BLOOM_CHUNK));
195     cowType.modifyConf(conf);
196     fs = HFileSystem.get(conf);
197     cacheConf = new CacheConfig(conf);
198     blockCache = cacheConf.getBlockCache();
199   }
200 
201   @After
202   public void tearDown() {
203     cacheConf = new CacheConfig(conf);
204     blockCache = cacheConf.getBlockCache();
205   }
206 
207   @Test
208   public void testStoreFileCacheOnWrite() throws IOException {
209     writeStoreFile();
210     readStoreFile();
211   }
212 
213   private void readStoreFile() throws IOException {
214     HFileReaderV2 reader = (HFileReaderV2) HFile.createReader(fs,
215       storeFilePath, cacheConf);
216     LOG.info("HFile information: " + reader);
217     final boolean cacheBlocks = false;
218     final boolean pread = false;
219     HFileScanner scanner = reader.getScanner(cacheBlocks, pread);
220     assertTrue(testDescription, scanner.seekTo());
221 
222     long offset = 0;
223     HFileBlock prevBlock = null;
224     EnumMap<BlockType, Integer> blockCountByType =
225         new EnumMap<BlockType, Integer>(BlockType.class);
226 
227     DataBlockEncoding encodingInCache =
228         encoderType.getEncoder().getDataBlockEncoding();
229     while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
230       long onDiskSize = -1;
231       if (prevBlock != null) {
232          onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
233       }
234       // Flags: don't cache the block, use pread, this is not a compaction.
235       // Also, pass null for expected block type to avoid checking it.
236       HFileBlock block = reader.readBlock(offset, onDiskSize, false, true,
237           false, null);
238       BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
239           offset, encodingInCache, block.getBlockType());
240       boolean isCached = blockCache.getBlock(blockCacheKey, true, false) != null;
241       boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
242       if (shouldBeCached != isCached) {
243         throw new AssertionError(
244             "shouldBeCached: " + shouldBeCached+ "\n" +
245             "isCached: " + isCached + "\n" +
246             "Test description: " + testDescription + "\n" +
247             "block: " + block + "\n" +
248             "encodingInCache: " + encodingInCache + "\n" +
249             "blockCacheKey: " + blockCacheKey);
250       }
251       prevBlock = block;
252       offset += block.getOnDiskSizeWithHeader();
253       BlockType bt = block.getBlockType();
254       Integer count = blockCountByType.get(bt);
255       blockCountByType.put(bt, (count == null ? 0 : count) + 1);
256     }
257 
258     LOG.info("Block count by type: " + blockCountByType);
259     String countByType = blockCountByType.toString();
260     BlockType cachedDataBlockType =
261         encoderType.encode ? BlockType.ENCODED_DATA : BlockType.DATA;
262     assertEquals("{" + cachedDataBlockType
263         + "=1379, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=24}",
264         countByType);
265 
266     reader.close();
267   }
268 
269   public static KeyValue.Type generateKeyType(Random rand) {
270     if (rand.nextBoolean()) {
271       // Let's make half of KVs puts.
272       return KeyValue.Type.Put;
273     } else {
274       KeyValue.Type keyType =
275           KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
276       if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum)
277       {
278         throw new RuntimeException("Generated an invalid key type: " + keyType
279             + ". " + "Probably the layout of KeyValue.Type has changed.");
280       }
281       return keyType;
282     }
283   }
284 
285   public void writeStoreFile() throws IOException {
286     Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(),
287         "test_cache_on_write");
288     StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, cacheConf, fs,
289         DATA_BLOCK_SIZE)
290             .withOutputDir(storeFileParentDir)
291             .withCompression(compress)
292             .withDataBlockEncoder(encoder)
293             .withComparator(KeyValue.COMPARATOR)
294             .withBloomType(BLOOM_TYPE)
295             .withMaxKeyCount(NUM_KV)
296             .withChecksumType(CKTYPE)
297             .withBytesPerChecksum(CKBYTES)
298             .build();
299 
300     final int rowLen = 32;
301     for (int i = 0; i < NUM_KV; ++i) {
302       byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
303       byte[] v = TestHFileWriterV2.randomValue(rand);
304       int cfLen = rand.nextInt(k.length - rowLen + 1);
305       KeyValue kv = new KeyValue(
306           k, 0, rowLen,
307           k, rowLen, cfLen,
308           k, rowLen + cfLen, k.length - rowLen - cfLen,
309           rand.nextLong(),
310           generateKeyType(rand),
311           v, 0, v.length);
312       sfw.append(kv);
313     }
314 
315     sfw.close();
316     storeFilePath = sfw.getPath();
317   }
318 
319   @Test
320   public void testNotCachingDataBlocksDuringCompaction() throws IOException {
321     // TODO: need to change this test if we add a cache size threshold for
322     // compactions, or if we implement some other kind of intelligent logic for
323     // deciding what blocks to cache-on-write on compaction.
324     final String table = "CompactionCacheOnWrite";
325     final String cf = "myCF";
326     final byte[] cfBytes = Bytes.toBytes(cf);
327     final int maxVersions = 3;
328     HRegion region = TEST_UTIL.createTestRegion(table, 
329         new HColumnDescriptor(cf)
330             .setCompressionType(compress)
331             .setBloomFilterType(BLOOM_TYPE)
332             .setMaxVersions(maxVersions)
333             .setDataBlockEncoding(encoder.getDataBlockEncoding())
334     );
335     int rowIdx = 0;
336     long ts = EnvironmentEdgeManager.currentTimeMillis();
337     for (int iFile = 0; iFile < 5; ++iFile) {
338       for (int iRow = 0; iRow < 500; ++iRow) {
339         String rowStr = "" + (rowIdx * rowIdx * rowIdx) + "row" + iFile + "_" + 
340             iRow;
341         Put p = new Put(Bytes.toBytes(rowStr));
342         ++rowIdx;
343         for (int iCol = 0; iCol < 10; ++iCol) {
344           String qualStr = "col" + iCol;
345           String valueStr = "value_" + rowStr + "_" + qualStr;
346           for (int iTS = 0; iTS < 5; ++iTS) {
347             p.add(cfBytes, Bytes.toBytes(qualStr), ts++,
348                 Bytes.toBytes(valueStr));
349           }
350         }
351         region.put(p);
352       }
353       region.flushcache();
354     }
355     LruBlockCache blockCache =
356         (LruBlockCache) new CacheConfig(conf).getBlockCache();
357     blockCache.clearCache();
358     assertEquals(0, blockCache.getBlockTypeCountsForTest().size());
359     region.compactStores();
360     LOG.debug("compactStores() returned");
361 
362     Map<BlockType, Integer> blockTypesInCache =
363         blockCache.getBlockTypeCountsForTest();
364     LOG.debug("Block types in cache: " + blockTypesInCache);
365     assertNull(blockTypesInCache.get(BlockType.DATA));
366     region.close();
367     blockCache.shutdown();
368   }
369 
370 }
371