1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.io.hfile;
22  
23  import static org.junit.Assert.assertEquals;
24  import static org.junit.Assert.assertNull;
25  import static org.junit.Assert.assertTrue;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.Collection;
30  import java.util.EnumMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Random;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HBaseTestingUtility;
41  import org.apache.hadoop.hbase.HColumnDescriptor;
42  import org.apache.hadoop.hbase.KeyValue;
43  import org.apache.hadoop.hbase.MediumTests;
44  import org.apache.hadoop.hbase.client.Put;
45  import org.apache.hadoop.hbase.fs.HFileSystem;
46  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
47  import org.apache.hadoop.hbase.regionserver.HRegion;
48  import org.apache.hadoop.hbase.regionserver.StoreFile;
49  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
50  import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
51  import org.apache.hadoop.hbase.util.BloomFilterFactory;
52  import org.apache.hadoop.hbase.util.Bytes;
53  import org.apache.hadoop.hbase.util.ChecksumType;
54  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
55  import org.junit.After;
56  import org.junit.Before;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  import org.junit.runner.RunWith;
60  import org.junit.runners.Parameterized;
61  import org.junit.runners.Parameterized.Parameters;
62  
63  /**
64   * Tests {@link HFile} cache-on-write functionality for the following block
65   * types: data blocks, non-root index blocks, and Bloom filter blocks.
66   */
67  @RunWith(Parameterized.class)
68  @Category(MediumTests.class)
69  public class TestCacheOnWrite {
70  
71    private static final Log LOG = LogFactory.getLog(TestCacheOnWrite.class);
72  
73    private static final HBaseTestingUtility TEST_UTIL =
74      new HBaseTestingUtility();
75    private Configuration conf;
76    private CacheConfig cacheConf;
77    private FileSystem fs;
78    private Random rand = new Random(12983177L);
79    private Path storeFilePath;
80    private BlockCache blockCache;
81    private String testDescription;
82  
83    private final CacheOnWriteType cowType;
84    private final Compression.Algorithm compress;
85    private final BlockEncoderTestType encoderType;
86    private final HFileDataBlockEncoder encoder;
87  
88    private static final int DATA_BLOCK_SIZE = 2048;
89    private static final int NUM_KV = 25000;
90    private static final int INDEX_BLOCK_SIZE = 512;
91    private static final int BLOOM_BLOCK_SIZE = 4096;
92    private static final BloomType BLOOM_TYPE = StoreFile.BloomType.ROWCOL;
93    private static final ChecksumType CKTYPE = ChecksumType.CRC32;
94    private static final int CKBYTES = 512;
95  
96    /** The number of valid key types possible in a store file */
97    private static final int NUM_VALID_KEY_TYPES =
98        KeyValue.Type.values().length - 2;
99  
100   private static enum CacheOnWriteType {
101     DATA_BLOCKS(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY,
102         BlockType.DATA, BlockType.ENCODED_DATA),
103     BLOOM_BLOCKS(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
104         BlockType.BLOOM_CHUNK),
105     INDEX_BLOCKS(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
106         BlockType.LEAF_INDEX, BlockType.INTERMEDIATE_INDEX);
107 
108     private final String confKey;
109     private final BlockType blockType1;
110     private final BlockType blockType2;
111 
112     private CacheOnWriteType(String confKey, BlockType blockType) {
113       this(confKey, blockType, blockType);
114     }
115 
116     private CacheOnWriteType(String confKey, BlockType blockType1,
117         BlockType blockType2) {
118       this.blockType1 = blockType1;
119       this.blockType2 = blockType2;
120       this.confKey = confKey;
121     }
122 
123     public boolean shouldBeCached(BlockType blockType) {
124       return blockType == blockType1 || blockType == blockType2;
125     }
126 
127     public void modifyConf(Configuration conf) {
128       for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
129         conf.setBoolean(cowType.confKey, cowType == this);
130       }
131     }
132 
133   }
134 
135   private static final DataBlockEncoding ENCODING_ALGO =
136       DataBlockEncoding.PREFIX;
137 
138   /** Provides fancy names for three combinations of two booleans */
139   private static enum BlockEncoderTestType {
140     NO_BLOCK_ENCODING(false, false),
141     BLOCK_ENCODING_IN_CACHE_ONLY(false, true),
142     BLOCK_ENCODING_EVERYWHERE(true, true);
143 
144     private final boolean encodeOnDisk;
145     private final boolean encodeInCache;
146 
147     BlockEncoderTestType(boolean encodeOnDisk, boolean encodeInCache) {
148       this.encodeOnDisk = encodeOnDisk;
149       this.encodeInCache = encodeInCache;
150     }
151 
152     public HFileDataBlockEncoder getEncoder() {
153       return new HFileDataBlockEncoderImpl(
154           encodeOnDisk ? ENCODING_ALGO : DataBlockEncoding.NONE,
155           encodeInCache ? ENCODING_ALGO : DataBlockEncoding.NONE);
156     }
157   }
158 
159   public TestCacheOnWrite(CacheOnWriteType cowType,
160       Compression.Algorithm compress, BlockEncoderTestType encoderType) {
161     this.cowType = cowType;
162     this.compress = compress;
163     this.encoderType = encoderType;
164     this.encoder = encoderType.getEncoder();
165     testDescription = "[cacheOnWrite=" + cowType + ", compress=" + compress + 
166         ", encoderType=" + encoderType + "]";
167     System.out.println(testDescription);
168   }
169 
170   @Parameters
171   public static Collection<Object[]> getParameters() {
172     List<Object[]> cowTypes = new ArrayList<Object[]>();
173     for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
174       for (Compression.Algorithm compress :
175            HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
176         for (BlockEncoderTestType encoderType :
177              BlockEncoderTestType.values()) {
178           cowTypes.add(new Object[] { cowType, compress, encoderType });
179         }
180       }
181     }
182     return cowTypes;
183   }
184 
185   @Before
186   public void setUp() throws IOException {
187     conf = TEST_UTIL.getConfiguration();
188     conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
189     conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE);
190     conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
191         BLOOM_BLOCK_SIZE);
192     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY,
193         cowType.shouldBeCached(BlockType.DATA));
194     conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
195         cowType.shouldBeCached(BlockType.LEAF_INDEX));
196     conf.setBoolean(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
197         cowType.shouldBeCached(BlockType.BLOOM_CHUNK));
198     cowType.modifyConf(conf);
199     fs = HFileSystem.get(conf);
200     cacheConf = new CacheConfig(conf);
201     blockCache = cacheConf.getBlockCache();
202   }
203 
204   @After
205   public void tearDown() {
206     cacheConf = new CacheConfig(conf);
207     blockCache = cacheConf.getBlockCache();
208   }
209 
210   @Test
211   public void testStoreFileCacheOnWrite() throws IOException {
212     writeStoreFile();
213     readStoreFile();
214   }
215 
216   private void readStoreFile() throws IOException {
217     HFileReaderV2 reader = (HFileReaderV2) HFile.createReaderWithEncoding(fs,
218         storeFilePath, cacheConf, encoder.getEncodingInCache());
219     LOG.info("HFile information: " + reader);
220     final boolean cacheBlocks = false;
221     final boolean pread = false;
222     HFileScanner scanner = reader.getScanner(cacheBlocks, pread);
223     assertTrue(testDescription, scanner.seekTo());
224 
225     long offset = 0;
226     HFileBlock prevBlock = null;
227     EnumMap<BlockType, Integer> blockCountByType =
228         new EnumMap<BlockType, Integer>(BlockType.class);
229 
230     DataBlockEncoding encodingInCache =
231         encoderType.getEncoder().getEncodingInCache();
232     while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
233       long onDiskSize = -1;
234       if (prevBlock != null) {
235          onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
236       }
237       // Flags: don't cache the block, use pread, this is not a compaction.
238       // Also, pass null for expected block type to avoid checking it.
239       HFileBlock block = reader.readBlock(offset, onDiskSize, false, true,
240           false, null);
241       BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
242           offset, encodingInCache, block.getBlockType());
243       boolean isCached = blockCache.getBlock(blockCacheKey, true, false) != null;
244       boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
245       if (shouldBeCached != isCached) {
246         throw new AssertionError(
247             "shouldBeCached: " + shouldBeCached+ "\n" +
248             "isCached: " + isCached + "\n" +
249             "Test description: " + testDescription + "\n" +
250             "block: " + block + "\n" +
251             "encodingInCache: " + encodingInCache + "\n" +
252             "blockCacheKey: " + blockCacheKey);
253       }
254       prevBlock = block;
255       offset += block.getOnDiskSizeWithHeader();
256       BlockType bt = block.getBlockType();
257       Integer count = blockCountByType.get(bt);
258       blockCountByType.put(bt, (count == null ? 0 : count) + 1);
259     }
260 
261     LOG.info("Block count by type: " + blockCountByType);
262     String countByType = blockCountByType.toString();
263     BlockType cachedDataBlockType =
264         encoderType.encodeInCache ? BlockType.ENCODED_DATA : BlockType.DATA;
265     assertEquals("{" + cachedDataBlockType
266         + "=1379, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=24}",
267         countByType);
268 
269     reader.close();
270   }
271 
272   public static KeyValue.Type generateKeyType(Random rand) {
273     if (rand.nextBoolean()) {
274       // Let's make half of KVs puts.
275       return KeyValue.Type.Put;
276     } else {
277       KeyValue.Type keyType =
278           KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
279       if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum)
280       {
281         throw new RuntimeException("Generated an invalid key type: " + keyType
282             + ". " + "Probably the layout of KeyValue.Type has changed.");
283       }
284       return keyType;
285     }
286   }
287 
288   public void writeStoreFile() throws IOException {
289     Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(),
290         "test_cache_on_write");
291     StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, cacheConf, fs,
292         DATA_BLOCK_SIZE)
293             .withOutputDir(storeFileParentDir)
294             .withCompression(compress)
295             .withDataBlockEncoder(encoder)
296             .withComparator(KeyValue.COMPARATOR)
297             .withBloomType(BLOOM_TYPE)
298             .withMaxKeyCount(NUM_KV)
299             .withChecksumType(CKTYPE)
300             .withBytesPerChecksum(CKBYTES)
301             .build();
302 
303     final int rowLen = 32;
304     for (int i = 0; i < NUM_KV; ++i) {
305       byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
306       byte[] v = TestHFileWriterV2.randomValue(rand);
307       int cfLen = rand.nextInt(k.length - rowLen + 1);
308       KeyValue kv = new KeyValue(
309           k, 0, rowLen,
310           k, rowLen, cfLen,
311           k, rowLen + cfLen, k.length - rowLen - cfLen,
312           rand.nextLong(),
313           generateKeyType(rand),
314           v, 0, v.length);
315       sfw.append(kv);
316     }
317 
318     sfw.close();
319     storeFilePath = sfw.getPath();
320   }
321 
322   @Test
323   public void testNotCachingDataBlocksDuringCompaction() throws IOException {
324     // TODO: need to change this test if we add a cache size threshold for
325     // compactions, or if we implement some other kind of intelligent logic for
326     // deciding what blocks to cache-on-write on compaction.
327     final String table = "CompactionCacheOnWrite";
328     final String cf = "myCF";
329     final byte[] cfBytes = Bytes.toBytes(cf);
330     final int maxVersions = 3;
331     HRegion region = TEST_UTIL.createTestRegion(table, 
332         new HColumnDescriptor(cf)
333             .setCompressionType(compress)
334             .setBloomFilterType(BLOOM_TYPE)
335             .setMaxVersions(maxVersions)
336             .setDataBlockEncoding(encoder.getEncodingInCache())
337             .setEncodeOnDisk(encoder.getEncodingOnDisk() !=
338                 DataBlockEncoding.NONE)
339     );
340     int rowIdx = 0;
341     long ts = EnvironmentEdgeManager.currentTimeMillis();
342     for (int iFile = 0; iFile < 5; ++iFile) {
343       for (int iRow = 0; iRow < 500; ++iRow) {
344         String rowStr = "" + (rowIdx * rowIdx * rowIdx) + "row" + iFile + "_" + 
345             iRow;
346         Put p = new Put(Bytes.toBytes(rowStr));
347         ++rowIdx;
348         for (int iCol = 0; iCol < 10; ++iCol) {
349           String qualStr = "col" + iCol;
350           String valueStr = "value_" + rowStr + "_" + qualStr;
351           for (int iTS = 0; iTS < 5; ++iTS) {
352             p.add(cfBytes, Bytes.toBytes(qualStr), ts++,
353                 Bytes.toBytes(valueStr));
354           }
355         }
356         region.put(p);
357       }
358       region.flushcache();
359     }
360     LruBlockCache blockCache =
361         (LruBlockCache) new CacheConfig(conf).getBlockCache();
362     blockCache.clearCache();
363     assertEquals(0, blockCache.getBlockTypeCountsForTest().size());
364     Map<String, Long> metricsBefore = SchemaMetrics.getMetricsSnapshot();
365     region.compactStores();
366     LOG.debug("compactStores() returned");
367     SchemaMetrics.validateMetricChanges(metricsBefore);
368     Map<String, Long> compactionMetrics = SchemaMetrics.diffMetrics(
369         metricsBefore, SchemaMetrics.getMetricsSnapshot());
370     LOG.debug(SchemaMetrics.formatMetrics(compactionMetrics));
371     Map<BlockType, Integer> blockTypesInCache =
372         blockCache.getBlockTypeCountsForTest();
373     LOG.debug("Block types in cache: " + blockTypesInCache);
374     assertNull(blockTypesInCache.get(BlockType.DATA));
375     region.close();
376     blockCache.shutdown();
377   }
378 
379   @org.junit.Rule
380   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
381     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
382 }
383