1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import java.io.IOException;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.List;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.HBaseConfiguration;
32  import org.apache.hadoop.hbase.HBaseTestCase;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HRegionInfo;
36  import org.apache.hadoop.hbase.HTableDescriptor;
37  import org.apache.hadoop.hbase.KeyValue;
38  import org.apache.hadoop.hbase.MediumTests;
39  import org.apache.hadoop.hbase.client.Delete;
40  import org.apache.hadoop.hbase.client.Get;
41  import org.apache.hadoop.hbase.client.Put;
42  import org.apache.hadoop.hbase.client.Scan;
43  import org.apache.hadoop.hbase.io.hfile.BlockCache;
44  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
45  import org.apache.hadoop.hbase.io.hfile.HFile;
46  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
49  import org.junit.Test;
50  import org.junit.experimental.categories.Category;
51  
52  @Category(MediumTests.class)
53  public class TestBlocksRead extends HBaseTestCase {
54    static final Log LOG = LogFactory.getLog(TestBlocksRead.class);
55    static final BloomType[] BLOOM_TYPE = new BloomType[] { BloomType.ROWCOL,
56        BloomType.ROW, BloomType.NONE };
57  
58    private static BlockCache blockCache;
59  
60    private HBaseConfiguration getConf() {
61      HBaseConfiguration conf = new HBaseConfiguration();
62  
63      // disable compactions in this test.
64      conf.setInt("hbase.hstore.compactionThreshold", 10000);
65      return conf;
66    }
67  
68    HRegion region = null;
69    private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
70    private final String DIR = TEST_UTIL.getDataTestDir("TestBlocksRead").toString();
71  
72    /**
73     * @see org.apache.hadoop.hbase.HBaseTestCase#setUp()
74     */
75    @SuppressWarnings("deprecation")
76    @Override
77    protected void setUp() throws Exception {
78      super.setUp();
79    }
80  
81    @SuppressWarnings("deprecation")
82    @Override
83    protected void tearDown() throws Exception {
84      super.tearDown();
85      EnvironmentEdgeManagerTestHelper.reset();
86    }
87  
88    /**
89     * Callers must afterward call {@link HRegion#closeHRegion(HRegion)}
90     * @param tableName
91     * @param callingMethod
92     * @param conf
93     * @param families
94     * @throws IOException
95     * @return created and initialized region.
96     */
97    private HRegion initHRegion(byte[] tableName, String callingMethod,
98        HBaseConfiguration conf, String family) throws IOException {
99      HTableDescriptor htd = new HTableDescriptor(tableName);
100     HColumnDescriptor familyDesc;
101     for (int i = 0; i < BLOOM_TYPE.length; i++) {
102       BloomType bloomType = BLOOM_TYPE[i];
103       familyDesc = new HColumnDescriptor(family + "_" + bloomType)
104           .setBlocksize(1)
105           .setBloomFilterType(BLOOM_TYPE[i]);
106       htd.addFamily(familyDesc);
107     }
108 
109     HRegionInfo info = new HRegionInfo(htd.getName(), null, null, false);
110     Path path = new Path(DIR + callingMethod);
111     HRegion r = HRegion.createHRegion(info, path, conf, htd);
112     blockCache = new CacheConfig(conf).getBlockCache();
113     return r;
114   }
115 
116   private void putData(String family, String row, String col, long version)
117       throws IOException {
118     for (int i = 0; i < BLOOM_TYPE.length; i++) {
119       putData(Bytes.toBytes(family + "_" + BLOOM_TYPE[i]), row, col, version,
120           version);
121     }
122   }
123 
124   // generates a value to put for a row/col/version.
125   private static byte[] genValue(String row, String col, long version) {
126     return Bytes.toBytes("Value:" + row + "#" + col + "#" + version);
127   }
128 
129   private void putData(byte[] cf, String row, String col, long versionStart,
130       long versionEnd) throws IOException {
131     byte columnBytes[] = Bytes.toBytes(col);
132     Put put = new Put(Bytes.toBytes(row));
133     put.setWriteToWAL(false);
134 
135     for (long version = versionStart; version <= versionEnd; version++) {
136       put.add(cf, columnBytes, version, genValue(row, col, version));
137     }
138     region.put(put);
139   }
140 
141   private KeyValue[] getData(String family, String row, List<String> columns,
142       int expBlocks) throws IOException {
143     return getData(family, row, columns, expBlocks, expBlocks, expBlocks);
144   }
145 
146   private KeyValue[] getData(String family, String row, List<String> columns,
147       int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
148       throws IOException {
149     int[] expBlocks = new int[] { expBlocksRowCol, expBlocksRow, expBlocksNone };
150     KeyValue[] kvs = null;
151 
152     for (int i = 0; i < BLOOM_TYPE.length; i++) {
153       BloomType bloomType = BLOOM_TYPE[i];
154       byte[] cf = Bytes.toBytes(family + "_" + bloomType);
155       long blocksStart = getBlkAccessCount(cf);
156       Get get = new Get(Bytes.toBytes(row));
157 
158       for (String column : columns) {
159         get.addColumn(cf, Bytes.toBytes(column));
160       }
161 
162       kvs = region.get(get, null).raw();
163       long blocksEnd = getBlkAccessCount(cf);
164       if (expBlocks[i] != -1) {
165         assertEquals("Blocks Read Check for Bloom: " + bloomType, expBlocks[i],
166             blocksEnd - blocksStart);
167       }
168       System.out.println("Blocks Read for Bloom: " + bloomType + " = "
169           + (blocksEnd - blocksStart) + "Expected = " + expBlocks[i]);
170     }
171     return kvs;
172   }
173 
174   private KeyValue[] getData(String family, String row, String column,
175       int expBlocks) throws IOException {
176     return getData(family, row, Arrays.asList(column), expBlocks, expBlocks,
177         expBlocks);
178   }
179 
180   private KeyValue[] getData(String family, String row, String column,
181       int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
182       throws IOException {
183     return getData(family, row, Arrays.asList(column), expBlocksRowCol,
184         expBlocksRow, expBlocksNone);
185   }
186 
187   private void deleteFamily(String family, String row, long version)
188       throws IOException {
189     Delete del = new Delete(Bytes.toBytes(row));
190     del.deleteFamily(Bytes.toBytes(family + "_ROWCOL"), version);
191     del.deleteFamily(Bytes.toBytes(family + "_ROW"), version);
192     del.deleteFamily(Bytes.toBytes(family + "_NONE"), version);
193     region.delete(del, null, true);
194   }
195 
196   private static void verifyData(KeyValue kv, String expectedRow,
197       String expectedCol, long expectedVersion) {
198     assertEquals("RowCheck", expectedRow, Bytes.toString(kv.getRow()));
199     assertEquals("ColumnCheck", expectedCol, Bytes.toString(kv.getQualifier()));
200     assertEquals("TSCheck", expectedVersion, kv.getTimestamp());
201     assertEquals("ValueCheck",
202         Bytes.toString(genValue(expectedRow, expectedCol, expectedVersion)),
203         Bytes.toString(kv.getValue()));
204   }
205 
206   private static long getBlkAccessCount(byte[] cf) {
207       return HFile.dataBlockReadCnt.get();
208   }
209 
210   private static long getBlkCount() {
211     return blockCache.getBlockCount();
212   }
213 
214   /**
215    * Test # of blocks read for some simple seek cases.
216    *
217    * @throws Exception
218    */
219   @Test
220   public void testBlocksRead() throws Exception {
221     byte[] TABLE = Bytes.toBytes("testBlocksRead");
222     String FAMILY = "cf1";
223     KeyValue kvs[];
224     HBaseConfiguration conf = getConf();
225     this.region = initHRegion(TABLE, getName(), conf, FAMILY);
226 
227     try {
228       putData(FAMILY, "row", "col1", 1);
229       putData(FAMILY, "row", "col2", 2);
230       putData(FAMILY, "row", "col3", 3);
231       putData(FAMILY, "row", "col4", 4);
232       putData(FAMILY, "row", "col5", 5);
233       putData(FAMILY, "row", "col6", 6);
234       putData(FAMILY, "row", "col7", 7);
235       region.flushcache();
236 
237       // Expected block reads: 1
238       // The top block has the KV we are
239       // interested. So only 1 seek is needed.
240       kvs = getData(FAMILY, "row", "col1", 1);
241       assertEquals(1, kvs.length);
242       verifyData(kvs[0], "row", "col1", 1);
243 
244       // Expected block reads: 2
245       // The top block and next block has the KVs we are
246       // interested. So only 2 seek is needed.
247       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
248       assertEquals(2, kvs.length);
249       verifyData(kvs[0], "row", "col1", 1);
250       verifyData(kvs[1], "row", "col2", 2);
251 
252       // Expected block reads: 3
253       // The first 2 seeks is to find out col2. [HBASE-4443]
254       // One additional seek for col3
255       // So 3 seeks are needed.
256       kvs = getData(FAMILY, "row", Arrays.asList("col2", "col3"), 3);
257       assertEquals(2, kvs.length);
258       verifyData(kvs[0], "row", "col2", 2);
259       verifyData(kvs[1], "row", "col3", 3);
260 
261       // Expected block reads: 2. [HBASE-4443]
262       kvs = getData(FAMILY, "row", Arrays.asList("col5"), 2);
263       assertEquals(1, kvs.length);
264       verifyData(kvs[0], "row", "col5", 5);
265     } finally {
266       HRegion.closeHRegion(this.region);
267       this.region = null;
268     }
269   }
270 
271   /**
272    * Test # of blocks read (targetted at some of the cases Lazy Seek optimizes).
273    *
274    * @throws Exception
275    */
276   @Test
277   public void testLazySeekBlocksRead() throws Exception {
278     byte[] TABLE = Bytes.toBytes("testLazySeekBlocksRead");
279     String FAMILY = "cf1";
280     KeyValue kvs[];
281     HBaseConfiguration conf = getConf();
282     this.region = initHRegion(TABLE, getName(), conf, FAMILY);
283 
284     try {
285       // File 1
286       putData(FAMILY, "row", "col1", 1);
287       putData(FAMILY, "row", "col2", 2);
288       region.flushcache();
289 
290       // File 2
291       putData(FAMILY, "row", "col1", 3);
292       putData(FAMILY, "row", "col2", 4);
293       region.flushcache();
294 
295       // Expected blocks read: 1.
296       // File 2's top block is also the KV we are
297       // interested. So only 1 seek is needed.
298       kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
299       assertEquals(1, kvs.length);
300       verifyData(kvs[0], "row", "col1", 3);
301 
302       // Expected blocks read: 2
303       // File 2's top block has the "col1" KV we are
304       // interested. We also need "col2" which is in a block
305       // of its own. So, we need that block as well.
306       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
307       assertEquals(2, kvs.length);
308       verifyData(kvs[0], "row", "col1", 3);
309       verifyData(kvs[1], "row", "col2", 4);
310 
311       // File 3: Add another column
312       putData(FAMILY, "row", "col3", 5);
313       region.flushcache();
314 
315       // Expected blocks read: 1
316       // File 3's top block has the "col3" KV we are
317       // interested. So only 1 seek is needed.
318       kvs = getData(FAMILY, "row", "col3", 1);
319       assertEquals(1, kvs.length);
320       verifyData(kvs[0], "row", "col3", 5);
321 
322       // Get a column from older file.
323       // For ROWCOL Bloom filter: Expected blocks read: 1.
324       // For ROW Bloom filter: Expected blocks read: 2.
325       // For NONE Bloom filter: Expected blocks read: 2.
326       kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1, 2, 2);
327       assertEquals(1, kvs.length);
328       verifyData(kvs[0], "row", "col1", 3);
329 
330       // File 4: Delete the entire row.
331       deleteFamily(FAMILY, "row", 6);
332       region.flushcache();
333 
334       // For ROWCOL Bloom filter: Expected blocks read: 2.
335       // For ROW Bloom filter: Expected blocks read: 3.
336       // For NONE Bloom filter: Expected blocks read: 3.
337       kvs = getData(FAMILY, "row", "col1", 2, 3, 3);
338       assertEquals(0, kvs.length);
339       kvs = getData(FAMILY, "row", "col2", 3, 4, 4);
340       assertEquals(0, kvs.length);
341       kvs = getData(FAMILY, "row", "col3", 2);
342       assertEquals(0, kvs.length);
343       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 4);
344       assertEquals(0, kvs.length);
345 
346       // File 5: Delete
347       deleteFamily(FAMILY, "row", 10);
348       region.flushcache();
349 
350       // File 6: some more puts, but with timestamps older than the
351       // previous delete.
352       putData(FAMILY, "row", "col1", 7);
353       putData(FAMILY, "row", "col2", 8);
354       putData(FAMILY, "row", "col3", 9);
355       region.flushcache();
356 
357       // Baseline expected blocks read: 8. [HBASE-4532]
358       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5);
359       assertEquals(0, kvs.length);
360  
361       // File 7: Put back new data
362       putData(FAMILY, "row", "col1", 11);
363       putData(FAMILY, "row", "col2", 12);
364       putData(FAMILY, "row", "col3", 13);
365       region.flushcache();
366 
367 
368       // Expected blocks read: 5. [HBASE-4585]
369       kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5);
370       assertEquals(3, kvs.length);
371       verifyData(kvs[0], "row", "col1", 11);
372       verifyData(kvs[1], "row", "col2", 12);
373       verifyData(kvs[2], "row", "col3", 13);
374     } finally {
375       HRegion.closeHRegion(this.region);
376       this.region = null;
377     }
378   }
379 
380   /**
381    * Test # of blocks read to ensure disabling cache-fill on Scan works.
382    * @throws Exception
383    */
384   @Test
385   public void testBlocksStoredWhenCachingDisabled() throws Exception {
386     byte [] TABLE = Bytes.toBytes("testBlocksReadWhenCachingDisabled");
387     String FAMILY = "cf1";
388 
389     HBaseConfiguration conf = getConf();
390     this.region = initHRegion(TABLE, getName(), conf, FAMILY);
391 
392     try {
393       putData(FAMILY, "row", "col1", 1);
394       putData(FAMILY, "row", "col2", 2);
395       region.flushcache();
396 
397       // Execute a scan with caching turned off
398       // Expected blocks stored: 0
399       long blocksStart = getBlkCount();
400       Scan scan = new Scan();
401       scan.setCacheBlocks(false);
402       RegionScanner rs = region.getScanner(scan);
403       List<KeyValue> result = new ArrayList<KeyValue>(2);
404       rs.next(result);
405       assertEquals(2 * BLOOM_TYPE.length, result.size());
406       rs.close();
407       long blocksEnd = getBlkCount();
408 
409       assertEquals(blocksStart, blocksEnd);
410 
411       // Execute with caching turned on
412       // Expected blocks stored: 2
413       blocksStart = blocksEnd;
414       scan.setCacheBlocks(true);
415       rs = region.getScanner(scan);
416       result = new ArrayList<KeyValue>(2);
417       rs.next(result);
418       assertEquals(2 * BLOOM_TYPE.length, result.size());
419       rs.close();
420       blocksEnd = getBlkCount();
421     
422       assertEquals(2 * BLOOM_TYPE.length, blocksEnd - blocksStart);
423     } finally {
424       HRegion.closeHRegion(this.region);
425       this.region = null;
426     }
427   }
428 
429   @Test
430   public void testLazySeekBlocksReadWithDelete() throws Exception {
431     byte[] TABLE = Bytes.toBytes("testLazySeekBlocksReadWithDelete");
432     String FAMILY = "cf1";
433     KeyValue kvs[];
434     HBaseConfiguration conf = getConf();
435     this.region = initHRegion(TABLE, getName(), conf, FAMILY);
436     try {
437       deleteFamily(FAMILY, "row", 200);
438       for (int i = 0; i < 100; i++) {
439         putData(FAMILY, "row", "col" + i, i);
440       }
441       putData(FAMILY, "row", "col99", 201);
442       region.flushcache();
443 
444       kvs = getData(FAMILY, "row", Arrays.asList("col0"), 2);
445       assertEquals(0, kvs.length);
446 
447       kvs = getData(FAMILY, "row", Arrays.asList("col99"), 2);
448       assertEquals(1, kvs.length);
449       verifyData(kvs[0], "row", "col99", 201);
450     } finally {
451       HRegion.closeHRegion(this.region);
452       this.region = null;
453     }
454   }
455 
456   @org.junit.Rule
457   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
458     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
459 }