View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.Collections;
26  import java.util.Comparator;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.TreeSet;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.HBaseTestCase;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.KeyValue;
42  import org.apache.hadoop.hbase.SmallTests;
43  import org.apache.hadoop.hbase.client.Scan;
44  import org.apache.hadoop.hbase.io.HFileLink;
45  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
46  import org.apache.hadoop.hbase.io.hfile.BlockCache;
47  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
48  import org.apache.hadoop.hbase.io.hfile.CacheStats;
49  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
50  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
51  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
52  import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
53  import org.apache.hadoop.hbase.util.BloomFilterFactory;
54  import org.apache.hadoop.hbase.util.Bytes;
55  import org.apache.hadoop.hbase.util.ChecksumType;
56  import org.apache.hadoop.hbase.util.FSUtils;
57  import org.junit.experimental.categories.Category;
58  import org.mockito.Mockito;
59  
60  import com.google.common.base.Joiner;
61  import com.google.common.collect.Iterables;
62  import com.google.common.collect.Lists;
63  
64  /**
65   * Test HStoreFile
66   */
67  @Category(SmallTests.class)
68  public class TestStoreFile extends HBaseTestCase {
69    static final Log LOG = LogFactory.getLog(TestStoreFile.class);
70    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
71    private CacheConfig cacheConf =  new CacheConfig(TEST_UTIL.getConfiguration());
72    private static String ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile").toString();
73    private static final ChecksumType CKTYPE = ChecksumType.CRC32;
74    private static final int CKBYTES = 512;
75    private static String TEST_FAMILY = "cf";
76  
77    @Override
78    public void setUp() throws Exception {
79      super.setUp();
80    }
81  
82    @Override
83    public void tearDown() throws Exception {
84      super.tearDown();
85    }
86  
87    /**
88     * Write a file and then assert that we can read from top and bottom halves
89     * using two HalfMapFiles.
90     * @throws Exception
91     */
92    public void testBasicHalfMapFile() throws Exception {
93      final HRegionInfo hri =
94          new HRegionInfo(TableName.valueOf("testBasicHalfMapFileTb"));
95      HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
96        conf, fs, new Path(this.testDir, hri.getTableName().getNameAsString()), hri);
97  
98      StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 2 * 1024)
99              .withFilePath(regionFs.createTempName())
100             .build();
101     writeStoreFile(writer);
102 
103     Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
104     StoreFile sf = new StoreFile(this.fs, sfPath, conf, cacheConf,
105         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
106     checkHalfHFile(regionFs, sf);
107   }
108 
109   private void writeStoreFile(final StoreFile.Writer writer) throws IOException {
110     writeStoreFile(writer, Bytes.toBytes(getName()), Bytes.toBytes(getName()));
111   }
112 
113   // pick an split point (roughly halfway)
114   byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR)/2, FIRST_CHAR};
115 
116   /*
117    * Writes HStoreKey and ImmutableBytes data to passed writer and
118    * then closes it.
119    * @param writer
120    * @throws IOException
121    */
122   public static void writeStoreFile(final StoreFile.Writer writer, byte[] fam, byte[] qualifier)
123   throws IOException {
124     long now = System.currentTimeMillis();
125     try {
126       for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
127         for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
128           byte[] b = new byte[] { (byte) d, (byte) e };
129           writer.append(new KeyValue(b, fam, qualifier, now, b));
130         }
131       }
132     } finally {
133       writer.close();
134     }
135   }
136 
137   /**
138    * Test that our mechanism of writing store files in one region to reference
139    * store files in other regions works.
140    * @throws IOException
141    */
142   public void testReference() throws IOException {
143     final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testReferenceTb"));
144     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
145       conf, fs, new Path(this.testDir, hri.getTableName().getNameAsString()), hri);
146 
147     // Make a store file and write data to it.
148     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 8 * 1024)
149             .withFilePath(regionFs.createTempName())
150             .build();
151     writeStoreFile(writer);
152 
153     Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
154     StoreFile hsf = new StoreFile(this.fs, hsfPath, conf, cacheConf,
155         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
156     StoreFile.Reader reader = hsf.createReader();
157     // Split on a row, not in middle of row.  Midkey returned by reader
158     // may be in middle of row.  Create new one with empty column and
159     // timestamp.
160     KeyValue kv = KeyValue.createKeyValueFromKey(reader.midkey());
161     byte [] midRow = kv.getRow();
162     kv = KeyValue.createKeyValueFromKey(reader.getLastKey());
163     byte [] finalRow = kv.getRow();
164     // Make a reference
165     HRegionInfo splitHri = new HRegionInfo(hri.getTableName(), null, midRow);
166     Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true);
167     StoreFile refHsf = new StoreFile(this.fs, refPath, conf, cacheConf,
168         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
169     // Now confirm that I can read from the reference and that it only gets
170     // keys from top half of the file.
171     HFileScanner s = refHsf.createReader().getScanner(false, false);
172     for(boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) {
173       ByteBuffer bb = s.getKey();
174       kv = KeyValue.createKeyValueFromKey(bb);
175       if (first) {
176         assertTrue(Bytes.equals(kv.getRow(), midRow));
177         first = false;
178       }
179     }
180     assertTrue(Bytes.equals(kv.getRow(), finalRow));
181   }
182 
183   public void testHFileLink() throws IOException {
184     final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testHFileLinkTb"));
185     // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
186     Configuration testConf = new Configuration(this.conf);
187     FSUtils.setRootDir(testConf, this.testDir);
188     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
189       testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTableName()), hri);
190 
191     // Make a store file and write data to it.
192     StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, this.fs, 8 * 1024)
193             .withFilePath(regionFs.createTempName())
194             .build();
195     writeStoreFile(writer);
196 
197     Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
198     Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY));
199     HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
200     Path linkFilePath = new Path(dstPath,
201                   HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
202 
203     // Try to open store file from link
204     StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath);
205     StoreFile hsf = new StoreFile(this.fs, storeFileInfo, testConf, cacheConf,
206         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
207     assertTrue(storeFileInfo.isLink());
208 
209     // Now confirm that I can read from the link
210     int count = 1;
211     HFileScanner s = hsf.createReader().getScanner(false, false);
212     s.seekTo();
213     while (s.next()) {
214       count++;
215     }
216     assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
217   }
218 
219   /**
220    * This test creates an hfile and then the dir structures and files to verify that references
221    * to hfilelinks (created by snapshot clones) can be properly interpreted.
222    */
223   public void testReferenceToHFileLink() throws IOException {
224     // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
225     Configuration testConf = new Configuration(this.conf);
226     FSUtils.setRootDir(testConf, this.testDir);
227 
228     // adding legal table name chars to verify regex handles it.
229     HRegionInfo hri = new HRegionInfo(TableName.valueOf("_original-evil-name"));
230     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
231       testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTableName()), hri);
232 
233     // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file>
234     StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, this.fs, 8 * 1024)
235             .withFilePath(regionFs.createTempName())
236             .build();
237     writeStoreFile(writer);
238     Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
239 
240     // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table>
241     HRegionInfo hriClone = new HRegionInfo(TableName.valueOf("clone"));
242     HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(
243       testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTableName()),
244         hriClone);
245     Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY);
246     HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
247     Path linkFilePath = new Path(dstPath,
248                   HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
249 
250     // create splits of the link.
251     // <root>/clone/splitA/<cf>/<reftohfilelink>,
252     // <root>/clone/splitB/<cf>/<reftohfilelink>
253     HRegionInfo splitHriA = new HRegionInfo(hri.getTableName(), null, SPLITKEY);
254     HRegionInfo splitHriB = new HRegionInfo(hri.getTableName(), SPLITKEY, null);
255     StoreFile f = new StoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE,
256         NoOpDataBlockEncoder.INSTANCE);
257     Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true); // top
258     Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false);// bottom
259 
260     // OK test the thing
261     FSUtils.logFileSystemState(fs, this.testDir, LOG);
262 
263     // There is a case where a file with the hfilelink pattern is actually a daughter
264     // reference to a hfile link.  This code in StoreFile that handles this case.
265 
266     // Try to open store file from link
267     StoreFile hsfA = new StoreFile(this.fs, pathA, testConf, cacheConf,
268         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
269 
270     // Now confirm that I can read from the ref to link
271     int count = 1;
272     HFileScanner s = hsfA.createReader().getScanner(false, false);
273     s.seekTo();
274     while (s.next()) {
275       count++;
276     }
277     assertTrue(count > 0); // read some rows here
278 
279     // Try to open store file from link
280     StoreFile hsfB = new StoreFile(this.fs, pathB, testConf, cacheConf,
281         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
282 
283     // Now confirm that I can read from the ref to link
284     HFileScanner sB = hsfB.createReader().getScanner(false, false);
285     sB.seekTo();
286     
287     //count++ as seekTo() will advance the scanner
288     count++;
289     while (sB.next()) {
290       count++;
291     }
292 
293     // read the rest of the rows
294     assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
295   }
296 
297   private void checkHalfHFile(final HRegionFileSystem regionFs, final StoreFile f)
298       throws IOException {
299     byte [] midkey = f.createReader().midkey();
300     KeyValue midKV = KeyValue.createKeyValueFromKey(midkey);
301     byte [] midRow = midKV.getRow();
302     // Create top split.
303     HRegionInfo topHri = new HRegionInfo(regionFs.getRegionInfo().getTableName(),
304         null, midRow);
305     Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true);
306     // Create bottom split.
307     HRegionInfo bottomHri = new HRegionInfo(regionFs.getRegionInfo().getTableName(),
308         midRow, null);
309     Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false);
310     // Make readers on top and bottom.
311     StoreFile.Reader top = new StoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE,
312         NoOpDataBlockEncoder.INSTANCE).createReader();
313     StoreFile.Reader bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE,
314         NoOpDataBlockEncoder.INSTANCE).createReader();
315     ByteBuffer previous = null;
316     LOG.info("Midkey: " + midKV.toString());
317     ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midkey);
318     try {
319       // Now make two HalfMapFiles and assert they can read the full backing
320       // file, one from the top and the other from the bottom.
321       // Test bottom half first.
322       // Now test reading from the top.
323       boolean first = true;
324       ByteBuffer key = null;
325       HFileScanner topScanner = top.getScanner(false, false);
326       while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
327              (topScanner.isSeeked() && topScanner.next())) {
328         key = topScanner.getKey();
329 
330         if (topScanner.getReader().getComparator().compare(key.array(),
331           key.arrayOffset(), key.limit(), midkey, 0, midkey.length) < 0) {
332           fail("key=" + Bytes.toStringBinary(key) + " < midkey=" +
333               Bytes.toStringBinary(midkey));
334         }
335         if (first) {
336           first = false;
337           LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key)));
338         }
339       }
340       LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key)));
341 
342       first = true;
343       HFileScanner bottomScanner = bottom.getScanner(false, false);
344       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
345           bottomScanner.next()) {
346         previous = bottomScanner.getKey();
347         key = bottomScanner.getKey();
348         if (first) {
349           first = false;
350           LOG.info("First in bottom: " +
351             Bytes.toString(Bytes.toBytes(previous)));
352         }
353         assertTrue(key.compareTo(bbMidkeyBytes) < 0);
354       }
355       if (previous != null) {
356         LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
357       }
358       // Remove references.
359       regionFs.cleanupDaughterRegion(topHri);
360       regionFs.cleanupDaughterRegion(bottomHri);
361 
362       // Next test using a midkey that does not exist in the file.
363       // First, do a key that is < than first key. Ensure splits behave
364       // properly.
365       byte [] badmidkey = Bytes.toBytes("  .");
366       assertTrue(fs.exists(f.getPath()));
367       topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
368       bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
369       
370       assertNull(bottomPath);
371       
372       top = new StoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE,
373           NoOpDataBlockEncoder.INSTANCE).createReader();
374       // Now read from the top.
375       first = true;
376       topScanner = top.getScanner(false, false);
377       while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
378           topScanner.next()) {
379         key = topScanner.getKey();
380         assertTrue(topScanner.getReader().getComparator().compare(key.array(),
381           key.arrayOffset(), key.limit(), badmidkey, 0, badmidkey.length) >= 0);
382         if (first) {
383           first = false;
384           KeyValue keyKV = KeyValue.createKeyValueFromKey(key);
385           LOG.info("First top when key < bottom: " + keyKV);
386           String tmp = Bytes.toString(keyKV.getRow());
387           for (int i = 0; i < tmp.length(); i++) {
388             assertTrue(tmp.charAt(i) == 'a');
389           }
390         }
391       }
392       KeyValue keyKV = KeyValue.createKeyValueFromKey(key);
393       LOG.info("Last top when key < bottom: " + keyKV);
394       String tmp = Bytes.toString(keyKV.getRow());
395       for (int i = 0; i < tmp.length(); i++) {
396         assertTrue(tmp.charAt(i) == 'z');
397       }
398       // Remove references.
399       regionFs.cleanupDaughterRegion(topHri);
400       regionFs.cleanupDaughterRegion(bottomHri);
401 
402       // Test when badkey is > than last key in file ('||' > 'zz').
403       badmidkey = Bytes.toBytes("|||");
404       topPath = splitStoreFile(regionFs,topHri, TEST_FAMILY, f, badmidkey, true);
405       bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
406       assertNull(topPath);
407       bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE,
408           NoOpDataBlockEncoder.INSTANCE).createReader();
409       first = true;
410       bottomScanner = bottom.getScanner(false, false);
411       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
412           bottomScanner.next()) {
413         key = bottomScanner.getKey();
414         if (first) {
415           first = false;
416           keyKV = KeyValue.createKeyValueFromKey(key);
417           LOG.info("First bottom when key > top: " + keyKV);
418           tmp = Bytes.toString(keyKV.getRow());
419           for (int i = 0; i < tmp.length(); i++) {
420             assertTrue(tmp.charAt(i) == 'a');
421           }
422         }
423       }
424       keyKV = KeyValue.createKeyValueFromKey(key);
425       LOG.info("Last bottom when key > top: " + keyKV);
426       for (int i = 0; i < tmp.length(); i++) {
427         assertTrue(Bytes.toString(keyKV.getRow()).charAt(i) == 'z');
428       }
429     } finally {
430       if (top != null) {
431         top.close(true); // evict since we are about to delete the file
432       }
433       if (bottom != null) {
434         bottom.close(true); // evict since we are about to delete the file
435       }
436       fs.delete(f.getPath(), true);
437     }
438   }
439 
440   private static final String localFormatter = "%010d";
441 
442   private void bloomWriteRead(StoreFile.Writer writer, FileSystem fs) throws Exception {
443     float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
444     Path f = writer.getPath();
445     long now = System.currentTimeMillis();
446     for (int i = 0; i < 2000; i += 2) {
447       String row = String.format(localFormatter, i);
448       KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(),
449         "col".getBytes(), now, "value".getBytes());
450       writer.append(kv);
451     }
452     writer.close();
453 
454     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf,
455         DataBlockEncoding.NONE);
456     reader.loadFileInfo();
457     reader.loadBloomfilter();
458     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
459 
460     // check false positives rate
461     int falsePos = 0;
462     int falseNeg = 0;
463     for (int i = 0; i < 2000; i++) {
464       String row = String.format(localFormatter, i);
465       TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
466       columns.add("family:col".getBytes());
467 
468       Scan scan = new Scan(row.getBytes(),row.getBytes());
469       scan.addColumn("family".getBytes(), "family:col".getBytes());
470       boolean exists = scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE);
471       if (i % 2 == 0) {
472         if (!exists) falseNeg++;
473       } else {
474         if (exists) falsePos++;
475       }
476     }
477     reader.close(true); // evict because we are about to delete the file
478     fs.delete(f, true);
479     assertEquals("False negatives: " + falseNeg, 0, falseNeg);
480     int maxFalsePos = (int) (2 * 2000 * err);
481     assertTrue("Too many false positives: " + falsePos + " (err=" + err
482         + ", expected no more than " + maxFalsePos + ")",
483         falsePos <= maxFalsePos);
484   }
485 
486   public void testBloomFilter() throws Exception {
487     FileSystem fs = FileSystem.getLocal(conf);
488     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
489     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
490 
491     // write the file
492     Path f = new Path(ROOT_DIR, getName());
493     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
494         StoreFile.DEFAULT_BLOCKSIZE_SMALL)
495             .withFilePath(f)
496             .withBloomType(BloomType.ROW)
497             .withMaxKeyCount(2000)
498             .withChecksumType(CKTYPE)
499             .withBytesPerChecksum(CKBYTES)
500             .build();
501     bloomWriteRead(writer, fs);
502   }
503 
504   public void testDeleteFamilyBloomFilter() throws Exception {
505     FileSystem fs = FileSystem.getLocal(conf);
506     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
507     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
508     float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
509 
510     // write the file
511     Path f = new Path(ROOT_DIR, getName());
512 
513     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
514         fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL)
515             .withFilePath(f)
516             .withMaxKeyCount(2000)
517             .withChecksumType(CKTYPE)
518             .withBytesPerChecksum(CKBYTES)
519             .build();
520 
521     // add delete family
522     long now = System.currentTimeMillis();
523     for (int i = 0; i < 2000; i += 2) {
524       String row = String.format(localFormatter, i);
525       KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(),
526           "col".getBytes(), now, KeyValue.Type.DeleteFamily, "value".getBytes());
527       writer.append(kv);
528     }
529     writer.close();
530 
531     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, DataBlockEncoding.NONE);
532     reader.loadFileInfo();
533     reader.loadBloomfilter();
534 
535     // check false positives rate
536     int falsePos = 0;
537     int falseNeg = 0;
538     for (int i = 0; i < 2000; i++) {
539       String row = String.format(localFormatter, i);
540       byte[] rowKey = Bytes.toBytes(row);
541       boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0,
542           rowKey.length);
543       if (i % 2 == 0) {
544         if (!exists)
545           falseNeg++;
546       } else {
547         if (exists)
548           falsePos++;
549       }
550     }
551     assertEquals(1000, reader.getDeleteFamilyCnt());
552     reader.close(true); // evict because we are about to delete the file
553     fs.delete(f, true);
554     assertEquals("False negatives: " + falseNeg, 0, falseNeg);
555     int maxFalsePos = (int) (2 * 2000 * err);
556     assertTrue("Too many false positives: " + falsePos + " (err=" + err
557         + ", expected no more than " + maxFalsePos, falsePos <= maxFalsePos);
558   }
559 
560   /**
561    * Test for HBASE-8012
562    */
563   public void testReseek() throws Exception {
564     // write the file
565     Path f = new Path(ROOT_DIR, getName());
566 
567     // Make a store file and write data to it.
568     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
569          this.fs, 8 * 1024)
570             .withFilePath(f)
571             .build();
572 
573     writeStoreFile(writer);
574     writer.close();
575 
576     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, DataBlockEncoding.NONE);
577 
578     // Now do reseek with empty KV to position to the beginning of the file
579 
580     KeyValue k = KeyValue.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY);
581     StoreFileScanner s = reader.getStoreFileScanner(false, false);
582     s.reseek(k);
583 
584     assertNotNull("Intial reseek should position at the beginning of the file", s.peek());
585   }
586 
587   public void testBloomTypes() throws Exception {
588     float err = (float) 0.01;
589     FileSystem fs = FileSystem.getLocal(conf);
590     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
591     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
592 
593     int rowCount = 50;
594     int colCount = 10;
595     int versions = 2;
596 
597     // run once using columns and once using rows
598     BloomType[] bt = {BloomType.ROWCOL, BloomType.ROW};
599     int[] expKeys  = {rowCount*colCount, rowCount};
600     // below line deserves commentary.  it is expected bloom false positives
601     //  column = rowCount*2*colCount inserts
602     //  row-level = only rowCount*2 inserts, but failures will be magnified by
603     //              2nd for loop for every column (2*colCount)
604     float[] expErr   = {2*rowCount*colCount*err, 2*rowCount*2*colCount*err};
605 
606     for (int x : new int[]{0,1}) {
607       // write the file
608       Path f = new Path(ROOT_DIR, getName() + x);
609       StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
610           fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL)
611               .withFilePath(f)
612               .withBloomType(bt[x])
613               .withMaxKeyCount(expKeys[x])
614               .withChecksumType(CKTYPE)
615               .withBytesPerChecksum(CKBYTES)
616               .build();
617 
618       long now = System.currentTimeMillis();
619       for (int i = 0; i < rowCount*2; i += 2) { // rows
620         for (int j = 0; j < colCount*2; j += 2) {   // column qualifiers
621           String row = String.format(localFormatter, i);
622           String col = String.format(localFormatter, j);
623           for (int k= 0; k < versions; ++k) { // versions
624             KeyValue kv = new KeyValue(row.getBytes(),
625               "family".getBytes(), ("col" + col).getBytes(),
626                 now-k, Bytes.toBytes((long)-1));
627             writer.append(kv);
628           }
629         }
630       }
631       writer.close();
632 
633       StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf,
634           DataBlockEncoding.NONE);
635       reader.loadFileInfo();
636       reader.loadBloomfilter();
637       StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
638       assertEquals(expKeys[x], reader.generalBloomFilter.getKeyCount());
639 
640       // check false positives rate
641       int falsePos = 0;
642       int falseNeg = 0;
643       for (int i = 0; i < rowCount*2; ++i) { // rows
644         for (int j = 0; j < colCount*2; ++j) {   // column qualifiers
645           String row = String.format(localFormatter, i);
646           String col = String.format(localFormatter, j);
647           TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
648           columns.add(("col" + col).getBytes());
649 
650           Scan scan = new Scan(row.getBytes(),row.getBytes());
651           scan.addColumn("family".getBytes(), ("col"+col).getBytes());
652           boolean exists =
653               scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE);
654           boolean shouldRowExist = i % 2 == 0;
655           boolean shouldColExist = j % 2 == 0;
656           shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
657           if (shouldRowExist && shouldColExist) {
658             if (!exists) falseNeg++;
659           } else {
660             if (exists) falsePos++;
661           }
662         }
663       }
664       reader.close(true); // evict because we are about to delete the file
665       fs.delete(f, true);
666       System.out.println(bt[x].toString());
667       System.out.println("  False negatives: " + falseNeg);
668       System.out.println("  False positives: " + falsePos);
669       assertEquals(0, falseNeg);
670       assertTrue(falsePos < 2*expErr[x]);
671     }
672   }
673 
674   public void testSeqIdComparator() {
675     assertOrdering(StoreFile.Comparators.SEQ_ID,
676         mockStoreFile(true,  100,   1000, -1, "/foo/123"),
677         mockStoreFile(true,  100,   1000, -1, "/foo/124"),
678         mockStoreFile(true,  99,    1000, -1, "/foo/126"),
679         mockStoreFile(true,  98,    2000, -1, "/foo/126"),
680         mockStoreFile(false, 3453, -1,     1, "/foo/1"),
681         mockStoreFile(false, 2,    -1,     3, "/foo/2"),
682         mockStoreFile(false, 1000, -1,     5, "/foo/2"),
683         mockStoreFile(false, 76,   -1,     5, "/foo/3"));
684   }
685 
686   /**
687    * Assert that the given comparator orders the given storefiles in the
688    * same way that they're passed.
689    */
690   private void assertOrdering(Comparator<StoreFile> comparator, StoreFile ... sfs) {
691     ArrayList<StoreFile> sorted = Lists.newArrayList(sfs);
692     Collections.shuffle(sorted);
693     Collections.sort(sorted, comparator);
694     LOG.debug("sfs: " + Joiner.on(",").join(sfs));
695     LOG.debug("sorted: " + Joiner.on(",").join(sorted));
696     assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted));
697   }
698 
699   /**
700    * Create a mock StoreFile with the given attributes.
701    */
702   private StoreFile mockStoreFile(boolean bulkLoad,
703                                   long size,
704                                   long bulkTimestamp,
705                                   long seqId,
706                                   String path) {
707     StoreFile mock = Mockito.mock(StoreFile.class);
708     StoreFile.Reader reader = Mockito.mock(StoreFile.Reader.class);
709 
710     Mockito.doReturn(size).when(reader).length();
711 
712     Mockito.doReturn(reader).when(mock).getReader();
713     Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult();
714     Mockito.doReturn(bulkTimestamp).when(mock).getBulkLoadTimestamp();
715     Mockito.doReturn(seqId).when(mock).getMaxSequenceId();
716     Mockito.doReturn(new Path(path)).when(mock).getPath();
717     String name = "mock storefile, bulkLoad=" + bulkLoad +
718       " bulkTimestamp=" + bulkTimestamp +
719       " seqId=" + seqId +
720       " path=" + path;
721     Mockito.doReturn(name).when(mock).toString();
722     return mock;
723   }
724 
725   /**
726    * Generate a list of KeyValues for testing based on given parameters
727    * @param timestamps
728    * @param numRows
729    * @param qualifier
730    * @param family
731    * @return
732    */
733   List<KeyValue> getKeyValueSet(long[] timestamps, int numRows,
734       byte[] qualifier, byte[] family) {
735     List<KeyValue> kvList = new ArrayList<KeyValue>();
736     for (int i=1;i<=numRows;i++) {
737       byte[] b = Bytes.toBytes(i) ;
738       LOG.info(Bytes.toString(b));
739       LOG.info(Bytes.toString(b));
740       for (long timestamp: timestamps)
741       {
742         kvList.add(new KeyValue(b, family, qualifier, timestamp, b));
743       }
744     }
745     return kvList;
746   }
747 
748   /**
749    * Test to ensure correctness when using StoreFile with multiple timestamps
750    * @throws IOException
751    */
752   public void testMultipleTimestamps() throws IOException {
753     byte[] family = Bytes.toBytes("familyname");
754     byte[] qualifier = Bytes.toBytes("qualifier");
755     int numRows = 10;
756     long[] timestamps = new long[] {20,10,5,1};
757     Scan scan = new Scan();
758 
759     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
760     Path storedir = new Path(new Path(this.testDir, "7e0102"), "familyname");
761     Path dir = new Path(storedir, "1234567890");
762     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
763         this.fs, 8 * 1024)
764             .withOutputDir(dir)
765             .build();
766 
767     List<KeyValue> kvList = getKeyValueSet(timestamps,numRows,
768         family, qualifier);
769 
770     for (KeyValue kv : kvList) {
771       writer.append(kv);
772     }
773     writer.appendMetadata(0, false);
774     writer.close();
775 
776     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
777         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
778     StoreFile.Reader reader = hsf.createReader();
779     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
780     TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
781     columns.add(qualifier);
782 
783     scan.setTimeRange(20, 100);
784     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
785 
786     scan.setTimeRange(1, 2);
787     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
788 
789     scan.setTimeRange(8, 10);
790     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
791 
792     scan.setTimeRange(7, 50);
793     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
794 
795     // This test relies on the timestamp range optimization
796     scan.setTimeRange(27, 50);
797     assertTrue(!scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
798   }
799 
800   public void testCacheOnWriteEvictOnClose() throws Exception {
801     Configuration conf = this.conf;
802 
803     // Find a home for our files (regiondir ("7e0102") and familyname).
804     Path baseDir = new Path(new Path(this.testDir, "7e0102"),"twoCOWEOC");
805 
806     // Grab the block cache and get the initial hit/miss counts
807     BlockCache bc = new CacheConfig(conf).getBlockCache();
808     assertNotNull(bc);
809     CacheStats cs = bc.getStats();
810     long startHit = cs.getHitCount();
811     long startMiss = cs.getMissCount();
812     long startEvicted = cs.getEvictedCount();
813 
814     // Let's write a StoreFile with three blocks, with cache on write off
815     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
816     CacheConfig cacheConf = new CacheConfig(conf);
817     Path pathCowOff = new Path(baseDir, "123456789");
818     StoreFile.Writer writer = writeStoreFile(conf, cacheConf, pathCowOff, 3);
819     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
820         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
821     LOG.debug(hsf.getPath().toString());
822 
823     // Read this file, we should see 3 misses
824     StoreFile.Reader reader = hsf.createReader();
825     reader.loadFileInfo();
826     StoreFileScanner scanner = reader.getStoreFileScanner(true, true);
827     scanner.seek(KeyValue.LOWESTKEY);
828     while (scanner.next() != null);
829     assertEquals(startHit, cs.getHitCount());
830     assertEquals(startMiss + 3, cs.getMissCount());
831     assertEquals(startEvicted, cs.getEvictedCount());
832     startMiss += 3;
833     scanner.close();
834     reader.close(cacheConf.shouldEvictOnClose());
835 
836     // Now write a StoreFile with three blocks, with cache on write on
837     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
838     cacheConf = new CacheConfig(conf);
839     Path pathCowOn = new Path(baseDir, "123456788");
840     writer = writeStoreFile(conf, cacheConf, pathCowOn, 3);
841     hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
842         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
843 
844     // Read this file, we should see 3 hits
845     reader = hsf.createReader();
846     scanner = reader.getStoreFileScanner(true, true);
847     scanner.seek(KeyValue.LOWESTKEY);
848     while (scanner.next() != null);
849     assertEquals(startHit + 3, cs.getHitCount());
850     assertEquals(startMiss, cs.getMissCount());
851     assertEquals(startEvicted, cs.getEvictedCount());
852     startHit += 3;
853     scanner.close();
854     reader.close(cacheConf.shouldEvictOnClose());
855 
856     // Let's read back the two files to ensure the blocks exactly match
857     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
858         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
859     StoreFile.Reader readerOne = hsf.createReader();
860     readerOne.loadFileInfo();
861     StoreFileScanner scannerOne = readerOne.getStoreFileScanner(true, true);
862     scannerOne.seek(KeyValue.LOWESTKEY);
863     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
864         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
865     StoreFile.Reader readerTwo = hsf.createReader();
866     readerTwo.loadFileInfo();
867     StoreFileScanner scannerTwo = readerTwo.getStoreFileScanner(true, true);
868     scannerTwo.seek(KeyValue.LOWESTKEY);
869     KeyValue kv1 = null;
870     KeyValue kv2 = null;
871     while ((kv1 = scannerOne.next()) != null) {
872       kv2 = scannerTwo.next();
873       assertTrue(kv1.equals(kv2));
874       assertTrue(Bytes.compareTo(
875           kv1.getBuffer(), kv1.getKeyOffset(), kv1.getKeyLength(), 
876           kv2.getBuffer(), kv2.getKeyOffset(), kv2.getKeyLength()) == 0);
877       assertTrue(Bytes.compareTo(
878           kv1.getBuffer(), kv1.getValueOffset(), kv1.getValueLength(),
879           kv2.getBuffer(), kv2.getValueOffset(), kv2.getValueLength()) == 0);
880     }
881     assertNull(scannerTwo.next());
882     assertEquals(startHit + 6, cs.getHitCount());
883     assertEquals(startMiss, cs.getMissCount());
884     assertEquals(startEvicted, cs.getEvictedCount());
885     startHit += 6;
886     scannerOne.close();
887     readerOne.close(cacheConf.shouldEvictOnClose());
888     scannerTwo.close();
889     readerTwo.close(cacheConf.shouldEvictOnClose());
890 
891     // Let's close the first file with evict on close turned on
892     conf.setBoolean("hbase.rs.evictblocksonclose", true);
893     cacheConf = new CacheConfig(conf);
894     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
895         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
896     reader = hsf.createReader();
897     reader.close(cacheConf.shouldEvictOnClose());
898 
899     // We should have 3 new evictions
900     assertEquals(startHit, cs.getHitCount());
901     assertEquals(startMiss, cs.getMissCount());
902     assertEquals(startEvicted + 3, cs.getEvictedCount());
903     startEvicted += 3;
904 
905     // Let's close the second file with evict on close turned off
906     conf.setBoolean("hbase.rs.evictblocksonclose", false);
907     cacheConf = new CacheConfig(conf);
908     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
909         BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
910     reader = hsf.createReader();
911     reader.close(cacheConf.shouldEvictOnClose());
912 
913     // We expect no changes
914     assertEquals(startHit, cs.getHitCount());
915     assertEquals(startMiss, cs.getMissCount());
916     assertEquals(startEvicted, cs.getEvictedCount());
917   }
918 
919   private Path splitStoreFile(final HRegionFileSystem regionFs, final HRegionInfo hri,
920       final String family, final StoreFile sf, final byte[] splitKey, boolean isTopRef)
921       throws IOException {
922     FileSystem fs = regionFs.getFileSystem();
923     Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef);
924     if (null == path) {
925       return null;
926     }
927     Path regionDir = regionFs.commitDaughterRegion(hri);
928     return new Path(new Path(regionDir, family), path.getName());
929   }
930 
931   private StoreFile.Writer writeStoreFile(Configuration conf,
932       CacheConfig cacheConf, Path path, int numBlocks)
933   throws IOException {
934     // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs
935     int numKVs = 5 * numBlocks;
936     List<KeyValue> kvs = new ArrayList<KeyValue>(numKVs);
937     byte [] b = Bytes.toBytes("x");
938     int totalSize = 0;
939     for (int i=numKVs;i>0;i--) {
940       KeyValue kv = new KeyValue(b, b, b, i, b);
941       kvs.add(kv);
942       // kv has memstoreTS 0, which takes 1 byte to store.
943       totalSize += kv.getLength() + 1;
944     }
945     int blockSize = totalSize / numBlocks;
946     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs, blockSize)
947             .withFilePath(path)
948             .withMaxKeyCount(2000)
949             .withChecksumType(CKTYPE)
950             .withBytesPerChecksum(CKBYTES)
951             .build();
952     // We'll write N-1 KVs to ensure we don't write an extra block
953     kvs.remove(kvs.size()-1);
954     for (KeyValue kv : kvs) {
955       writer.append(kv);
956     }
957     writer.appendMetadata(0, false);
958     writer.close();
959     return writer;
960   }
961 
962   /**
963    * Check if data block encoding information is saved correctly in HFile's
964    * file info.
965    */
966   public void testDataBlockEncodingMetaData() throws IOException {
967     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
968     Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
969     Path path = new Path(dir, "1234567890");
970 
971     DataBlockEncoding dataBlockEncoderAlgo =
972         DataBlockEncoding.FAST_DIFF;
973     HFileDataBlockEncoder dataBlockEncoder =
974         new HFileDataBlockEncoderImpl(
975             dataBlockEncoderAlgo,
976             dataBlockEncoderAlgo);
977     cacheConf = new CacheConfig(conf);
978     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
979         HConstants.DEFAULT_BLOCKSIZE)
980             .withFilePath(path)
981             .withDataBlockEncoder(dataBlockEncoder)
982             .withMaxKeyCount(2000)
983             .withChecksumType(CKTYPE)
984             .withBytesPerChecksum(CKBYTES)
985             .build();
986     writer.close();
987 
988     StoreFile storeFile = new StoreFile(fs, writer.getPath(), conf,
989         cacheConf, BloomType.NONE, dataBlockEncoder);
990     StoreFile.Reader reader = storeFile.createReader();
991 
992     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
993     byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
994     assertEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
995   }
996 }
997