View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.Collections;
26  import java.util.Comparator;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.TreeSet;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.HBaseTestCase;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.KeyValue;
42  import org.apache.hadoop.hbase.SmallTests;
43  import org.apache.hadoop.hbase.client.Scan;
44  import org.apache.hadoop.hbase.io.HFileLink;
45  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
46  import org.apache.hadoop.hbase.io.hfile.BlockCache;
47  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
48  import org.apache.hadoop.hbase.io.hfile.CacheStats;
49  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
50  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
51  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
52  import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
53  import org.apache.hadoop.hbase.util.BloomFilterFactory;
54  import org.apache.hadoop.hbase.util.Bytes;
55  import org.apache.hadoop.hbase.util.ChecksumType;
56  import org.apache.hadoop.hbase.util.FSUtils;
57  import org.junit.experimental.categories.Category;
58  import org.mockito.Mockito;
59  
60  import com.google.common.base.Joiner;
61  import com.google.common.collect.Iterables;
62  import com.google.common.collect.Lists;
63  
64  /**
65   * Test HStoreFile
66   */
67  @Category(SmallTests.class)
68  public class TestStoreFile extends HBaseTestCase {
69    static final Log LOG = LogFactory.getLog(TestStoreFile.class);
70    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
71    private CacheConfig cacheConf =  new CacheConfig(TEST_UTIL.getConfiguration());
72    private static String ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile").toString();
73    private static final ChecksumType CKTYPE = ChecksumType.CRC32;
74    private static final int CKBYTES = 512;
75    private static String TEST_FAMILY = "cf";
76  
77    @Override
78    public void setUp() throws Exception {
79      super.setUp();
80    }
81  
82    @Override
83    public void tearDown() throws Exception {
84      super.tearDown();
85    }
86  
87    /**
88     * Write a file and then assert that we can read from top and bottom halves
89     * using two HalfMapFiles.
90     * @throws Exception
91     */
92    public void testBasicHalfMapFile() throws Exception {
93      final HRegionInfo hri =
94          new HRegionInfo(TableName.valueOf("testBasicHalfMapFileTb"));
95      HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
96        conf, fs, new Path(this.testDir, hri.getTable().getNameAsString()), hri);
97  
98      StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 2 * 1024)
99              .withFilePath(regionFs.createTempName())
100             .build();
101     writeStoreFile(writer);
102 
103     Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
104     StoreFile sf = new StoreFile(this.fs, sfPath, conf, cacheConf,
105       BloomType.NONE);
106     checkHalfHFile(regionFs, sf);
107   }
108 
109   private void writeStoreFile(final StoreFile.Writer writer) throws IOException {
110     writeStoreFile(writer, Bytes.toBytes(getName()), Bytes.toBytes(getName()));
111   }
112 
113   // pick an split point (roughly halfway)
114   byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR)/2, FIRST_CHAR};
115 
116   /*
117    * Writes HStoreKey and ImmutableBytes data to passed writer and
118    * then closes it.
119    * @param writer
120    * @throws IOException
121    */
122   public static void writeStoreFile(final StoreFile.Writer writer, byte[] fam, byte[] qualifier)
123   throws IOException {
124     long now = System.currentTimeMillis();
125     try {
126       for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
127         for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
128           byte[] b = new byte[] { (byte) d, (byte) e };
129           writer.append(new KeyValue(b, fam, qualifier, now, b));
130         }
131       }
132     } finally {
133       writer.close();
134     }
135   }
136 
137   /**
138    * Test that our mechanism of writing store files in one region to reference
139    * store files in other regions works.
140    * @throws IOException
141    */
142   public void testReference() throws IOException {
143     final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testReferenceTb"));
144     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
145       conf, fs, new Path(this.testDir, hri.getTable().getNameAsString()), hri);
146 
147     // Make a store file and write data to it.
148     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 8 * 1024)
149             .withFilePath(regionFs.createTempName())
150             .build();
151     writeStoreFile(writer);
152 
153     Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
154     StoreFile hsf = new StoreFile(this.fs, hsfPath, conf, cacheConf,
155       BloomType.NONE);
156     StoreFile.Reader reader = hsf.createReader();
157     // Split on a row, not in middle of row.  Midkey returned by reader
158     // may be in middle of row.  Create new one with empty column and
159     // timestamp.
160     KeyValue kv = KeyValue.createKeyValueFromKey(reader.midkey());
161     byte [] midRow = kv.getRow();
162     kv = KeyValue.createKeyValueFromKey(reader.getLastKey());
163     byte [] finalRow = kv.getRow();
164     // Make a reference
165     HRegionInfo splitHri = new HRegionInfo(hri.getTable(), null, midRow);
166     Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true);
167     StoreFile refHsf = new StoreFile(this.fs, refPath, conf, cacheConf,
168       BloomType.NONE);
169     // Now confirm that I can read from the reference and that it only gets
170     // keys from top half of the file.
171     HFileScanner s = refHsf.createReader().getScanner(false, false);
172     for(boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) {
173       ByteBuffer bb = s.getKey();
174       kv = KeyValue.createKeyValueFromKey(bb);
175       if (first) {
176         assertTrue(Bytes.equals(kv.getRow(), midRow));
177         first = false;
178       }
179     }
180     assertTrue(Bytes.equals(kv.getRow(), finalRow));
181   }
182 
183   public void testHFileLink() throws IOException {
184     final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testHFileLinkTb"));
185     // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
186     Configuration testConf = new Configuration(this.conf);
187     FSUtils.setRootDir(testConf, this.testDir);
188     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
189       testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTable()), hri);
190 
191     // Make a store file and write data to it.
192     StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, this.fs, 8 * 1024)
193             .withFilePath(regionFs.createTempName())
194             .build();
195     writeStoreFile(writer);
196 
197     Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
198     Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY));
199     HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
200     Path linkFilePath = new Path(dstPath,
201                   HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
202 
203     // Try to open store file from link
204     StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath);
205     StoreFile hsf = new StoreFile(this.fs, storeFileInfo, testConf, cacheConf,
206       BloomType.NONE);
207     assertTrue(storeFileInfo.isLink());
208 
209     // Now confirm that I can read from the link
210     int count = 1;
211     HFileScanner s = hsf.createReader().getScanner(false, false);
212     s.seekTo();
213     while (s.next()) {
214       count++;
215     }
216     assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
217   }
218 
219   /**
220    * This test creates an hfile and then the dir structures and files to verify that references
221    * to hfilelinks (created by snapshot clones) can be properly interpreted.
222    */
223   public void testReferenceToHFileLink() throws IOException {
224     // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
225     Configuration testConf = new Configuration(this.conf);
226     FSUtils.setRootDir(testConf, this.testDir);
227 
228     // adding legal table name chars to verify regex handles it.
229     HRegionInfo hri = new HRegionInfo(TableName.valueOf("_original-evil-name"));
230     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
231       testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTable()), hri);
232 
233     // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file>
234     StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, this.fs, 8 * 1024)
235             .withFilePath(regionFs.createTempName())
236             .build();
237     writeStoreFile(writer);
238     Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
239 
240     // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table>
241     HRegionInfo hriClone = new HRegionInfo(TableName.valueOf("clone"));
242     HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(
243       testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTable()),
244         hriClone);
245     Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY);
246     HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
247     Path linkFilePath = new Path(dstPath,
248                   HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
249 
250     // create splits of the link.
251     // <root>/clone/splitA/<cf>/<reftohfilelink>,
252     // <root>/clone/splitB/<cf>/<reftohfilelink>
253     HRegionInfo splitHriA = new HRegionInfo(hri.getTable(), null, SPLITKEY);
254     HRegionInfo splitHriB = new HRegionInfo(hri.getTable(), SPLITKEY, null);
255     StoreFile f = new StoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE);
256     Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true); // top
257     Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false);// bottom
258 
259     // OK test the thing
260     FSUtils.logFileSystemState(fs, this.testDir, LOG);
261 
262     // There is a case where a file with the hfilelink pattern is actually a daughter
263     // reference to a hfile link.  This code in StoreFile that handles this case.
264 
265     // Try to open store file from link
266     StoreFile hsfA = new StoreFile(this.fs, pathA, testConf, cacheConf,
267       BloomType.NONE);
268 
269     // Now confirm that I can read from the ref to link
270     int count = 1;
271     HFileScanner s = hsfA.createReader().getScanner(false, false);
272     s.seekTo();
273     while (s.next()) {
274       count++;
275     }
276     assertTrue(count > 0); // read some rows here
277 
278     // Try to open store file from link
279     StoreFile hsfB = new StoreFile(this.fs, pathB, testConf, cacheConf,
280       BloomType.NONE);
281 
282     // Now confirm that I can read from the ref to link
283     HFileScanner sB = hsfB.createReader().getScanner(false, false);
284     sB.seekTo();
285     
286     //count++ as seekTo() will advance the scanner
287     count++;
288     while (sB.next()) {
289       count++;
290     }
291 
292     // read the rest of the rows
293     assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
294   }
295 
296   private void checkHalfHFile(final HRegionFileSystem regionFs, final StoreFile f)
297       throws IOException {
298     byte [] midkey = f.createReader().midkey();
299     KeyValue midKV = KeyValue.createKeyValueFromKey(midkey);
300     byte [] midRow = midKV.getRow();
301     // Create top split.
302     HRegionInfo topHri = new HRegionInfo(regionFs.getRegionInfo().getTable(),
303         null, midRow);
304     Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true);
305     // Create bottom split.
306     HRegionInfo bottomHri = new HRegionInfo(regionFs.getRegionInfo().getTable(),
307         midRow, null);
308     Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false);
309     // Make readers on top and bottom.
310     StoreFile.Reader top = new StoreFile(this.fs, topPath, conf, cacheConf,
311       BloomType.NONE).createReader();
312     StoreFile.Reader bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf,
313       BloomType.NONE).createReader();
314     ByteBuffer previous = null;
315     LOG.info("Midkey: " + midKV.toString());
316     ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midkey);
317     try {
318       // Now make two HalfMapFiles and assert they can read the full backing
319       // file, one from the top and the other from the bottom.
320       // Test bottom half first.
321       // Now test reading from the top.
322       boolean first = true;
323       ByteBuffer key = null;
324       HFileScanner topScanner = top.getScanner(false, false);
325       while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
326              (topScanner.isSeeked() && topScanner.next())) {
327         key = topScanner.getKey();
328 
329         if (topScanner.getReader().getComparator().compareFlatKey(key.array(),
330           key.arrayOffset(), key.limit(), midkey, 0, midkey.length) < 0) {
331           fail("key=" + Bytes.toStringBinary(key) + " < midkey=" +
332               Bytes.toStringBinary(midkey));
333         }
334         if (first) {
335           first = false;
336           LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key)));
337         }
338       }
339       LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key)));
340 
341       first = true;
342       HFileScanner bottomScanner = bottom.getScanner(false, false);
343       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
344           bottomScanner.next()) {
345         previous = bottomScanner.getKey();
346         key = bottomScanner.getKey();
347         if (first) {
348           first = false;
349           LOG.info("First in bottom: " +
350             Bytes.toString(Bytes.toBytes(previous)));
351         }
352         assertTrue(key.compareTo(bbMidkeyBytes) < 0);
353       }
354       if (previous != null) {
355         LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
356       }
357       // Remove references.
358       regionFs.cleanupDaughterRegion(topHri);
359       regionFs.cleanupDaughterRegion(bottomHri);
360 
361       // Next test using a midkey that does not exist in the file.
362       // First, do a key that is < than first key. Ensure splits behave
363       // properly.
364       byte [] badmidkey = Bytes.toBytes("  .");
365       assertTrue(fs.exists(f.getPath()));
366       topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
367       bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
368       
369       assertNull(bottomPath);
370       
371       top = new StoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE).createReader();
372       // Now read from the top.
373       first = true;
374       topScanner = top.getScanner(false, false);
375       while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
376           topScanner.next()) {
377         key = topScanner.getKey();
378         assertTrue(topScanner.getReader().getComparator().compareFlatKey(key.array(),
379           key.arrayOffset(), key.limit(), badmidkey, 0, badmidkey.length) >= 0);
380         if (first) {
381           first = false;
382           KeyValue keyKV = KeyValue.createKeyValueFromKey(key);
383           LOG.info("First top when key < bottom: " + keyKV);
384           String tmp = Bytes.toString(keyKV.getRow());
385           for (int i = 0; i < tmp.length(); i++) {
386             assertTrue(tmp.charAt(i) == 'a');
387           }
388         }
389       }
390       KeyValue keyKV = KeyValue.createKeyValueFromKey(key);
391       LOG.info("Last top when key < bottom: " + keyKV);
392       String tmp = Bytes.toString(keyKV.getRow());
393       for (int i = 0; i < tmp.length(); i++) {
394         assertTrue(tmp.charAt(i) == 'z');
395       }
396       // Remove references.
397       regionFs.cleanupDaughterRegion(topHri);
398       regionFs.cleanupDaughterRegion(bottomHri);
399 
400       // Test when badkey is > than last key in file ('||' > 'zz').
401       badmidkey = Bytes.toBytes("|||");
402       topPath = splitStoreFile(regionFs,topHri, TEST_FAMILY, f, badmidkey, true);
403       bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
404       assertNull(topPath);
405       bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf,
406         BloomType.NONE).createReader();
407       first = true;
408       bottomScanner = bottom.getScanner(false, false);
409       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
410           bottomScanner.next()) {
411         key = bottomScanner.getKey();
412         if (first) {
413           first = false;
414           keyKV = KeyValue.createKeyValueFromKey(key);
415           LOG.info("First bottom when key > top: " + keyKV);
416           tmp = Bytes.toString(keyKV.getRow());
417           for (int i = 0; i < tmp.length(); i++) {
418             assertTrue(tmp.charAt(i) == 'a');
419           }
420         }
421       }
422       keyKV = KeyValue.createKeyValueFromKey(key);
423       LOG.info("Last bottom when key > top: " + keyKV);
424       for (int i = 0; i < tmp.length(); i++) {
425         assertTrue(Bytes.toString(keyKV.getRow()).charAt(i) == 'z');
426       }
427     } finally {
428       if (top != null) {
429         top.close(true); // evict since we are about to delete the file
430       }
431       if (bottom != null) {
432         bottom.close(true); // evict since we are about to delete the file
433       }
434       fs.delete(f.getPath(), true);
435     }
436   }
437 
438   private static final String localFormatter = "%010d";
439 
440   private void bloomWriteRead(StoreFile.Writer writer, FileSystem fs) throws Exception {
441     float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
442     Path f = writer.getPath();
443     long now = System.currentTimeMillis();
444     for (int i = 0; i < 2000; i += 2) {
445       String row = String.format(localFormatter, i);
446       KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(),
447         "col".getBytes(), now, "value".getBytes());
448       writer.append(kv);
449     }
450     writer.close();
451 
452     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf);
453     reader.loadFileInfo();
454     reader.loadBloomfilter();
455     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
456 
457     // check false positives rate
458     int falsePos = 0;
459     int falseNeg = 0;
460     for (int i = 0; i < 2000; i++) {
461       String row = String.format(localFormatter, i);
462       TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
463       columns.add("family:col".getBytes());
464 
465       Scan scan = new Scan(row.getBytes(),row.getBytes());
466       scan.addColumn("family".getBytes(), "family:col".getBytes());
467       boolean exists = scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE);
468       if (i % 2 == 0) {
469         if (!exists) falseNeg++;
470       } else {
471         if (exists) falsePos++;
472       }
473     }
474     reader.close(true); // evict because we are about to delete the file
475     fs.delete(f, true);
476     assertEquals("False negatives: " + falseNeg, 0, falseNeg);
477     int maxFalsePos = (int) (2 * 2000 * err);
478     assertTrue("Too many false positives: " + falsePos + " (err=" + err
479         + ", expected no more than " + maxFalsePos + ")",
480         falsePos <= maxFalsePos);
481   }
482 
483   public void testBloomFilter() throws Exception {
484     FileSystem fs = FileSystem.getLocal(conf);
485     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
486     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
487 
488     // write the file
489     Path f = new Path(ROOT_DIR, getName());
490     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
491         StoreFile.DEFAULT_BLOCKSIZE_SMALL)
492             .withFilePath(f)
493             .withBloomType(BloomType.ROW)
494             .withMaxKeyCount(2000)
495             .withChecksumType(CKTYPE)
496             .withBytesPerChecksum(CKBYTES)
497             .build();
498     bloomWriteRead(writer, fs);
499   }
500 
501   public void testDeleteFamilyBloomFilter() throws Exception {
502     FileSystem fs = FileSystem.getLocal(conf);
503     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
504     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
505     float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
506 
507     // write the file
508     Path f = new Path(ROOT_DIR, getName());
509 
510     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
511         fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL)
512             .withFilePath(f)
513             .withMaxKeyCount(2000)
514             .withChecksumType(CKTYPE)
515             .withBytesPerChecksum(CKBYTES)
516             .build();
517 
518     // add delete family
519     long now = System.currentTimeMillis();
520     for (int i = 0; i < 2000; i += 2) {
521       String row = String.format(localFormatter, i);
522       KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(),
523           "col".getBytes(), now, KeyValue.Type.DeleteFamily, "value".getBytes());
524       writer.append(kv);
525     }
526     writer.close();
527 
528     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf);
529     reader.loadFileInfo();
530     reader.loadBloomfilter();
531 
532     // check false positives rate
533     int falsePos = 0;
534     int falseNeg = 0;
535     for (int i = 0; i < 2000; i++) {
536       String row = String.format(localFormatter, i);
537       byte[] rowKey = Bytes.toBytes(row);
538       boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0,
539           rowKey.length);
540       if (i % 2 == 0) {
541         if (!exists)
542           falseNeg++;
543       } else {
544         if (exists)
545           falsePos++;
546       }
547     }
548     assertEquals(1000, reader.getDeleteFamilyCnt());
549     reader.close(true); // evict because we are about to delete the file
550     fs.delete(f, true);
551     assertEquals("False negatives: " + falseNeg, 0, falseNeg);
552     int maxFalsePos = (int) (2 * 2000 * err);
553     assertTrue("Too many false positives: " + falsePos + " (err=" + err
554         + ", expected no more than " + maxFalsePos, falsePos <= maxFalsePos);
555   }
556 
557   /**
558    * Test for HBASE-8012
559    */
560   public void testReseek() throws Exception {
561     // write the file
562     Path f = new Path(ROOT_DIR, getName());
563 
564     // Make a store file and write data to it.
565     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
566          this.fs, 8 * 1024)
567             .withFilePath(f)
568             .build();
569 
570     writeStoreFile(writer);
571     writer.close();
572 
573     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf);
574 
575     // Now do reseek with empty KV to position to the beginning of the file
576 
577     KeyValue k = KeyValue.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY);
578     StoreFileScanner s = reader.getStoreFileScanner(false, false);
579     s.reseek(k);
580 
581     assertNotNull("Intial reseek should position at the beginning of the file", s.peek());
582   }
583 
584   public void testBloomTypes() throws Exception {
585     float err = (float) 0.01;
586     FileSystem fs = FileSystem.getLocal(conf);
587     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
588     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
589 
590     int rowCount = 50;
591     int colCount = 10;
592     int versions = 2;
593 
594     // run once using columns and once using rows
595     BloomType[] bt = {BloomType.ROWCOL, BloomType.ROW};
596     int[] expKeys  = {rowCount*colCount, rowCount};
597     // below line deserves commentary.  it is expected bloom false positives
598     //  column = rowCount*2*colCount inserts
599     //  row-level = only rowCount*2 inserts, but failures will be magnified by
600     //              2nd for loop for every column (2*colCount)
601     float[] expErr   = {2*rowCount*colCount*err, 2*rowCount*2*colCount*err};
602 
603     for (int x : new int[]{0,1}) {
604       // write the file
605       Path f = new Path(ROOT_DIR, getName() + x);
606       StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
607           fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL)
608               .withFilePath(f)
609               .withBloomType(bt[x])
610               .withMaxKeyCount(expKeys[x])
611               .withChecksumType(CKTYPE)
612               .withBytesPerChecksum(CKBYTES)
613               .build();
614 
615       long now = System.currentTimeMillis();
616       for (int i = 0; i < rowCount*2; i += 2) { // rows
617         for (int j = 0; j < colCount*2; j += 2) {   // column qualifiers
618           String row = String.format(localFormatter, i);
619           String col = String.format(localFormatter, j);
620           for (int k= 0; k < versions; ++k) { // versions
621             KeyValue kv = new KeyValue(row.getBytes(),
622               "family".getBytes(), ("col" + col).getBytes(),
623                 now-k, Bytes.toBytes((long)-1));
624             writer.append(kv);
625           }
626         }
627       }
628       writer.close();
629 
630       StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf);
631       reader.loadFileInfo();
632       reader.loadBloomfilter();
633       StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
634       assertEquals(expKeys[x], reader.generalBloomFilter.getKeyCount());
635 
636       // check false positives rate
637       int falsePos = 0;
638       int falseNeg = 0;
639       for (int i = 0; i < rowCount*2; ++i) { // rows
640         for (int j = 0; j < colCount*2; ++j) {   // column qualifiers
641           String row = String.format(localFormatter, i);
642           String col = String.format(localFormatter, j);
643           TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
644           columns.add(("col" + col).getBytes());
645 
646           Scan scan = new Scan(row.getBytes(),row.getBytes());
647           scan.addColumn("family".getBytes(), ("col"+col).getBytes());
648           boolean exists =
649               scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE);
650           boolean shouldRowExist = i % 2 == 0;
651           boolean shouldColExist = j % 2 == 0;
652           shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
653           if (shouldRowExist && shouldColExist) {
654             if (!exists) falseNeg++;
655           } else {
656             if (exists) falsePos++;
657           }
658         }
659       }
660       reader.close(true); // evict because we are about to delete the file
661       fs.delete(f, true);
662       System.out.println(bt[x].toString());
663       System.out.println("  False negatives: " + falseNeg);
664       System.out.println("  False positives: " + falsePos);
665       assertEquals(0, falseNeg);
666       assertTrue(falsePos < 2*expErr[x]);
667     }
668   }
669 
670   public void testSeqIdComparator() {
671     assertOrdering(StoreFile.Comparators.SEQ_ID,
672         mockStoreFile(true,  100,   1000, -1, "/foo/123"),
673         mockStoreFile(true,  100,   1000, -1, "/foo/124"),
674         mockStoreFile(true,  99,    1000, -1, "/foo/126"),
675         mockStoreFile(true,  98,    2000, -1, "/foo/126"),
676         mockStoreFile(false, 3453, -1,     1, "/foo/1"),
677         mockStoreFile(false, 2,    -1,     3, "/foo/2"),
678         mockStoreFile(false, 1000, -1,     5, "/foo/2"),
679         mockStoreFile(false, 76,   -1,     5, "/foo/3"));
680   }
681 
682   /**
683    * Assert that the given comparator orders the given storefiles in the
684    * same way that they're passed.
685    */
686   private void assertOrdering(Comparator<StoreFile> comparator, StoreFile ... sfs) {
687     ArrayList<StoreFile> sorted = Lists.newArrayList(sfs);
688     Collections.shuffle(sorted);
689     Collections.sort(sorted, comparator);
690     LOG.debug("sfs: " + Joiner.on(",").join(sfs));
691     LOG.debug("sorted: " + Joiner.on(",").join(sorted));
692     assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted));
693   }
694 
695   /**
696    * Create a mock StoreFile with the given attributes.
697    */
698   private StoreFile mockStoreFile(boolean bulkLoad,
699                                   long size,
700                                   long bulkTimestamp,
701                                   long seqId,
702                                   String path) {
703     StoreFile mock = Mockito.mock(StoreFile.class);
704     StoreFile.Reader reader = Mockito.mock(StoreFile.Reader.class);
705 
706     Mockito.doReturn(size).when(reader).length();
707 
708     Mockito.doReturn(reader).when(mock).getReader();
709     Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult();
710     Mockito.doReturn(bulkTimestamp).when(mock).getBulkLoadTimestamp();
711     Mockito.doReturn(seqId).when(mock).getMaxSequenceId();
712     Mockito.doReturn(new Path(path)).when(mock).getPath();
713     String name = "mock storefile, bulkLoad=" + bulkLoad +
714       " bulkTimestamp=" + bulkTimestamp +
715       " seqId=" + seqId +
716       " path=" + path;
717     Mockito.doReturn(name).when(mock).toString();
718     return mock;
719   }
720 
721   /**
722    * Generate a list of KeyValues for testing based on given parameters
723    * @param timestamps
724    * @param numRows
725    * @param qualifier
726    * @param family
727    * @return
728    */
729   List<KeyValue> getKeyValueSet(long[] timestamps, int numRows,
730       byte[] qualifier, byte[] family) {
731     List<KeyValue> kvList = new ArrayList<KeyValue>();
732     for (int i=1;i<=numRows;i++) {
733       byte[] b = Bytes.toBytes(i) ;
734       LOG.info(Bytes.toString(b));
735       LOG.info(Bytes.toString(b));
736       for (long timestamp: timestamps)
737       {
738         kvList.add(new KeyValue(b, family, qualifier, timestamp, b));
739       }
740     }
741     return kvList;
742   }
743 
744   /**
745    * Test to ensure correctness when using StoreFile with multiple timestamps
746    * @throws IOException
747    */
748   public void testMultipleTimestamps() throws IOException {
749     byte[] family = Bytes.toBytes("familyname");
750     byte[] qualifier = Bytes.toBytes("qualifier");
751     int numRows = 10;
752     long[] timestamps = new long[] {20,10,5,1};
753     Scan scan = new Scan();
754 
755     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
756     Path storedir = new Path(new Path(this.testDir, "7e0102"), "familyname");
757     Path dir = new Path(storedir, "1234567890");
758     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
759         this.fs, 8 * 1024)
760             .withOutputDir(dir)
761             .build();
762 
763     List<KeyValue> kvList = getKeyValueSet(timestamps,numRows,
764         family, qualifier);
765 
766     for (KeyValue kv : kvList) {
767       writer.append(kv);
768     }
769     writer.appendMetadata(0, false);
770     writer.close();
771 
772     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
773       BloomType.NONE);
774     StoreFile.Reader reader = hsf.createReader();
775     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
776     TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
777     columns.add(qualifier);
778 
779     scan.setTimeRange(20, 100);
780     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
781 
782     scan.setTimeRange(1, 2);
783     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
784 
785     scan.setTimeRange(8, 10);
786     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
787 
788     scan.setTimeRange(7, 50);
789     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
790 
791     // This test relies on the timestamp range optimization
792     scan.setTimeRange(27, 50);
793     assertTrue(!scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
794   }
795 
796   public void testCacheOnWriteEvictOnClose() throws Exception {
797     Configuration conf = this.conf;
798 
799     // Find a home for our files (regiondir ("7e0102") and familyname).
800     Path baseDir = new Path(new Path(this.testDir, "7e0102"),"twoCOWEOC");
801 
802     // Grab the block cache and get the initial hit/miss counts
803     BlockCache bc = new CacheConfig(conf).getBlockCache();
804     assertNotNull(bc);
805     CacheStats cs = bc.getStats();
806     long startHit = cs.getHitCount();
807     long startMiss = cs.getMissCount();
808     long startEvicted = cs.getEvictedCount();
809 
810     // Let's write a StoreFile with three blocks, with cache on write off
811     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
812     CacheConfig cacheConf = new CacheConfig(conf);
813     Path pathCowOff = new Path(baseDir, "123456789");
814     StoreFile.Writer writer = writeStoreFile(conf, cacheConf, pathCowOff, 3);
815     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
816       BloomType.NONE);
817     LOG.debug(hsf.getPath().toString());
818 
819     // Read this file, we should see 3 misses
820     StoreFile.Reader reader = hsf.createReader();
821     reader.loadFileInfo();
822     StoreFileScanner scanner = reader.getStoreFileScanner(true, true);
823     scanner.seek(KeyValue.LOWESTKEY);
824     while (scanner.next() != null);
825     assertEquals(startHit, cs.getHitCount());
826     assertEquals(startMiss + 3, cs.getMissCount());
827     assertEquals(startEvicted, cs.getEvictedCount());
828     startMiss += 3;
829     scanner.close();
830     reader.close(cacheConf.shouldEvictOnClose());
831 
832     // Now write a StoreFile with three blocks, with cache on write on
833     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
834     cacheConf = new CacheConfig(conf);
835     Path pathCowOn = new Path(baseDir, "123456788");
836     writer = writeStoreFile(conf, cacheConf, pathCowOn, 3);
837     hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
838       BloomType.NONE);
839 
840     // Read this file, we should see 3 hits
841     reader = hsf.createReader();
842     scanner = reader.getStoreFileScanner(true, true);
843     scanner.seek(KeyValue.LOWESTKEY);
844     while (scanner.next() != null);
845     assertEquals(startHit + 3, cs.getHitCount());
846     assertEquals(startMiss, cs.getMissCount());
847     assertEquals(startEvicted, cs.getEvictedCount());
848     startHit += 3;
849     scanner.close();
850     reader.close(cacheConf.shouldEvictOnClose());
851 
852     // Let's read back the two files to ensure the blocks exactly match
853     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
854       BloomType.NONE);
855     StoreFile.Reader readerOne = hsf.createReader();
856     readerOne.loadFileInfo();
857     StoreFileScanner scannerOne = readerOne.getStoreFileScanner(true, true);
858     scannerOne.seek(KeyValue.LOWESTKEY);
859     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
860       BloomType.NONE);
861     StoreFile.Reader readerTwo = hsf.createReader();
862     readerTwo.loadFileInfo();
863     StoreFileScanner scannerTwo = readerTwo.getStoreFileScanner(true, true);
864     scannerTwo.seek(KeyValue.LOWESTKEY);
865     KeyValue kv1 = null;
866     KeyValue kv2 = null;
867     while ((kv1 = scannerOne.next()) != null) {
868       kv2 = scannerTwo.next();
869       assertTrue(kv1.equals(kv2));
870       assertTrue(Bytes.compareTo(
871           kv1.getBuffer(), kv1.getKeyOffset(), kv1.getKeyLength(), 
872           kv2.getBuffer(), kv2.getKeyOffset(), kv2.getKeyLength()) == 0);
873       assertTrue(Bytes.compareTo(
874           kv1.getBuffer(), kv1.getValueOffset(), kv1.getValueLength(),
875           kv2.getBuffer(), kv2.getValueOffset(), kv2.getValueLength()) == 0);
876     }
877     assertNull(scannerTwo.next());
878     assertEquals(startHit + 6, cs.getHitCount());
879     assertEquals(startMiss, cs.getMissCount());
880     assertEquals(startEvicted, cs.getEvictedCount());
881     startHit += 6;
882     scannerOne.close();
883     readerOne.close(cacheConf.shouldEvictOnClose());
884     scannerTwo.close();
885     readerTwo.close(cacheConf.shouldEvictOnClose());
886 
887     // Let's close the first file with evict on close turned on
888     conf.setBoolean("hbase.rs.evictblocksonclose", true);
889     cacheConf = new CacheConfig(conf);
890     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
891       BloomType.NONE);
892     reader = hsf.createReader();
893     reader.close(cacheConf.shouldEvictOnClose());
894 
895     // We should have 3 new evictions
896     assertEquals(startHit, cs.getHitCount());
897     assertEquals(startMiss, cs.getMissCount());
898     assertEquals(startEvicted + 3, cs.getEvictedCount());
899     startEvicted += 3;
900 
901     // Let's close the second file with evict on close turned off
902     conf.setBoolean("hbase.rs.evictblocksonclose", false);
903     cacheConf = new CacheConfig(conf);
904     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
905       BloomType.NONE);
906     reader = hsf.createReader();
907     reader.close(cacheConf.shouldEvictOnClose());
908 
909     // We expect no changes
910     assertEquals(startHit, cs.getHitCount());
911     assertEquals(startMiss, cs.getMissCount());
912     assertEquals(startEvicted, cs.getEvictedCount());
913   }
914 
915   private Path splitStoreFile(final HRegionFileSystem regionFs, final HRegionInfo hri,
916       final String family, final StoreFile sf, final byte[] splitKey, boolean isTopRef)
917       throws IOException {
918     FileSystem fs = regionFs.getFileSystem();
919     Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef);
920     if (null == path) {
921       return null;
922     }
923     Path regionDir = regionFs.commitDaughterRegion(hri);
924     return new Path(new Path(regionDir, family), path.getName());
925   }
926 
927   private StoreFile.Writer writeStoreFile(Configuration conf,
928       CacheConfig cacheConf, Path path, int numBlocks)
929   throws IOException {
930     // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs
931     int numKVs = 5 * numBlocks;
932     List<KeyValue> kvs = new ArrayList<KeyValue>(numKVs);
933     byte [] b = Bytes.toBytes("x");
934     int totalSize = 0;
935     for (int i=numKVs;i>0;i--) {
936       KeyValue kv = new KeyValue(b, b, b, i, b);
937       kvs.add(kv);
938       // kv has memstoreTS 0, which takes 1 byte to store.
939       totalSize += kv.getLength() + 1;
940     }
941     int blockSize = totalSize / numBlocks;
942     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs, blockSize)
943             .withFilePath(path)
944             .withMaxKeyCount(2000)
945             .withChecksumType(CKTYPE)
946             .withBytesPerChecksum(CKBYTES)
947             .build();
948     // We'll write N-1 KVs to ensure we don't write an extra block
949     kvs.remove(kvs.size()-1);
950     for (KeyValue kv : kvs) {
951       writer.append(kv);
952     }
953     writer.appendMetadata(0, false);
954     writer.close();
955     return writer;
956   }
957 
958   /**
959    * Check if data block encoding information is saved correctly in HFile's
960    * file info.
961    */
962   public void testDataBlockEncodingMetaData() throws IOException {
963     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
964     Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
965     Path path = new Path(dir, "1234567890");
966 
967     DataBlockEncoding dataBlockEncoderAlgo =
968         DataBlockEncoding.FAST_DIFF;
969     HFileDataBlockEncoder dataBlockEncoder =
970         new HFileDataBlockEncoderImpl(
971             dataBlockEncoderAlgo);
972     cacheConf = new CacheConfig(conf);
973     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
974         HConstants.DEFAULT_BLOCKSIZE)
975             .withFilePath(path)
976             .withDataBlockEncoder(dataBlockEncoder)
977             .withMaxKeyCount(2000)
978             .withChecksumType(CKTYPE)
979             .withBytesPerChecksum(CKBYTES)
980             .build();
981     writer.close();
982 
983     StoreFile storeFile = new StoreFile(fs, writer.getPath(), conf,
984         cacheConf, BloomType.NONE);
985     StoreFile.Reader reader = storeFile.createReader();
986 
987     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
988     byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
989     assertEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
990   }
991 }
992