1   /**
2    * Copyright 2007 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.nio.ByteBuffer;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.Collections;
27  import java.util.Comparator;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.TreeSet;
31  import java.util.regex.Pattern;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileStatus;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HBaseTestCase;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.KeyValue;
42  import org.apache.hadoop.hbase.SmallTests;
43  import org.apache.hadoop.hbase.client.Scan;
44  import org.apache.hadoop.hbase.io.HFileLink;
45  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
46  import org.apache.hadoop.hbase.io.Reference;
47  import org.apache.hadoop.hbase.io.Reference.Range;
48  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
49  import org.apache.hadoop.hbase.io.hfile.BlockCache;
50  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
51  import org.apache.hadoop.hbase.io.hfile.CacheStats;
52  import org.apache.hadoop.hbase.io.hfile.HFile;
53  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
54  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
55  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
56  import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
57  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
58  import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
59  import org.apache.hadoop.hbase.util.BloomFilterFactory;
60  import org.apache.hadoop.hbase.util.Bytes;
61  import org.apache.hadoop.hbase.util.ChecksumType;
62  import org.apache.hadoop.hbase.util.FSUtils;
63  import org.junit.experimental.categories.Category;
64  import org.mockito.Mockito;
65  
66  import com.google.common.base.Joiner;
67  import com.google.common.collect.Iterables;
68  import com.google.common.collect.Lists;
69  
70  /**
71   * Test HStoreFile
72   */
73  @Category(SmallTests.class)
74  public class TestStoreFile extends HBaseTestCase {
75    static final Log LOG = LogFactory.getLog(TestStoreFile.class);
76    private CacheConfig cacheConf =  new CacheConfig(conf);
77    private String ROOT_DIR;
78    private Map<String, Long> startingMetrics;
79  
80    private static final ChecksumType CKTYPE = ChecksumType.CRC32;
81    private static final int CKBYTES = 512;
82  
83    @Override
84    public void setUp() throws Exception {
85      super.setUp();
86      startingMetrics = SchemaMetrics.getMetricsSnapshot();
87      ROOT_DIR = new Path(this.testDir, "TestStoreFile").toString();
88    }
89  
90    @Override
91    public void tearDown() throws Exception {
92      super.tearDown();
93      SchemaMetrics.validateMetricChanges(startingMetrics);
94    }
95  
96    /**
97     * Write a file and then assert that we can read from top and bottom halves
98     * using two HalfMapFiles.
99     * @throws Exception
100    */
101   public void testBasicHalfMapFile() throws Exception {
102     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
103     Path outputDir = new Path(new Path(this.testDir, "7e0102"),
104         "familyname");
105     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
106         this.fs, 2 * 1024)
107             .withOutputDir(outputDir)
108             .build();
109     writeStoreFile(writer);
110     checkHalfHFile(new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
111         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE));
112   }
113 
114   private void writeStoreFile(final StoreFile.Writer writer) throws IOException {
115     writeStoreFile(writer, Bytes.toBytes(getName()), Bytes.toBytes(getName()));
116   }
117 
118   // pick an split point (roughly halfway)
119   byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR)/2, FIRST_CHAR};
120 
121   /*
122    * Writes HStoreKey and ImmutableBytes data to passed writer and
123    * then closes it.
124    * @param writer
125    * @throws IOException
126    */
127   public static void writeStoreFile(final StoreFile.Writer writer, byte[] fam, byte[] qualifier)
128   throws IOException {
129     long now = System.currentTimeMillis();
130     try {
131       for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
132         for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
133           byte[] b = new byte[] { (byte) d, (byte) e };
134           writer.append(new KeyValue(b, fam, qualifier, now, b));
135         }
136       }
137     } finally {
138       writer.close();
139     }
140   }
141 
142   /**
143    * Test that our mechanism of writing store files in one region to reference
144    * store files in other regions works.
145    * @throws IOException
146    */
147   public void testReference()
148   throws IOException {
149     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
150     Path storedir = new Path(new Path(this.testDir, "7e0102"), "familyname");
151     // Make a store file and write data to it.
152     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
153         this.fs, 8 * 1024)
154             .withOutputDir(storedir)
155             .build();
156     writeStoreFile(writer);
157     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
158         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
159     StoreFile.Reader reader = hsf.createReader();
160     // Split on a row, not in middle of row.  Midkey returned by reader
161     // may be in middle of row.  Create new one with empty column and
162     // timestamp.
163     KeyValue kv = KeyValue.createKeyValueFromKey(reader.midkey());
164     byte [] midRow = kv.getRow();
165     kv = KeyValue.createKeyValueFromKey(reader.getLastKey());
166     byte [] finalRow = kv.getRow();
167     // Make a reference
168     Path refPath = StoreFile.split(fs, storedir, hsf, midRow, Range.top);
169     StoreFile refHsf = new StoreFile(this.fs, refPath, conf, cacheConf,
170         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
171     // Now confirm that I can read from the reference and that it only gets
172     // keys from top half of the file.
173     HFileScanner s = refHsf.createReader().getScanner(false, false);
174     for(boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) {
175       ByteBuffer bb = s.getKey();
176       kv = KeyValue.createKeyValueFromKey(bb);
177       if (first) {
178         assertTrue(Bytes.equals(kv.getRow(), midRow));
179         first = false;
180       }
181     }
182     assertTrue(Bytes.equals(kv.getRow(), finalRow));
183   }
184 
185   public void testHFileLink() throws IOException {
186     final String columnFamily = "f";
187 
188     Configuration testConf = new Configuration(this.conf);
189     FSUtils.setRootDir(testConf, this.testDir);
190 
191     HRegionInfo hri = new HRegionInfo(Bytes.toBytes("table-link"));
192     Path storedir = new Path(new Path(this.testDir,
193       new Path(hri.getTableNameAsString(), hri.getEncodedName())), columnFamily);
194 
195     // Make a store file and write data to it.
196     StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf,
197          this.fs, 8 * 1024)
198             .withOutputDir(storedir)
199             .build();
200     Path storeFilePath = writer.getPath();
201     writeStoreFile(writer);
202     writer.close();
203 
204     Path dstPath = new Path(this.testDir, new Path("test-region", columnFamily));
205     HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
206     Path linkFilePath = new Path(dstPath,
207                   HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
208 
209     // Try to open store file from link
210     StoreFile hsf = new StoreFile(this.fs, linkFilePath, testConf, cacheConf,
211         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
212     assertTrue(hsf.isLink());
213 
214     // Now confirm that I can read from the link
215     int count = 1;
216     HFileScanner s = hsf.createReader().getScanner(false, false);
217     s.seekTo();
218     while (s.next()) {
219       count++;
220     }
221     assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
222   }
223 
224   /**
225    * Validate that we can handle valid tables with '.', '_', and '-' chars.
226    */
227   public void testStoreFileNames() {
228     String[] legalHFileLink = { "MyTable_02=abc012-def345", "MyTable_02.300=abc012-def345",
229       "MyTable_02-400=abc012-def345", "MyTable_02-400.200=abc012-def345",
230       "MyTable_02=abc012-def345_SeqId_1_", "MyTable_02=abc012-def345_SeqId_20_" };
231     for (String name: legalHFileLink) {
232       assertTrue("should be a valid link: " + name, HFileLink.isHFileLink(name));
233       assertTrue("should be a valid StoreFile" + name, StoreFile.validateStoreFileName(name));
234       assertFalse("should not be a valid reference: " + name, StoreFile.isReference(name));
235 
236       String refName = name + ".6789";
237       assertTrue("should be a valid link reference: " + refName, StoreFile.isReference(refName));
238       assertTrue("should be a valid StoreFile" + refName, StoreFile.validateStoreFileName(refName));
239     }
240 
241     String[] illegalHFileLink = { ".MyTable_02=abc012-def345", "-MyTable_02.300=abc012-def345",
242       "MyTable_02-400=abc0_12-def345", "MyTable_02-400.200=abc012-def345...." };
243     for (String name: illegalHFileLink) {
244       assertFalse("should not be a valid link: " + name, HFileLink.isHFileLink(name));
245     }
246   }
247 
248   /**
249    * This test creates an hfile and then the dir structures and files to verify that references
250    * to hfilelinks (created by snapshot clones) can be properly interpreted.
251    */
252   public void testReferenceToHFileLink() throws IOException {
253     final String columnFamily = "f";
254 
255     Path rootDir = FSUtils.getRootDir(conf);
256 
257     String tablename = "_original-evil-name"; // adding legal table name chars to verify regex handles it.
258     HRegionInfo hri = new HRegionInfo(Bytes.toBytes(tablename));
259     // store dir = <root>/<tablename>/<rgn>/<cf>
260     Path storedir = new Path(new Path(rootDir,
261       new Path(hri.getTableNameAsString(), hri.getEncodedName())), columnFamily);
262 
263     // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file>
264     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
265          this.fs, 8 * 1024)
266             .withOutputDir(storedir)
267             .build();
268     Path storeFilePath = writer.getPath();
269     writeStoreFile(writer);
270     writer.close();
271 
272     // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table>
273     String target = "clone";
274     Path dstPath = new Path(rootDir, new Path(new Path(target, "7e0102"), columnFamily));
275     HFileLink.create(conf, this.fs, dstPath, hri, storeFilePath.getName());
276     Path linkFilePath = new Path(dstPath,
277                   HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
278 
279     // create splits of the link.
280     // <root>/clone/splitA/<cf>/<reftohfilelink>,
281     // <root>/clone/splitB/<cf>/<reftohfilelink>
282     Path splitDirA = new Path(new Path(rootDir,
283         new Path(target, "571A")), columnFamily);
284     Path splitDirB = new Path(new Path(rootDir,
285         new Path(target, "571B")), columnFamily);
286     StoreFile f = new StoreFile(fs, linkFilePath, conf, cacheConf, BloomType.NONE,
287         NoOpDataBlockEncoder.INSTANCE);
288     byte[] splitRow = SPLITKEY;
289     Path pathA = StoreFile.split(fs, splitDirA, f, splitRow, Range.top); // top
290     Path pathB = StoreFile.split(fs, splitDirB, f, splitRow, Range.bottom); // bottom
291 
292     // OK test the thing
293     FSUtils.logFileSystemState(fs, rootDir, LOG);
294 
295     // There is a case where a file with the hfilelink pattern is actually a daughter
296     // reference to a hfile link.  This code in StoreFile that handles this case.
297 
298     // Try to open store file from link
299     StoreFile hsfA = new StoreFile(this.fs, pathA,  conf, cacheConf,
300         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
301 
302     // Now confirm that I can read from the ref to link
303     int count = 1;
304     HFileScanner s = hsfA.createReader().getScanner(false, false);
305     s.seekTo();
306     while (s.next()) {
307       count++;
308     }
309     assertTrue(count > 0); // read some rows here
310 
311     // Try to open store file from link
312     StoreFile hsfB = new StoreFile(this.fs, pathB,  conf, cacheConf,
313         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
314 
315     // Now confirm that I can read from the ref to link
316     HFileScanner sB = hsfB.createReader().getScanner(false, false);
317     sB.seekTo();
318     
319     //count++ as seekTo() will advance the scanner
320     count++;
321     while (sB.next()) {
322       count++;
323     }
324 
325     // read the rest of the rows
326     assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
327   }
328 
329   private void checkHalfHFile(final StoreFile f)
330   throws IOException {
331     byte [] midkey = f.createReader().midkey();
332     KeyValue midKV = KeyValue.createKeyValueFromKey(midkey);
333     byte [] midRow = midKV.getRow();
334     // Create top split.
335     Path topDir = Store.getStoreHomedir(this.testDir, "1",
336       Bytes.toBytes(f.getPath().getParent().getName()));
337     if (this.fs.exists(topDir)) {
338       this.fs.delete(topDir, true);
339     }
340     Path topPath = StoreFile.split(this.fs, topDir, f, midRow, Range.top);
341     // Create bottom split.
342     Path bottomDir = Store.getStoreHomedir(this.testDir, "2",
343       Bytes.toBytes(f.getPath().getParent().getName()));
344     if (this.fs.exists(bottomDir)) {
345       this.fs.delete(bottomDir, true);
346     }
347     Path bottomPath = StoreFile.split(this.fs, bottomDir,
348       f, midRow, Range.bottom);
349     // Make readers on top and bottom.
350     StoreFile.Reader top =
351         new StoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE,
352             NoOpDataBlockEncoder.INSTANCE).createReader();
353     StoreFile.Reader bottom = new StoreFile(this.fs, bottomPath,
354         conf, cacheConf, BloomType.NONE,
355         NoOpDataBlockEncoder.INSTANCE).createReader();
356     ByteBuffer previous = null;
357     LOG.info("Midkey: " + midKV.toString());
358     ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midkey);
359     try {
360       // Now make two HalfMapFiles and assert they can read the full backing
361       // file, one from the top and the other from the bottom.
362       // Test bottom half first.
363       // Now test reading from the top.
364       boolean first = true;
365       ByteBuffer key = null;
366       HFileScanner topScanner = top.getScanner(false, false);
367       while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
368           (topScanner.isSeeked() && topScanner.next())) {
369         key = topScanner.getKey();
370 
371         if (topScanner.getReader().getComparator().compare(key.array(),
372           key.arrayOffset(), key.limit(), midkey, 0, midkey.length) < 0) {
373           fail("key=" + Bytes.toStringBinary(key) + " < midkey=" +
374               Bytes.toStringBinary(midkey));
375         }
376         if (first) {
377           first = false;
378           LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key)));
379         }
380       }
381       LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key)));
382 
383       first = true;
384       HFileScanner bottomScanner = bottom.getScanner(false, false);
385       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
386           bottomScanner.next()) {
387         previous = bottomScanner.getKey();
388         key = bottomScanner.getKey();
389         if (first) {
390           first = false;
391           LOG.info("First in bottom: " +
392             Bytes.toString(Bytes.toBytes(previous)));
393         }
394         assertTrue(key.compareTo(bbMidkeyBytes) < 0);
395       }
396       if (previous != null) {
397         LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
398       }
399       // Remove references.
400       this.fs.delete(topPath, false);
401       this.fs.delete(bottomPath, false);
402 
403       // Next test using a midkey that does not exist in the file.
404       // First, do a key that is < than first key. Ensure splits behave
405       // properly.
406       byte [] badmidkey = Bytes.toBytes("  .");
407       topPath = StoreFile.split(this.fs, topDir, f, badmidkey, Range.top);
408       bottomPath = StoreFile.split(this.fs, bottomDir, f, badmidkey,
409         Range.bottom);
410       
411       assertNull(bottomPath);
412       
413       top = new StoreFile(this.fs, topPath, conf, cacheConf,
414           StoreFile.BloomType.NONE,
415           NoOpDataBlockEncoder.INSTANCE).createReader();
416       // Now read from the top.
417       first = true;
418       topScanner = top.getScanner(false, false);
419       while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
420           topScanner.next()) {
421         key = topScanner.getKey();
422         assertTrue(topScanner.getReader().getComparator().compare(key.array(),
423           key.arrayOffset(), key.limit(), badmidkey, 0, badmidkey.length) >= 0);
424         if (first) {
425           first = false;
426           KeyValue keyKV = KeyValue.createKeyValueFromKey(key);
427           LOG.info("First top when key < bottom: " + keyKV);
428           String tmp = Bytes.toString(keyKV.getRow());
429           for (int i = 0; i < tmp.length(); i++) {
430             assertTrue(tmp.charAt(i) == 'a');
431           }
432         }
433       }
434       KeyValue keyKV = KeyValue.createKeyValueFromKey(key);
435       LOG.info("Last top when key < bottom: " + keyKV);
436       String tmp = Bytes.toString(keyKV.getRow());
437       for (int i = 0; i < tmp.length(); i++) {
438         assertTrue(tmp.charAt(i) == 'z');
439       }
440       // Remove references.
441       this.fs.delete(topPath, false);
442 
443       // Test when badkey is > than last key in file ('||' > 'zz').
444       badmidkey = Bytes.toBytes("|||");
445       topPath = StoreFile.split(this.fs, topDir, f, badmidkey, Range.top);
446       bottomPath = StoreFile.split(this.fs, bottomDir, f, badmidkey,
447         Range.bottom);
448 
449       assertNull(topPath);
450       
451       bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf,
452           StoreFile.BloomType.NONE,
453           NoOpDataBlockEncoder.INSTANCE).createReader();
454       first = true;
455       bottomScanner = bottom.getScanner(false, false);
456       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
457           bottomScanner.next()) {
458         key = bottomScanner.getKey();
459         if (first) {
460           first = false;
461           keyKV = KeyValue.createKeyValueFromKey(key);
462           LOG.info("First bottom when key > top: " + keyKV);
463           tmp = Bytes.toString(keyKV.getRow());
464           for (int i = 0; i < tmp.length(); i++) {
465             assertTrue(tmp.charAt(i) == 'a');
466           }
467         }
468       }
469       keyKV = KeyValue.createKeyValueFromKey(key);
470       LOG.info("Last bottom when key > top: " + keyKV);
471       for (int i = 0; i < tmp.length(); i++) {
472         assertTrue(Bytes.toString(keyKV.getRow()).charAt(i) == 'z');
473       }
474     } finally {
475       if (top != null) {
476         top.close(true); // evict since we are about to delete the file
477       }
478       if (bottom != null) {
479         bottom.close(true); // evict since we are about to delete the file
480       }
481       fs.delete(f.getPath(), true);
482     }
483   }
484 
485   private static final String localFormatter = "%010d";
486 
487   private void bloomWriteRead(StoreFile.Writer writer, FileSystem fs)
488   throws Exception {
489     float err = conf.getFloat(
490         BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
491     Path f = writer.getPath();
492     long now = System.currentTimeMillis();
493     for (int i = 0; i < 2000; i += 2) {
494       String row = String.format(localFormatter, i);
495       KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(),
496         "col".getBytes(), now, "value".getBytes());
497       writer.append(kv);
498     }
499     writer.close();
500 
501     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf,
502         DataBlockEncoding.NONE);
503     reader.loadFileInfo();
504     reader.loadBloomfilter();
505     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
506 
507     // check false positives rate
508     int falsePos = 0;
509     int falseNeg = 0;
510     for (int i = 0; i < 2000; i++) {
511       String row = String.format(localFormatter, i);
512       TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
513       columns.add("family:col".getBytes());
514 
515       Scan scan = new Scan(row.getBytes(),row.getBytes());
516       scan.addColumn("family".getBytes(), "family:col".getBytes());
517       boolean exists = scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE);
518       if (i % 2 == 0) {
519         if (!exists) falseNeg++;
520       } else {
521         if (exists) falsePos++;
522       }
523     }
524     reader.close(true); // evict because we are about to delete the file
525     fs.delete(f, true);
526     assertEquals("False negatives: " + falseNeg, 0, falseNeg);
527     int maxFalsePos = (int) (2 * 2000 * err);
528     assertTrue("Too many false positives: " + falsePos + " (err=" + err
529         + ", expected no more than " + maxFalsePos + ")",
530         falsePos <= maxFalsePos);
531   }
532 
533   public void testBloomFilter() throws Exception {
534     FileSystem fs = FileSystem.getLocal(conf);
535     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE,
536         (float) 0.01);
537     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
538 
539     // write the file
540     Path f = new Path(ROOT_DIR, getName());
541     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
542         StoreFile.DEFAULT_BLOCKSIZE_SMALL)
543             .withFilePath(f)
544             .withBloomType(StoreFile.BloomType.ROW)
545             .withMaxKeyCount(2000)
546             .withChecksumType(CKTYPE)
547             .withBytesPerChecksum(CKBYTES)
548             .build();
549     bloomWriteRead(writer, fs);
550   }
551 
552   public void testDeleteFamilyBloomFilter() throws Exception {
553     FileSystem fs = FileSystem.getLocal(conf);
554     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE,
555         (float) 0.01);
556     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
557     float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE,
558         0);
559 
560     // write the file
561     Path f = new Path(ROOT_DIR, getName());
562 
563     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
564         fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL)
565             .withFilePath(f)
566             .withMaxKeyCount(2000)
567             .withChecksumType(CKTYPE)
568             .withBytesPerChecksum(CKBYTES)
569             .build();
570 
571     // add delete family
572     long now = System.currentTimeMillis();
573     for (int i = 0; i < 2000; i += 2) {
574       String row = String.format(localFormatter, i);
575       KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(),
576           "col".getBytes(), now, KeyValue.Type.DeleteFamily, "value".getBytes());
577       writer.append(kv);
578     }
579     writer.close();
580 
581     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf,
582         DataBlockEncoding.NONE);
583     reader.loadFileInfo();
584     reader.loadBloomfilter();
585 
586     // check false positives rate
587     int falsePos = 0;
588     int falseNeg = 0;
589     for (int i = 0; i < 2000; i++) {
590       String row = String.format(localFormatter, i);
591       byte[] rowKey = Bytes.toBytes(row);
592       boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0,
593           rowKey.length);
594       if (i % 2 == 0) {
595         if (!exists)
596           falseNeg++;
597       } else {
598         if (exists)
599           falsePos++;
600       }
601     }
602     assertEquals(1000, reader.getDeleteFamilyCnt());
603     reader.close(true); // evict because we are about to delete the file
604     fs.delete(f, true);
605     assertEquals("False negatives: " + falseNeg, 0, falseNeg);
606     int maxFalsePos = (int) (2 * 2000 * err);
607     assertTrue("Too many false positives: " + falsePos + " (err=" + err
608         + ", expected no more than " + maxFalsePos, falsePos <= maxFalsePos);
609   }
610 
611   public void testBloomTypes() throws Exception {
612     float err = (float) 0.01;
613     FileSystem fs = FileSystem.getLocal(conf);
614     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
615     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
616 
617     int rowCount = 50;
618     int colCount = 10;
619     int versions = 2;
620 
621     // run once using columns and once using rows
622     StoreFile.BloomType[] bt =
623       {StoreFile.BloomType.ROWCOL, StoreFile.BloomType.ROW};
624     int[] expKeys    = {rowCount*colCount, rowCount};
625     // below line deserves commentary.  it is expected bloom false positives
626     //  column = rowCount*2*colCount inserts
627     //  row-level = only rowCount*2 inserts, but failures will be magnified by
628     //              2nd for loop for every column (2*colCount)
629     float[] expErr   = {2*rowCount*colCount*err, 2*rowCount*2*colCount*err};
630 
631     for (int x : new int[]{0,1}) {
632       // write the file
633       Path f = new Path(ROOT_DIR, getName() + x);
634       StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
635           fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL)
636               .withFilePath(f)
637               .withBloomType(bt[x])
638               .withMaxKeyCount(expKeys[x])
639               .withChecksumType(CKTYPE)
640               .withBytesPerChecksum(CKBYTES)
641               .build();
642 
643       long now = System.currentTimeMillis();
644       for (int i = 0; i < rowCount*2; i += 2) { // rows
645         for (int j = 0; j < colCount*2; j += 2) {   // column qualifiers
646           String row = String.format(localFormatter, i);
647           String col = String.format(localFormatter, j);
648           for (int k= 0; k < versions; ++k) { // versions
649             KeyValue kv = new KeyValue(row.getBytes(),
650               "family".getBytes(), ("col" + col).getBytes(),
651                 now-k, Bytes.toBytes((long)-1));
652             writer.append(kv);
653           }
654         }
655       }
656       writer.close();
657 
658       StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf,
659           DataBlockEncoding.NONE);
660       reader.loadFileInfo();
661       reader.loadBloomfilter();
662       StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
663       assertEquals(expKeys[x], reader.generalBloomFilter.getKeyCount());
664 
665       // check false positives rate
666       int falsePos = 0;
667       int falseNeg = 0;
668       for (int i = 0; i < rowCount*2; ++i) { // rows
669         for (int j = 0; j < colCount*2; ++j) {   // column qualifiers
670           String row = String.format(localFormatter, i);
671           String col = String.format(localFormatter, j);
672           TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
673           columns.add(("col" + col).getBytes());
674 
675           Scan scan = new Scan(row.getBytes(),row.getBytes());
676           scan.addColumn("family".getBytes(), ("col"+col).getBytes());
677           boolean exists =
678               scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE);
679           boolean shouldRowExist = i % 2 == 0;
680           boolean shouldColExist = j % 2 == 0;
681           shouldColExist = shouldColExist || bt[x] == StoreFile.BloomType.ROW;
682           if (shouldRowExist && shouldColExist) {
683             if (!exists) falseNeg++;
684           } else {
685             if (exists) falsePos++;
686           }
687         }
688       }
689       reader.close(true); // evict because we are about to delete the file
690       fs.delete(f, true);
691       System.out.println(bt[x].toString());
692       System.out.println("  False negatives: " + falseNeg);
693       System.out.println("  False positives: " + falsePos);
694       assertEquals(0, falseNeg);
695       assertTrue(falsePos < 2*expErr[x]);
696     }
697   }
698 
699   public void testBloomEdgeCases() throws Exception {
700     float err = (float)0.005;
701     FileSystem fs = FileSystem.getLocal(conf);
702     Path f = new Path(ROOT_DIR, getName());
703     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
704     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
705     conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_MAX_KEYS, 1000);
706 
707     // This test only runs for HFile format version 1.
708     conf.setInt(HFile.FORMAT_VERSION_KEY, 1);
709 
710     // this should not create a bloom because the max keys is too small
711     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
712         StoreFile.DEFAULT_BLOCKSIZE_SMALL)
713             .withFilePath(f)
714             .withBloomType(StoreFile.BloomType.ROW)
715             .withMaxKeyCount(2000)
716             .withChecksumType(CKTYPE)
717             .withBytesPerChecksum(CKBYTES)
718             .build();
719     assertFalse(writer.hasGeneralBloom());
720     writer.close();
721     fs.delete(f, true);
722 
723     conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_MAX_KEYS,
724         Integer.MAX_VALUE);
725 
726     // TODO: commented out because we run out of java heap space on trunk
727     // the below config caused IllegalArgumentException in our production cluster
728     // however, the resulting byteSize is < MAX_INT, so this should work properly
729     writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
730         StoreFile.DEFAULT_BLOCKSIZE_SMALL)
731             .withFilePath(f)
732             .withBloomType(StoreFile.BloomType.ROW)
733             .withMaxKeyCount(27244696)
734             .build();
735     assertTrue(writer.hasGeneralBloom());
736     bloomWriteRead(writer, fs);
737 
738     // this, however, is too large and should not create a bloom
739     // because Java can't create a contiguous array > MAX_INT
740     writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
741         StoreFile.DEFAULT_BLOCKSIZE_SMALL)
742             .withFilePath(f)
743             .withBloomType(StoreFile.BloomType.ROW)
744             .withMaxKeyCount(Integer.MAX_VALUE)
745             .withChecksumType(CKTYPE)
746             .withBytesPerChecksum(CKBYTES)
747             .build();
748     assertFalse(writer.hasGeneralBloom());
749     writer.close();
750     fs.delete(f, true);
751   }
752 
753   public void testFlushTimeComparator() {
754     assertOrdering(StoreFile.Comparators.FLUSH_TIME,
755         mockStoreFile(true, 1000, -1, "/foo/123"),
756         mockStoreFile(true, 1000, -1, "/foo/126"),
757         mockStoreFile(true, 2000, -1, "/foo/126"),
758         mockStoreFile(false, -1, 1, "/foo/1"),
759         mockStoreFile(false, -1, 3, "/foo/2"),
760         mockStoreFile(false, -1, 5, "/foo/2"),
761         mockStoreFile(false, -1, 5, "/foo/3"));
762   }
763 
764   /**
765    * Assert that the given comparator orders the given storefiles in the
766    * same way that they're passed.
767    */
768   private void assertOrdering(Comparator<StoreFile> comparator, StoreFile ... sfs) {
769     ArrayList<StoreFile> sorted = Lists.newArrayList(sfs);
770     Collections.shuffle(sorted);
771     Collections.sort(sorted, comparator);
772     LOG.debug("sfs: " + Joiner.on(",").join(sfs));
773     LOG.debug("sorted: " + Joiner.on(",").join(sorted));
774     assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted));
775   }
776 
777   /**
778    * Create a mock StoreFile with the given attributes.
779    */
780   private StoreFile mockStoreFile(boolean bulkLoad, long bulkTimestamp,
781       long seqId, String path) {
782     StoreFile mock = Mockito.mock(StoreFile.class);
783     Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult();
784     Mockito.doReturn(bulkTimestamp).when(mock).getBulkLoadTimestamp();
785     if (bulkLoad) {
786       // Bulk load files will throw if you ask for their sequence ID
787       Mockito.doThrow(new IllegalAccessError("bulk load"))
788         .when(mock).getMaxSequenceId();
789     } else {
790       Mockito.doReturn(seqId).when(mock).getMaxSequenceId();
791     }
792     Mockito.doReturn(new Path(path)).when(mock).getPath();
793     String name = "mock storefile, bulkLoad=" + bulkLoad +
794       " bulkTimestamp=" + bulkTimestamp +
795       " seqId=" + seqId +
796       " path=" + path;
797     Mockito.doReturn(name).when(mock).toString();
798     return mock;
799   }
800 
801   /**
802    * Generate a list of KeyValues for testing based on given parameters
803    * @param timestamps
804    * @param numRows
805    * @param qualifier
806    * @param family
807    * @return
808    */
809   List<KeyValue> getKeyValueSet(long[] timestamps, int numRows,
810       byte[] qualifier, byte[] family) {
811     List<KeyValue> kvList = new ArrayList<KeyValue>();
812     for (int i=1;i<=numRows;i++) {
813       byte[] b = Bytes.toBytes(i) ;
814       LOG.info(Bytes.toString(b));
815       LOG.info(Bytes.toString(b));
816       for (long timestamp: timestamps)
817       {
818         kvList.add(new KeyValue(b, family, qualifier, timestamp, b));
819       }
820     }
821     return kvList;
822   }
823 
824   /**
825    * Test to ensure correctness when using StoreFile with multiple timestamps
826    * @throws IOException
827    */
828   public void testMultipleTimestamps() throws IOException {
829     byte[] family = Bytes.toBytes("familyname");
830     byte[] qualifier = Bytes.toBytes("qualifier");
831     int numRows = 10;
832     long[] timestamps = new long[] {20,10,5,1};
833     Scan scan = new Scan();
834 
835     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
836     Path storedir = new Path(new Path(this.testDir, "7e0102"), "familyname");
837     Path dir = new Path(storedir, "1234567890");
838     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
839         this.fs, 8 * 1024)
840             .withOutputDir(dir)
841             .build();
842 
843     List<KeyValue> kvList = getKeyValueSet(timestamps,numRows,
844         family, qualifier);
845 
846     for (KeyValue kv : kvList) {
847       writer.append(kv);
848     }
849     writer.appendMetadata(0, false);
850     writer.close();
851 
852     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
853         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
854     StoreFile.Reader reader = hsf.createReader();
855     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
856     TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
857     columns.add(qualifier);
858 
859     scan.setTimeRange(20, 100);
860     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
861 
862     scan.setTimeRange(1, 2);
863     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
864 
865     scan.setTimeRange(8, 10);
866     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
867 
868     scan.setTimeRange(7, 50);
869     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
870 
871     // This test relies on the timestamp range optimization
872     scan.setTimeRange(27, 50);
873     assertTrue(!scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
874   }
875 
876   public void testCacheOnWriteEvictOnClose() throws Exception {
877     Configuration conf = this.conf;
878 
879     // Find a home for our files (regiondir ("7e0102") and familyname).
880     Path baseDir = new Path(new Path(this.testDir, "7e0102"),"twoCOWEOC");
881 
882     // Grab the block cache and get the initial hit/miss counts
883     BlockCache bc = new CacheConfig(conf).getBlockCache();
884     assertNotNull(bc);
885     CacheStats cs = bc.getStats();
886     long startHit = cs.getHitCount();
887     long startMiss = cs.getMissCount();
888     long startEvicted = cs.getEvictedCount();
889 
890     // Let's write a StoreFile with three blocks, with cache on write off
891     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
892     CacheConfig cacheConf = new CacheConfig(conf);
893     Path pathCowOff = new Path(baseDir, "123456789");
894     StoreFile.Writer writer = writeStoreFile(conf, cacheConf, pathCowOff, 3);
895     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
896         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
897     LOG.debug(hsf.getPath().toString());
898 
899     // Read this file, we should see 3 misses
900     StoreFile.Reader reader = hsf.createReader();
901     reader.loadFileInfo();
902     StoreFileScanner scanner = reader.getStoreFileScanner(true, true);
903     scanner.seek(KeyValue.LOWESTKEY);
904     while (scanner.next() != null);
905     assertEquals(startHit, cs.getHitCount());
906     assertEquals(startMiss + 3, cs.getMissCount());
907     assertEquals(startEvicted, cs.getEvictedCount());
908     startMiss += 3;
909     scanner.close();
910     reader.close(cacheConf.shouldEvictOnClose());
911 
912     // Now write a StoreFile with three blocks, with cache on write on
913     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
914     cacheConf = new CacheConfig(conf);
915     Path pathCowOn = new Path(baseDir, "123456788");
916     writer = writeStoreFile(conf, cacheConf, pathCowOn, 3);
917     hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
918         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
919 
920     // Read this file, we should see 3 hits
921     reader = hsf.createReader();
922     scanner = reader.getStoreFileScanner(true, true);
923     scanner.seek(KeyValue.LOWESTKEY);
924     while (scanner.next() != null);
925     assertEquals(startHit + 3, cs.getHitCount());
926     assertEquals(startMiss, cs.getMissCount());
927     assertEquals(startEvicted, cs.getEvictedCount());
928     startHit += 3;
929     scanner.close();
930     reader.close(cacheConf.shouldEvictOnClose());
931 
932     // Let's read back the two files to ensure the blocks exactly match
933     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
934         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
935     StoreFile.Reader readerOne = hsf.createReader();
936     readerOne.loadFileInfo();
937     StoreFileScanner scannerOne = readerOne.getStoreFileScanner(true, true);
938     scannerOne.seek(KeyValue.LOWESTKEY);
939     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
940         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
941     StoreFile.Reader readerTwo = hsf.createReader();
942     readerTwo.loadFileInfo();
943     StoreFileScanner scannerTwo = readerTwo.getStoreFileScanner(true, true);
944     scannerTwo.seek(KeyValue.LOWESTKEY);
945     KeyValue kv1 = null;
946     KeyValue kv2 = null;
947     while ((kv1 = scannerOne.next()) != null) {
948       kv2 = scannerTwo.next();
949       assertTrue(kv1.equals(kv2));
950       assertTrue(Bytes.compareTo(
951           kv1.getBuffer(), kv1.getKeyOffset(), kv1.getKeyLength(),
952           kv2.getBuffer(), kv2.getKeyOffset(), kv2.getKeyLength()) == 0);
953       assertTrue(Bytes.compareTo(
954           kv1.getBuffer(), kv1.getValueOffset(), kv1.getValueLength(),
955           kv2.getBuffer(), kv2.getValueOffset(), kv2.getValueLength()) == 0);
956     }
957     assertNull(scannerTwo.next());
958     assertEquals(startHit + 6, cs.getHitCount());
959     assertEquals(startMiss, cs.getMissCount());
960     assertEquals(startEvicted, cs.getEvictedCount());
961     startHit += 6;
962     scannerOne.close();
963     readerOne.close(cacheConf.shouldEvictOnClose());
964     scannerTwo.close();
965     readerTwo.close(cacheConf.shouldEvictOnClose());
966 
967     // Let's close the first file with evict on close turned on
968     conf.setBoolean("hbase.rs.evictblocksonclose", true);
969     cacheConf = new CacheConfig(conf);
970     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
971         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
972     reader = hsf.createReader();
973     reader.close(cacheConf.shouldEvictOnClose());
974 
975     // We should have 3 new evictions
976     assertEquals(startHit, cs.getHitCount());
977     assertEquals(startMiss, cs.getMissCount());
978     assertEquals(startEvicted + 3, cs.getEvictedCount());
979     startEvicted += 3;
980 
981     // Let's close the second file with evict on close turned off
982     conf.setBoolean("hbase.rs.evictblocksonclose", false);
983     cacheConf = new CacheConfig(conf);
984     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
985         StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE);
986     reader = hsf.createReader();
987     reader.close(cacheConf.shouldEvictOnClose());
988 
989     // We expect no changes
990     assertEquals(startHit, cs.getHitCount());
991     assertEquals(startMiss, cs.getMissCount());
992     assertEquals(startEvicted, cs.getEvictedCount());
993   }
994 
995   private StoreFile.Writer writeStoreFile(Configuration conf,
996       CacheConfig cacheConf, Path path, int numBlocks)
997   throws IOException {
998     // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs
999     int numKVs = 5 * numBlocks;
1000     List<KeyValue> kvs = new ArrayList<KeyValue>(numKVs);
1001     byte [] b = Bytes.toBytes("x");
1002     int totalSize = 0;
1003     for (int i=numKVs;i>0;i--) {
1004       KeyValue kv = new KeyValue(b, b, b, i, b);
1005       kvs.add(kv);
1006       // kv has memstoreTS 0, which takes 1 byte to store.
1007       totalSize += kv.getLength() + 1;
1008     }
1009     int blockSize = totalSize / numBlocks;
1010     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
1011         blockSize)
1012             .withFilePath(path)
1013             .withMaxKeyCount(2000)
1014             .withChecksumType(CKTYPE)
1015             .withBytesPerChecksum(CKBYTES)
1016             .build();
1017     // We'll write N-1 KVs to ensure we don't write an extra block
1018     kvs.remove(kvs.size()-1);
1019     for (KeyValue kv : kvs) {
1020       writer.append(kv);
1021     }
1022     writer.appendMetadata(0, false);
1023     writer.close();
1024     return writer;
1025   }
1026 
1027   /**
1028    * Check if data block encoding information is saved correctly in HFile's
1029    * file info.
1030    */
1031   public void testDataBlockEncodingMetaData() throws IOException {
1032     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
1033     Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
1034     Path path = new Path(dir, "1234567890");
1035 
1036     DataBlockEncoding dataBlockEncoderAlgo =
1037         DataBlockEncoding.FAST_DIFF;
1038     HFileDataBlockEncoder dataBlockEncoder =
1039         new HFileDataBlockEncoderImpl(
1040             dataBlockEncoderAlgo,
1041             dataBlockEncoderAlgo);
1042     cacheConf = new CacheConfig(conf);
1043     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs,
1044         HFile.DEFAULT_BLOCKSIZE)
1045             .withFilePath(path)
1046             .withDataBlockEncoder(dataBlockEncoder)
1047             .withMaxKeyCount(2000)
1048             .withChecksumType(CKTYPE)
1049             .withBytesPerChecksum(CKBYTES)
1050             .build();
1051     writer.close();
1052 
1053     StoreFile storeFile = new StoreFile(fs, writer.getPath(), conf,
1054         cacheConf, BloomType.NONE, dataBlockEncoder);
1055     StoreFile.Reader reader = storeFile.createReader();
1056 
1057     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1058     byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1059 
1060     assertEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
1061   }
1062 
1063   @org.junit.Rule
1064   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
1065     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
1066 }
1067