View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import com.google.common.base.Function;
22  import com.google.common.base.Preconditions;
23  import com.google.common.collect.ImmutableList;
24  import com.google.common.collect.Ordering;
25  
26  import java.io.DataInput;
27  import java.io.IOException;
28  import java.net.InetSocketAddress;
29  import java.nio.ByteBuffer;
30  import java.util.Arrays;
31  import java.util.Collection;
32  import java.util.Collections;
33  import java.util.Comparator;
34  import java.util.Map;
35  import java.util.SortedSet;
36  import java.util.UUID;
37  import java.util.concurrent.atomic.AtomicBoolean;
38  
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  import org.apache.hadoop.conf.Configuration;
42  import org.apache.hadoop.fs.FileSystem;
43  import org.apache.hadoop.fs.Path;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
46  import org.apache.hadoop.hbase.KeyValue;
47  import org.apache.hadoop.hbase.KeyValue.KVComparator;
48  import org.apache.hadoop.hbase.classification.InterfaceAudience;
49  import org.apache.hadoop.hbase.client.Scan;
50  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
51  import org.apache.hadoop.hbase.io.TimeRange;
52  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
53  import org.apache.hadoop.hbase.io.hfile.BlockType;
54  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
55  import org.apache.hadoop.hbase.io.hfile.HFile;
56  import org.apache.hadoop.hbase.io.hfile.HFileContext;
57  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
58  import org.apache.hadoop.hbase.io.hfile.HFileWriterV2;
59  import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
60  import org.apache.hadoop.hbase.util.BloomFilter;
61  import org.apache.hadoop.hbase.util.BloomFilterFactory;
62  import org.apache.hadoop.hbase.util.BloomFilterWriter;
63  import org.apache.hadoop.hbase.util.Bytes;
64  import org.apache.hadoop.hbase.util.ChecksumType;
65  import org.apache.hadoop.hbase.util.Writables;
66  import org.apache.hadoop.io.WritableUtils;
67  
68  /**
69   * A Store data file.  Stores usually have one or more of these files.  They
70   * are produced by flushing the memstore to disk.  To
71   * create, instantiate a writer using {@link StoreFile.WriterBuilder}
72   * and append data. Be sure to add any metadata before calling close on the
73   * Writer (Use the appendMetadata convenience methods). On close, a StoreFile
74   * is sitting in the Filesystem.  To refer to it, create a StoreFile instance
75   * passing filesystem and path.  To read, call {@link #createReader()}.
76   * <p>StoreFiles may also reference store files in another Store.
77   *
78   * The reason for this weird pattern where you use a different instance for the
79   * writer and a reader is that we write once but read a lot more.
80   */
81  @InterfaceAudience.LimitedPrivate("Coprocessor")
82  public class StoreFile {
83    static final Log LOG = LogFactory.getLog(StoreFile.class.getName());
84  
85    // Keys for fileinfo values in HFile
86  
87    /** Max Sequence ID in FileInfo */
88    public static final byte [] MAX_SEQ_ID_KEY = Bytes.toBytes("MAX_SEQ_ID_KEY");
89  
90    /** Major compaction flag in FileInfo */
91    public static final byte[] MAJOR_COMPACTION_KEY =
92        Bytes.toBytes("MAJOR_COMPACTION_KEY");
93  
94    /** Minor compaction flag in FileInfo */
95    public static final byte[] EXCLUDE_FROM_MINOR_COMPACTION_KEY =
96        Bytes.toBytes("EXCLUDE_FROM_MINOR_COMPACTION");
97  
98    /** Bloom filter Type in FileInfo */
99    public static final byte[] BLOOM_FILTER_TYPE_KEY =
100       Bytes.toBytes("BLOOM_FILTER_TYPE");
101 
102   /** Delete Family Count in FileInfo */
103   public static final byte[] DELETE_FAMILY_COUNT =
104       Bytes.toBytes("DELETE_FAMILY_COUNT");
105 
106   /** Last Bloom filter key in FileInfo */
107   private static final byte[] LAST_BLOOM_KEY = Bytes.toBytes("LAST_BLOOM_KEY");
108 
109   /** Key for Timerange information in metadata*/
110   public static final byte[] TIMERANGE_KEY = Bytes.toBytes("TIMERANGE");
111 
112   /** Key for timestamp of earliest-put in metadata*/
113   public static final byte[] EARLIEST_PUT_TS = Bytes.toBytes("EARLIEST_PUT_TS");
114 
115   private final StoreFileInfo fileInfo;
116   private final FileSystem fs;
117 
118   // Block cache configuration and reference.
119   private final CacheConfig cacheConf;
120 
121   // Keys for metadata stored in backing HFile.
122   // Set when we obtain a Reader.
123   private long sequenceid = -1;
124 
125   // max of the MemstoreTS in the KV's in this store
126   // Set when we obtain a Reader.
127   private long maxMemstoreTS = -1;
128 
129   public long getMaxMemstoreTS() {
130     return maxMemstoreTS;
131   }
132 
133   public void setMaxMemstoreTS(long maxMemstoreTS) {
134     this.maxMemstoreTS = maxMemstoreTS;
135   }
136 
137   // If true, this file was product of a major compaction.  Its then set
138   // whenever you get a Reader.
139   private AtomicBoolean majorCompaction = null;
140 
141   // If true, this file should not be included in minor compactions.
142   // It's set whenever you get a Reader.
143   private boolean excludeFromMinorCompaction = false;
144 
145   /** Meta key set when store file is a result of a bulk load */
146   public static final byte[] BULKLOAD_TASK_KEY =
147     Bytes.toBytes("BULKLOAD_SOURCE_TASK");
148   public static final byte[] BULKLOAD_TIME_KEY =
149     Bytes.toBytes("BULKLOAD_TIMESTAMP");
150 
151   /**
152    * Map of the metadata entries in the corresponding HFile
153    */
154   private Map<byte[], byte[]> metadataMap;
155 
156   // StoreFile.Reader
157   private volatile Reader reader;
158 
159   /**
160    * Bloom filter type specified in column family configuration. Does not
161    * necessarily correspond to the Bloom filter type present in the HFile.
162    */
163   private final BloomType cfBloomType;
164 
165   // the last modification time stamp
166   private long modificationTimeStamp = 0L;
167 
168   /**
169    * Constructor, loads a reader and it's indices, etc. May allocate a
170    * substantial amount of ram depending on the underlying files (10-20MB?).
171    *
172    * @param fs  The current file system to use.
173    * @param p  The path of the file.
174    * @param conf  The current configuration.
175    * @param cacheConf  The cache configuration and block cache reference.
176    * @param cfBloomType The bloom type to use for this store file as specified
177    *          by column family configuration. This may or may not be the same
178    *          as the Bloom filter type actually present in the HFile, because
179    *          column family configuration might change. If this is
180    *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
181    * @throws IOException When opening the reader fails.
182    */
183   public StoreFile(final FileSystem fs, final Path p, final Configuration conf,
184         final CacheConfig cacheConf, final BloomType cfBloomType) throws IOException {
185     this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType);
186   }
187 
188 
189   /**
190    * Constructor, loads a reader and it's indices, etc. May allocate a
191    * substantial amount of ram depending on the underlying files (10-20MB?).
192    *
193    * @param fs  The current file system to use.
194    * @param fileInfo  The store file information.
195    * @param conf  The current configuration.
196    * @param cacheConf  The cache configuration and block cache reference.
197    * @param cfBloomType The bloom type to use for this store file as specified
198    *          by column family configuration. This may or may not be the same
199    *          as the Bloom filter type actually present in the HFile, because
200    *          column family configuration might change. If this is
201    *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
202    * @throws IOException When opening the reader fails.
203    */
204   public StoreFile(final FileSystem fs, final StoreFileInfo fileInfo, final Configuration conf,
205       final CacheConfig cacheConf,  final BloomType cfBloomType) throws IOException {
206     this.fs = fs;
207     this.fileInfo = fileInfo;
208     this.cacheConf = cacheConf;
209 
210     if (BloomFilterFactory.isGeneralBloomEnabled(conf)) {
211       this.cfBloomType = cfBloomType;
212     } else {
213       LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " +
214           "cfBloomType=" + cfBloomType + " (disabled in config)");
215       this.cfBloomType = BloomType.NONE;
216     }
217 
218     // cache the modification time stamp of this store file
219     this.modificationTimeStamp = fileInfo.getModificationTime();
220   }
221 
222   /**
223    * Clone
224    * @param other The StoreFile to clone from
225    */
226   public StoreFile(final StoreFile other) {
227     this.fs = other.fs;
228     this.fileInfo = other.fileInfo;
229     this.cacheConf = other.cacheConf;
230     this.cfBloomType = other.cfBloomType;
231     this.modificationTimeStamp = other.modificationTimeStamp;
232   }
233 
234   /**
235    * Clone a StoreFile for opening private reader.
236    */
237   public StoreFile cloneForReader() {
238     return new StoreFile(this);
239   }
240 
241   /**
242    * @return the StoreFile object associated to this StoreFile.
243    *         null if the StoreFile is not a reference.
244    */
245   public StoreFileInfo getFileInfo() {
246     return this.fileInfo;
247   }
248 
249   /**
250    * @return Path or null if this StoreFile was made with a Stream.
251    */
252   public Path getPath() {
253     return this.fileInfo.getPath();
254   }
255 
256   /**
257    * @return Returns the qualified path of this StoreFile
258    */
259   public Path getQualifiedPath() {
260     return this.fileInfo.getPath().makeQualified(fs);
261   }
262 
263   /**
264    * @return True if this is a StoreFile Reference; call
265    * after {@link #open(boolean canUseDropBehind)} else may get wrong answer.
266    */
267   public boolean isReference() {
268     return this.fileInfo.isReference();
269   }
270 
271   /**
272    * @return True if this is HFile.
273    */
274   public boolean isHFile() {
275     return this.fileInfo.isHFile(this.fileInfo.getPath());
276   }
277 
278   /**
279    * @return True if this file was made by a major compaction.
280    */
281   public boolean isMajorCompaction() {
282     if (this.majorCompaction == null) {
283       throw new NullPointerException("This has not been set yet");
284     }
285     return this.majorCompaction.get();
286   }
287 
288   /**
289    * @return True if this file should not be part of a minor compaction.
290    */
291   public boolean excludeFromMinorCompaction() {
292     return this.excludeFromMinorCompaction;
293   }
294 
295   /**
296    * @return This files maximum edit sequence id.
297    */
298   public long getMaxSequenceId() {
299     return this.sequenceid;
300   }
301 
302   public long getModificationTimeStamp() {
303     return modificationTimeStamp;
304   }
305 
306   public byte[] getMetadataValue(byte[] key) {
307     return metadataMap.get(key);
308   }
309 
310   /**
311    * Return the largest memstoreTS found across all storefiles in
312    * the given list. Store files that were created by a mapreduce
313    * bulk load are ignored, as they do not correspond to any specific
314    * put operation, and thus do not have a memstoreTS associated with them.
315    * @return 0 if no non-bulk-load files are provided or, this is Store that
316    * does not yet have any store files.
317    */
318   public static long getMaxMemstoreTSInList(Collection<StoreFile> sfs) {
319     long max = 0;
320     for (StoreFile sf : sfs) {
321       if (!sf.isBulkLoadResult()) {
322         max = Math.max(max, sf.getMaxMemstoreTS());
323       }
324     }
325     return max;
326   }
327 
328   /**
329    * Return the highest sequence ID found across all storefiles in
330    * the given list.
331    * @param sfs
332    * @return 0 if no non-bulk-load files are provided or, this is Store that
333    * does not yet have any store files.
334    */
335   public static long getMaxSequenceIdInList(Collection<StoreFile> sfs) {
336     long max = 0;
337     for (StoreFile sf : sfs) {
338       max = Math.max(max, sf.getMaxSequenceId());
339     }
340     return max;
341   }
342 
343   public CacheConfig getCacheConf() {
344     return this.cacheConf;
345   }
346 
347   /**
348    * Check if this storefile was created by bulk load.
349    * When a hfile is bulk loaded into HBase, we append
350    * '_SeqId_<id-when-loaded>' to the hfile name, unless
351    * "hbase.mapreduce.bulkload.assign.sequenceNumbers" is
352    * explicitly turned off.
353    * If "hbase.mapreduce.bulkload.assign.sequenceNumbers"
354    * is turned off, fall back to BULKLOAD_TIME_KEY.
355    * @return true if this storefile was created by bulk load.
356    */
357   public boolean isBulkLoadResult() {
358     boolean bulkLoadedHFile = false;
359     String fileName = this.getPath().getName();
360     int startPos = fileName.indexOf("SeqId_");
361     if (startPos != -1) {
362       bulkLoadedHFile = true;
363     }
364     return metadataMap.containsKey(BULKLOAD_TIME_KEY) || bulkLoadedHFile;
365   }
366 
367   /**
368    * Return the timestamp at which this bulk load file was generated.
369    */
370   public long getBulkLoadTimestamp() {
371     byte[] bulkLoadTimestamp = metadataMap.get(BULKLOAD_TIME_KEY);
372     return (bulkLoadTimestamp == null) ? 0 : Bytes.toLong(bulkLoadTimestamp);
373   }
374 
375   /**
376    * @return the cached value of HDFS blocks distribution. The cached value is
377    * calculated when store file is opened.
378    */
379   public HDFSBlocksDistribution getHDFSBlockDistribution() {
380     return this.fileInfo.getHDFSBlockDistribution();
381   }
382 
383   /**
384    * Opens reader on this store file.  Called by Constructor.
385    * @return Reader for the store file.
386    * @throws IOException
387    * @see #closeReader(boolean)
388    */
389   private Reader open(boolean canUseDropBehind) throws IOException {
390     if (this.reader != null) {
391       throw new IllegalAccessError("Already open");
392     }
393 
394     // Open the StoreFile.Reader
395     this.reader = fileInfo.open(this.fs, this.cacheConf, canUseDropBehind);
396 
397     // Load up indices and fileinfo. This also loads Bloom filter type.
398     metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo());
399 
400     // Read in our metadata.
401     byte [] b = metadataMap.get(MAX_SEQ_ID_KEY);
402     if (b != null) {
403       // By convention, if halfhfile, top half has a sequence number > bottom
404       // half. Thats why we add one in below. Its done for case the two halves
405       // are ever merged back together --rare.  Without it, on open of store,
406       // since store files are distinguished by sequence id, the one half would
407       // subsume the other.
408       this.sequenceid = Bytes.toLong(b);
409       if (fileInfo.isTopReference()) {
410         this.sequenceid += 1;
411       }
412     }
413 
414     if (isBulkLoadResult()){
415       // generate the sequenceId from the fileName
416       // fileName is of the form <randomName>_SeqId_<id-when-loaded>_
417       String fileName = this.getPath().getName();
418       // Use lastIndexOf() to get the last, most recent bulk load seqId.
419       int startPos = fileName.lastIndexOf("SeqId_");
420       if (startPos != -1) {
421         this.sequenceid = Long.parseLong(fileName.substring(startPos + 6,
422             fileName.indexOf('_', startPos + 6)));
423         // Handle reference files as done above.
424         if (fileInfo.isTopReference()) {
425           this.sequenceid += 1;
426         }
427       }
428       this.reader.setBulkLoaded(true);
429     }
430     this.reader.setSequenceID(this.sequenceid);
431 
432     b = metadataMap.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY);
433     if (b != null) {
434       this.maxMemstoreTS = Bytes.toLong(b);
435     }
436 
437     b = metadataMap.get(MAJOR_COMPACTION_KEY);
438     if (b != null) {
439       boolean mc = Bytes.toBoolean(b);
440       if (this.majorCompaction == null) {
441         this.majorCompaction = new AtomicBoolean(mc);
442       } else {
443         this.majorCompaction.set(mc);
444       }
445     } else {
446       // Presume it is not major compacted if it doesn't explicity say so
447       // HFileOutputFormat explicitly sets the major compacted key.
448       this.majorCompaction = new AtomicBoolean(false);
449     }
450 
451     b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY);
452     this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b));
453 
454     BloomType hfileBloomType = reader.getBloomFilterType();
455     if (cfBloomType != BloomType.NONE) {
456       reader.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
457       if (hfileBloomType != cfBloomType) {
458         LOG.info("HFile Bloom filter type for "
459             + reader.getHFileReader().getName() + ": " + hfileBloomType
460             + ", but " + cfBloomType + " specified in column family "
461             + "configuration");
462       }
463     } else if (hfileBloomType != BloomType.NONE) {
464       LOG.info("Bloom filter turned off by CF config for "
465           + reader.getHFileReader().getName());
466     }
467 
468     // load delete family bloom filter
469     reader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
470 
471     try {
472       this.reader.timeRange = TimeRangeTracker.getTimeRange(metadataMap.get(TIMERANGE_KEY));
473     } catch (IllegalArgumentException e) {
474       LOG.error("Error reading timestamp range data from meta -- " +
475           "proceeding without", e);
476       this.reader.timeRange = null;
477     }
478     return this.reader;
479   }
480 
481   public Reader createReader() throws IOException {
482     return createReader(false);
483   }
484 
485   /**
486    * @return Reader for StoreFile. creates if necessary
487    * @throws IOException
488    */
489   public Reader createReader(boolean canUseDropBehind) throws IOException {
490     if (this.reader == null) {
491       try {
492         this.reader = open(canUseDropBehind);
493       } catch (IOException e) {
494         try {
495           boolean evictOnClose =
496               cacheConf != null? cacheConf.shouldEvictOnClose(): true; 
497           this.closeReader(evictOnClose);
498         } catch (IOException ee) {
499         }
500         throw e;
501       }
502 
503     }
504     return this.reader;
505   }
506 
507   /**
508    * @return Current reader.  Must call createReader first else returns null.
509    * @see #createReader()
510    */
511   public Reader getReader() {
512     return this.reader;
513   }
514 
515   /**
516    * @param evictOnClose whether to evict blocks belonging to this file
517    * @throws IOException
518    */
519   public synchronized void closeReader(boolean evictOnClose)
520       throws IOException {
521     if (this.reader != null) {
522       this.reader.close(evictOnClose);
523       this.reader = null;
524     }
525   }
526 
527   /**
528    * Delete this file
529    * @throws IOException
530    */
531   public void deleteReader() throws IOException {
532     boolean evictOnClose =
533         cacheConf != null? cacheConf.shouldEvictOnClose(): true; 
534     closeReader(evictOnClose);
535     this.fs.delete(getPath(), true);
536   }
537 
538   @Override
539   public String toString() {
540     return this.fileInfo.toString();
541   }
542 
543   /**
544    * @return a length description of this StoreFile, suitable for debug output
545    */
546   public String toStringDetailed() {
547     StringBuilder sb = new StringBuilder();
548     sb.append(this.getPath().toString());
549     sb.append(", isReference=").append(isReference());
550     sb.append(", isBulkLoadResult=").append(isBulkLoadResult());
551     if (isBulkLoadResult()) {
552       sb.append(", bulkLoadTS=").append(getBulkLoadTimestamp());
553     } else {
554       sb.append(", seqid=").append(getMaxSequenceId());
555     }
556     sb.append(", majorCompaction=").append(isMajorCompaction());
557 
558     return sb.toString();
559   }
560 
561   public static class WriterBuilder {
562     private final Configuration conf;
563     private final CacheConfig cacheConf;
564     private final FileSystem fs;
565 
566     private KeyValue.KVComparator comparator = KeyValue.COMPARATOR;
567     private BloomType bloomType = BloomType.NONE;
568     private long maxKeyCount = 0;
569     private Path dir;
570     private Path filePath;
571     private InetSocketAddress[] favoredNodes;
572     private HFileContext fileContext;
573     private boolean shouldDropCacheBehind = false;
574 
575     public WriterBuilder(Configuration conf, CacheConfig cacheConf,
576         FileSystem fs) {
577       this.conf = conf;
578       this.cacheConf = cacheConf;
579       this.fs = fs;
580     }
581 
582     /**
583      * Use either this method or {@link #withFilePath}, but not both.
584      * @param dir Path to column family directory. The directory is created if
585      *          does not exist. The file is given a unique name within this
586      *          directory.
587      * @return this (for chained invocation)
588      */
589     public WriterBuilder withOutputDir(Path dir) {
590       Preconditions.checkNotNull(dir);
591       this.dir = dir;
592       return this;
593     }
594 
595     /**
596      * Use either this method or {@link #withOutputDir}, but not both.
597      * @param filePath the StoreFile path to write
598      * @return this (for chained invocation)
599      */
600     public WriterBuilder withFilePath(Path filePath) {
601       Preconditions.checkNotNull(filePath);
602       this.filePath = filePath;
603       return this;
604     }
605 
606     /**
607      * @param favoredNodes an array of favored nodes or possibly null
608      * @return this (for chained invocation)
609      */
610     public WriterBuilder withFavoredNodes(InetSocketAddress[] favoredNodes) {
611       this.favoredNodes = favoredNodes;
612       return this;
613     }
614 
615     public WriterBuilder withComparator(KeyValue.KVComparator comparator) {
616       Preconditions.checkNotNull(comparator);
617       this.comparator = comparator;
618       return this;
619     }
620 
621     public WriterBuilder withBloomType(BloomType bloomType) {
622       Preconditions.checkNotNull(bloomType);
623       this.bloomType = bloomType;
624       return this;
625     }
626 
627     /**
628      * @param maxKeyCount estimated maximum number of keys we expect to add
629      * @return this (for chained invocation)
630      */
631     public WriterBuilder withMaxKeyCount(long maxKeyCount) {
632       this.maxKeyCount = maxKeyCount;
633       return this;
634     }
635 
636     public WriterBuilder withFileContext(HFileContext fileContext) {
637       this.fileContext = fileContext;
638       return this;
639     }
640 
641     public WriterBuilder withShouldDropCacheBehind(boolean shouldDropCacheBehind) {
642       this.shouldDropCacheBehind = shouldDropCacheBehind;
643       return this;
644     }
645     /**
646      * Create a store file writer. Client is responsible for closing file when
647      * done. If metadata, add BEFORE closing using
648      * {@link Writer#appendMetadata}.
649      */
650     public Writer build() throws IOException {
651       if ((dir == null ? 0 : 1) + (filePath == null ? 0 : 1) != 1) {
652         throw new IllegalArgumentException("Either specify parent directory " +
653             "or file path");
654       }
655 
656       if (dir == null) {
657         dir = filePath.getParent();
658       }
659 
660       if (!fs.exists(dir)) {
661         fs.mkdirs(dir);
662       }
663 
664       if (filePath == null) {
665         filePath = getUniqueFile(fs, dir);
666         if (!BloomFilterFactory.isGeneralBloomEnabled(conf)) {
667           bloomType = BloomType.NONE;
668         }
669       }
670 
671       if (comparator == null) {
672         comparator = KeyValue.COMPARATOR;
673       }
674       return new Writer(fs, filePath,
675           conf, cacheConf, comparator, bloomType, maxKeyCount, favoredNodes, fileContext);
676     }
677   }
678 
679   /**
680    * @param fs
681    * @param dir Directory to create file in.
682    * @return random filename inside passed <code>dir</code>
683    */
684   public static Path getUniqueFile(final FileSystem fs, final Path dir)
685       throws IOException {
686     if (!fs.getFileStatus(dir).isDir()) {
687       throw new IOException("Expecting " + dir.toString() +
688         " to be a directory");
689     }
690     return new Path(dir, UUID.randomUUID().toString().replaceAll("-", ""));
691   }
692 
693   public Long getMinimumTimestamp() {
694     return getReader().timeRange == null? null: getReader().timeRange.getMin();
695   }
696 
697   public Long getMaximumTimestamp() {
698     return getReader().timeRange == null? null: getReader().timeRange.getMax();
699   }
700 
701   /**
702    * Gets the approximate mid-point of this file that is optimal for use in splitting it.
703    * @param comparator Comparator used to compare KVs.
704    * @return The split point row, or null if splitting is not possible, or reader is null.
705    */
706   @SuppressWarnings("deprecation")
707   byte[] getFileSplitPoint(KVComparator comparator) throws IOException {
708     if (this.reader == null) {
709       LOG.warn("Storefile " + this + " Reader is null; cannot get split point");
710       return null;
711     }
712     // Get first, last, and mid keys.  Midkey is the key that starts block
713     // in middle of hfile.  Has column and timestamp.  Need to return just
714     // the row we want to split on as midkey.
715     byte [] midkey = this.reader.midkey();
716     if (midkey != null) {
717       KeyValue mk = KeyValue.createKeyValueFromKey(midkey, 0, midkey.length);
718       byte [] fk = this.reader.getFirstKey();
719       KeyValue firstKey = KeyValue.createKeyValueFromKey(fk, 0, fk.length);
720       byte [] lk = this.reader.getLastKey();
721       KeyValue lastKey = KeyValue.createKeyValueFromKey(lk, 0, lk.length);
722       // if the midkey is the same as the first or last keys, we cannot (ever) split this region.
723       if (comparator.compareRows(mk, firstKey) == 0 || comparator.compareRows(mk, lastKey) == 0) {
724         if (LOG.isDebugEnabled()) {
725           LOG.debug("cannot split because midkey is the same as first or last row");
726         }
727         return null;
728       }
729       return mk.getRow();
730     }
731     return null;
732   }
733 
734   /**
735    * A StoreFile writer.  Use this to read/write HBase Store Files. It is package
736    * local because it is an implementation detail of the HBase regionserver.
737    */
738   public static class Writer implements Compactor.CellSink {
739     private final BloomFilterWriter generalBloomFilterWriter;
740     private final BloomFilterWriter deleteFamilyBloomFilterWriter;
741     private final BloomType bloomType;
742     private byte[] lastBloomKey;
743     private int lastBloomKeyOffset, lastBloomKeyLen;
744     private KVComparator kvComparator;
745     private KeyValue lastKv = null;
746     private long earliestPutTs = HConstants.LATEST_TIMESTAMP;
747     private KeyValue lastDeleteFamilyKV = null;
748     private long deleteFamilyCnt = 0;
749 
750 
751     /** Checksum type */
752     protected ChecksumType checksumType;
753 
754     /** Bytes per Checksum */
755     protected int bytesPerChecksum;
756 
757     TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
758     /**
759      * timeRangeTrackerSet is used to figure if we were passed a filled-out TimeRangeTracker or not.
760      * When flushing a memstore, we set the TimeRangeTracker that it accumulated during updates to
761      * memstore in here into this Writer and use this variable to indicate that we do not need to
762      * recalculate the timeRangeTracker bounds; it was done already as part of add-to-memstore.
763      * A completed TimeRangeTracker is not set in cases of compactions when it is recalculated.
764      */
765     boolean timeRangeTrackerSet = false;
766 
767     protected HFile.Writer writer;
768 
769     /**
770      * Creates an HFile.Writer that also write helpful meta data.
771      * @param fs file system to write to
772      * @param path file name to create
773      * @param conf user configuration
774      * @param comparator key comparator
775      * @param bloomType bloom filter setting
776      * @param maxKeys the expected maximum number of keys to be added. Was used
777      *        for Bloom filter size in {@link HFile} format version 1.
778      * @param favoredNodes
779      * @param fileContext - The HFile context
780      * @throws IOException problem writing to FS
781      */
782     private Writer(FileSystem fs, Path path,
783         final Configuration conf,
784         CacheConfig cacheConf,
785         final KVComparator comparator, BloomType bloomType, long maxKeys,
786         InetSocketAddress[] favoredNodes, HFileContext fileContext)
787             throws IOException {
788       writer = HFile.getWriterFactory(conf, cacheConf)
789           .withPath(fs, path)
790           .withComparator(comparator)
791           .withFavoredNodes(favoredNodes)
792           .withFileContext(fileContext)
793           .create();
794 
795       this.kvComparator = comparator;
796 
797       generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite(
798           conf, cacheConf, bloomType,
799           (int) Math.min(maxKeys, Integer.MAX_VALUE), writer);
800 
801       if (generalBloomFilterWriter != null) {
802         this.bloomType = bloomType;
803         if (LOG.isTraceEnabled()) LOG.trace("Bloom filter type for " + path + ": " +
804           this.bloomType + ", " + generalBloomFilterWriter.getClass().getSimpleName());
805       } else {
806         // Not using Bloom filters.
807         this.bloomType = BloomType.NONE;
808       }
809 
810       // initialize delete family Bloom filter when there is NO RowCol Bloom
811       // filter
812       if (this.bloomType != BloomType.ROWCOL) {
813         this.deleteFamilyBloomFilterWriter = BloomFilterFactory
814             .createDeleteBloomAtWrite(conf, cacheConf,
815                 (int) Math.min(maxKeys, Integer.MAX_VALUE), writer);
816       } else {
817         deleteFamilyBloomFilterWriter = null;
818       }
819       if (deleteFamilyBloomFilterWriter != null) {
820         if (LOG.isTraceEnabled()) LOG.trace("Delete Family Bloom filter type for " + path + ": "
821             + deleteFamilyBloomFilterWriter.getClass().getSimpleName());
822       }
823     }
824 
825     /**
826      * Writes meta data.
827      * Call before {@link #close()} since its written as meta data to this file.
828      * @param maxSequenceId Maximum sequence id.
829      * @param majorCompaction True if this file is product of a major compaction
830      * @throws IOException problem writing to FS
831      */
832     public void appendMetadata(final long maxSequenceId, final boolean majorCompaction)
833     throws IOException {
834       writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId));
835       writer.appendFileInfo(MAJOR_COMPACTION_KEY,
836           Bytes.toBytes(majorCompaction));
837       appendTrackedTimestampsToMetadata();
838     }
839 
840     /**
841      * Add TimestampRange and earliest put timestamp to Metadata
842      */
843     public void appendTrackedTimestampsToMetadata() throws IOException {
844       appendFileInfo(TIMERANGE_KEY,WritableUtils.toByteArray(timeRangeTracker));
845       appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs));
846     }
847 
848     /**
849      * Set TimeRangeTracker.
850      * Called when flushing to pass us a pre-calculated TimeRangeTracker, one made during updates
851      * to memstore so we don't have to make one ourselves as Cells get appended. Call before first
852      * append. If this method is not called, we will calculate our own range of the Cells that
853      * comprise this StoreFile (and write them on the end as metadata). It is good to have this stuff
854      * passed because it is expensive to make.
855      */
856     public void setTimeRangeTracker(final TimeRangeTracker trt) {
857       this.timeRangeTracker = trt;
858       timeRangeTrackerSet = true;
859     }
860 
861     /**
862      * Record the earlest Put timestamp.
863      *
864      * If the timeRangeTracker is not set,
865      * update TimeRangeTracker to include the timestamp of this key
866      * @param kv
867      */
868     public void trackTimestamps(final KeyValue kv) {
869       if (KeyValue.Type.Put.getCode() == kv.getTypeByte()) {
870         earliestPutTs = Math.min(earliestPutTs, kv.getTimestamp());
871       }
872       if (!timeRangeTrackerSet) {
873         timeRangeTracker.includeTimestamp(kv);
874       }
875     }
876 
877     private void appendGeneralBloomfilter(final KeyValue kv) throws IOException {
878       if (this.generalBloomFilterWriter != null) {
879         // only add to the bloom filter on a new, unique key
880         boolean newKey = true;
881         if (this.lastKv != null) {
882           switch(bloomType) {
883           case ROW:
884             newKey = ! kvComparator.matchingRows(kv, lastKv);
885             break;
886           case ROWCOL:
887             newKey = ! kvComparator.matchingRowColumn(kv, lastKv);
888             break;
889           case NONE:
890             newKey = false;
891             break;
892           default:
893             throw new IOException("Invalid Bloom filter type: " + bloomType +
894                 " (ROW or ROWCOL expected)");
895           }
896         }
897         if (newKey) {
898           /*
899            * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png
900            * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] + TimeStamp
901            *
902            * 2 Types of Filtering:
903            *  1. Row = Row
904            *  2. RowCol = Row + Qualifier
905            */
906           byte[] bloomKey;
907           int bloomKeyOffset, bloomKeyLen;
908 
909           switch (bloomType) {
910           case ROW:
911             bloomKey = kv.getBuffer();
912             bloomKeyOffset = kv.getRowOffset();
913             bloomKeyLen = kv.getRowLength();
914             break;
915           case ROWCOL:
916             // merge(row, qualifier)
917             // TODO: could save one buffer copy in case of compound Bloom
918             // filters when this involves creating a KeyValue
919             bloomKey = generalBloomFilterWriter.createBloomKey(kv.getBuffer(),
920                 kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(),
921                 kv.getQualifierOffset(), kv.getQualifierLength());
922             bloomKeyOffset = 0;
923             bloomKeyLen = bloomKey.length;
924             break;
925           default:
926             throw new IOException("Invalid Bloom filter type: " + bloomType +
927                 " (ROW or ROWCOL expected)");
928           }
929           generalBloomFilterWriter.add(bloomKey, bloomKeyOffset, bloomKeyLen);
930           if (lastBloomKey != null
931               && generalBloomFilterWriter.getComparator().compareFlatKey(bloomKey,
932                   bloomKeyOffset, bloomKeyLen, lastBloomKey,
933                   lastBloomKeyOffset, lastBloomKeyLen) <= 0) {
934             throw new IOException("Non-increasing Bloom keys: "
935                 + Bytes.toStringBinary(bloomKey, bloomKeyOffset, bloomKeyLen)
936                 + " after "
937                 + Bytes.toStringBinary(lastBloomKey, lastBloomKeyOffset,
938                     lastBloomKeyLen));
939           }
940           lastBloomKey = bloomKey;
941           lastBloomKeyOffset = bloomKeyOffset;
942           lastBloomKeyLen = bloomKeyLen;
943           this.lastKv = kv;
944         }
945       }
946     }
947 
948     private void appendDeleteFamilyBloomFilter(final KeyValue kv)
949         throws IOException {
950       if (!kv.isDeleteFamily() && !kv.isDeleteFamilyVersion()) {
951         return;
952       }
953 
954       // increase the number of delete family in the store file
955       deleteFamilyCnt++;
956       if (null != this.deleteFamilyBloomFilterWriter) {
957         boolean newKey = true;
958         if (lastDeleteFamilyKV != null) {
959           newKey = !kvComparator.matchingRows(kv, lastDeleteFamilyKV);
960         }
961         if (newKey) {
962           this.deleteFamilyBloomFilterWriter.add(kv.getBuffer(),
963               kv.getRowOffset(), kv.getRowLength());
964           this.lastDeleteFamilyKV = kv;
965         }
966       }
967     }
968 
969     public void append(final KeyValue kv) throws IOException {
970       appendGeneralBloomfilter(kv);
971       appendDeleteFamilyBloomFilter(kv);
972       writer.append(kv);
973       trackTimestamps(kv);
974     }
975 
976     public Path getPath() {
977       return this.writer.getPath();
978     }
979 
980     boolean hasGeneralBloom() {
981       return this.generalBloomFilterWriter != null;
982     }
983 
984     /**
985      * For unit testing only.
986      *
987      * @return the Bloom filter used by this writer.
988      */
989     BloomFilterWriter getGeneralBloomWriter() {
990       return generalBloomFilterWriter;
991     }
992 
993     private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException {
994       boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0);
995       if (haveBloom) {
996         bfw.compactBloom();
997       }
998       return haveBloom;
999     }
1000 
1001     private boolean closeGeneralBloomFilter() throws IOException {
1002       boolean hasGeneralBloom = closeBloomFilter(generalBloomFilterWriter);
1003 
1004       // add the general Bloom filter writer and append file info
1005       if (hasGeneralBloom) {
1006         writer.addGeneralBloomFilter(generalBloomFilterWriter);
1007         writer.appendFileInfo(BLOOM_FILTER_TYPE_KEY,
1008             Bytes.toBytes(bloomType.toString()));
1009         if (lastBloomKey != null) {
1010           writer.appendFileInfo(LAST_BLOOM_KEY, Arrays.copyOfRange(
1011               lastBloomKey, lastBloomKeyOffset, lastBloomKeyOffset
1012                   + lastBloomKeyLen));
1013         }
1014       }
1015       return hasGeneralBloom;
1016     }
1017 
1018     private boolean closeDeleteFamilyBloomFilter() throws IOException {
1019       boolean hasDeleteFamilyBloom = closeBloomFilter(deleteFamilyBloomFilterWriter);
1020 
1021       // add the delete family Bloom filter writer
1022       if (hasDeleteFamilyBloom) {
1023         writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter);
1024       }
1025 
1026       // append file info about the number of delete family kvs
1027       // even if there is no delete family Bloom.
1028       writer.appendFileInfo(DELETE_FAMILY_COUNT,
1029           Bytes.toBytes(this.deleteFamilyCnt));
1030 
1031       return hasDeleteFamilyBloom;
1032     }
1033 
1034     public void close() throws IOException {
1035       boolean hasGeneralBloom = this.closeGeneralBloomFilter();
1036       boolean hasDeleteFamilyBloom = this.closeDeleteFamilyBloomFilter();
1037 
1038       writer.close();
1039 
1040       // Log final Bloom filter statistics. This needs to be done after close()
1041       // because compound Bloom filters might be finalized as part of closing.
1042       if (StoreFile.LOG.isTraceEnabled()) {
1043         StoreFile.LOG.trace((hasGeneralBloom ? "" : "NO ") + "General Bloom and " +
1044           (hasDeleteFamilyBloom ? "" : "NO ") + "DeleteFamily" + " was added to HFile " +
1045           getPath());
1046       }
1047 
1048     }
1049 
1050     public void appendFileInfo(byte[] key, byte[] value) throws IOException {
1051       writer.appendFileInfo(key, value);
1052     }
1053 
1054     /** For use in testing, e.g. {@link org.apache.hadoop.hbase.regionserver.CreateRandomStoreFile}
1055      */
1056     HFile.Writer getHFileWriter() {
1057       return writer;
1058     }
1059   }
1060 
1061   /**
1062    * Reader for a StoreFile.
1063    */
1064   public static class Reader {
1065     static final Log LOG = LogFactory.getLog(Reader.class.getName());
1066 
1067     protected BloomFilter generalBloomFilter = null;
1068     protected BloomFilter deleteFamilyBloomFilter = null;
1069     protected BloomType bloomFilterType;
1070     private final HFile.Reader reader;
1071     protected TimeRange timeRange;
1072     protected long sequenceID = -1;
1073     private byte[] lastBloomKey;
1074     private long deleteFamilyCnt = -1;
1075     private boolean bulkLoadResult = false;
1076 
1077     public Reader(FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf)
1078         throws IOException {
1079       reader = HFile.createReader(fs, path, cacheConf, conf);
1080       bloomFilterType = BloomType.NONE;
1081     }
1082 
1083     public Reader(FileSystem fs, Path path, FSDataInputStreamWrapper in, long size,
1084         CacheConfig cacheConf, Configuration conf) throws IOException {
1085       reader = HFile.createReader(fs, path, in, size, cacheConf, conf);
1086       bloomFilterType = BloomType.NONE;
1087     }
1088 
1089     /**
1090      * ONLY USE DEFAULT CONSTRUCTOR FOR UNIT TESTS
1091      */
1092     Reader() {
1093       this.reader = null;
1094     }
1095 
1096     public KVComparator getComparator() {
1097       return reader.getComparator();
1098     }
1099 
1100     /**
1101      * Get a scanner to scan over this StoreFile. Do not use
1102      * this overload if using this scanner for compactions.
1103      *
1104      * @param cacheBlocks should this scanner cache blocks?
1105      * @param pread use pread (for highly concurrent small readers)
1106      * @return a scanner
1107      */
1108     public StoreFileScanner getStoreFileScanner(boolean cacheBlocks,
1109                                                boolean pread) {
1110       return getStoreFileScanner(cacheBlocks, pread, false,
1111         // 0 is passed as readpoint because this method is only used by test
1112         // where StoreFile is directly operated upon
1113         0);
1114     }
1115 
1116     /**
1117      * Get a scanner to scan over this StoreFile.
1118      * Bulk loaded files may or may not have mvcc info.
1119      * We will consistently ignore MVCC info in bulk loaded file.
1120      * They will be visible to scanners immediately following bulk load.
1121      *
1122      * @param cacheBlocks should this scanner cache blocks?
1123      * @param pread use pread (for highly concurrent small readers)
1124      * @param isCompaction is scanner being used for compaction?
1125      * @return a scanner
1126      */
1127     public StoreFileScanner getStoreFileScanner(boolean cacheBlocks,
1128                                                boolean pread,
1129                                                boolean isCompaction, long readPt) {
1130       return new StoreFileScanner(this,
1131                                  getScanner(cacheBlocks, pread, isCompaction),
1132                                  !isCompaction, reader.hasMVCCInfo() && !this.bulkLoadResult,
1133                                  readPt);
1134     }
1135 
1136     /**
1137      * Warning: Do not write further code which depends on this call. Instead
1138      * use getStoreFileScanner() which uses the StoreFileScanner class/interface
1139      * which is the preferred way to scan a store with higher level concepts.
1140      *
1141      * @param cacheBlocks should we cache the blocks?
1142      * @param pread use pread (for concurrent small readers)
1143      * @return the underlying HFileScanner
1144      */
1145     @Deprecated
1146     public HFileScanner getScanner(boolean cacheBlocks, boolean pread) {
1147       return getScanner(cacheBlocks, pread, false);
1148     }
1149 
1150     /**
1151      * Warning: Do not write further code which depends on this call. Instead
1152      * use getStoreFileScanner() which uses the StoreFileScanner class/interface
1153      * which is the preferred way to scan a store with higher level concepts.
1154      *
1155      * @param cacheBlocks
1156      *          should we cache the blocks?
1157      * @param pread
1158      *          use pread (for concurrent small readers)
1159      * @param isCompaction
1160      *          is scanner being used for compaction?
1161      * @return the underlying HFileScanner
1162      */
1163     @Deprecated
1164     public HFileScanner getScanner(boolean cacheBlocks, boolean pread,
1165         boolean isCompaction) {
1166       return reader.getScanner(cacheBlocks, pread, isCompaction);
1167     }
1168 
1169     public void close(boolean evictOnClose) throws IOException {
1170       reader.close(evictOnClose);
1171     }
1172 
1173     /**
1174      * Check if this storeFile may contain keys within the TimeRange that
1175      * have not expired (i.e. not older than oldestUnexpiredTS).
1176      * @param scan the current scan
1177      * @param oldestUnexpiredTS the oldest timestamp that is not expired, as
1178      *          determined by the column family's TTL
1179      * @return false if queried keys definitely don't exist in this StoreFile
1180      */
1181     boolean passesTimerangeFilter(TimeRange tr, long oldestUnexpiredTS) {
1182       return this.timeRange == null? true:
1183         this.timeRange.includesTimeRange(tr) && this.timeRange.getMax() >= oldestUnexpiredTS;
1184     }
1185 
1186     /**
1187      * Checks whether the given scan passes the Bloom filter (if present). Only
1188      * checks Bloom filters for single-row or single-row-column scans. Bloom
1189      * filter checking for multi-gets is implemented as part of the store
1190      * scanner system (see {@link StoreFileScanner#seekExactly}) and uses
1191      * the lower-level API {@link #passesGeneralBloomFilter(byte[], int, int, byte[],
1192      * int, int)}.
1193      *
1194      * @param scan the scan specification. Used to determine the row, and to
1195      *          check whether this is a single-row ("get") scan.
1196      * @param columns the set of columns. Only used for row-column Bloom
1197      *          filters.
1198      * @return true if the scan with the given column set passes the Bloom
1199      *         filter, or if the Bloom filter is not applicable for the scan.
1200      *         False if the Bloom filter is applicable and the scan fails it.
1201      */
1202      boolean passesBloomFilter(Scan scan,
1203         final SortedSet<byte[]> columns) {
1204       // Multi-column non-get scans will use Bloom filters through the
1205       // lower-level API function that this function calls.
1206       if (!scan.isGetScan()) {
1207         return true;
1208       }
1209 
1210       byte[] row = scan.getStartRow();
1211       switch (this.bloomFilterType) {
1212         case ROW:
1213           return passesGeneralBloomFilter(row, 0, row.length, null, 0, 0);
1214 
1215         case ROWCOL:
1216           if (columns != null && columns.size() == 1) {
1217             byte[] column = columns.first();
1218             return passesGeneralBloomFilter(row, 0, row.length, column, 0,
1219                 column.length);
1220           }
1221 
1222           // For multi-column queries the Bloom filter is checked from the
1223           // seekExact operation.
1224           return true;
1225 
1226         default:
1227           return true;
1228       }
1229     }
1230 
1231     public boolean passesDeleteFamilyBloomFilter(byte[] row, int rowOffset,
1232         int rowLen) {
1233       // Cache Bloom filter as a local variable in case it is set to null by
1234       // another thread on an IO error.
1235       BloomFilter bloomFilter = this.deleteFamilyBloomFilter;
1236 
1237       // Empty file or there is no delete family at all
1238       if (reader.getTrailer().getEntryCount() == 0 || deleteFamilyCnt == 0) {
1239         return false;
1240       }
1241 
1242       if (bloomFilter == null) {
1243         return true;
1244       }
1245 
1246       try {
1247         if (!bloomFilter.supportsAutoLoading()) {
1248           return true;
1249         }
1250         return bloomFilter.contains(row, rowOffset, rowLen, null);
1251       } catch (IllegalArgumentException e) {
1252         LOG.error("Bad Delete Family bloom filter data -- proceeding without",
1253             e);
1254         setDeleteFamilyBloomFilterFaulty();
1255       }
1256 
1257       return true;
1258     }
1259 
1260     /**
1261      * A method for checking Bloom filters. Called directly from
1262      * StoreFileScanner in case of a multi-column query.
1263      *
1264      * @param row
1265      * @param rowOffset
1266      * @param rowLen
1267      * @param col
1268      * @param colOffset
1269      * @param colLen
1270      * @return True if passes
1271      */
1272     public boolean passesGeneralBloomFilter(byte[] row, int rowOffset,
1273         int rowLen, byte[] col, int colOffset, int colLen) {
1274       // Cache Bloom filter as a local variable in case it is set to null by
1275       // another thread on an IO error.
1276       BloomFilter bloomFilter = this.generalBloomFilter;
1277       if (bloomFilter == null) {
1278         return true;
1279       }
1280 
1281       byte[] key;
1282       switch (bloomFilterType) {
1283         case ROW:
1284           if (col != null) {
1285             throw new RuntimeException("Row-only Bloom filter called with " +
1286                 "column specified");
1287           }
1288           if (rowOffset != 0 || rowLen != row.length) {
1289               throw new AssertionError("For row-only Bloom filters the row "
1290                   + "must occupy the whole array");
1291           }
1292           key = row;
1293           break;
1294 
1295         case ROWCOL:
1296           key = bloomFilter.createBloomKey(row, rowOffset, rowLen, col,
1297               colOffset, colLen);
1298 
1299           break;
1300 
1301         default:
1302           return true;
1303       }
1304 
1305       // Empty file
1306       if (reader.getTrailer().getEntryCount() == 0)
1307         return false;
1308 
1309       try {
1310         boolean shouldCheckBloom;
1311         ByteBuffer bloom;
1312         if (bloomFilter.supportsAutoLoading()) {
1313           bloom = null;
1314           shouldCheckBloom = true;
1315         } else {
1316           bloom = reader.getMetaBlock(HFile.BLOOM_FILTER_DATA_KEY,
1317               true);
1318           shouldCheckBloom = bloom != null;
1319         }
1320 
1321         if (shouldCheckBloom) {
1322           boolean exists;
1323 
1324           // Whether the primary Bloom key is greater than the last Bloom key
1325           // from the file info. For row-column Bloom filters this is not yet
1326           // a sufficient condition to return false.
1327           boolean keyIsAfterLast = lastBloomKey != null
1328               && bloomFilter.getComparator().compareFlatKey(key, lastBloomKey) > 0;
1329 
1330           if (bloomFilterType == BloomType.ROWCOL) {
1331             // Since a Row Delete is essentially a DeleteFamily applied to all
1332             // columns, a file might be skipped if using row+col Bloom filter.
1333             // In order to ensure this file is included an additional check is
1334             // required looking only for a row bloom.
1335             byte[] rowBloomKey = bloomFilter.createBloomKey(row, rowOffset, rowLen,
1336                 null, 0, 0);
1337 
1338             if (keyIsAfterLast
1339                 && bloomFilter.getComparator().compareFlatKey(rowBloomKey,
1340                     lastBloomKey) > 0) {
1341               exists = false;
1342             } else {
1343               exists =
1344                   bloomFilter.contains(key, 0, key.length, bloom) ||
1345                   bloomFilter.contains(rowBloomKey, 0, rowBloomKey.length,
1346                       bloom);
1347             }
1348           } else {
1349             exists = !keyIsAfterLast
1350                 && bloomFilter.contains(key, 0, key.length, bloom);
1351           }
1352 
1353           return exists;
1354         }
1355       } catch (IOException e) {
1356         LOG.error("Error reading bloom filter data -- proceeding without",
1357             e);
1358         setGeneralBloomFilterFaulty();
1359       } catch (IllegalArgumentException e) {
1360         LOG.error("Bad bloom filter data -- proceeding without", e);
1361         setGeneralBloomFilterFaulty();
1362       }
1363 
1364       return true;
1365     }
1366 
1367     /**
1368      * Checks whether the given scan rowkey range overlaps with the current storefile's
1369      * @param scan the scan specification. Used to determine the rowkey range.
1370      * @return true if there is overlap, false otherwise
1371      */
1372     public boolean passesKeyRangeFilter(Scan scan) {
1373       if (this.getFirstKey() == null || this.getLastKey() == null) {
1374         // the file is empty
1375         return false;
1376       }
1377       if (Bytes.equals(scan.getStartRow(), HConstants.EMPTY_START_ROW)
1378           && Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
1379         return true;
1380       }
1381       KeyValue smallestScanKeyValue = scan.isReversed() ? KeyValue
1382           .createFirstOnRow(scan.getStopRow()) : KeyValue.createFirstOnRow(scan
1383           .getStartRow());
1384       KeyValue largestScanKeyValue = scan.isReversed() ? KeyValue
1385           .createLastOnRow(scan.getStartRow()) : KeyValue.createLastOnRow(scan
1386           .getStopRow());
1387       boolean nonOverLapping = (getComparator().compareFlatKey(
1388           this.getFirstKey(), largestScanKeyValue.getKey()) > 0 && !Bytes
1389           .equals(scan.isReversed() ? scan.getStartRow() : scan.getStopRow(),
1390               HConstants.EMPTY_END_ROW))
1391           || getComparator().compareFlatKey(this.getLastKey(),
1392               smallestScanKeyValue.getKey()) < 0;
1393       return !nonOverLapping;
1394     }
1395 
1396     public Map<byte[], byte[]> loadFileInfo() throws IOException {
1397       Map<byte [], byte []> fi = reader.loadFileInfo();
1398 
1399       byte[] b = fi.get(BLOOM_FILTER_TYPE_KEY);
1400       if (b != null) {
1401         bloomFilterType = BloomType.valueOf(Bytes.toString(b));
1402       }
1403 
1404       lastBloomKey = fi.get(LAST_BLOOM_KEY);
1405       byte[] cnt = fi.get(DELETE_FAMILY_COUNT);
1406       if (cnt != null) {
1407         deleteFamilyCnt = Bytes.toLong(cnt);
1408       }
1409 
1410       return fi;
1411     }
1412 
1413     public void loadBloomfilter() {
1414       this.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
1415       this.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
1416     }
1417 
1418     private void loadBloomfilter(BlockType blockType) {
1419       try {
1420         if (blockType == BlockType.GENERAL_BLOOM_META) {
1421           if (this.generalBloomFilter != null)
1422             return; // Bloom has been loaded
1423 
1424           DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
1425           if (bloomMeta != null) {
1426             // sanity check for NONE Bloom filter
1427             if (bloomFilterType == BloomType.NONE) {
1428               throw new IOException(
1429                   "valid bloom filter type not found in FileInfo");
1430             } else {
1431               generalBloomFilter = BloomFilterFactory.createFromMeta(bloomMeta,
1432                   reader);
1433               if (LOG.isTraceEnabled()) {
1434                 LOG.trace("Loaded " + bloomFilterType.toString() + " "
1435                   + generalBloomFilter.getClass().getSimpleName()
1436                   + " metadata for " + reader.getName());
1437               }
1438             }
1439           }
1440         } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) {
1441           if (this.deleteFamilyBloomFilter != null)
1442             return; // Bloom has been loaded
1443 
1444           DataInput bloomMeta = reader.getDeleteBloomFilterMetadata();
1445           if (bloomMeta != null) {
1446             deleteFamilyBloomFilter = BloomFilterFactory.createFromMeta(
1447                 bloomMeta, reader);
1448             LOG.info("Loaded Delete Family Bloom ("
1449                 + deleteFamilyBloomFilter.getClass().getSimpleName()
1450                 + ") metadata for " + reader.getName());
1451           }
1452         } else {
1453           throw new RuntimeException("Block Type: " + blockType.toString()
1454               + "is not supported for Bloom filter");
1455         }
1456       } catch (IOException e) {
1457         LOG.error("Error reading bloom filter meta for " + blockType
1458             + " -- proceeding without", e);
1459         setBloomFilterFaulty(blockType);
1460       } catch (IllegalArgumentException e) {
1461         LOG.error("Bad bloom filter meta " + blockType
1462             + " -- proceeding without", e);
1463         setBloomFilterFaulty(blockType);
1464       }
1465     }
1466 
1467     private void setBloomFilterFaulty(BlockType blockType) {
1468       if (blockType == BlockType.GENERAL_BLOOM_META) {
1469         setGeneralBloomFilterFaulty();
1470       } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) {
1471         setDeleteFamilyBloomFilterFaulty();
1472       }
1473     }
1474 
1475     /**
1476      * The number of Bloom filter entries in this store file, or an estimate
1477      * thereof, if the Bloom filter is not loaded. This always returns an upper
1478      * bound of the number of Bloom filter entries.
1479      *
1480      * @return an estimate of the number of Bloom filter entries in this file
1481      */
1482     public long getFilterEntries() {
1483       return generalBloomFilter != null ? generalBloomFilter.getKeyCount()
1484           : reader.getEntries();
1485     }
1486 
1487     public void setGeneralBloomFilterFaulty() {
1488       generalBloomFilter = null;
1489     }
1490 
1491     public void setDeleteFamilyBloomFilterFaulty() {
1492       this.deleteFamilyBloomFilter = null;
1493     }
1494 
1495     public byte[] getLastKey() {
1496       return reader.getLastKey();
1497     }
1498 
1499     public byte[] getLastRowKey() {
1500       return reader.getLastRowKey();
1501     }
1502 
1503     public byte[] midkey() throws IOException {
1504       return reader.midkey();
1505     }
1506 
1507     public long length() {
1508       return reader.length();
1509     }
1510 
1511     public long getTotalUncompressedBytes() {
1512       return reader.getTrailer().getTotalUncompressedBytes();
1513     }
1514 
1515     public long getEntries() {
1516       return reader.getEntries();
1517     }
1518 
1519     public long getDeleteFamilyCnt() {
1520       return deleteFamilyCnt;
1521     }
1522 
1523     public byte[] getFirstKey() {
1524       return reader.getFirstKey();
1525     }
1526 
1527     public long indexSize() {
1528       return reader.indexSize();
1529     }
1530 
1531     public BloomType getBloomFilterType() {
1532       return this.bloomFilterType;
1533     }
1534 
1535     public long getSequenceID() {
1536       return sequenceID;
1537     }
1538 
1539     public void setSequenceID(long sequenceID) {
1540       this.sequenceID = sequenceID;
1541     }
1542 
1543     BloomFilter getGeneralBloomFilter() {
1544       return generalBloomFilter;
1545     }
1546 
1547     long getUncompressedDataIndexSize() {
1548       return reader.getTrailer().getUncompressedDataIndexSize();
1549     }
1550 
1551     public long getTotalBloomSize() {
1552       if (generalBloomFilter == null)
1553         return 0;
1554       return generalBloomFilter.getByteSize();
1555     }
1556 
1557     public int getHFileVersion() {
1558       return reader.getTrailer().getMajorVersion();
1559     }
1560 
1561     public int getHFileMinorVersion() {
1562       return reader.getTrailer().getMinorVersion();
1563     }
1564 
1565     public HFile.Reader getHFileReader() {
1566       return reader;
1567     }
1568 
1569     void disableBloomFilterForTesting() {
1570       generalBloomFilter = null;
1571       this.deleteFamilyBloomFilter = null;
1572     }
1573 
1574     public long getMaxTimestamp() {
1575       return timeRange == null ? Long.MAX_VALUE : timeRange.getMax();
1576     }
1577 
1578     public void setBulkLoaded(boolean bulkLoadResult) {
1579       this.bulkLoadResult = bulkLoadResult;
1580     }
1581 
1582     public boolean isBulkLoaded() {
1583       return this.bulkLoadResult;
1584     }
1585   }
1586 
1587   /**
1588    * Useful comparators for comparing StoreFiles.
1589    */
1590   public abstract static class Comparators {
1591     /**
1592      * Comparator that compares based on the Sequence Ids of the
1593      * the StoreFiles. Bulk loads that did not request a seq ID
1594      * are given a seq id of -1; thus, they are placed before all non-
1595      * bulk loads, and bulk loads with sequence Id. Among these files,
1596      * the size is used to determine the ordering, then bulkLoadTime.
1597      * If there are ties, the path name is used as a tie-breaker.
1598      */
1599     public static final Comparator<StoreFile> SEQ_ID =
1600       Ordering.compound(ImmutableList.of(
1601           Ordering.natural().onResultOf(new GetSeqId()),
1602           Ordering.natural().onResultOf(new GetFileSize()).reverse(),
1603           Ordering.natural().onResultOf(new GetBulkTime()),
1604           Ordering.natural().onResultOf(new GetPathName())
1605       ));
1606 
1607     /**
1608      * Comparator for time-aware compaction. SeqId is still the first
1609      *   ordering criterion to maintain MVCC.
1610      */
1611     public static final Comparator<StoreFile> SEQ_ID_MAX_TIMESTAMP =
1612       Ordering.compound(ImmutableList.of(
1613         Ordering.natural().onResultOf(new GetSeqId()),
1614         Ordering.natural().onResultOf(new GetMaxTimestamp()),
1615         Ordering.natural().onResultOf(new GetFileSize()).reverse(),
1616         Ordering.natural().onResultOf(new GetBulkTime()),
1617         Ordering.natural().onResultOf(new GetPathName())
1618       ));
1619 
1620     private static class GetSeqId implements Function<StoreFile, Long> {
1621       @Override
1622       public Long apply(StoreFile sf) {
1623         return sf.getMaxSequenceId();
1624       }
1625     }
1626 
1627     private static class GetFileSize implements Function<StoreFile, Long> {
1628       @Override
1629       public Long apply(StoreFile sf) {
1630         return sf.getReader().length();
1631       }
1632     }
1633 
1634     private static class GetBulkTime implements Function<StoreFile, Long> {
1635       @Override
1636       public Long apply(StoreFile sf) {
1637         if (!sf.isBulkLoadResult()) return Long.MAX_VALUE;
1638         return sf.getBulkLoadTimestamp();
1639       }
1640     }
1641 
1642     private static class GetPathName implements Function<StoreFile, String> {
1643       @Override
1644       public String apply(StoreFile sf) {
1645         return sf.getPath().getName();
1646       }
1647     }
1648 
1649     private static class GetMaxTimestamp implements Function<StoreFile, Long> {
1650       @Override
1651       public Long apply(StoreFile sf) {
1652         return sf.getMaximumTimestamp() == null? (Long)Long.MAX_VALUE : sf.getMaximumTimestamp();
1653       }
1654     }
1655   }
1656 }