View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.DataInput;
23  import java.io.IOException;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.KeyValue;
30  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
31  import org.apache.hadoop.hbase.io.hfile.HFile;
32  import org.apache.hadoop.hbase.regionserver.StoreFile;
33  import org.apache.hadoop.hbase.regionserver.BloomType;
34  
35  /**
36   * Handles Bloom filter initialization based on configuration and serialized
37   * metadata in the reader and writer of {@link StoreFile}.
38   */
39  @InterfaceAudience.Private
40  public final class BloomFilterFactory {
41  
42    private static final Log LOG =
43        LogFactory.getLog(BloomFilterFactory.class.getName());
44  
45    /** This class should not be instantiated. */
46    private BloomFilterFactory() {}
47  
48    /**
49     * Specifies the target error rate to use when selecting the number of keys
50     * per Bloom filter.
51     */
52    public static final String IO_STOREFILE_BLOOM_ERROR_RATE =
53        "io.storefile.bloom.error.rate";
54  
55    /**
56     * Maximum folding factor allowed. The Bloom filter will be shrunk by
57     * the factor of up to 2 ** this times if we oversize it initially.
58     */
59    public static final String IO_STOREFILE_BLOOM_MAX_FOLD =
60        "io.storefile.bloom.max.fold";
61  
62    /**
63     * For default (single-block) Bloom filters this specifies the maximum number
64     * of keys.
65     */
66    public static final String IO_STOREFILE_BLOOM_MAX_KEYS =
67        "io.storefile.bloom.max.keys";
68  
69    /** Master switch to enable Bloom filters */
70    public static final String IO_STOREFILE_BLOOM_ENABLED =
71        "io.storefile.bloom.enabled";
72  
73    /** Master switch to enable Delete Family Bloom filters */
74    public static final String IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED =
75        "io.storefile.delete.family.bloom.enabled";
76  
77    /**
78     * Target Bloom block size. Bloom filter blocks of approximately this size
79     * are interleaved with data blocks.
80     */
81    public static final String IO_STOREFILE_BLOOM_BLOCK_SIZE =
82        "io.storefile.bloom.block.size";
83  
84    /** Maximum number of times a Bloom filter can be "folded" if oversized */
85    private static final int MAX_ALLOWED_FOLD_FACTOR = 7;
86  
87    /**
88     * Instantiates the correct Bloom filter class based on the version provided
89     * in the meta block data.
90     *
91     * @param meta the byte array holding the Bloom filter's metadata, including
92     *          version information
93     * @param reader the {@link HFile} reader to use to lazily load Bloom filter
94     *          blocks
95     * @return an instance of the correct type of Bloom filter
96     * @throws IllegalArgumentException
97     */
98    public static BloomFilter
99        createFromMeta(DataInput meta, HFile.Reader reader)
100       throws IllegalArgumentException, IOException {
101     int version = meta.readInt();
102     switch (version) {
103       case ByteBloomFilter.VERSION:
104         // This is only possible in a version 1 HFile. We are ignoring the
105         // passed comparator because raw byte comparators are always used
106         // in version 1 Bloom filters.
107         return new ByteBloomFilter(meta);
108 
109       case CompoundBloomFilterBase.VERSION:
110         return new CompoundBloomFilter(meta, reader);
111 
112       default:
113         throw new IllegalArgumentException(
114           "Bad bloom filter format version " + version
115         );
116     }
117   }
118 
119   /**
120    * @return true if general Bloom (Row or RowCol) filters are enabled in the
121    * given configuration
122    */
123   public static boolean isGeneralBloomEnabled(Configuration conf) {
124     return conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true);
125   }
126 
127   /**
128    * @return true if Delete Family Bloom filters are enabled in the given configuration
129    */
130   public static boolean isDeleteFamilyBloomEnabled(Configuration conf) {
131     return conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true);
132   }
133 
134   /**
135    * @return the Bloom filter error rate in the given configuration
136    */
137   public static float getErrorRate(Configuration conf) {
138     return conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
139   }
140 
141   /**
142    * @return the value for Bloom filter max fold in the given configuration
143    */
144   public static int getMaxFold(Configuration conf) {
145     return conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR);
146   }
147 
148   /** @return the compound Bloom filter block size from the configuration */
149   public static int getBloomBlockSize(Configuration conf) {
150     return conf.getInt(IO_STOREFILE_BLOOM_BLOCK_SIZE, 128 * 1024);
151   }
152 
153   /**
154   * @return max key for the Bloom filter from the configuration
155   */
156   public static int getMaxKeys(Configuration conf) {
157     return conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128 * 1000 * 1000);
158   }
159 
160   /**
161    * Creates a new general (Row or RowCol) Bloom filter at the time of
162    * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
163    *
164    * @param conf
165    * @param cacheConf
166    * @param bloomType
167    * @param maxKeys an estimate of the number of keys we expect to insert.
168    *        Irrelevant if compound Bloom filters are enabled.
169    * @param writer the HFile writer
170    * @return the new Bloom filter, or null in case Bloom filters are disabled
171    *         or when failed to create one.
172    */
173   public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf,
174       CacheConfig cacheConf, BloomType bloomType, int maxKeys,
175       HFile.Writer writer) {
176     if (!isGeneralBloomEnabled(conf)) {
177       LOG.trace("Bloom filters are disabled by configuration for "
178           + writer.getPath()
179           + (conf == null ? " (configuration is null)" : ""));
180       return null;
181     } else if (bloomType == BloomType.NONE) {
182       LOG.trace("Bloom filter is turned off for the column family");
183       return null;
184     }
185 
186     float err = getErrorRate(conf);
187 
188     // In case of row/column Bloom filter lookups, each lookup is an OR if two
189     // separate lookups. Therefore, if each lookup's false positive rate is p,
190     // the resulting false positive rate is err = 1 - (1 - p)^2, and
191     // p = 1 - sqrt(1 - err).
192     if (bloomType == BloomType.ROWCOL) {
193       err = (float) (1 - Math.sqrt(1 - err));
194     }
195 
196     int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD,
197         MAX_ALLOWED_FOLD_FACTOR);
198 
199     // Do we support compound bloom filters?
200     if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) {
201       // In case of compound Bloom filters we ignore the maxKeys hint.
202       CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(
203           getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold,
204           cacheConf.shouldCacheBloomsOnWrite(), bloomType == BloomType.ROWCOL
205               ? KeyValue.KEY_COMPARATOR : Bytes.BYTES_RAWCOMPARATOR);
206       writer.addInlineBlockWriter(bloomWriter);
207       return bloomWriter;
208     } else {
209       // A single-block Bloom filter. Only used when testing HFile format
210       // version 1.
211       int tooBig = conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS,
212           128 * 1000 * 1000);
213 
214       if (maxKeys <= 0) {
215         LOG.warn("Invalid maximum number of keys specified: " + maxKeys
216             + ", not using Bloom filter");
217         return null;
218       } else if (maxKeys < tooBig) {
219         BloomFilterWriter bloom = new ByteBloomFilter((int) maxKeys, err,
220             Hash.getHashType(conf), maxFold);
221         bloom.allocBloom();
222         return bloom;
223       } else {
224         LOG.debug("Skipping bloom filter because max keysize too large: "
225             + maxKeys);
226       }
227     }
228     return null;
229   }
230 
231   /**
232    * Creates a new Delete Family Bloom filter at the time of
233    * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
234    * @param conf
235    * @param cacheConf
236    * @param maxKeys an estimate of the number of keys we expect to insert.
237    *        Irrelevant if compound Bloom filters are enabled.
238    * @param writer the HFile writer
239    * @return the new Bloom filter, or null in case Bloom filters are disabled
240    *         or when failed to create one.
241    */
242   public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf,
243       CacheConfig cacheConf, int maxKeys, HFile.Writer writer) {
244     if (!isDeleteFamilyBloomEnabled(conf)) {
245       LOG.info("Delete Bloom filters are disabled by configuration for "
246           + writer.getPath()
247           + (conf == null ? " (configuration is null)" : ""));
248       return null;
249     }
250 
251     float err = getErrorRate(conf);
252 
253     if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) {
254       int maxFold = getMaxFold(conf);
255       // In case of compound Bloom filters we ignore the maxKeys hint.
256       CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(
257           getBloomBlockSize(conf), err, Hash.getHashType(conf),
258           maxFold,
259           cacheConf.shouldCacheBloomsOnWrite(), Bytes.BYTES_RAWCOMPARATOR);
260       writer.addInlineBlockWriter(bloomWriter);
261       return bloomWriter;
262     } else {
263       LOG.info("Delete Family Bloom filter is not supported in HFile V1");
264       return null;
265     }
266   }
267 };