1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.util;
21
22 import java.io.DataInput;
23 import java.io.IOException;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.classification.InterfaceAudience;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.hbase.KeyValue;
30 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
31 import org.apache.hadoop.hbase.io.hfile.HFile;
32 import org.apache.hadoop.hbase.regionserver.StoreFile;
33 import org.apache.hadoop.hbase.regionserver.BloomType;
34
35
36
37
38
39 @InterfaceAudience.Private
40 public final class BloomFilterFactory {
41
42 private static final Log LOG =
43 LogFactory.getLog(BloomFilterFactory.class.getName());
44
45
46 private BloomFilterFactory() {}
47
48
49
50
51
52 public static final String IO_STOREFILE_BLOOM_ERROR_RATE =
53 "io.storefile.bloom.error.rate";
54
55
56
57
58
59 public static final String IO_STOREFILE_BLOOM_MAX_FOLD =
60 "io.storefile.bloom.max.fold";
61
62
63
64
65
66 public static final String IO_STOREFILE_BLOOM_MAX_KEYS =
67 "io.storefile.bloom.max.keys";
68
69
70 public static final String IO_STOREFILE_BLOOM_ENABLED =
71 "io.storefile.bloom.enabled";
72
73
74 public static final String IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED =
75 "io.storefile.delete.family.bloom.enabled";
76
77
78
79
80
81 public static final String IO_STOREFILE_BLOOM_BLOCK_SIZE =
82 "io.storefile.bloom.block.size";
83
84
85 private static final int MAX_ALLOWED_FOLD_FACTOR = 7;
86
87
88
89
90
91
92
93
94
95
96
97
98 public static BloomFilter
99 createFromMeta(DataInput meta, HFile.Reader reader)
100 throws IllegalArgumentException, IOException {
101 int version = meta.readInt();
102 switch (version) {
103 case ByteBloomFilter.VERSION:
104
105
106
107 return new ByteBloomFilter(meta);
108
109 case CompoundBloomFilterBase.VERSION:
110 return new CompoundBloomFilter(meta, reader);
111
112 default:
113 throw new IllegalArgumentException(
114 "Bad bloom filter format version " + version
115 );
116 }
117 }
118
119
120
121
122
123 public static boolean isGeneralBloomEnabled(Configuration conf) {
124 return conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true);
125 }
126
127
128
129
130 public static boolean isDeleteFamilyBloomEnabled(Configuration conf) {
131 return conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true);
132 }
133
134
135
136
137 public static float getErrorRate(Configuration conf) {
138 return conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
139 }
140
141
142
143
144 public static int getMaxFold(Configuration conf) {
145 return conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, MAX_ALLOWED_FOLD_FACTOR);
146 }
147
148
149 public static int getBloomBlockSize(Configuration conf) {
150 return conf.getInt(IO_STOREFILE_BLOOM_BLOCK_SIZE, 128 * 1024);
151 }
152
153
154
155
156 public static int getMaxKeys(Configuration conf) {
157 return conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128 * 1000 * 1000);
158 }
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173 public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf,
174 CacheConfig cacheConf, BloomType bloomType, int maxKeys,
175 HFile.Writer writer) {
176 if (!isGeneralBloomEnabled(conf)) {
177 LOG.trace("Bloom filters are disabled by configuration for "
178 + writer.getPath()
179 + (conf == null ? " (configuration is null)" : ""));
180 return null;
181 } else if (bloomType == BloomType.NONE) {
182 LOG.trace("Bloom filter is turned off for the column family");
183 return null;
184 }
185
186 float err = getErrorRate(conf);
187
188
189
190
191
192 if (bloomType == BloomType.ROWCOL) {
193 err = (float) (1 - Math.sqrt(1 - err));
194 }
195
196 int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD,
197 MAX_ALLOWED_FOLD_FACTOR);
198
199
200 if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) {
201
202 CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(
203 getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold,
204 cacheConf.shouldCacheBloomsOnWrite(), bloomType == BloomType.ROWCOL
205 ? KeyValue.KEY_COMPARATOR : Bytes.BYTES_RAWCOMPARATOR);
206 writer.addInlineBlockWriter(bloomWriter);
207 return bloomWriter;
208 } else {
209
210
211 int tooBig = conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS,
212 128 * 1000 * 1000);
213
214 if (maxKeys <= 0) {
215 LOG.warn("Invalid maximum number of keys specified: " + maxKeys
216 + ", not using Bloom filter");
217 return null;
218 } else if (maxKeys < tooBig) {
219 BloomFilterWriter bloom = new ByteBloomFilter((int) maxKeys, err,
220 Hash.getHashType(conf), maxFold);
221 bloom.allocBloom();
222 return bloom;
223 } else {
224 LOG.debug("Skipping bloom filter because max keysize too large: "
225 + maxKeys);
226 }
227 }
228 return null;
229 }
230
231
232
233
234
235
236
237
238
239
240
241
242 public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf,
243 CacheConfig cacheConf, int maxKeys, HFile.Writer writer) {
244 if (!isDeleteFamilyBloomEnabled(conf)) {
245 LOG.info("Delete Bloom filters are disabled by configuration for "
246 + writer.getPath()
247 + (conf == null ? " (configuration is null)" : ""));
248 return null;
249 }
250
251 float err = getErrorRate(conf);
252
253 if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) {
254 int maxFold = getMaxFold(conf);
255
256 CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(
257 getBloomBlockSize(conf), err, Hash.getHashType(conf),
258 maxFold,
259 cacheConf.shouldCacheBloomsOnWrite(), Bytes.BYTES_RAWCOMPARATOR);
260 writer.addInlineBlockWriter(bloomWriter);
261 return bloomWriter;
262 } else {
263 LOG.info("Delete Family Bloom filter is not supported in HFile V1");
264 return null;
265 }
266 }
267 };