1 /*
2 * Copyright 2010 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.util;
21
22 import org.apache.hadoop.io.Writable;
23
24 import java.nio.ByteBuffer;
25
26 /**
27 * Defines the general behavior of a bloom filter.
28 * <p>
29 * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
30 * the networking research community in the past decade thanks to the bandwidth efficiencies that it
31 * offers for the transmission of set membership information between networked hosts. A sender encodes
32 * the information into a bit vector, the Bloom filter, that is more compact than a conventional
33 * representation. Computation and space costs for construction are linear in the number of elements.
34 * The receiver uses the filter to test whether various elements are members of the set. Though the
35 * filter will occasionally return a false positive, it will never return a false negative. When creating
36 * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
37 *
38 * <p>
39 * Originally created by
40 * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
41 *
42 * <p>
43 * It must be extended in order to define the real behavior.
44 */
45 public interface BloomFilter {
46 /**
47 * Allocate memory for the bloom filter data. Note that bloom data isn't
48 * allocated by default because it can grow large & reads would be better
49 * managed by the LRU cache.
50 */
51 void allocBloom();
52
53 /**
54 * Add the specified binary to the bloom filter.
55 *
56 * @param buf data to be added to the bloom
57 */
58 void add(byte []buf);
59
60 /**
61 * Add the specified binary to the bloom filter.
62 *
63 * @param buf data to be added to the bloom
64 * @param offset offset into the data to be added
65 * @param len length of the data to be added
66 */
67 void add(byte []buf, int offset, int len);
68
69 /**
70 * Check if the specified key is contained in the bloom filter.
71 *
72 * @param buf data to check for existence of
73 * @param bloom bloom filter data to search
74 * @return true if matched by bloom, false if not
75 */
76 boolean contains(byte [] buf, ByteBuffer bloom);
77
78 /**
79 * Check if the specified key is contained in the bloom filter.
80 *
81 * @param buf data to check for existence of
82 * @param offset offset into the data
83 * @param length length of the data
84 * @param bloom bloom filter data to search
85 * @return true if matched by bloom, false if not
86 */
87 boolean contains(byte [] buf, int offset, int length, ByteBuffer bloom);
88
89 /**
90 * @return The number of keys added to the bloom
91 */
92 int getKeyCount();
93
94 /**
95 * @return The max number of keys that can be inserted
96 * to maintain the desired error rate
97 */
98 public int getMaxKeys();
99
100 /**
101 * @return Size of the bloom, in bytes
102 */
103 public int getByteSize();
104
105 /**
106 * Compact the bloom before writing metadata & data to disk
107 */
108 void compactBloom();
109
110 /**
111 * Get a writable interface into bloom filter meta data.
112 * @return writable class
113 */
114 Writable getMetaWriter();
115
116 /**
117 * Get a writable interface into bloom filter data (actual bloom).
118 * @return writable class
119 */
120 Writable getDataWriter();
121 }