View Javadoc

1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.util;
21  
22  import org.apache.hadoop.io.Writable;
23  
24  import java.nio.ByteBuffer;
25  
26  /**
27   * Defines the general behavior of a bloom filter.
28   * <p>
29   * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
30   * the networking research community in the past decade thanks to the bandwidth efficiencies that it
31   * offers for the transmission of set membership information between networked hosts.  A sender encodes
32   * the information into a bit vector, the Bloom filter, that is more compact than a conventional
33   * representation. Computation and space costs for construction are linear in the number of elements.
34   * The receiver uses the filter to test whether various elements are members of the set. Though the
35   * filter will occasionally return a false positive, it will never return a false negative. When creating
36   * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
37   *
38   * <p>
39   * Originally created by
40   * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
41   *
42   * <p>
43   * It must be extended in order to define the real behavior.
44   */
45  public interface BloomFilter {
46    /**
47     * Allocate memory for the bloom filter data.  Note that bloom data isn't
48     * allocated by default because it can grow large & reads would be better
49     * managed by the LRU cache.
50     */
51    void allocBloom();
52  
53    /**
54     * Add the specified binary to the bloom filter.
55     *
56     * @param buf data to be added to the bloom
57     */
58    void add(byte []buf);
59  
60    /**
61     * Add the specified binary to the bloom filter.
62     *
63     * @param buf data to be added to the bloom
64     * @param offset offset into the data to be added
65     * @param len length of the data to be added
66     */
67    void add(byte []buf, int offset, int len);
68  
69    /**
70     * Check if the specified key is contained in the bloom filter.
71     *
72     * @param buf data to check for existence of
73     * @param bloom bloom filter data to search
74     * @return true if matched by bloom, false if not
75     */
76    boolean contains(byte [] buf, ByteBuffer bloom);
77  
78    /**
79     * Check if the specified key is contained in the bloom filter.
80     *
81     * @param buf data to check for existence of
82     * @param offset offset into the data
83     * @param length length of the data
84     * @param bloom bloom filter data to search
85     * @return true if matched by bloom, false if not
86     */
87    boolean contains(byte [] buf, int offset, int length, ByteBuffer bloom);
88  
89    /**
90     * @return The number of keys added to the bloom
91     */
92    int getKeyCount();
93  
94    /**
95     * @return The max number of keys that can be inserted
96     *         to maintain the desired error rate
97     */
98    public int getMaxKeys();
99  
100   /**
101    * @return Size of the bloom, in bytes
102    */
103   public int getByteSize();
104 
105   /**
106    * Compact the bloom before writing metadata & data to disk
107    */
108   void compactBloom();
109 
110   /**
111    * Get a writable interface into bloom filter meta data.
112    * @return writable class
113    */
114   Writable getMetaWriter();
115 
116   /**
117    * Get a writable interface into bloom filter data (actual bloom).
118    * @return writable class
119    */
120   Writable getDataWriter();
121 }