1 /* 2 * Copyright 2010 The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 package org.apache.hadoop.hbase.util; 21 22 import org.apache.hadoop.io.Writable; 23 24 import java.nio.ByteBuffer; 25 26 /** 27 * Defines the general behavior of a bloom filter. 28 * <p> 29 * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by 30 * the networking research community in the past decade thanks to the bandwidth efficiencies that it 31 * offers for the transmission of set membership information between networked hosts. A sender encodes 32 * the information into a bit vector, the Bloom filter, that is more compact than a conventional 33 * representation. Computation and space costs for construction are linear in the number of elements. 34 * The receiver uses the filter to test whether various elements are members of the set. Though the 35 * filter will occasionally return a false positive, it will never return a false negative. When creating 36 * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. 37 * 38 * <p> 39 * Originally created by 40 * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>. 41 * 42 * <p> 43 * It must be extended in order to define the real behavior. 44 */ 45 public interface BloomFilter { 46 /** 47 * Allocate memory for the bloom filter data. Note that bloom data isn't 48 * allocated by default because it can grow large & reads would be better 49 * managed by the LRU cache. 50 */ 51 void allocBloom(); 52 53 /** 54 * Add the specified binary to the bloom filter. 55 * 56 * @param buf data to be added to the bloom 57 */ 58 void add(byte []buf); 59 60 /** 61 * Add the specified binary to the bloom filter. 62 * 63 * @param buf data to be added to the bloom 64 * @param offset offset into the data to be added 65 * @param len length of the data to be added 66 */ 67 void add(byte []buf, int offset, int len); 68 69 /** 70 * Check if the specified key is contained in the bloom filter. 71 * 72 * @param buf data to check for existence of 73 * @param bloom bloom filter data to search 74 * @return true if matched by bloom, false if not 75 */ 76 boolean contains(byte [] buf, ByteBuffer bloom); 77 78 /** 79 * Check if the specified key is contained in the bloom filter. 80 * 81 * @param buf data to check for existence of 82 * @param offset offset into the data 83 * @param length length of the data 84 * @param bloom bloom filter data to search 85 * @return true if matched by bloom, false if not 86 */ 87 boolean contains(byte [] buf, int offset, int length, ByteBuffer bloom); 88 89 /** 90 * @return The number of keys added to the bloom 91 */ 92 int getKeyCount(); 93 94 /** 95 * @return The max number of keys that can be inserted 96 * to maintain the desired error rate 97 */ 98 public int getMaxKeys(); 99 100 /** 101 * @return Size of the bloom, in bytes 102 */ 103 public int getByteSize(); 104 105 /** 106 * Compact the bloom before writing metadata & data to disk 107 */ 108 void compactBloom(); 109 110 /** 111 * Get a writable interface into bloom filter meta data. 112 * @return writable class 113 */ 114 Writable getMetaWriter(); 115 116 /** 117 * Get a writable interface into bloom filter data (actual bloom). 118 * @return writable class 119 */ 120 Writable getDataWriter(); 121 }