1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.util;
22  
23  import java.io.ByteArrayOutputStream;
24  import java.io.DataOutputStream;
25  import java.nio.ByteBuffer;
26  
27  import junit.framework.TestCase;
28  import org.apache.hadoop.hbase.SmallTests;
29  import org.junit.experimental.categories.Category;
30  
31  @Category(SmallTests.class)
32  public class TestByteBloomFilter extends TestCase {
33  
34    public void testBasicBloom() throws Exception {
35      ByteBloomFilter bf1 = new ByteBloomFilter(1000, (float)0.01, Hash.MURMUR_HASH, 0);
36      ByteBloomFilter bf2 = new ByteBloomFilter(1000, (float)0.01, Hash.MURMUR_HASH, 0);
37      bf1.allocBloom();
38      bf2.allocBloom();
39  
40      // test 1: verify no fundamental false negatives or positives
41      byte[] key1 = {1,2,3,4,5,6,7,8,9};
42      byte[] key2 = {1,2,3,4,5,6,7,8,7};
43  
44      bf1.add(key1);
45      bf2.add(key2);
46  
47      assertTrue(bf1.contains(key1));
48      assertFalse(bf1.contains(key2));
49      assertFalse(bf2.contains(key1));
50      assertTrue(bf2.contains(key2));
51  
52      byte [] bkey = {1,2,3,4};
53      byte [] bval = "this is a much larger byte array".getBytes();
54  
55      bf1.add(bkey);
56      bf1.add(bval, 1, bval.length-1);
57  
58      assertTrue( bf1.contains(bkey) );
59      assertTrue( bf1.contains(bval, 1, bval.length-1) );
60      assertFalse( bf1.contains(bval) );
61      assertFalse( bf1.contains(bval) );
62  
63      // test 2: serialization & deserialization.
64      // (convert bloom to byte array & read byte array back in as input)
65      ByteArrayOutputStream bOut = new ByteArrayOutputStream();
66      bf1.writeBloom(new DataOutputStream(bOut));
67      ByteBuffer bb = ByteBuffer.wrap(bOut.toByteArray());
68      ByteBloomFilter newBf1 = new ByteBloomFilter(1000, (float)0.01,
69          Hash.MURMUR_HASH, 0);
70      assertTrue(newBf1.contains(key1, bb));
71      assertFalse(newBf1.contains(key2, bb));
72      assertTrue( newBf1.contains(bkey, bb) );
73      assertTrue( newBf1.contains(bval, 1, bval.length-1, bb) );
74      assertFalse( newBf1.contains(bval, bb) );
75      assertFalse( newBf1.contains(bval, bb) );
76  
77      System.out.println("Serialized as " + bOut.size() + " bytes");
78      assertTrue(bOut.size() - bf1.byteSize < 10); //... allow small padding
79    }
80  
81    public void testBloomFold() throws Exception {
82      // test: foldFactor < log(max/actual)
83      ByteBloomFilter b = new ByteBloomFilter(1003, (float) 0.01,
84          Hash.MURMUR_HASH, 2);
85      b.allocBloom();
86      long origSize = b.getByteSize();
87      assertEquals(1204, origSize);
88      for (int i = 0; i < 12; ++i) {
89        b.add(Bytes.toBytes(i));
90      }
91      b.compactBloom();
92      assertEquals(origSize>>2, b.getByteSize());
93      int falsePositives = 0;
94      for (int i = 0; i < 25; ++i) {
95        if (b.contains(Bytes.toBytes(i))) {
96          if(i >= 12) falsePositives++;
97        } else {
98          assertFalse(i < 12);
99        }
100     }
101     assertTrue(falsePositives <= 1);
102 
103     // test: foldFactor > log(max/actual)
104   }
105 
106   public void testBloomPerf() throws Exception {
107     // add
108     float err = (float)0.01;
109     ByteBloomFilter b = new ByteBloomFilter(10*1000*1000, (float)err, Hash.MURMUR_HASH, 3);
110     b.allocBloom();
111     long startTime =  System.currentTimeMillis();
112     long origSize = b.getByteSize();
113     for (int i = 0; i < 1*1000*1000; ++i) {
114       b.add(Bytes.toBytes(i));
115     }
116     long endTime = System.currentTimeMillis();
117     System.out.println("Total Add time = " + (endTime - startTime) + "ms");
118 
119     // fold
120     startTime = System.currentTimeMillis();
121     b.compactBloom();
122     endTime = System.currentTimeMillis();
123     System.out.println("Total Fold time = " + (endTime - startTime) + "ms");
124     assertTrue(origSize >= b.getByteSize()<<3);
125 
126     // test
127     startTime = System.currentTimeMillis();
128     int falsePositives = 0;
129     for (int i = 0; i < 2*1000*1000; ++i) {
130 
131       if (b.contains(Bytes.toBytes(i))) {
132         if(i >= 1*1000*1000) falsePositives++;
133       } else {
134         assertFalse(i < 1*1000*1000);
135       }
136     }
137     endTime = System.currentTimeMillis();
138     System.out.println("Total Contains time = " + (endTime - startTime) + "ms");
139     System.out.println("False Positive = " + falsePositives);
140     assertTrue(falsePositives <= (1*1000*1000)*err);
141 
142     // test: foldFactor > log(max/actual)
143   }
144 
145   public void testSizing() {
146     int bitSize = 8 * 128 * 1024; // 128 KB
147     double errorRate = 0.025; // target false positive rate
148 
149     // How many keys can we store in a Bloom filter of this size maintaining
150     // the given false positive rate, not taking into account that the n
151     long maxKeys = ByteBloomFilter.idealMaxKeys(bitSize, errorRate);
152     assertEquals(136570, maxKeys);
153 
154     // A reverse operation: how many bits would we need to store this many keys
155     // and keep the same low false positive rate?
156     long bitSize2 = ByteBloomFilter.computeBitSize(maxKeys, errorRate);
157 
158     // The bit size comes out a little different due to rounding.
159     assertTrue(Math.abs(bitSize2 - bitSize) * 1.0 / bitSize < 1e-5);
160   }
161 
162   public void testFoldableByteSize() {
163     assertEquals(128, ByteBloomFilter.computeFoldableByteSize(1000, 5));
164     assertEquals(640, ByteBloomFilter.computeFoldableByteSize(5001, 4));
165   }
166 
167 
168   @org.junit.Rule
169   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
170     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
171 }
172