1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.apache.hadoop.hbase.util;
22
23 import java.io.ByteArrayOutputStream;
24 import java.io.DataOutputStream;
25 import java.nio.ByteBuffer;
26
27 import junit.framework.TestCase;
28 import org.apache.hadoop.hbase.SmallTests;
29 import org.junit.experimental.categories.Category;
30
31 @Category(SmallTests.class)
32 public class TestByteBloomFilter extends TestCase {
33
34 public void testBasicBloom() throws Exception {
35 ByteBloomFilter bf1 = new ByteBloomFilter(1000, (float)0.01, Hash.MURMUR_HASH, 0);
36 ByteBloomFilter bf2 = new ByteBloomFilter(1000, (float)0.01, Hash.MURMUR_HASH, 0);
37 bf1.allocBloom();
38 bf2.allocBloom();
39
40
41 byte[] key1 = {1,2,3,4,5,6,7,8,9};
42 byte[] key2 = {1,2,3,4,5,6,7,8,7};
43
44 bf1.add(key1);
45 bf2.add(key2);
46
47 assertTrue(bf1.contains(key1));
48 assertFalse(bf1.contains(key2));
49 assertFalse(bf2.contains(key1));
50 assertTrue(bf2.contains(key2));
51
52 byte [] bkey = {1,2,3,4};
53 byte [] bval = "this is a much larger byte array".getBytes();
54
55 bf1.add(bkey);
56 bf1.add(bval, 1, bval.length-1);
57
58 assertTrue( bf1.contains(bkey) );
59 assertTrue( bf1.contains(bval, 1, bval.length-1) );
60 assertFalse( bf1.contains(bval) );
61 assertFalse( bf1.contains(bval) );
62
63
64
65 ByteArrayOutputStream bOut = new ByteArrayOutputStream();
66 bf1.writeBloom(new DataOutputStream(bOut));
67 ByteBuffer bb = ByteBuffer.wrap(bOut.toByteArray());
68 ByteBloomFilter newBf1 = new ByteBloomFilter(1000, (float)0.01,
69 Hash.MURMUR_HASH, 0);
70 assertTrue(newBf1.contains(key1, bb));
71 assertFalse(newBf1.contains(key2, bb));
72 assertTrue( newBf1.contains(bkey, bb) );
73 assertTrue( newBf1.contains(bval, 1, bval.length-1, bb) );
74 assertFalse( newBf1.contains(bval, bb) );
75 assertFalse( newBf1.contains(bval, bb) );
76
77 System.out.println("Serialized as " + bOut.size() + " bytes");
78 assertTrue(bOut.size() - bf1.byteSize < 10);
79 }
80
81 public void testBloomFold() throws Exception {
82
83 ByteBloomFilter b = new ByteBloomFilter(1003, (float) 0.01,
84 Hash.MURMUR_HASH, 2);
85 b.allocBloom();
86 long origSize = b.getByteSize();
87 assertEquals(1204, origSize);
88 for (int i = 0; i < 12; ++i) {
89 b.add(Bytes.toBytes(i));
90 }
91 b.compactBloom();
92 assertEquals(origSize>>2, b.getByteSize());
93 int falsePositives = 0;
94 for (int i = 0; i < 25; ++i) {
95 if (b.contains(Bytes.toBytes(i))) {
96 if(i >= 12) falsePositives++;
97 } else {
98 assertFalse(i < 12);
99 }
100 }
101 assertTrue(falsePositives <= 1);
102
103
104 }
105
106 public void testBloomPerf() throws Exception {
107
108 float err = (float)0.01;
109 ByteBloomFilter b = new ByteBloomFilter(10*1000*1000, (float)err, Hash.MURMUR_HASH, 3);
110 b.allocBloom();
111 long startTime = System.currentTimeMillis();
112 long origSize = b.getByteSize();
113 for (int i = 0; i < 1*1000*1000; ++i) {
114 b.add(Bytes.toBytes(i));
115 }
116 long endTime = System.currentTimeMillis();
117 System.out.println("Total Add time = " + (endTime - startTime) + "ms");
118
119
120 startTime = System.currentTimeMillis();
121 b.compactBloom();
122 endTime = System.currentTimeMillis();
123 System.out.println("Total Fold time = " + (endTime - startTime) + "ms");
124 assertTrue(origSize >= b.getByteSize()<<3);
125
126
127 startTime = System.currentTimeMillis();
128 int falsePositives = 0;
129 for (int i = 0; i < 2*1000*1000; ++i) {
130
131 if (b.contains(Bytes.toBytes(i))) {
132 if(i >= 1*1000*1000) falsePositives++;
133 } else {
134 assertFalse(i < 1*1000*1000);
135 }
136 }
137 endTime = System.currentTimeMillis();
138 System.out.println("Total Contains time = " + (endTime - startTime) + "ms");
139 System.out.println("False Positive = " + falsePositives);
140 assertTrue(falsePositives <= (1*1000*1000)*err);
141
142
143 }
144
145 public void testSizing() {
146 int bitSize = 8 * 128 * 1024;
147 double errorRate = 0.025;
148
149
150
151 long maxKeys = ByteBloomFilter.idealMaxKeys(bitSize, errorRate);
152 assertEquals(136570, maxKeys);
153
154
155
156 long bitSize2 = ByteBloomFilter.computeBitSize(maxKeys, errorRate);
157
158
159 assertTrue(Math.abs(bitSize2 - bitSize) * 1.0 / bitSize < 1e-5);
160 }
161
162 public void testFoldableByteSize() {
163 assertEquals(128, ByteBloomFilter.computeFoldableByteSize(1000, 5));
164 assertEquals(640, ByteBloomFilter.computeFoldableByteSize(5001, 4));
165 }
166
167
168 @org.junit.Rule
169 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
170 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
171 }
172