1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.util;
21
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataOutputStream;
24 import java.nio.ByteBuffer;
25
26 import junit.framework.TestCase;
27 import org.apache.hadoop.hbase.SmallTests;
28 import org.junit.experimental.categories.Category;
29
30 @Category(SmallTests.class)
31 public class TestByteBloomFilter extends TestCase {
32
33 public void testBasicBloom() throws Exception {
34 ByteBloomFilter bf1 = new ByteBloomFilter(1000, (float)0.01, Hash.MURMUR_HASH, 0);
35 ByteBloomFilter bf2 = new ByteBloomFilter(1000, (float)0.01, Hash.MURMUR_HASH, 0);
36 bf1.allocBloom();
37 bf2.allocBloom();
38
39
40 byte[] key1 = {1,2,3,4,5,6,7,8,9};
41 byte[] key2 = {1,2,3,4,5,6,7,8,7};
42
43 bf1.add(key1);
44 bf2.add(key2);
45
46 assertTrue(bf1.contains(key1));
47 assertFalse(bf1.contains(key2));
48 assertFalse(bf2.contains(key1));
49 assertTrue(bf2.contains(key2));
50
51 byte [] bkey = {1,2,3,4};
52 byte [] bval = "this is a much larger byte array".getBytes();
53
54 bf1.add(bkey);
55 bf1.add(bval, 1, bval.length-1);
56
57 assertTrue( bf1.contains(bkey) );
58 assertTrue( bf1.contains(bval, 1, bval.length-1) );
59 assertFalse( bf1.contains(bval) );
60 assertFalse( bf1.contains(bval) );
61
62
63
64 ByteArrayOutputStream bOut = new ByteArrayOutputStream();
65 bf1.writeBloom(new DataOutputStream(bOut));
66 ByteBuffer bb = ByteBuffer.wrap(bOut.toByteArray());
67 ByteBloomFilter newBf1 = new ByteBloomFilter(1000, (float)0.01,
68 Hash.MURMUR_HASH, 0);
69 assertTrue(newBf1.contains(key1, bb));
70 assertFalse(newBf1.contains(key2, bb));
71 assertTrue( newBf1.contains(bkey, bb) );
72 assertTrue( newBf1.contains(bval, 1, bval.length-1, bb) );
73 assertFalse( newBf1.contains(bval, bb) );
74 assertFalse( newBf1.contains(bval, bb) );
75
76 System.out.println("Serialized as " + bOut.size() + " bytes");
77 assertTrue(bOut.size() - bf1.byteSize < 10);
78 }
79
80 public void testBloomFold() throws Exception {
81
82 ByteBloomFilter b = new ByteBloomFilter(1003, (float) 0.01,
83 Hash.MURMUR_HASH, 2);
84 b.allocBloom();
85 long origSize = b.getByteSize();
86 assertEquals(1204, origSize);
87 for (int i = 0; i < 12; ++i) {
88 b.add(Bytes.toBytes(i));
89 }
90 b.compactBloom();
91 assertEquals(origSize>>2, b.getByteSize());
92 int falsePositives = 0;
93 for (int i = 0; i < 25; ++i) {
94 if (b.contains(Bytes.toBytes(i))) {
95 if(i >= 12) falsePositives++;
96 } else {
97 assertFalse(i < 12);
98 }
99 }
100 assertTrue(falsePositives <= 1);
101
102
103 }
104
105 public void testBloomPerf() throws Exception {
106
107 float err = (float)0.01;
108 ByteBloomFilter b = new ByteBloomFilter(10*1000*1000, (float)err, Hash.MURMUR_HASH, 3);
109 b.allocBloom();
110 long startTime = System.currentTimeMillis();
111 long origSize = b.getByteSize();
112 for (int i = 0; i < 1*1000*1000; ++i) {
113 b.add(Bytes.toBytes(i));
114 }
115 long endTime = System.currentTimeMillis();
116 System.out.println("Total Add time = " + (endTime - startTime) + "ms");
117
118
119 startTime = System.currentTimeMillis();
120 b.compactBloom();
121 endTime = System.currentTimeMillis();
122 System.out.println("Total Fold time = " + (endTime - startTime) + "ms");
123 assertTrue(origSize >= b.getByteSize()<<3);
124
125
126 startTime = System.currentTimeMillis();
127 int falsePositives = 0;
128 for (int i = 0; i < 2*1000*1000; ++i) {
129
130 if (b.contains(Bytes.toBytes(i))) {
131 if(i >= 1*1000*1000) falsePositives++;
132 } else {
133 assertFalse(i < 1*1000*1000);
134 }
135 }
136 endTime = System.currentTimeMillis();
137 System.out.println("Total Contains time = " + (endTime - startTime) + "ms");
138 System.out.println("False Positive = " + falsePositives);
139 assertTrue(falsePositives <= (1*1000*1000)*err);
140
141
142 }
143
144 public void testSizing() {
145 int bitSize = 8 * 128 * 1024;
146 double errorRate = 0.025;
147
148
149
150 long maxKeys = ByteBloomFilter.idealMaxKeys(bitSize, errorRate);
151 assertEquals(136570, maxKeys);
152
153
154
155 long bitSize2 = ByteBloomFilter.computeBitSize(maxKeys, errorRate);
156
157
158 assertTrue(Math.abs(bitSize2 - bitSize) * 1.0 / bitSize < 1e-5);
159 }
160
161 public void testFoldableByteSize() {
162 assertEquals(128, ByteBloomFilter.computeFoldableByteSize(1000, 5));
163 assertEquals(640, ByteBloomFilter.computeFoldableByteSize(5001, 4));
164 }
165
166
167 }
168