suricata
util-bloomfilter-counting.c
Go to the documentation of this file.
1 /* Copyright (C) 2007-2010 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  *
23  * Counting Bloom Filter implementation. Can be used with 8, 16, 32 bits
24  * counters.
25  */
26 
27 #include "suricata-common.h"
29 #include "util-unittest.h"
30 
31 /* type: 1, 2 or 4 for 8, 16, or 32 bit counters
32  *
33  */
34 BloomFilterCounting *BloomFilterCountingInit(uint32_t size, uint8_t type, uint8_t iter, uint32_t (*Hash)(const void *, uint16_t, uint8_t, uint32_t)) {
35  BloomFilterCounting *bf = NULL;
36 
37  if (iter == 0)
38  goto error;
39 
40  if (Hash == NULL || size == 0) {
41  //printf("ERROR: BloomFilterCountingInit no Hash function\n");
42  goto error;
43  }
44 
45  if (type != 1 && type != 2 && type != 4) {
46  //printf("ERROR: BloomFilterCountingInit only 1, 2 and 4 bytes are supported\n");
47  goto error;
48  }
49 
50  /* setup the filter */
51  bf = SCMalloc(sizeof(BloomFilterCounting));
52  if (unlikely(bf == NULL))
53  goto error;
54  memset(bf,0,sizeof(BloomFilterCounting));
55  bf->type = type; /* size of the type: 1, 2, 4 */
56  bf->array_size = size;
57  bf->hash_iterations = iter;
58  bf->Hash = Hash;
59 
60  /* setup the bitarray */
61  bf->array = SCMalloc(bf->array_size * bf->type);
62  if (bf->array == NULL)
63  goto error;
64  memset(bf->array,0,bf->array_size * bf->type);
65 
66  return bf;
67 
68 error:
69  if (bf != NULL) {
70  if (bf->array != NULL)
71  SCFree(bf->array);
72 
73  SCFree(bf);
74  }
75  return NULL;
76 }
77 
79 {
80  if (bf != NULL) {
81  if (bf->array != NULL)
82  SCFree(bf->array);
83 
84  SCFree(bf);
85  }
86 }
87 
89 {
90  printf("\n------ Counting Bloom Filter Stats ------\n");
91  printf("Buckets: %" PRIu32 "\n", bf->array_size);
92  printf("Counter size: %" PRIu32 "\n", bf->type);
93  printf("Memory size: %" PRIu32 " bytes\n", bf->array_size * bf->type);
94  printf("Hash function pointer: %p\n", bf->Hash);
95  printf("Hash functions: %" PRIu32 "\n", bf->hash_iterations);
96  printf("-----------------------------------------\n");
97 }
98 
99 int BloomFilterCountingAdd(BloomFilterCounting *bf, const void *data, uint16_t datalen)
100 {
101  uint8_t iter = 0;
102  uint32_t hash = 0;
103 
104  if (bf == NULL || data == NULL || datalen == 0)
105  return -1;
106 
107  for (iter = 0; iter < bf->hash_iterations; iter++) {
108  hash = bf->Hash(data, datalen, iter, bf->array_size) * bf->type;
109  if (bf->type == 1) {
110  uint8_t *u8 = (uint8_t *)&bf->array[hash];
111  if ((*u8) != 255)
112  (*u8)++;
113  } else if (bf->type == 2) {
114  uint16_t *u16 = (uint16_t *)&bf->array[hash];
115  if ((*u16) != 65535)
116  (*u16)++;
117  } else if (bf->type == 4) {
118  uint32_t *u32 = (uint32_t *)&bf->array[hash];
119  if ((*u32) != 4294967295UL)
120  (*u32)++;
121  }
122  }
123 
124  return 0;
125 }
126 
127 int BloomFilterCountingRemove(BloomFilterCounting *bf, const void *data, uint16_t datalen)
128 {
129  uint8_t iter = 0;
130  uint32_t hash = 0;
131 
132  if (bf == NULL || data == NULL || datalen == 0)
133  return -1;
134 
135  /* only remove data that was actually added */
136  if (BloomFilterCountingTest(bf, data, datalen) == 0) {
137  printf("ERROR: BloomFilterCountingRemove tried to remove data "
138  "that was never added to the set or was already removed.\n");
139  return -1;
140  }
141 
142  /* decrease counters for every iteration */
143  for (iter = 0; iter < bf->hash_iterations; iter++) {
144  hash = bf->Hash(data, datalen, iter, bf->array_size) * bf->type;
145  if (bf->type == 1) {
146  uint8_t *u8 = (uint8_t *)&bf->array[hash];
147  if ((*u8) > 0)
148  (*u8)--;
149  else {
150  printf("ERROR: BloomFilterCountingRemove tried to decrease a "
151  "counter below zero.\n");
152  return -1;
153  }
154  } else if (bf->type == 2) {
155  uint16_t *u16 = (uint16_t *)&bf->array[hash];
156  if ((*u16) > 0)
157  (*u16)--;
158  else {
159  printf("ERROR: BloomFilterCountingRemove tried to decrease a "
160  "counter below zero.\n");
161  return -1;
162  }
163  } else if (bf->type == 4) {
164  uint32_t *u32 = (uint32_t *)&bf->array[hash];
165  if ((*u32) > 0)
166  (*u32)--;
167  else {
168  printf("ERROR: BloomFilterCountingRemove tried to decrease a "
169  "counter below zero.\n");
170  return -1;
171  }
172  }
173  }
174 
175  return 0;
176 }
177 
178 /* Test if data matches our filter and is likely to be in the set
179  *
180  * returns 0: for no match
181  * 1: match
182  */
183 int BloomFilterCountingTest(BloomFilterCounting *bf, const void *data, uint16_t datalen)
184 {
185  uint8_t iter = 0;
186  uint32_t hash = 0;
187  int hit = 1;
188 
189  /* check each hash iteration */
190  for (iter = 0; iter < bf->hash_iterations; iter++) {
191  hash = bf->Hash(data, datalen, iter, bf->array_size) * bf->type;
192  if (bf->type == 1) {
193  uint8_t *u8 = (uint8_t *)&bf->array[hash];
194  if ((*u8) == 0x00) {
195  hit = 0;
196  break;
197  }
198  } else if (bf->type == 2) {
199  uint16_t *u16 = (uint16_t *)&bf->array[hash];
200  if ((*u16) == 0x0000) {
201  hit = 0;
202  break;
203  }
204  } else if (bf->type == 4) {
205  uint32_t *u32 = (uint32_t *)&bf->array[hash];
206  if ((*u32) == 0x00000000) {
207  hit = 0;
208  break;
209  }
210  }
211  }
212 
213  return hit;
214 }
215 
216 /*
217  * ONLY TESTS BELOW THIS COMMENT
218  */
219 
220 #ifdef UNITTESTS
221 static uint32_t BloomHash(const void *data, uint16_t datalen, uint8_t iter, uint32_t hash_size)
222 {
223  uint8_t *d = (uint8_t *)data;
224  uint32_t i;
225  uint32_t hash = 0;
226 
227  for (i = 0; i < datalen; i++) {
228  if (i == 0) hash += (((uint32_t)*d++));
229  else if (i == 1) hash += (((uint32_t)*d++) * datalen);
230  else hash *= (((uint32_t)*d++) * i);
231  }
232 
233  hash *= (iter + datalen);
234  hash %= hash_size;
235  return hash;
236 }
237 
238 static int BloomFilterCountingTestInit01 (void)
239 {
240  BloomFilterCounting *bf = BloomFilterCountingInit(1024, 4, 4, BloomHash);
241  if (bf == NULL)
242  return 0;
243 
245  return 1;
246 }
247 
248 /* no hash function, so it should fail */
249 static int BloomFilterCountingTestInit02 (void)
250 {
251  BloomFilterCounting *bf = BloomFilterCountingInit(1024, 4, 4, NULL);
252  if (bf == NULL)
253  return 1;
254 
256  return 0;
257 }
258 
259 static int BloomFilterCountingTestInit03 (void)
260 {
261  int result = 0;
262  BloomFilterCounting *bf = BloomFilterCountingInit(1024, 4, 4, BloomHash);
263  if (bf == NULL)
264  return 0;
265 
266  if (bf->Hash == BloomHash)
267  result = 1;
268 
270  return result;
271 }
272 
273 static int BloomFilterCountingTestInit04 (void)
274 {
275  BloomFilterCounting *bf = BloomFilterCountingInit(1024, 0, 4, BloomHash);
276  if (bf == NULL)
277  return 1;
278 
280  return 0;
281 }
282 
283 static int BloomFilterCountingTestInit05 (void)
284 {
285  BloomFilterCounting *bf = BloomFilterCountingInit(0, 4, 4, BloomHash);
286  if (bf == NULL)
287  return 1;
288 
290  return 0;
291 }
292 
293 static int BloomFilterCountingTestInit06 (void)
294 {
295  BloomFilterCounting *bf = BloomFilterCountingInit(32, 3, 4, BloomHash);
296  if (bf == NULL)
297  return 1;
298 
300  return 0;
301 }
302 
303 static int BloomFilterCountingTestAdd01 (void)
304 {
305  int result = 0;
306  BloomFilterCounting *bf = BloomFilterCountingInit(1024, 4, 4, BloomHash);
307  if (bf == NULL)
308  return 0;
309 
310  int r = BloomFilterCountingAdd(bf, "test", 0);
311  if (r == 0)
312  goto end;
313 
314  /* all is good! */
315  result = 1;
316 end:
317  if (bf != NULL) BloomFilterCountingFree(bf);
318  return result;
319 }
320 
321 static int BloomFilterCountingTestAdd02 (void)
322 {
323  int result = 0;
324  BloomFilterCounting *bf = BloomFilterCountingInit(1024, 4, 4, BloomHash);
325  if (bf == NULL)
326  return 0;
327 
328  int r = BloomFilterCountingAdd(bf, NULL, 4);
329  if (r == 0)
330  goto end;
331 
332  /* all is good! */
333  result = 1;
334 end:
335  if (bf != NULL) BloomFilterCountingFree(bf);
336  return result;
337 }
338 
339 static int BloomFilterCountingTestFull01 (void)
340 {
341  int result = 0;
342  BloomFilterCounting *bf = BloomFilterCountingInit(32, 4, 4, BloomHash);
343  if (bf == NULL) {
344  printf("init failed: ");
345  goto end;
346  }
347 
348  int r = BloomFilterCountingAdd(bf, "test", 4);
349  if (r != 0) {
350  printf("first add: ");
351  goto end;
352  }
353 
354  r = BloomFilterCountingTest(bf, "test", 4);
355  if (r != 1) {
356  printf("2nd add: ");
357  goto end;
358  }
359 
360  r = BloomFilterCountingRemove(bf, "test", 4);
361  if (r != 0) {
362  printf("3rd add: ");
363  goto end;
364  }
365 
366  /* all is good! */
367  result = 1;
368 end:
369  if (bf != NULL)
371  return result;
372 }
373 
374 static int BloomFilterCountingTestFull02 (void)
375 {
376  int result = 0;
377  BloomFilterCounting *bf = BloomFilterCountingInit(32, 4, 4, BloomHash);
378  if (bf == NULL)
379  goto end;
380 
381  int r = BloomFilterCountingTest(bf, "test", 4);
382  if (r != 0)
383  goto end;
384 
385  /* all is good! */
386  result = 1;
387 end:
388  if (bf != NULL) BloomFilterCountingFree(bf);
389  return result;
390 }
391 #endif
392 
394 {
395 #ifdef UNITTESTS
396  UtRegisterTest("BloomFilterCountingTestInit01",
397  BloomFilterCountingTestInit01);
398  UtRegisterTest("BloomFilterCountingTestInit02",
399  BloomFilterCountingTestInit02);
400  UtRegisterTest("BloomFilterCountingTestInit03",
401  BloomFilterCountingTestInit03);
402  UtRegisterTest("BloomFilterCountingTestInit04",
403  BloomFilterCountingTestInit04);
404  UtRegisterTest("BloomFilterCountingTestInit05",
405  BloomFilterCountingTestInit05);
406  UtRegisterTest("BloomFilterCountingTestInit06",
407  BloomFilterCountingTestInit06);
408 
409  UtRegisterTest("BloomFilterCountingTestAdd01",
410  BloomFilterCountingTestAdd01);
411  UtRegisterTest("BloomFilterCountingTestAdd02",
412  BloomFilterCountingTestAdd02);
413 
414  UtRegisterTest("BloomFilterCountingTestFull01",
415  BloomFilterCountingTestFull01);
416  UtRegisterTest("BloomFilterCountingTestFull02",
417  BloomFilterCountingTestFull02);
418 #endif
419 }
420 
uint32_t(* Hash)(const void *, uint16_t, uint8_t, uint32_t)
void BloomFilterCountingRegisterTests(void)
#define unlikely(expr)
Definition: util-optimize.h:35
int BloomFilterCountingTest(BloomFilterCounting *bf, const void *data, uint16_t datalen)
void BloomFilterCountingFree(BloomFilterCounting *bf)
uint16_t type
void UtRegisterTest(const char *name, int(*TestFn)(void))
Register unit test.
int BloomFilterCountingRemove(BloomFilterCounting *bf, const void *data, uint16_t datalen)
int BloomFilterCountingAdd(BloomFilterCounting *bf, const void *data, uint16_t datalen)
#define SCMalloc(a)
Definition: util-mem.h:174
#define SCFree(a)
Definition: util-mem.h:236
BloomFilterCounting * BloomFilterCountingInit(uint32_t size, uint8_t type, uint8_t iter, uint32_t(*Hash)(const void *, uint16_t, uint8_t, uint32_t))
void BloomFilterCountingPrint(BloomFilterCounting *bf)