suricata
util-spm-bm.c
Go to the documentation of this file.
1 /* Copyright (C) 2007-2014 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22  *
23  * Boyer Moore simple pattern matcher implementation
24  *
25  * Boyer Moore algorithm has a really good performance. It need two arrays
26  * of context for each pattern that hold applicable shifts on the text
27  * to seach in, based on characters not available in the pattern
28  * and combinations of characters that start a sufix of the pattern.
29  * If possible, we should store the context of patterns that we are going
30  * to search for multiple times, so we don't spend time on rebuilding them.
31  */
32 
33 #include "suricata-common.h"
34 #include "suricata.h"
35 
36 #include "util-spm-bm.h"
37 #include "util-spm.h"
38 #include "util-debug.h"
39 #include "util-error.h"
40 #include "util-memcpy.h"
41 
42 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
43 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
44 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
45 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
46  uint16_t *suff);
47 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
48 
49 /**
50  * \brief Given a BmCtx structure, recreate the pre/suffixes for
51  * nocase
52  *
53  * \retval BmCtx pointer to the already created BmCtx (with BoyerMooreCtxInit())
54  * \param str pointer to the pattern string
55  * \param size length of the string
56  */
57 void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
58 {
59  /* Store the content as lower case to make searching faster */
60  memcpy_tolower(needle, needle, needle_len);
61 
62  /* Prepare bad chars with nocase chars */
63  PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
64 
65  /* Prepare good Suffixes with nocase chars */
66  PreBmGsNocase(needle, needle_len, bm_ctx->bmGs);
67 }
68 
69 /**
70  * \brief Setup a Booyer Moore context.
71  *
72  * \param str pointer to the pattern string
73  * \param size length of the string
74  * \retval BmCtx pointer to the newly created Context for the pattern
75  * \initonly BoyerMoore contexts should be created at init
76  */
77 BmCtx *BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
78 {
79  BmCtx *new = SCMalloc(sizeof(BmCtx));
80  if (unlikely(new == NULL)) {
81  SCLogError(SC_ERR_FATAL, "Fatal error encountered in BoyerMooreCtxInit. Exiting...");
82  exit(EXIT_FAILURE);
83  }
84 
85  /* Prepare bad chars */
86  PreBmBc(needle, needle_len, new->bmBc);
87 
88  new->bmGs = SCMalloc(sizeof(uint16_t) * (needle_len + 1));
89  if (new->bmGs == NULL) {
90  exit(EXIT_FAILURE);
91  }
92 
93  /* Prepare good Suffixes */
94  if (PreBmGs(needle, needle_len, new->bmGs) == -1) {
95  SCLogError(SC_ERR_FATAL, "Fatal error encountered in BooyerMooreCtxInit. Exiting...");
96  exit(EXIT_FAILURE);
97  }
98 
99 
100  return new;
101 }
102 
103 /**
104  * \brief Setup a Booyer Moore context for nocase search
105  *
106  * \param str pointer to the pattern string
107  * \param size length of the string
108  * \retval BmCtx pointer to the newly created Context for the pattern
109  * \initonly BoyerMoore contexts should be created at init
110  */
111 BmCtx *BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
112 {
113  BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needle_len);
114 
115  BoyerMooreCtxToNocase(bm_ctx, needle, needle_len);
116 
117  return bm_ctx;
118 }
119 
120 /**
121  * \brief Free the memory allocated to Booyer Moore context.
122  *
123  * \param bmCtx pointer to the Context for the pattern
124  */
126 {
127  SCEnter();
128  if (bmctx == NULL)
129  SCReturn;
130 
131  if (bmctx->bmGs != NULL)
132  SCFree(bmctx->bmGs);
133 
134  SCFree(bmctx);
135 
136  SCReturn;
137 }
138 /**
139  * \brief Array setup function for bad characters that split the pattern
140  * Remember that the result array should be the length of ALPHABET_SIZE
141  *
142  * \param str pointer to the pattern string
143  * \param size length of the string
144  * \param result pointer to an empty array that will hold the badchars
145  */
146 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
147 {
148  int32_t i;
149 
150  for (i = 0; i < 256; ++i) {
151  bmBc[i] = m;
152  }
153  for (i = 0; i < m - 1; ++i) {
154  bmBc[(unsigned char)x[i]] = m - i - 1;
155  }
156 }
157 
158 /**
159  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
160  *
161  * \param x pointer to the pattern string
162  * \param m length of the string
163  * \param suff pointer to an empty array that will hold the prefixes (shifts)
164  */
165 static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff)
166 {
167  int32_t f = 0, g, i;
168  suff[m - 1] = m;
169  g = m - 1;
170  for (i = m - 2; i >= 0; --i) {
171  if (i > g && suff[i + m - 1 - f] < i - g)
172  suff[i] = suff[i + m - 1 - f];
173  else {
174  if (i < g)
175  g = i;
176  f = i;
177  while (g >= 0 && x[g] == x[g + m - 1 - f])
178  --g;
179  suff[i] = f - g;
180  }
181  }
182 }
183 
184 /**
185  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
186  *
187  * \param x pointer to the pattern string
188  * \param m length of the string
189  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
190  * \retval 0 ok, -1 failed
191  */
192 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
193 {
194  int32_t i, j;
195  uint16_t suff[m + 1];
196 
197  BoyerMooreSuffixes(x, m, suff);
198 
199  for (i = 0; i < m; ++i)
200  bmGs[i] = m;
201 
202  j = 0;
203 
204  for (i = m - 1; i >= -1; --i)
205  if (i == -1 || suff[i] == i + 1)
206  for (; j < m - 1 - i; ++j)
207  if (bmGs[j] == m)
208  bmGs[j] = m - 1 - i;
209 
210  for (i = 0; i <= m - 2; ++i)
211  bmGs[m - 1 - suff[i]] = m - 1 - i;
212  return 0;
213 }
214 
215 /**
216  * \brief Array setup function for bad characters that split the pattern
217  * Remember that the result array should be the length of ALPHABET_SIZE
218  *
219  * \param str pointer to the pattern string
220  * \param size length of the string
221  * \param result pointer to an empty array that will hold the badchars
222  */
223 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
224 {
225  int32_t i;
226 
227  for (i = 0; i < 256; ++i) {
228  bmBc[i] = m;
229  }
230  for (i = 0; i < m - 1; ++i) {
231  bmBc[u8_tolower((unsigned char)x[i])] = m - 1 - i;
232  }
233 }
234 
235 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
236  uint16_t *suff)
237 {
238  int32_t f = 0, g, i;
239 
240  suff[m - 1] = m;
241  g = m - 1;
242  for (i = m - 2; i >= 0; --i) {
243  if (i > g && suff[i + m - 1 - f] < i - g) {
244  suff[i] = suff[i + m - 1 - f];
245  } else {
246  if (i < g) {
247  g = i;
248  }
249  f = i;
250  while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
251  --g;
252  }
253  suff[i] = f - g;
254  }
255  }
256 }
257 
258 /**
259  * \brief Array setup function for building prefixes (shift for valid prefixes)
260  * for boyermoore context case less
261  *
262  * \param x pointer to the pattern string
263  * \param m length of the string
264  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
265  */
266 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
267 {
268  int32_t i, j;
269  uint16_t suff[m + 1];
270 
271  BoyerMooreSuffixesNocase(x, m, suff);
272 
273  for (i = 0; i < m; ++i) {
274  bmGs[i] = m;
275  }
276  j = 0;
277  for (i = m - 1; i >= 0; --i) {
278  if (i == -1 || suff[i] == i + 1) {
279  for (; j < m - 1 - i; ++j) {
280  if (bmGs[j] == m) {
281  bmGs[j] = m - 1 - i;
282  }
283  }
284  }
285  }
286  for (i = 0; i <= m - 2; ++i) {
287  bmGs[m - 1 - suff[i]] = m - 1 - i;
288  }
289 }
290 
291 /**
292  * \brief Boyer Moore search algorithm
293  * Is better as the pattern length increases and for big buffers to search in.
294  * The algorithm needs a context of two arrays already prepared
295  * by prep_bad_chars() and prep_good_suffix()
296  *
297  * \param y pointer to the buffer to search in
298  * \param n length limit of the buffer
299  * \param x pointer to the pattern we ar searching for
300  * \param m length limit of the needle
301  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
302  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
303  *
304  * \retval ptr to start of the match; NULL if no match
305  */
306 uint8_t *BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
307 {
308  uint16_t *bmGs = bm_ctx->bmGs;
309  uint16_t *bmBc = bm_ctx->bmBc;
310 
311  int i, j, m1, m2;
312  int32_t int_n;
313 #if 0
314  printf("\nBad:\n");
315  for (i=0;i<ALPHABET_SIZE;i++)
316  printf("%c,%d ", i, bmBc[i]);
317 
318  printf("\ngood:\n");
319  for (i=0;i<m;i++)
320  printf("%c, %d ", x[i],bmBc[i]);
321  printf("\n");
322 #endif
323  // force casting to int32_t (if possible)
324  int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
325  j = 0;
326  while (j <= int_n - m ) {
327  for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
328 
329  if (i < 0) {
330  return (uint8_t *)(y + j);
331  //j += bmGs[0];
332  } else {
333 // printf("%c", y[i+j]);
334  j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)? m1: m2;
335 // printf("%d, %d\n", m1, m2);
336  }
337  }
338  return NULL;
339 }
340 
341 
342 /**
343  * \brief Boyer Moore search algorithm
344  * Is better as the pattern length increases and for big buffers to search in.
345  * The algorithm needs a context of two arrays already prepared
346  * by prep_bad_chars() and prep_good_suffix()
347  *
348  * \param y pointer to the buffer to search in
349  * \param n length limit of the buffer
350  * \param x pointer to the pattern we ar searching for
351  * \param m length limit of the needle
352  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
353  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
354  *
355  * \retval ptr to start of the match; NULL if no match
356  */
357 uint8_t *BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
358 {
359  uint16_t *bmGs = bm_ctx->bmGs;
360  uint16_t *bmBc = bm_ctx->bmBc;
361  int i, j, m1, m2;
362  int32_t int_n;
363 #if 0
364  printf("\nBad:\n");
365  for (i=0;i<ALPHABET_SIZE;i++)
366  printf("%c,%d ", i, bmBc[i]);
367 
368  printf("\ngood:\n");
369  for (i=0;i<m;i++)
370  printf("%c, %d ", x[i],bmBc[i]);
371  printf("\n");
372 #endif
373  // force casting to int32_t (if possible)
374  int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
375  j = 0;
376  while (j <= int_n - m ) {
377  /* x is stored in lowercase. */
378  for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
379 
380  if (i < 0) {
381  return (uint8_t *)(y + j);
382  } else {
383  j += (m1 = bmGs[i]) > (m2 = bmBc[u8_tolower(y[i + j])] - m + 1 + i)?
384  m1: m2;
385  }
386  }
387  return NULL;
388 }
389 
390 typedef struct SpmBmCtx_ {
392  uint8_t *needle;
393  uint16_t needle_len;
394  int nocase;
395 } SpmBmCtx;
396 
397 static SpmCtx *BMInitCtx(const uint8_t *needle, uint16_t needle_len, int nocase,
398  SpmGlobalThreadCtx *global_thread_ctx)
399 {
400  SpmCtx *ctx = SCMalloc(sizeof(SpmCtx));
401  if (ctx == NULL) {
402  SCLogDebug("Unable to alloc SpmCtx.");
403  return NULL;
404  }
405  memset(ctx, 0, sizeof(*ctx));
406  ctx->matcher = SPM_BM;
407 
408  SpmBmCtx *sctx = SCMalloc(sizeof(SpmBmCtx));
409  if (sctx == NULL) {
410  SCLogDebug("Unable to alloc SpmBmCtx.");
411  SCFree(ctx);
412  return NULL;
413  }
414  memset(sctx, 0, sizeof(*sctx));
415 
416  sctx->needle = SCMalloc(needle_len);
417  if (sctx->needle == NULL) {
418  SCLogDebug("Unable to alloc string.");
419  SCFree(sctx);
420  SCFree(ctx);
421  return NULL;
422  }
423  memcpy(sctx->needle, needle, needle_len);
424  sctx->needle_len = needle_len;
425 
426  if (nocase) {
427  sctx->bm_ctx = BoyerMooreNocaseCtxInit(sctx->needle, sctx->needle_len);
428  sctx->nocase = 1;
429  } else {
430  sctx->bm_ctx = BoyerMooreCtxInit(sctx->needle, sctx->needle_len);
431  sctx->nocase = 0;
432  }
433 
434  ctx->ctx = sctx;
435  return ctx;
436 }
437 
438 static void BMDestroyCtx(SpmCtx *ctx)
439 {
440  if (ctx == NULL) {
441  return;
442  }
443 
444  SpmBmCtx *sctx = ctx->ctx;
445  if (sctx != NULL) {
447  if (sctx->needle != NULL) {
448  SCFree(sctx->needle);
449  }
450  SCFree(sctx);
451  }
452 
453  SCFree(ctx);
454 }
455 
456 static uint8_t *BMScan(const SpmCtx *ctx, SpmThreadCtx *thread_ctx,
457  const uint8_t *haystack, uint32_t haystack_len)
458 {
459  const SpmBmCtx *sctx = ctx->ctx;
460 
461  if (sctx->nocase) {
462  return BoyerMooreNocase(sctx->needle, sctx->needle_len, haystack,
463  haystack_len, sctx->bm_ctx);
464  } else {
465  return BoyerMoore(sctx->needle, sctx->needle_len, haystack,
466  haystack_len, sctx->bm_ctx);
467  }
468 }
469 
470 static SpmGlobalThreadCtx *BMInitGlobalThreadCtx(void)
471 {
472  SpmGlobalThreadCtx *global_thread_ctx = SCMalloc(sizeof(SpmGlobalThreadCtx));
473  if (global_thread_ctx == NULL) {
474  SCLogDebug("Unable to alloc SpmThreadCtx.");
475  return NULL;
476  }
477  memset(global_thread_ctx, 0, sizeof(*global_thread_ctx));
478  global_thread_ctx->matcher = SPM_BM;
479  return global_thread_ctx;
480 }
481 
482 static void BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx *global_thread_ctx)
483 {
484  if (global_thread_ctx == NULL) {
485  return;
486  }
487  SCFree(global_thread_ctx);
488 }
489 
490 static void BMDestroyThreadCtx(SpmThreadCtx *thread_ctx)
491 {
492  if (thread_ctx == NULL) {
493  return;
494  }
495  SCFree(thread_ctx);
496 }
497 
498 static SpmThreadCtx *BMMakeThreadCtx(const SpmGlobalThreadCtx *global_thread_ctx) {
499  SpmThreadCtx *thread_ctx = SCMalloc(sizeof(SpmThreadCtx));
500  if (thread_ctx == NULL) {
501  SCLogDebug("Unable to alloc SpmThreadCtx.");
502  return NULL;
503  }
504  memset(thread_ctx, 0, sizeof(*thread_ctx));
505  thread_ctx->matcher = SPM_BM;
506  return thread_ctx;
507 }
508 
509 void SpmBMRegister(void)
510 {
511  spm_table[SPM_BM].name = "bm";
512  spm_table[SPM_BM].InitGlobalThreadCtx = BMInitGlobalThreadCtx;
513  spm_table[SPM_BM].DestroyGlobalThreadCtx = BMDestroyGlobalThreadCtx;
514  spm_table[SPM_BM].MakeThreadCtx = BMMakeThreadCtx;
515  spm_table[SPM_BM].DestroyThreadCtx = BMDestroyThreadCtx;
516  spm_table[SPM_BM].InitCtx = BMInitCtx;
517  spm_table[SPM_BM].DestroyCtx = BMDestroyCtx;
518  spm_table[SPM_BM].Scan = BMScan;
519 }
const char * name
Definition: util-spm.h:62
#define SCLogDebug(...)
Definition: util-debug.h:335
uint16_t matcher
Definition: util-spm.h:43
BmCtx * BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
Setup a Booyer Moore context.
Definition: util-spm-bm.c:77
struct SpmBmCtx_ SpmBmCtx
#define unlikely(expr)
Definition: util-optimize.h:35
void(* DestroyCtx)(SpmCtx *)
Definition: util-spm.h:69
SpmCtx *(* InitCtx)(const uint8_t *needle, uint16_t needle_len, int nocase, SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:67
uint16_t bmBc[ALPHABET_SIZE]
Definition: util-spm-bm.h:35
BmCtx * bm_ctx
Definition: util-spm-bm.c:391
uint8_t *(* Scan)(const SpmCtx *ctx, SpmThreadCtx *thread_ctx, const uint8_t *haystack, uint32_t haystack_len)
Definition: util-spm.h:70
uint16_t matcher
Definition: util-spm.h:57
SpmGlobalThreadCtx *(* InitGlobalThreadCtx)(void)
Definition: util-spm.h:63
SpmTableElmt spm_table[SPM_TABLE_SIZE]
Definition: util-spm.h:74
uint8_t * BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:357
void BoyerMooreCtxDeInit(BmCtx *bmctx)
Free the memory allocated to Booyer Moore context.
Definition: util-spm-bm.c:125
#define u8_tolower(c)
Definition: suricata.h:180
void(* DestroyThreadCtx)(SpmThreadCtx *thread_ctx)
Definition: util-spm.h:66
#define SCLogError(err_code,...)
Macro used to log ERROR messages.
Definition: util-debug.h:294
SpmThreadCtx *(* MakeThreadCtx)(const SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:65
#define SCEnter(...)
Definition: util-debug.h:337
void(* DestroyGlobalThreadCtx)(SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:64
uint8_t * needle
Definition: util-spm-bm.c:392
void * ctx
Definition: util-spm.h:44
void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
Given a BmCtx structure, recreate the pre/suffixes for nocase.
Definition: util-spm-bm.c:57
#define SCMalloc(a)
Definition: util-mem.h:166
#define ALPHABET_SIZE
Definition: util-spm-bm.h:31
#define SCFree(a)
Definition: util-mem.h:228
void SpmBMRegister(void)
Definition: util-spm-bm.c:509
SCMutex m
Definition: flow-hash.h:105
BmCtx * BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
Setup a Booyer Moore context for nocase search.
Definition: util-spm-bm.c:111
uint16_t * bmGs
Definition: util-spm-bm.h:36
uint8_t * BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:306
#define SCReturn
Definition: util-debug.h:339
uint16_t needle_len
Definition: util-spm-bm.c:393
uint16_t matcher
Definition: util-spm.h:50