suricata
util-spm-bm.c
Go to the documentation of this file.
1 /* Copyright (C) 2007-2014 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22  *
23  * Boyer Moore simple pattern matcher implementation
24  *
25  * Boyer Moore algorithm has a really good performance. It need two arrays
26  * of context for each pattern that hold applicable shifts on the text
27  * to search in, based on characters not available in the pattern
28  * and combinations of characters that start a suffix of the pattern.
29  * If possible, we should store the context of patterns that we are going
30  * to search for multiple times, so we don't spend time on rebuilding them.
31  */
32 
33 #include "suricata-common.h"
34 #include "suricata.h"
35 
36 #include "util-spm-bm.h"
37 #include "util-spm.h"
38 #include "util-debug.h"
39 #include "util-error.h"
40 #include "util-memcpy.h"
41 #include "util-validate.h"
42 
43 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
44 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
45 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
46 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
47  uint16_t *suff);
48 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
49 
50 /**
51  * \brief Given a BmCtx structure, recreate the pre/suffixes for
52  * nocase
53  *
54  * \retval BmCtx pointer to the already created BmCtx (with BoyerMooreCtxInit())
55  * \param str pointer to the pattern string
56  * \param size length of the string
57  */
58 void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
59 {
60  /* Store the content as lower case to make searching faster */
61  memcpy_tolower(needle, needle, needle_len);
62 
63  /* Prepare bad chars with nocase chars */
64  PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
65 
66  /* Prepare good Suffixes with nocase chars */
67  PreBmGsNocase(needle, needle_len, bm_ctx->bmGs);
68 }
69 
70 /**
71  * \brief Setup a Boyer Moore context.
72  *
73  * \param str pointer to the pattern string
74  * \param size length of the string
75  * \retval BmCtx pointer to the newly created Context for the pattern
76  * \initonly BoyerMoore contexts should be created at init
77  */
78 BmCtx *BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
79 {
80  BmCtx *new = SCMalloc(sizeof(BmCtx) + sizeof(uint16_t) * (needle_len + 1));
81  if (unlikely(new == NULL)) {
82  FatalError("Fatal error encountered in BoyerMooreCtxInit. Exiting...");
83  }
84 
85  /* Prepare bad chars */
86  PreBmBc(needle, needle_len, new->bmBc);
87 
88  /* Prepare good Suffixes */
89  if (PreBmGs(needle, needle_len, new->bmGs) == -1) {
90  FatalError("Fatal error encountered in BoyerMoreCtxInit. Exiting...");
91  }
92 
93 
94  return new;
95 }
96 
97 /**
98  * \brief Setup a Boyer Moore context for nocase search
99  *
100  * \param str pointer to the pattern string
101  * \param size length of the string
102  * \retval BmCtx pointer to the newly created Context for the pattern
103  * \initonly BoyerMoore contexts should be created at init
104  */
105 BmCtx *BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
106 {
107  BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needle_len);
108 
109  BoyerMooreCtxToNocase(bm_ctx, needle, needle_len);
110 
111  return bm_ctx;
112 }
113 
114 /**
115  * \brief Free the memory allocated to Boyer Moore context.
116  *
117  * \param bmCtx pointer to the Context for the pattern
118  */
120 {
121  SCEnter();
122  if (bmctx == NULL)
123  SCReturn;
124 
125  SCFree(bmctx);
126 
127  SCReturn;
128 }
129 /**
130  * \brief Array setup function for bad characters that split the pattern
131  * Remember that the result array should be the length of ALPHABET_SIZE
132  *
133  * \param str pointer to the pattern string
134  * \param size length of the string
135  * \param result pointer to an empty array that will hold the badchars
136  */
137 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
138 {
139  uint16_t i;
140 
141  for (i = 0; i < 256; ++i) {
142  bmBc[i] = m;
143  }
144  for (i = 0; i < m - 1; ++i) {
145  bmBc[(unsigned char)x[i]] = m - i - 1;
146  }
147 }
148 
149 /**
150  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
151  *
152  * \param x pointer to the pattern string
153  * \param m length of the string
154  * \param suff pointer to an empty array that will hold the prefixes (shifts)
155  */
156 static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff)
157 {
158  int32_t f = 0, g, i;
159  suff[m - 1] = m;
160  g = m - 1;
161  for (i = m - 2; i >= 0; --i) {
162  if (i > g && suff[i + m - 1 - f] < i - g)
163  suff[i] = suff[i + m - 1 - f];
164  else {
165  if (i < g)
166  g = i;
167  f = i;
168  while (g >= 0 && x[g] == x[g + m - 1 - f])
169  --g;
170  DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
171  suff[i] = (uint16_t)(f - g);
172  }
173  }
174 }
175 
176 /**
177  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
178  *
179  * \param x pointer to the pattern string
180  * \param m length of the string
181  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
182  * \retval 0 ok, -1 failed
183  */
184 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
185 {
186  int32_t i, j;
187  uint16_t suff[m + 1];
188 
189  BoyerMooreSuffixes(x, m, suff);
190 
191  for (i = 0; i < m; ++i)
192  bmGs[i] = m;
193 
194  j = 0;
195 
196  for (i = m - 1; i >= -1; --i)
197  if (i == -1 || suff[i] == i + 1)
198  for (; j < m - 1 - i; ++j)
199  if (bmGs[j] == m)
200  bmGs[j] = (uint16_t)(m - 1 - i);
201 
202  for (i = 0; i <= m - 2; ++i)
203  bmGs[m - 1 - suff[i]] = (uint16_t)(m - 1 - i);
204  return 0;
205 }
206 
207 /**
208  * \brief Array setup function for bad characters that split the pattern
209  * Remember that the result array should be the length of ALPHABET_SIZE
210  *
211  * \param str pointer to the pattern string
212  * \param size length of the string
213  * \param result pointer to an empty array that will hold the badchars
214  */
215 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
216 {
217  uint16_t i;
218 
219  for (i = 0; i < 256; ++i) {
220  bmBc[i] = m;
221  }
222  for (i = 0; i < m - 1; ++i) {
223  bmBc[u8_tolower(x[i])] = m - 1 - i;
224  bmBc[u8_toupper(x[i])] = m - 1 - i;
225  }
226 }
227 
228 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
229  uint16_t *suff)
230 {
231  int32_t f = 0, g, i;
232 
233  suff[m - 1] = m;
234  g = m - 1;
235  for (i = m - 2; i >= 0; --i) {
236  if (i > g && suff[i + m - 1 - f] < i - g) {
237  suff[i] = suff[i + m - 1 - f];
238  } else {
239  if (i < g) {
240  g = i;
241  }
242  f = i;
243  while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
244  --g;
245  }
246  DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
247  suff[i] = (uint16_t)(f - g);
248  }
249  }
250 }
251 
252 /**
253  * \brief Array setup function for building prefixes (shift for valid prefixes)
254  * for boyermoore context case less
255  *
256  * \param x pointer to the pattern string
257  * \param m length of the string
258  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
259  */
260 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
261 {
262  uint16_t i, j;
263  uint16_t suff[m + 1];
264 
265  BoyerMooreSuffixesNocase(x, m, suff);
266 
267  for (i = 0; i < m; ++i) {
268  bmGs[i] = m;
269  }
270  j = 0;
271  for (i = m; i > 0; --i) {
272  if (suff[i - 1] == i) {
273  for (; j < m - i; ++j) {
274  if (bmGs[j] == m) {
275  bmGs[j] = m - i;
276  }
277  }
278  }
279  }
280  for (i = 0; i <= m - 2; ++i) {
281  bmGs[m - 1 - suff[i]] = m - 1 - i;
282  }
283 }
284 
285 /**
286  * \brief Boyer Moore search algorithm
287  * Is better as the pattern length increases and for big buffers to search in.
288  * The algorithm needs a context of two arrays already prepared
289  * by prep_bad_chars() and prep_good_suffix()
290  *
291  * \param y pointer to the buffer to search in
292  * \param n length limit of the buffer
293  * \param x pointer to the pattern we ar searching for
294  * \param m length limit of the needle
295  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
296  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
297  *
298  * \retval ptr to start of the match; NULL if no match
299  */
300 uint8_t *BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
301 {
302  uint16_t *bmGs = bm_ctx->bmGs;
303  uint16_t *bmBc = bm_ctx->bmBc;
304 
305  int i, j, m1, m2;
306  int32_t int_n;
307 #if 0
308  printf("\nBad:\n");
309  for (i=0;i<ALPHABET_SIZE;i++)
310  printf("%c,%d ", i, bmBc[i]);
311 
312  printf("\ngood:\n");
313  for (i=0;i<m;i++)
314  printf("%c, %d ", x[i],bmBc[i]);
315  printf("\n");
316 #endif
317  // force casting to int32_t (if possible)
318  int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
319  j = 0;
320  while (j <= int_n - m ) {
321  for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
322 
323  if (i < 0) {
324  return (uint8_t *)(y + j);
325  //j += bmGs[0];
326  } else {
327 // printf("%c", y[i+j]);
328  j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)? m1: m2;
329 // printf("%d, %d\n", m1, m2);
330  }
331  }
332  return NULL;
333 }
334 
335 
336 /**
337  * \brief Boyer Moore search algorithm
338  * Is better as the pattern length increases and for big buffers to search in.
339  * The algorithm needs a context of two arrays already prepared
340  * by prep_bad_chars() and prep_good_suffix()
341  *
342  * \param y pointer to the buffer to search in
343  * \param n length limit of the buffer
344  * \param x pointer to the pattern we ar searching for
345  * \param m length limit of the needle
346  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
347  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
348  *
349  * \retval ptr to start of the match; NULL if no match
350  */
351 uint8_t *BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
352 {
353  uint16_t *bmGs = bm_ctx->bmGs;
354  uint16_t *bmBc = bm_ctx->bmBc;
355  int i, j, m1, m2;
356  int32_t int_n;
357 #if 0
358  printf("\nBad:\n");
359  for (i=0;i<ALPHABET_SIZE;i++)
360  printf("%c,%d ", i, bmBc[i]);
361 
362  printf("\ngood:\n");
363  for (i=0;i<m;i++)
364  printf("%c, %d ", x[i],bmBc[i]);
365  printf("\n");
366 #endif
367  // force casting to int32_t (if possible)
368  int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
369  j = 0;
370  while (j <= int_n - m ) {
371  /* x is stored in lowercase. */
372  for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
373 
374  if (i < 0) {
375  return (uint8_t *)(y + j);
376  } else {
377  j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)?
378  m1: m2;
379  }
380  }
381  return NULL;
382 }
383 
384 typedef struct SpmBmCtx_ {
386  uint8_t *needle;
387  uint16_t needle_len;
388  int nocase;
390 
391 static SpmCtx *BMInitCtx(const uint8_t *needle, uint16_t needle_len, int nocase,
392  SpmGlobalThreadCtx *global_thread_ctx)
393 {
394  SpmCtx *ctx = SCMalloc(sizeof(SpmCtx));
395  if (ctx == NULL) {
396  SCLogDebug("Unable to alloc SpmCtx.");
397  return NULL;
398  }
399  memset(ctx, 0, sizeof(*ctx));
400  ctx->matcher = SPM_BM;
401 
402  SpmBmCtx *sctx = SCMalloc(sizeof(SpmBmCtx));
403  if (sctx == NULL) {
404  SCLogDebug("Unable to alloc SpmBmCtx.");
405  SCFree(ctx);
406  return NULL;
407  }
408  memset(sctx, 0, sizeof(*sctx));
409 
410  sctx->needle = SCMalloc(needle_len);
411  if (sctx->needle == NULL) {
412  SCLogDebug("Unable to alloc string.");
413  SCFree(sctx);
414  SCFree(ctx);
415  return NULL;
416  }
417  memcpy(sctx->needle, needle, needle_len);
418  sctx->needle_len = needle_len;
419 
420  if (nocase) {
421  sctx->bm_ctx = BoyerMooreNocaseCtxInit(sctx->needle, sctx->needle_len);
422  sctx->nocase = 1;
423  } else {
424  sctx->bm_ctx = BoyerMooreCtxInit(sctx->needle, sctx->needle_len);
425  sctx->nocase = 0;
426  }
427 
428  ctx->ctx = sctx;
429  return ctx;
430 }
431 
432 static void BMDestroyCtx(SpmCtx *ctx)
433 {
434  if (ctx == NULL) {
435  return;
436  }
437 
438  SpmBmCtx *sctx = ctx->ctx;
439  if (sctx != NULL) {
441  if (sctx->needle != NULL) {
442  SCFree(sctx->needle);
443  }
444  SCFree(sctx);
445  }
446 
447  SCFree(ctx);
448 }
449 
450 static uint8_t *BMScan(const SpmCtx *ctx, SpmThreadCtx *thread_ctx,
451  const uint8_t *haystack, uint32_t haystack_len)
452 {
453  const SpmBmCtx *sctx = ctx->ctx;
454 
455  if (sctx->nocase) {
456  return BoyerMooreNocase(sctx->needle, sctx->needle_len, haystack,
457  haystack_len, sctx->bm_ctx);
458  } else {
459  return BoyerMoore(sctx->needle, sctx->needle_len, haystack,
460  haystack_len, sctx->bm_ctx);
461  }
462 }
463 
464 static SpmGlobalThreadCtx *BMInitGlobalThreadCtx(void)
465 {
466  SpmGlobalThreadCtx *global_thread_ctx = SCMalloc(sizeof(SpmGlobalThreadCtx));
467  if (global_thread_ctx == NULL) {
468  SCLogDebug("Unable to alloc SpmThreadCtx.");
469  return NULL;
470  }
471  memset(global_thread_ctx, 0, sizeof(*global_thread_ctx));
472  global_thread_ctx->matcher = SPM_BM;
473  return global_thread_ctx;
474 }
475 
476 static void BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx *global_thread_ctx)
477 {
478  if (global_thread_ctx == NULL) {
479  return;
480  }
481  SCFree(global_thread_ctx);
482 }
483 
484 static void BMDestroyThreadCtx(SpmThreadCtx *thread_ctx)
485 {
486  if (thread_ctx == NULL) {
487  return;
488  }
489  SCFree(thread_ctx);
490 }
491 
492 static SpmThreadCtx *BMMakeThreadCtx(const SpmGlobalThreadCtx *global_thread_ctx) {
493  SpmThreadCtx *thread_ctx = SCMalloc(sizeof(SpmThreadCtx));
494  if (thread_ctx == NULL) {
495  SCLogDebug("Unable to alloc SpmThreadCtx.");
496  return NULL;
497  }
498  memset(thread_ctx, 0, sizeof(*thread_ctx));
499  thread_ctx->matcher = SPM_BM;
500  return thread_ctx;
501 }
502 
503 void SpmBMRegister(void)
504 {
505  spm_table[SPM_BM].name = "bm";
506  spm_table[SPM_BM].InitGlobalThreadCtx = BMInitGlobalThreadCtx;
507  spm_table[SPM_BM].DestroyGlobalThreadCtx = BMDestroyGlobalThreadCtx;
508  spm_table[SPM_BM].MakeThreadCtx = BMMakeThreadCtx;
509  spm_table[SPM_BM].DestroyThreadCtx = BMDestroyThreadCtx;
510  spm_table[SPM_BM].InitCtx = BMInitCtx;
511  spm_table[SPM_BM].DestroyCtx = BMDestroyCtx;
512  spm_table[SPM_BM].Scan = BMScan;
513 }
SpmBmCtx_
Definition: util-spm-bm.c:384
SpmBmCtx
struct SpmBmCtx_ SpmBmCtx
SpmBmCtx_::needle_len
uint16_t needle_len
Definition: util-spm-bm.c:387
SpmBmCtx_::bm_ctx
BmCtx * bm_ctx
Definition: util-spm-bm.c:385
SpmGlobalThreadCtx_::matcher
uint16_t matcher
Definition: util-spm.h:48
BmCtx_::bmGs
uint16_t bmGs[]
Definition: util-spm-bm.h:36
BoyerMooreCtxInit
BmCtx * BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
Setup a Boyer Moore context.
Definition: util-spm-bm.c:78
unlikely
#define unlikely(expr)
Definition: util-optimize.h:35
SpmTableElmt_::Scan
uint8_t *(* Scan)(const SpmCtx *ctx, SpmThreadCtx *thread_ctx, const uint8_t *haystack, uint32_t haystack_len)
Definition: util-spm.h:68
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:269
u8_toupper
#define u8_toupper(c)
Definition: suricata-common.h:432
SpmBMRegister
void SpmBMRegister(void)
Definition: util-spm-bm.c:503
SpmTableElmt_::InitCtx
SpmCtx *(* InitCtx)(const uint8_t *needle, uint16_t needle_len, int nocase, SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:65
SpmTableElmt_::name
const char * name
Definition: util-spm.h:60
util-memcpy.h
SpmTableElmt_::MakeThreadCtx
SpmThreadCtx *(* MakeThreadCtx)(const SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:63
u8_tolower
#define u8_tolower(c)
Definition: suricata-common.h:431
BoyerMoore
uint8_t * BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:300
m
SCMutex m
Definition: flow-hash.h:6
SPM_BM
@ SPM_BM
Definition: util-spm.h:30
ALPHABET_SIZE
#define ALPHABET_SIZE
Definition: util-spm-bm.h:30
util-debug.h
util-error.h
BmCtx_
Definition: util-spm-bm.h:33
SCEnter
#define SCEnter(...)
Definition: util-debug.h:271
BoyerMooreCtxToNocase
void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
Given a BmCtx structure, recreate the pre/suffixes for nocase.
Definition: util-spm-bm.c:58
SpmBmCtx_::nocase
int nocase
Definition: util-spm-bm.c:388
BmCtx_::bmBc
uint16_t bmBc[ALPHABET_SIZE]
Definition: util-spm-bm.h:34
BoyerMooreNocase
uint8_t * BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:351
SpmCtx_::matcher
uint16_t matcher
Definition: util-spm.h:41
SpmTableElmt_::InitGlobalThreadCtx
SpmGlobalThreadCtx *(* InitGlobalThreadCtx)(void)
Definition: util-spm.h:61
SCReturn
#define SCReturn
Definition: util-debug.h:273
SpmBmCtx_::needle
uint8_t * needle
Definition: util-spm-bm.c:386
SpmThreadCtx_::matcher
uint16_t matcher
Definition: util-spm.h:55
SpmCtx_
Definition: util-spm.h:40
suricata-common.h
SpmTableElmt_::DestroyCtx
void(* DestroyCtx)(SpmCtx *)
Definition: util-spm.h:67
util-spm.h
util-spm-bm.h
FatalError
#define FatalError(...)
Definition: util-debug.h:502
SpmCtx_::ctx
void * ctx
Definition: util-spm.h:42
util-validate.h
SpmGlobalThreadCtx_
Definition: util-spm.h:47
SCMalloc
#define SCMalloc(sz)
Definition: util-mem.h:47
SCFree
#define SCFree(p)
Definition: util-mem.h:61
spm_table
SpmTableElmt spm_table[SPM_TABLE_SIZE]
Definition: util-spm.c:62
suricata.h
BoyerMooreNocaseCtxInit
BmCtx * BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
Setup a Boyer Moore context for nocase search.
Definition: util-spm-bm.c:105
DEBUG_VALIDATE_BUG_ON
#define DEBUG_VALIDATE_BUG_ON(exp)
Definition: util-validate.h:104
SpmTableElmt_::DestroyThreadCtx
void(* DestroyThreadCtx)(SpmThreadCtx *thread_ctx)
Definition: util-spm.h:64
BoyerMooreCtxDeInit
void BoyerMooreCtxDeInit(BmCtx *bmctx)
Free the memory allocated to Boyer Moore context.
Definition: util-spm-bm.c:119
SpmTableElmt_::DestroyGlobalThreadCtx
void(* DestroyGlobalThreadCtx)(SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:62
SpmThreadCtx_
Definition: util-spm.h:54