suricata
util-spm-bm.c
Go to the documentation of this file.
1 /* Copyright (C) 2007-2014 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22  *
23  * Boyer Moore simple pattern matcher implementation
24  *
25  * Boyer Moore algorithm has a really good performance. It need two arrays
26  * of context for each pattern that hold applicable shifts on the text
27  * to seach in, based on characters not available in the pattern
28  * and combinations of characters that start a sufix of the pattern.
29  * If possible, we should store the context of patterns that we are going
30  * to search for multiple times, so we don't spend time on rebuilding them.
31  */
32 
33 #include "suricata-common.h"
34 #include "suricata.h"
35 
36 #include "util-spm-bm.h"
37 #include "util-spm.h"
38 #include "util-debug.h"
39 #include "util-error.h"
40 #include "util-memcpy.h"
41 #include "util-validate.h"
42 
43 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
44 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
45 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
46 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
47  uint16_t *suff);
48 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
49 
50 /**
51  * \brief Given a BmCtx structure, recreate the pre/suffixes for
52  * nocase
53  *
54  * \retval BmCtx pointer to the already created BmCtx (with BoyerMooreCtxInit())
55  * \param str pointer to the pattern string
56  * \param size length of the string
57  */
58 void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
59 {
60  /* Store the content as lower case to make searching faster */
61  memcpy_tolower(needle, needle, needle_len);
62 
63  /* Prepare bad chars with nocase chars */
64  PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
65 
66  /* Prepare good Suffixes with nocase chars */
67  PreBmGsNocase(needle, needle_len, bm_ctx->bmGs);
68 }
69 
70 /**
71  * \brief Setup a Booyer Moore context.
72  *
73  * \param str pointer to the pattern string
74  * \param size length of the string
75  * \retval BmCtx pointer to the newly created Context for the pattern
76  * \initonly BoyerMoore contexts should be created at init
77  */
78 BmCtx *BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
79 {
80  BmCtx *new = SCMalloc(sizeof(BmCtx) + sizeof(uint16_t) * (needle_len + 1));
81  if (unlikely(new == NULL)) {
83  "Fatal error encountered in BoyerMooreCtxInit. Exiting...");
84  }
85 
86  /* Prepare bad chars */
87  PreBmBc(needle, needle_len, new->bmBc);
88 
89  /* Prepare good Suffixes */
90  if (PreBmGs(needle, needle_len, new->bmGs) == -1) {
92  "Fatal error encountered in BooyerMooreCtxInit. Exiting...");
93  }
94 
95 
96  return new;
97 }
98 
99 /**
100  * \brief Setup a Booyer Moore context for nocase search
101  *
102  * \param str pointer to the pattern string
103  * \param size length of the string
104  * \retval BmCtx pointer to the newly created Context for the pattern
105  * \initonly BoyerMoore contexts should be created at init
106  */
107 BmCtx *BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
108 {
109  BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needle_len);
110 
111  BoyerMooreCtxToNocase(bm_ctx, needle, needle_len);
112 
113  return bm_ctx;
114 }
115 
116 /**
117  * \brief Free the memory allocated to Booyer Moore context.
118  *
119  * \param bmCtx pointer to the Context for the pattern
120  */
122 {
123  SCEnter();
124  if (bmctx == NULL)
125  SCReturn;
126 
127  SCFree(bmctx);
128 
129  SCReturn;
130 }
131 /**
132  * \brief Array setup function for bad characters that split the pattern
133  * Remember that the result array should be the length of ALPHABET_SIZE
134  *
135  * \param str pointer to the pattern string
136  * \param size length of the string
137  * \param result pointer to an empty array that will hold the badchars
138  */
139 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
140 {
141  uint16_t i;
142 
143  for (i = 0; i < 256; ++i) {
144  bmBc[i] = m;
145  }
146  for (i = 0; i < m - 1; ++i) {
147  bmBc[(unsigned char)x[i]] = m - i - 1;
148  }
149 }
150 
151 /**
152  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
153  *
154  * \param x pointer to the pattern string
155  * \param m length of the string
156  * \param suff pointer to an empty array that will hold the prefixes (shifts)
157  */
158 static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff)
159 {
160  int32_t f = 0, g, i;
161  suff[m - 1] = m;
162  g = m - 1;
163  for (i = m - 2; i >= 0; --i) {
164  if (i > g && suff[i + m - 1 - f] < i - g)
165  suff[i] = suff[i + m - 1 - f];
166  else {
167  if (i < g)
168  g = i;
169  f = i;
170  while (g >= 0 && x[g] == x[g + m - 1 - f])
171  --g;
172  DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
173  suff[i] = (uint16_t)(f - g);
174  }
175  }
176 }
177 
178 /**
179  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
180  *
181  * \param x pointer to the pattern string
182  * \param m length of the string
183  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
184  * \retval 0 ok, -1 failed
185  */
186 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
187 {
188  int32_t i, j;
189  uint16_t suff[m + 1];
190 
191  BoyerMooreSuffixes(x, m, suff);
192 
193  for (i = 0; i < m; ++i)
194  bmGs[i] = m;
195 
196  j = 0;
197 
198  for (i = m - 1; i >= -1; --i)
199  if (i == -1 || suff[i] == i + 1)
200  for (; j < m - 1 - i; ++j)
201  if (bmGs[j] == m)
202  bmGs[j] = (uint16_t)(m - 1 - i);
203 
204  for (i = 0; i <= m - 2; ++i)
205  bmGs[m - 1 - suff[i]] = (uint16_t)(m - 1 - i);
206  return 0;
207 }
208 
209 /**
210  * \brief Array setup function for bad characters that split the pattern
211  * Remember that the result array should be the length of ALPHABET_SIZE
212  *
213  * \param str pointer to the pattern string
214  * \param size length of the string
215  * \param result pointer to an empty array that will hold the badchars
216  */
217 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
218 {
219  uint16_t i;
220 
221  for (i = 0; i < 256; ++i) {
222  bmBc[i] = m;
223  }
224  for (i = 0; i < m - 1; ++i) {
225  bmBc[u8_tolower(x[i])] = m - 1 - i;
226  bmBc[u8_toupper(x[i])] = m - 1 - i;
227  }
228 }
229 
230 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
231  uint16_t *suff)
232 {
233  int32_t f = 0, g, i;
234 
235  suff[m - 1] = m;
236  g = m - 1;
237  for (i = m - 2; i >= 0; --i) {
238  if (i > g && suff[i + m - 1 - f] < i - g) {
239  suff[i] = suff[i + m - 1 - f];
240  } else {
241  if (i < g) {
242  g = i;
243  }
244  f = i;
245  while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
246  --g;
247  }
248  DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
249  suff[i] = (uint16_t)(f - g);
250  }
251  }
252 }
253 
254 /**
255  * \brief Array setup function for building prefixes (shift for valid prefixes)
256  * for boyermoore context case less
257  *
258  * \param x pointer to the pattern string
259  * \param m length of the string
260  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
261  */
262 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
263 {
264  uint16_t i, j;
265  uint16_t suff[m + 1];
266 
267  BoyerMooreSuffixesNocase(x, m, suff);
268 
269  for (i = 0; i < m; ++i) {
270  bmGs[i] = m;
271  }
272  j = 0;
273  for (i = m; i > 0; --i) {
274  if (suff[i - 1] == i) {
275  for (; j < m - i; ++j) {
276  if (bmGs[j] == m) {
277  bmGs[j] = m - i;
278  }
279  }
280  }
281  }
282  for (i = 0; i <= m - 2; ++i) {
283  bmGs[m - 1 - suff[i]] = m - 1 - i;
284  }
285 }
286 
287 /**
288  * \brief Boyer Moore search algorithm
289  * Is better as the pattern length increases and for big buffers to search in.
290  * The algorithm needs a context of two arrays already prepared
291  * by prep_bad_chars() and prep_good_suffix()
292  *
293  * \param y pointer to the buffer to search in
294  * \param n length limit of the buffer
295  * \param x pointer to the pattern we ar searching for
296  * \param m length limit of the needle
297  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
298  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
299  *
300  * \retval ptr to start of the match; NULL if no match
301  */
302 uint8_t *BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
303 {
304  uint16_t *bmGs = bm_ctx->bmGs;
305  uint16_t *bmBc = bm_ctx->bmBc;
306 
307  int i, j, m1, m2;
308  int32_t int_n;
309 #if 0
310  printf("\nBad:\n");
311  for (i=0;i<ALPHABET_SIZE;i++)
312  printf("%c,%d ", i, bmBc[i]);
313 
314  printf("\ngood:\n");
315  for (i=0;i<m;i++)
316  printf("%c, %d ", x[i],bmBc[i]);
317  printf("\n");
318 #endif
319  // force casting to int32_t (if possible)
320  int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
321  j = 0;
322  while (j <= int_n - m ) {
323  for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
324 
325  if (i < 0) {
326  return (uint8_t *)(y + j);
327  //j += bmGs[0];
328  } else {
329 // printf("%c", y[i+j]);
330  j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)? m1: m2;
331 // printf("%d, %d\n", m1, m2);
332  }
333  }
334  return NULL;
335 }
336 
337 
338 /**
339  * \brief Boyer Moore search algorithm
340  * Is better as the pattern length increases and for big buffers to search in.
341  * The algorithm needs a context of two arrays already prepared
342  * by prep_bad_chars() and prep_good_suffix()
343  *
344  * \param y pointer to the buffer to search in
345  * \param n length limit of the buffer
346  * \param x pointer to the pattern we ar searching for
347  * \param m length limit of the needle
348  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
349  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
350  *
351  * \retval ptr to start of the match; NULL if no match
352  */
353 uint8_t *BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
354 {
355  uint16_t *bmGs = bm_ctx->bmGs;
356  uint16_t *bmBc = bm_ctx->bmBc;
357  int i, j, m1, m2;
358  int32_t int_n;
359 #if 0
360  printf("\nBad:\n");
361  for (i=0;i<ALPHABET_SIZE;i++)
362  printf("%c,%d ", i, bmBc[i]);
363 
364  printf("\ngood:\n");
365  for (i=0;i<m;i++)
366  printf("%c, %d ", x[i],bmBc[i]);
367  printf("\n");
368 #endif
369  // force casting to int32_t (if possible)
370  int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
371  j = 0;
372  while (j <= int_n - m ) {
373  /* x is stored in lowercase. */
374  for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
375 
376  if (i < 0) {
377  return (uint8_t *)(y + j);
378  } else {
379  j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)?
380  m1: m2;
381  }
382  }
383  return NULL;
384 }
385 
386 typedef struct SpmBmCtx_ {
388  uint8_t *needle;
389  uint16_t needle_len;
390  int nocase;
392 
393 static SpmCtx *BMInitCtx(const uint8_t *needle, uint16_t needle_len, int nocase,
394  SpmGlobalThreadCtx *global_thread_ctx)
395 {
396  SpmCtx *ctx = SCMalloc(sizeof(SpmCtx));
397  if (ctx == NULL) {
398  SCLogDebug("Unable to alloc SpmCtx.");
399  return NULL;
400  }
401  memset(ctx, 0, sizeof(*ctx));
402  ctx->matcher = SPM_BM;
403 
404  SpmBmCtx *sctx = SCMalloc(sizeof(SpmBmCtx));
405  if (sctx == NULL) {
406  SCLogDebug("Unable to alloc SpmBmCtx.");
407  SCFree(ctx);
408  return NULL;
409  }
410  memset(sctx, 0, sizeof(*sctx));
411 
412  sctx->needle = SCMalloc(needle_len);
413  if (sctx->needle == NULL) {
414  SCLogDebug("Unable to alloc string.");
415  SCFree(sctx);
416  SCFree(ctx);
417  return NULL;
418  }
419  memcpy(sctx->needle, needle, needle_len);
420  sctx->needle_len = needle_len;
421 
422  if (nocase) {
423  sctx->bm_ctx = BoyerMooreNocaseCtxInit(sctx->needle, sctx->needle_len);
424  sctx->nocase = 1;
425  } else {
426  sctx->bm_ctx = BoyerMooreCtxInit(sctx->needle, sctx->needle_len);
427  sctx->nocase = 0;
428  }
429 
430  ctx->ctx = sctx;
431  return ctx;
432 }
433 
434 static void BMDestroyCtx(SpmCtx *ctx)
435 {
436  if (ctx == NULL) {
437  return;
438  }
439 
440  SpmBmCtx *sctx = ctx->ctx;
441  if (sctx != NULL) {
443  if (sctx->needle != NULL) {
444  SCFree(sctx->needle);
445  }
446  SCFree(sctx);
447  }
448 
449  SCFree(ctx);
450 }
451 
452 static uint8_t *BMScan(const SpmCtx *ctx, SpmThreadCtx *thread_ctx,
453  const uint8_t *haystack, uint32_t haystack_len)
454 {
455  const SpmBmCtx *sctx = ctx->ctx;
456 
457  if (sctx->nocase) {
458  return BoyerMooreNocase(sctx->needle, sctx->needle_len, haystack,
459  haystack_len, sctx->bm_ctx);
460  } else {
461  return BoyerMoore(sctx->needle, sctx->needle_len, haystack,
462  haystack_len, sctx->bm_ctx);
463  }
464 }
465 
466 static SpmGlobalThreadCtx *BMInitGlobalThreadCtx(void)
467 {
468  SpmGlobalThreadCtx *global_thread_ctx = SCMalloc(sizeof(SpmGlobalThreadCtx));
469  if (global_thread_ctx == NULL) {
470  SCLogDebug("Unable to alloc SpmThreadCtx.");
471  return NULL;
472  }
473  memset(global_thread_ctx, 0, sizeof(*global_thread_ctx));
474  global_thread_ctx->matcher = SPM_BM;
475  return global_thread_ctx;
476 }
477 
478 static void BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx *global_thread_ctx)
479 {
480  if (global_thread_ctx == NULL) {
481  return;
482  }
483  SCFree(global_thread_ctx);
484 }
485 
486 static void BMDestroyThreadCtx(SpmThreadCtx *thread_ctx)
487 {
488  if (thread_ctx == NULL) {
489  return;
490  }
491  SCFree(thread_ctx);
492 }
493 
494 static SpmThreadCtx *BMMakeThreadCtx(const SpmGlobalThreadCtx *global_thread_ctx) {
495  SpmThreadCtx *thread_ctx = SCMalloc(sizeof(SpmThreadCtx));
496  if (thread_ctx == NULL) {
497  SCLogDebug("Unable to alloc SpmThreadCtx.");
498  return NULL;
499  }
500  memset(thread_ctx, 0, sizeof(*thread_ctx));
501  thread_ctx->matcher = SPM_BM;
502  return thread_ctx;
503 }
504 
505 void SpmBMRegister(void)
506 {
507  spm_table[SPM_BM].name = "bm";
508  spm_table[SPM_BM].InitGlobalThreadCtx = BMInitGlobalThreadCtx;
509  spm_table[SPM_BM].DestroyGlobalThreadCtx = BMDestroyGlobalThreadCtx;
510  spm_table[SPM_BM].MakeThreadCtx = BMMakeThreadCtx;
511  spm_table[SPM_BM].DestroyThreadCtx = BMDestroyThreadCtx;
512  spm_table[SPM_BM].InitCtx = BMInitCtx;
513  spm_table[SPM_BM].DestroyCtx = BMDestroyCtx;
514  spm_table[SPM_BM].Scan = BMScan;
515 }
SpmBmCtx_
Definition: util-spm-bm.c:386
SpmBmCtx
struct SpmBmCtx_ SpmBmCtx
SPM_BM
@ SPM_BM
Definition: util-spm.h:30
SpmBmCtx_::needle_len
uint16_t needle_len
Definition: util-spm-bm.c:389
SpmBmCtx_::bm_ctx
BmCtx * bm_ctx
Definition: util-spm-bm.c:387
SpmGlobalThreadCtx_::matcher
uint16_t matcher
Definition: util-spm.h:48
BmCtx_::bmGs
uint16_t bmGs[]
Definition: util-spm-bm.h:36
BoyerMooreCtxInit
BmCtx * BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
Setup a Booyer Moore context.
Definition: util-spm-bm.c:78
unlikely
#define unlikely(expr)
Definition: util-optimize.h:35
SpmTableElmt_::Scan
uint8_t *(* Scan)(const SpmCtx *ctx, SpmThreadCtx *thread_ctx, const uint8_t *haystack, uint32_t haystack_len)
Definition: util-spm.h:68
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:296
SpmBMRegister
void SpmBMRegister(void)
Definition: util-spm-bm.c:505
u8_tolower
#define u8_tolower(c)
Definition: suricata.h:177
SpmTableElmt_::InitCtx
SpmCtx *(* InitCtx)(const uint8_t *needle, uint16_t needle_len, int nocase, SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:65
SpmTableElmt_::name
const char * name
Definition: util-spm.h:60
util-memcpy.h
SpmTableElmt_::MakeThreadCtx
SpmThreadCtx *(* MakeThreadCtx)(const SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:63
BoyerMoore
uint8_t * BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:302
m
SCMutex m
Definition: flow-hash.h:6
ALPHABET_SIZE
#define ALPHABET_SIZE
Definition: util-spm-bm.h:30
util-debug.h
util-error.h
u8_toupper
#define u8_toupper(c)
Definition: suricata.h:178
BmCtx_
Definition: util-spm-bm.h:33
SCEnter
#define SCEnter(...)
Definition: util-debug.h:298
BoyerMooreCtxToNocase
void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
Given a BmCtx structure, recreate the pre/suffixes for nocase.
Definition: util-spm-bm.c:58
SpmBmCtx_::nocase
int nocase
Definition: util-spm-bm.c:390
BmCtx_::bmBc
uint16_t bmBc[ALPHABET_SIZE]
Definition: util-spm-bm.h:34
BoyerMooreNocase
uint8_t * BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:353
SpmCtx_::matcher
uint16_t matcher
Definition: util-spm.h:41
SpmTableElmt_::InitGlobalThreadCtx
SpmGlobalThreadCtx *(* InitGlobalThreadCtx)(void)
Definition: util-spm.h:61
SCReturn
#define SCReturn
Definition: util-debug.h:300
SpmBmCtx_::needle
uint8_t * needle
Definition: util-spm-bm.c:388
SpmThreadCtx_::matcher
uint16_t matcher
Definition: util-spm.h:55
SpmCtx_
Definition: util-spm.h:40
suricata-common.h
SpmTableElmt_::DestroyCtx
void(* DestroyCtx)(SpmCtx *)
Definition: util-spm.h:67
util-spm.h
util-spm-bm.h
FatalError
#define FatalError(x,...)
Definition: util-debug.h:530
SpmCtx_::ctx
void * ctx
Definition: util-spm.h:42
util-validate.h
SpmGlobalThreadCtx_
Definition: util-spm.h:47
SCMalloc
#define SCMalloc(sz)
Definition: util-mem.h:47
SCFree
#define SCFree(p)
Definition: util-mem.h:61
SC_ERR_FATAL
@ SC_ERR_FATAL
Definition: util-error.h:203
spm_table
SpmTableElmt spm_table[SPM_TABLE_SIZE]
Definition: util-spm.c:62
suricata.h
BoyerMooreNocaseCtxInit
BmCtx * BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
Setup a Booyer Moore context for nocase search.
Definition: util-spm-bm.c:107
DEBUG_VALIDATE_BUG_ON
#define DEBUG_VALIDATE_BUG_ON(exp)
Definition: util-validate.h:111
SpmTableElmt_::DestroyThreadCtx
void(* DestroyThreadCtx)(SpmThreadCtx *thread_ctx)
Definition: util-spm.h:64
BoyerMooreCtxDeInit
void BoyerMooreCtxDeInit(BmCtx *bmctx)
Free the memory allocated to Booyer Moore context.
Definition: util-spm-bm.c:121
SpmTableElmt_::DestroyGlobalThreadCtx
void(* DestroyGlobalThreadCtx)(SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:62
SpmThreadCtx_
Definition: util-spm.h:54