suricata
util-spm-bm.c
Go to the documentation of this file.
1 /* Copyright (C) 2007-2014 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22  *
23  * Boyer Moore simple pattern matcher implementation
24  *
25  * Boyer Moore algorithm has a really good performance. It need two arrays
26  * of context for each pattern that hold applicable shifts on the text
27  * to seach in, based on characters not available in the pattern
28  * and combinations of characters that start a sufix of the pattern.
29  * If possible, we should store the context of patterns that we are going
30  * to search for multiple times, so we don't spend time on rebuilding them.
31  */
32 
33 #include "suricata-common.h"
34 #include "suricata.h"
35 
36 #include "util-spm-bm.h"
37 #include "util-spm.h"
38 #include "util-debug.h"
39 #include "util-error.h"
40 #include "util-memcpy.h"
41 
42 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
43 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
44 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
45 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
46  uint16_t *suff);
47 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
48 
49 /**
50  * \brief Given a BmCtx structure, recreate the pre/suffixes for
51  * nocase
52  *
53  * \retval BmCtx pointer to the already created BmCtx (with BoyerMooreCtxInit())
54  * \param str pointer to the pattern string
55  * \param size length of the string
56  */
57 void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
58 {
59  /* Store the content as lower case to make searching faster */
60  memcpy_tolower(needle, needle, needle_len);
61 
62  /* Prepare bad chars with nocase chars */
63  PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
64 
65  /* Prepare good Suffixes with nocase chars */
66  PreBmGsNocase(needle, needle_len, bm_ctx->bmGs);
67 }
68 
69 /**
70  * \brief Setup a Booyer Moore context.
71  *
72  * \param str pointer to the pattern string
73  * \param size length of the string
74  * \retval BmCtx pointer to the newly created Context for the pattern
75  * \initonly BoyerMoore contexts should be created at init
76  */
77 BmCtx *BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
78 {
79  BmCtx *new = SCMalloc(sizeof(BmCtx) + sizeof(uint16_t) * (needle_len + 1));
80  if (unlikely(new == NULL)) {
81  SCLogError(SC_ERR_FATAL, "Fatal error encountered in BoyerMooreCtxInit. Exiting...");
82  exit(EXIT_FAILURE);
83  }
84 
85  /* Prepare bad chars */
86  PreBmBc(needle, needle_len, new->bmBc);
87 
88  /* Prepare good Suffixes */
89  if (PreBmGs(needle, needle_len, new->bmGs) == -1) {
90  SCLogError(SC_ERR_FATAL, "Fatal error encountered in BooyerMooreCtxInit. Exiting...");
91  exit(EXIT_FAILURE);
92  }
93 
94 
95  return new;
96 }
97 
98 /**
99  * \brief Setup a Booyer Moore context for nocase search
100  *
101  * \param str pointer to the pattern string
102  * \param size length of the string
103  * \retval BmCtx pointer to the newly created Context for the pattern
104  * \initonly BoyerMoore contexts should be created at init
105  */
106 BmCtx *BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
107 {
108  BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needle_len);
109 
110  BoyerMooreCtxToNocase(bm_ctx, needle, needle_len);
111 
112  return bm_ctx;
113 }
114 
115 /**
116  * \brief Free the memory allocated to Booyer Moore context.
117  *
118  * \param bmCtx pointer to the Context for the pattern
119  */
121 {
122  SCEnter();
123  if (bmctx == NULL)
124  SCReturn;
125 
126  SCFree(bmctx);
127 
128  SCReturn;
129 }
130 /**
131  * \brief Array setup function for bad characters that split the pattern
132  * Remember that the result array should be the length of ALPHABET_SIZE
133  *
134  * \param str pointer to the pattern string
135  * \param size length of the string
136  * \param result pointer to an empty array that will hold the badchars
137  */
138 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
139 {
140  int32_t i;
141 
142  for (i = 0; i < 256; ++i) {
143  bmBc[i] = m;
144  }
145  for (i = 0; i < m - 1; ++i) {
146  bmBc[(unsigned char)x[i]] = m - i - 1;
147  }
148 }
149 
150 /**
151  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
152  *
153  * \param x pointer to the pattern string
154  * \param m length of the string
155  * \param suff pointer to an empty array that will hold the prefixes (shifts)
156  */
157 static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff)
158 {
159  int32_t f = 0, g, i;
160  suff[m - 1] = m;
161  g = m - 1;
162  for (i = m - 2; i >= 0; --i) {
163  if (i > g && suff[i + m - 1 - f] < i - g)
164  suff[i] = suff[i + m - 1 - f];
165  else {
166  if (i < g)
167  g = i;
168  f = i;
169  while (g >= 0 && x[g] == x[g + m - 1 - f])
170  --g;
171  suff[i] = f - g;
172  }
173  }
174 }
175 
176 /**
177  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
178  *
179  * \param x pointer to the pattern string
180  * \param m length of the string
181  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
182  * \retval 0 ok, -1 failed
183  */
184 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
185 {
186  int32_t i, j;
187  uint16_t suff[m + 1];
188 
189  BoyerMooreSuffixes(x, m, suff);
190 
191  for (i = 0; i < m; ++i)
192  bmGs[i] = m;
193 
194  j = 0;
195 
196  for (i = m - 1; i >= -1; --i)
197  if (i == -1 || suff[i] == i + 1)
198  for (; j < m - 1 - i; ++j)
199  if (bmGs[j] == m)
200  bmGs[j] = m - 1 - i;
201 
202  for (i = 0; i <= m - 2; ++i)
203  bmGs[m - 1 - suff[i]] = m - 1 - i;
204  return 0;
205 }
206 
207 /**
208  * \brief Array setup function for bad characters that split the pattern
209  * Remember that the result array should be the length of ALPHABET_SIZE
210  *
211  * \param str pointer to the pattern string
212  * \param size length of the string
213  * \param result pointer to an empty array that will hold the badchars
214  */
215 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
216 {
217  int32_t i;
218 
219  for (i = 0; i < 256; ++i) {
220  bmBc[i] = m;
221  }
222  for (i = 0; i < m - 1; ++i) {
223  bmBc[u8_tolower((unsigned char)x[i])] = m - 1 - i;
224  }
225 }
226 
227 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
228  uint16_t *suff)
229 {
230  int32_t f = 0, g, i;
231 
232  suff[m - 1] = m;
233  g = m - 1;
234  for (i = m - 2; i >= 0; --i) {
235  if (i > g && suff[i + m - 1 - f] < i - g) {
236  suff[i] = suff[i + m - 1 - f];
237  } else {
238  if (i < g) {
239  g = i;
240  }
241  f = i;
242  while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
243  --g;
244  }
245  suff[i] = f - g;
246  }
247  }
248 }
249 
250 /**
251  * \brief Array setup function for building prefixes (shift for valid prefixes)
252  * for boyermoore context case less
253  *
254  * \param x pointer to the pattern string
255  * \param m length of the string
256  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
257  */
258 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
259 {
260  int32_t i, j;
261  uint16_t suff[m + 1];
262 
263  BoyerMooreSuffixesNocase(x, m, suff);
264 
265  for (i = 0; i < m; ++i) {
266  bmGs[i] = m;
267  }
268  j = 0;
269  for (i = m - 1; i >= 0; --i) {
270  if (i == -1 || suff[i] == i + 1) {
271  for (; j < m - 1 - i; ++j) {
272  if (bmGs[j] == m) {
273  bmGs[j] = m - 1 - i;
274  }
275  }
276  }
277  }
278  for (i = 0; i <= m - 2; ++i) {
279  bmGs[m - 1 - suff[i]] = m - 1 - i;
280  }
281 }
282 
283 /**
284  * \brief Boyer Moore search algorithm
285  * Is better as the pattern length increases and for big buffers to search in.
286  * The algorithm needs a context of two arrays already prepared
287  * by prep_bad_chars() and prep_good_suffix()
288  *
289  * \param y pointer to the buffer to search in
290  * \param n length limit of the buffer
291  * \param x pointer to the pattern we ar searching for
292  * \param m length limit of the needle
293  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
294  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
295  *
296  * \retval ptr to start of the match; NULL if no match
297  */
298 uint8_t *BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
299 {
300  uint16_t *bmGs = bm_ctx->bmGs;
301  uint16_t *bmBc = bm_ctx->bmBc;
302 
303  int i, j, m1, m2;
304  int32_t int_n;
305 #if 0
306  printf("\nBad:\n");
307  for (i=0;i<ALPHABET_SIZE;i++)
308  printf("%c,%d ", i, bmBc[i]);
309 
310  printf("\ngood:\n");
311  for (i=0;i<m;i++)
312  printf("%c, %d ", x[i],bmBc[i]);
313  printf("\n");
314 #endif
315  // force casting to int32_t (if possible)
316  int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
317  j = 0;
318  while (j <= int_n - m ) {
319  for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
320 
321  if (i < 0) {
322  return (uint8_t *)(y + j);
323  //j += bmGs[0];
324  } else {
325 // printf("%c", y[i+j]);
326  j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)? m1: m2;
327 // printf("%d, %d\n", m1, m2);
328  }
329  }
330  return NULL;
331 }
332 
333 
334 /**
335  * \brief Boyer Moore search algorithm
336  * Is better as the pattern length increases and for big buffers to search in.
337  * The algorithm needs a context of two arrays already prepared
338  * by prep_bad_chars() and prep_good_suffix()
339  *
340  * \param y pointer to the buffer to search in
341  * \param n length limit of the buffer
342  * \param x pointer to the pattern we ar searching for
343  * \param m length limit of the needle
344  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
345  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
346  *
347  * \retval ptr to start of the match; NULL if no match
348  */
349 uint8_t *BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
350 {
351  uint16_t *bmGs = bm_ctx->bmGs;
352  uint16_t *bmBc = bm_ctx->bmBc;
353  int i, j, m1, m2;
354  int32_t int_n;
355 #if 0
356  printf("\nBad:\n");
357  for (i=0;i<ALPHABET_SIZE;i++)
358  printf("%c,%d ", i, bmBc[i]);
359 
360  printf("\ngood:\n");
361  for (i=0;i<m;i++)
362  printf("%c, %d ", x[i],bmBc[i]);
363  printf("\n");
364 #endif
365  // force casting to int32_t (if possible)
366  int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
367  j = 0;
368  while (j <= int_n - m ) {
369  /* x is stored in lowercase. */
370  for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
371 
372  if (i < 0) {
373  return (uint8_t *)(y + j);
374  } else {
375  j += (m1 = bmGs[i]) > (m2 = bmBc[u8_tolower(y[i + j])] - m + 1 + i)?
376  m1: m2;
377  }
378  }
379  return NULL;
380 }
381 
382 typedef struct SpmBmCtx_ {
384  uint8_t *needle;
385  uint16_t needle_len;
386  int nocase;
387 } SpmBmCtx;
388 
389 static SpmCtx *BMInitCtx(const uint8_t *needle, uint16_t needle_len, int nocase,
390  SpmGlobalThreadCtx *global_thread_ctx)
391 {
392  SpmCtx *ctx = SCMalloc(sizeof(SpmCtx));
393  if (ctx == NULL) {
394  SCLogDebug("Unable to alloc SpmCtx.");
395  return NULL;
396  }
397  memset(ctx, 0, sizeof(*ctx));
398  ctx->matcher = SPM_BM;
399 
400  SpmBmCtx *sctx = SCMalloc(sizeof(SpmBmCtx));
401  if (sctx == NULL) {
402  SCLogDebug("Unable to alloc SpmBmCtx.");
403  SCFree(ctx);
404  return NULL;
405  }
406  memset(sctx, 0, sizeof(*sctx));
407 
408  sctx->needle = SCMalloc(needle_len);
409  if (sctx->needle == NULL) {
410  SCLogDebug("Unable to alloc string.");
411  SCFree(sctx);
412  SCFree(ctx);
413  return NULL;
414  }
415  memcpy(sctx->needle, needle, needle_len);
416  sctx->needle_len = needle_len;
417 
418  if (nocase) {
419  sctx->bm_ctx = BoyerMooreNocaseCtxInit(sctx->needle, sctx->needle_len);
420  sctx->nocase = 1;
421  } else {
422  sctx->bm_ctx = BoyerMooreCtxInit(sctx->needle, sctx->needle_len);
423  sctx->nocase = 0;
424  }
425 
426  ctx->ctx = sctx;
427  return ctx;
428 }
429 
430 static void BMDestroyCtx(SpmCtx *ctx)
431 {
432  if (ctx == NULL) {
433  return;
434  }
435 
436  SpmBmCtx *sctx = ctx->ctx;
437  if (sctx != NULL) {
439  if (sctx->needle != NULL) {
440  SCFree(sctx->needle);
441  }
442  SCFree(sctx);
443  }
444 
445  SCFree(ctx);
446 }
447 
448 static uint8_t *BMScan(const SpmCtx *ctx, SpmThreadCtx *thread_ctx,
449  const uint8_t *haystack, uint32_t haystack_len)
450 {
451  const SpmBmCtx *sctx = ctx->ctx;
452 
453  if (sctx->nocase) {
454  return BoyerMooreNocase(sctx->needle, sctx->needle_len, haystack,
455  haystack_len, sctx->bm_ctx);
456  } else {
457  return BoyerMoore(sctx->needle, sctx->needle_len, haystack,
458  haystack_len, sctx->bm_ctx);
459  }
460 }
461 
462 static SpmGlobalThreadCtx *BMInitGlobalThreadCtx(void)
463 {
464  SpmGlobalThreadCtx *global_thread_ctx = SCMalloc(sizeof(SpmGlobalThreadCtx));
465  if (global_thread_ctx == NULL) {
466  SCLogDebug("Unable to alloc SpmThreadCtx.");
467  return NULL;
468  }
469  memset(global_thread_ctx, 0, sizeof(*global_thread_ctx));
470  global_thread_ctx->matcher = SPM_BM;
471  return global_thread_ctx;
472 }
473 
474 static void BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx *global_thread_ctx)
475 {
476  if (global_thread_ctx == NULL) {
477  return;
478  }
479  SCFree(global_thread_ctx);
480 }
481 
482 static void BMDestroyThreadCtx(SpmThreadCtx *thread_ctx)
483 {
484  if (thread_ctx == NULL) {
485  return;
486  }
487  SCFree(thread_ctx);
488 }
489 
490 static SpmThreadCtx *BMMakeThreadCtx(const SpmGlobalThreadCtx *global_thread_ctx) {
491  SpmThreadCtx *thread_ctx = SCMalloc(sizeof(SpmThreadCtx));
492  if (thread_ctx == NULL) {
493  SCLogDebug("Unable to alloc SpmThreadCtx.");
494  return NULL;
495  }
496  memset(thread_ctx, 0, sizeof(*thread_ctx));
497  thread_ctx->matcher = SPM_BM;
498  return thread_ctx;
499 }
500 
501 void SpmBMRegister(void)
502 {
503  spm_table[SPM_BM].name = "bm";
504  spm_table[SPM_BM].InitGlobalThreadCtx = BMInitGlobalThreadCtx;
505  spm_table[SPM_BM].DestroyGlobalThreadCtx = BMDestroyGlobalThreadCtx;
506  spm_table[SPM_BM].MakeThreadCtx = BMMakeThreadCtx;
507  spm_table[SPM_BM].DestroyThreadCtx = BMDestroyThreadCtx;
508  spm_table[SPM_BM].InitCtx = BMInitCtx;
509  spm_table[SPM_BM].DestroyCtx = BMDestroyCtx;
510  spm_table[SPM_BM].Scan = BMScan;
511 }
const char * name
Definition: util-spm.h:62
#define SCLogDebug(...)
Definition: util-debug.h:335
uint16_t matcher
Definition: util-spm.h:43
BmCtx * BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
Setup a Booyer Moore context.
Definition: util-spm-bm.c:77
struct SpmBmCtx_ SpmBmCtx
#define unlikely(expr)
Definition: util-optimize.h:35
void(* DestroyCtx)(SpmCtx *)
Definition: util-spm.h:69
uint16_t bmGs[]
Definition: util-spm-bm.h:37
SpmCtx *(* InitCtx)(const uint8_t *needle, uint16_t needle_len, int nocase, SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:67
uint16_t bmBc[ALPHABET_SIZE]
Definition: util-spm-bm.h:35
BmCtx * bm_ctx
Definition: util-spm-bm.c:383
uint8_t *(* Scan)(const SpmCtx *ctx, SpmThreadCtx *thread_ctx, const uint8_t *haystack, uint32_t haystack_len)
Definition: util-spm.h:70
uint16_t matcher
Definition: util-spm.h:57
SpmGlobalThreadCtx *(* InitGlobalThreadCtx)(void)
Definition: util-spm.h:63
SpmTableElmt spm_table[SPM_TABLE_SIZE]
Definition: util-spm.h:74
uint8_t * BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:349
void BoyerMooreCtxDeInit(BmCtx *bmctx)
Free the memory allocated to Booyer Moore context.
Definition: util-spm-bm.c:120
#define u8_tolower(c)
Definition: suricata.h:181
void(* DestroyThreadCtx)(SpmThreadCtx *thread_ctx)
Definition: util-spm.h:66
#define SCLogError(err_code,...)
Macro used to log ERROR messages.
Definition: util-debug.h:294
SpmThreadCtx *(* MakeThreadCtx)(const SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:65
#define SCEnter(...)
Definition: util-debug.h:337
void(* DestroyGlobalThreadCtx)(SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:64
uint8_t * needle
Definition: util-spm-bm.c:384
void * ctx
Definition: util-spm.h:44
void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
Given a BmCtx structure, recreate the pre/suffixes for nocase.
Definition: util-spm-bm.c:57
#define SCMalloc(a)
Definition: util-mem.h:222
#define ALPHABET_SIZE
Definition: util-spm-bm.h:31
#define SCFree(a)
Definition: util-mem.h:322
void SpmBMRegister(void)
Definition: util-spm-bm.c:501
SCMutex m
Definition: flow-hash.h:105
BmCtx * BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
Setup a Booyer Moore context for nocase search.
Definition: util-spm-bm.c:106
uint8_t * BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:298
#define SCReturn
Definition: util-debug.h:339
uint16_t needle_len
Definition: util-spm-bm.c:385
uint16_t matcher
Definition: util-spm.h:50