suricata
util-spm-bm.c
Go to the documentation of this file.
1 /* Copyright (C) 2007-2014 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22  *
23  * Boyer Moore simple pattern matcher implementation
24  *
25  * Boyer Moore algorithm has a really good performance. It need two arrays
26  * of context for each pattern that hold applicable shifts on the text
27  * to search in, based on characters not available in the pattern
28  * and combinations of characters that start a suffix of the pattern.
29  * If possible, we should store the context of patterns that we are going
30  * to search for multiple times, so we don't spend time on rebuilding them.
31  */
32 
33 #include "suricata-common.h"
34 #include "suricata.h"
35 
36 #include "util-spm-bm.h"
37 #include "util-spm.h"
38 #include "util-debug.h"
39 #include "util-error.h"
40 #include "util-memcpy.h"
41 #include "util-validate.h"
42 
43 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
44 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
45 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
46 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
47  uint16_t *suff);
48 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
49 
50 /**
51  * \brief Given a BmCtx structure, recreate the pre/suffixes for
52  * nocase
53  *
54  * \retval BmCtx pointer to the already created BmCtx (with BoyerMooreCtxInit())
55  * \param str pointer to the pattern string
56  * \param size length of the string
57  */
58 void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
59 {
60  /* Store the content as lower case to make searching faster */
61  memcpy_tolower(needle, needle, needle_len);
62 
63  /* Prepare bad chars with nocase chars */
64  PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
65 
66  /* Prepare good Suffixes with nocase chars */
67  PreBmGsNocase(needle, needle_len, bm_ctx->bmGs);
68 }
69 
70 /**
71  * \brief Setup a Boyer Moore context.
72  *
73  * \param str pointer to the pattern string
74  * \param size length of the string
75  * \retval BmCtx pointer to the newly created Context for the pattern
76  * \initonly BoyerMoore contexts should be created at init
77  */
78 BmCtx *BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
79 {
80  BmCtx *new = SCMalloc(sizeof(BmCtx) + sizeof(uint16_t) * (needle_len + 1));
81  if (unlikely(new == NULL)) {
82  FatalError("Fatal error encountered in BoyerMooreCtxInit. Exiting...");
83  }
84 
85  /* Prepare bad chars */
86  PreBmBc(needle, needle_len, new->bmBc);
87 
88  /* Prepare good Suffixes */
89  if (PreBmGs(needle, needle_len, new->bmGs) == -1) {
90  FatalError("Fatal error encountered in BoyerMoreCtxInit. Exiting...");
91  }
92 
93 
94  return new;
95 }
96 
97 /**
98  * \brief Setup a Boyer Moore context for nocase search
99  *
100  * \param str pointer to the pattern string
101  * \param size length of the string
102  * \retval BmCtx pointer to the newly created Context for the pattern
103  * \initonly BoyerMoore contexts should be created at init
104  */
105 BmCtx *BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
106 {
107  BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needle_len);
108 
109  BoyerMooreCtxToNocase(bm_ctx, needle, needle_len);
110 
111  return bm_ctx;
112 }
113 
114 /**
115  * \brief Free the memory allocated to Boyer Moore context.
116  *
117  * \param bmCtx pointer to the Context for the pattern
118  */
120 {
121  SCEnter();
122  if (bmctx == NULL)
123  SCReturn;
124 
125  SCFree(bmctx);
126 
127  SCReturn;
128 }
129 /**
130  * \brief Array setup function for bad characters that split the pattern
131  * Remember that the result array should be the length of ALPHABET_SIZE
132  *
133  * \param str pointer to the pattern string
134  * \param size length of the string
135  * \param result pointer to an empty array that will hold the badchars
136  */
137 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
138 {
139  uint16_t i;
140 
141  for (i = 0; i < 256; ++i) {
142  bmBc[i] = m;
143  }
144  for (i = 0; i < m - 1; ++i) {
145  bmBc[(unsigned char)x[i]] = m - i - 1;
146  }
147 }
148 
149 /**
150  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
151  *
152  * \param x pointer to the pattern string
153  * \param m length of the string
154  * \param suff pointer to an empty array that will hold the prefixes (shifts)
155  */
156 static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff)
157 {
158  int32_t f = 0, g, i;
159  suff[m - 1] = m;
160  g = m - 1;
161  for (i = m - 2; i >= 0; --i) {
162  if (i > g && suff[i + m - 1 - f] < i - g)
163  suff[i] = suff[i + m - 1 - f];
164  else {
165  if (i < g)
166  g = i;
167  f = i;
168  while (g >= 0 && x[g] == x[g + m - 1 - f])
169  --g;
170  DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
171  suff[i] = (uint16_t)(f - g);
172  }
173  }
174 }
175 
176 /**
177  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
178  *
179  * \param x pointer to the pattern string
180  * \param m length of the string
181  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
182  * \retval 0 ok, -1 failed
183  */
184 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
185 {
186  int32_t i, j;
187  uint16_t suff[m + 1];
188 
189  BoyerMooreSuffixes(x, m, suff);
190 
191  for (i = 0; i < m; ++i)
192  bmGs[i] = m;
193 
194  j = 0;
195 
196  for (i = m - 1; i >= -1; --i)
197  if (i == -1 || suff[i] == i + 1)
198  for (; j < m - 1 - i; ++j)
199  if (bmGs[j] == m)
200  bmGs[j] = (uint16_t)(m - 1 - i);
201 
202  for (i = 0; i <= m - 2; ++i)
203  bmGs[m - 1 - suff[i]] = (uint16_t)(m - 1 - i);
204  return 0;
205 }
206 
207 /**
208  * \brief Array setup function for bad characters that split the pattern
209  * Remember that the result array should be the length of ALPHABET_SIZE
210  *
211  * \param str pointer to the pattern string
212  * \param size length of the string
213  * \param result pointer to an empty array that will hold the badchars
214  */
215 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
216 {
217  uint16_t i;
218 
219  for (i = 0; i < 256; ++i) {
220  bmBc[i] = m;
221  }
222  for (i = 0; i < m - 1; ++i) {
223  bmBc[u8_tolower(x[i])] = m - 1 - i;
224  bmBc[u8_toupper(x[i])] = m - 1 - i;
225  }
226 }
227 
228 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
229  uint16_t *suff)
230 {
231  int32_t f = 0, g, i;
232 
233  suff[m - 1] = m;
234  g = m - 1;
235  for (i = m - 2; i >= 0; --i) {
236  if (i > g && suff[i + m - 1 - f] < i - g) {
237  suff[i] = suff[i + m - 1 - f];
238  } else {
239  if (i < g) {
240  g = i;
241  }
242  f = i;
243  while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
244  --g;
245  }
246  DEBUG_VALIDATE_BUG_ON(f - g < 0 || f - g > UINT16_MAX);
247  suff[i] = (uint16_t)(f - g);
248  }
249  }
250 }
251 
252 /**
253  * \brief Array setup function for building prefixes (shift for valid prefixes)
254  * for boyermoore context case less
255  *
256  * \param x pointer to the pattern string
257  * \param m length of the string
258  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
259  */
260 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
261 {
262  uint16_t i, j;
263  uint16_t suff[m + 1];
264 
265  BoyerMooreSuffixesNocase(x, m, suff);
266 
267  for (i = 0; i < m; ++i) {
268  bmGs[i] = m;
269  }
270  j = 0;
271  for (i = m; i > 0; --i) {
272  if (suff[i - 1] == i) {
273  for (; j < m - i; ++j) {
274  if (bmGs[j] == m) {
275  bmGs[j] = m - i;
276  }
277  }
278  }
279  }
280  for (i = 0; i <= m - 2; ++i) {
281  bmGs[m - 1 - suff[i]] = m - 1 - i;
282  }
283 }
284 
285 /**
286  * \brief Boyer Moore search algorithm
287  * Is better as the pattern length increases and for big buffers to search in.
288  * The algorithm needs a context of two arrays already prepared
289  * by prep_bad_chars() and prep_good_suffix()
290  *
291  * \param y pointer to the buffer to search in
292  * \param n length limit of the buffer
293  * \param x pointer to the pattern we ar searching for
294  * \param m length limit of the needle
295  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
296  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
297  *
298  * \retval ptr to start of the match; NULL if no match
299  */
300 uint8_t *BoyerMoore(
301  const uint8_t *x, const uint16_t m, const uint8_t *y, const uint32_t n, const BmCtx *bm_ctx)
302 {
303  const uint16_t *bmGs = bm_ctx->bmGs;
304  const uint16_t *bmBc = bm_ctx->bmBc;
305 
306  int i, j, m1, m2;
307  // force casting to int32_t (if possible)
308  const int32_t int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
309  j = 0;
310  while (j <= int_n - m ) {
311  for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
312 
313  if (i < 0) {
314  return (uint8_t *)(y + j);
315  } else {
316  j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i) ? m1 : m2;
317  }
318  }
319  return NULL;
320 }
321 
322 
323 /**
324  * \brief Boyer Moore search algorithm
325  * Is better as the pattern length increases and for big buffers to search in.
326  * The algorithm needs a context of two arrays already prepared
327  * by prep_bad_chars() and prep_good_suffix()
328  *
329  * \param y pointer to the buffer to search in
330  * \param n length limit of the buffer
331  * \param x pointer to the pattern we ar searching for
332  * \param m length limit of the needle
333  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
334  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
335  *
336  * \retval ptr to start of the match; NULL if no match
337  */
339  const uint8_t *x, const uint16_t m, const uint8_t *y, const uint32_t n, const BmCtx *bm_ctx)
340 {
341  const uint16_t *bmGs = bm_ctx->bmGs;
342  const uint16_t *bmBc = bm_ctx->bmBc;
343  int i, j, m1, m2;
344  // force casting to int32_t (if possible)
345  const int32_t int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
346  j = 0;
347  while (j <= int_n - m ) {
348  /* x is stored in lowercase. */
349  for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
350 
351  if (i < 0) {
352  return (uint8_t *)(y + j);
353  } else {
354  j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i) ? m1 : m2;
355  }
356  }
357  return NULL;
358 }
359 
360 typedef struct SpmBmCtx_ {
362  uint8_t *needle;
363  uint16_t needle_len;
364  int nocase;
366 
367 static SpmCtx *BMInitCtx(const uint8_t *needle, uint16_t needle_len, int nocase,
368  SpmGlobalThreadCtx *global_thread_ctx)
369 {
370  SpmCtx *ctx = SCCalloc(1, sizeof(SpmCtx));
371  if (ctx == NULL) {
372  SCLogDebug("Unable to alloc SpmCtx.");
373  return NULL;
374  }
375  ctx->matcher = SPM_BM;
376 
377  SpmBmCtx *sctx = SCCalloc(1, sizeof(SpmBmCtx));
378  if (sctx == NULL) {
379  SCLogDebug("Unable to alloc SpmBmCtx.");
380  SCFree(ctx);
381  return NULL;
382  }
383 
384  sctx->needle = SCMalloc(needle_len);
385  if (sctx->needle == NULL) {
386  SCLogDebug("Unable to alloc string.");
387  SCFree(sctx);
388  SCFree(ctx);
389  return NULL;
390  }
391  memcpy(sctx->needle, needle, needle_len);
392  sctx->needle_len = needle_len;
393 
394  if (nocase) {
395  sctx->bm_ctx = BoyerMooreNocaseCtxInit(sctx->needle, sctx->needle_len);
396  sctx->nocase = 1;
397  } else {
398  sctx->bm_ctx = BoyerMooreCtxInit(sctx->needle, sctx->needle_len);
399  sctx->nocase = 0;
400  }
401 
402  ctx->ctx = sctx;
403  return ctx;
404 }
405 
406 static void BMDestroyCtx(SpmCtx *ctx)
407 {
408  if (ctx == NULL) {
409  return;
410  }
411 
412  SpmBmCtx *sctx = ctx->ctx;
413  if (sctx != NULL) {
415  if (sctx->needle != NULL) {
416  SCFree(sctx->needle);
417  }
418  SCFree(sctx);
419  }
420 
421  SCFree(ctx);
422 }
423 
424 static uint8_t *BMScan(const SpmCtx *ctx, SpmThreadCtx *thread_ctx,
425  const uint8_t *haystack, uint32_t haystack_len)
426 {
427  const SpmBmCtx *sctx = ctx->ctx;
428 
429  if (sctx->nocase) {
430  return BoyerMooreNocase(sctx->needle, sctx->needle_len, haystack,
431  haystack_len, sctx->bm_ctx);
432  } else {
433  return BoyerMoore(sctx->needle, sctx->needle_len, haystack,
434  haystack_len, sctx->bm_ctx);
435  }
436 }
437 
438 static SpmGlobalThreadCtx *BMInitGlobalThreadCtx(void)
439 {
440  SpmGlobalThreadCtx *global_thread_ctx = SCCalloc(1, sizeof(SpmGlobalThreadCtx));
441  if (global_thread_ctx == NULL) {
442  SCLogDebug("Unable to alloc SpmThreadCtx.");
443  return NULL;
444  }
445  global_thread_ctx->matcher = SPM_BM;
446  return global_thread_ctx;
447 }
448 
449 static void BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx *global_thread_ctx)
450 {
451  if (global_thread_ctx == NULL) {
452  return;
453  }
454  SCFree(global_thread_ctx);
455 }
456 
457 static void BMDestroyThreadCtx(SpmThreadCtx *thread_ctx)
458 {
459  if (thread_ctx == NULL) {
460  return;
461  }
462  SCFree(thread_ctx);
463 }
464 
465 static SpmThreadCtx *BMMakeThreadCtx(const SpmGlobalThreadCtx *global_thread_ctx) {
466  SpmThreadCtx *thread_ctx = SCCalloc(1, sizeof(SpmThreadCtx));
467  if (thread_ctx == NULL) {
468  SCLogDebug("Unable to alloc SpmThreadCtx.");
469  return NULL;
470  }
471  thread_ctx->matcher = SPM_BM;
472  return thread_ctx;
473 }
474 
475 void SpmBMRegister(void)
476 {
477  spm_table[SPM_BM].name = "bm";
478  spm_table[SPM_BM].InitGlobalThreadCtx = BMInitGlobalThreadCtx;
479  spm_table[SPM_BM].DestroyGlobalThreadCtx = BMDestroyGlobalThreadCtx;
480  spm_table[SPM_BM].MakeThreadCtx = BMMakeThreadCtx;
481  spm_table[SPM_BM].DestroyThreadCtx = BMDestroyThreadCtx;
482  spm_table[SPM_BM].InitCtx = BMInitCtx;
483  spm_table[SPM_BM].DestroyCtx = BMDestroyCtx;
484  spm_table[SPM_BM].Scan = BMScan;
485 }
SpmBmCtx_
Definition: util-spm-bm.c:360
SpmBmCtx
struct SpmBmCtx_ SpmBmCtx
SPM_BM
@ SPM_BM
Definition: util-spm.h:30
SpmCtx_::matcher
uint8_t matcher
Definition: util-spm.h:41
SpmBmCtx_::needle_len
uint16_t needle_len
Definition: util-spm-bm.c:363
SpmBmCtx_::bm_ctx
BmCtx * bm_ctx
Definition: util-spm-bm.c:361
BmCtx_::bmGs
uint16_t bmGs[]
Definition: util-spm-bm.h:36
BoyerMooreCtxInit
BmCtx * BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
Setup a Boyer Moore context.
Definition: util-spm-bm.c:78
unlikely
#define unlikely(expr)
Definition: util-optimize.h:35
SpmTableElmt_::Scan
uint8_t *(* Scan)(const SpmCtx *ctx, SpmThreadCtx *thread_ctx, const uint8_t *haystack, uint32_t haystack_len)
Definition: util-spm.h:68
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:269
u8_toupper
#define u8_toupper(c)
Definition: suricata-common.h:437
SpmBMRegister
void SpmBMRegister(void)
Definition: util-spm-bm.c:475
SpmTableElmt_::InitCtx
SpmCtx *(* InitCtx)(const uint8_t *needle, uint16_t needle_len, int nocase, SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:65
SpmTableElmt_::name
const char * name
Definition: util-spm.h:60
util-memcpy.h
BoyerMooreNocase
uint8_t * BoyerMooreNocase(const uint8_t *x, const uint16_t m, const uint8_t *y, const uint32_t n, const BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:338
SpmTableElmt_::MakeThreadCtx
SpmThreadCtx *(* MakeThreadCtx)(const SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:63
u8_tolower
#define u8_tolower(c)
Definition: suricata-common.h:436
m
SCMutex m
Definition: flow-hash.h:6
util-debug.h
util-error.h
BoyerMoore
uint8_t * BoyerMoore(const uint8_t *x, const uint16_t m, const uint8_t *y, const uint32_t n, const BmCtx *bm_ctx)
Boyer Moore search algorithm Is better as the pattern length increases and for big buffers to search ...
Definition: util-spm-bm.c:300
BmCtx_
Definition: util-spm-bm.h:33
SCEnter
#define SCEnter(...)
Definition: util-debug.h:271
BoyerMooreCtxToNocase
void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
Given a BmCtx structure, recreate the pre/suffixes for nocase.
Definition: util-spm-bm.c:58
SpmBmCtx_::nocase
int nocase
Definition: util-spm-bm.c:364
BmCtx_::bmBc
uint16_t bmBc[ALPHABET_SIZE]
Definition: util-spm-bm.h:34
SpmTableElmt_::InitGlobalThreadCtx
SpmGlobalThreadCtx *(* InitGlobalThreadCtx)(void)
Definition: util-spm.h:61
SCReturn
#define SCReturn
Definition: util-debug.h:273
SpmBmCtx_::needle
uint8_t * needle
Definition: util-spm-bm.c:362
SpmGlobalThreadCtx_::matcher
uint8_t matcher
Definition: util-spm.h:48
SpmCtx_
Definition: util-spm.h:40
suricata-common.h
SpmTableElmt_::DestroyCtx
void(* DestroyCtx)(SpmCtx *)
Definition: util-spm.h:67
util-spm.h
util-spm-bm.h
FatalError
#define FatalError(...)
Definition: util-debug.h:502
SpmCtx_::ctx
void * ctx
Definition: util-spm.h:42
util-validate.h
SpmGlobalThreadCtx_
Definition: util-spm.h:47
SCMalloc
#define SCMalloc(sz)
Definition: util-mem.h:47
SCFree
#define SCFree(p)
Definition: util-mem.h:61
spm_table
SpmTableElmt spm_table[SPM_TABLE_SIZE]
Definition: util-spm.c:62
suricata.h
SpmThreadCtx_::matcher
uint8_t matcher
Definition: util-spm.h:55
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
BoyerMooreNocaseCtxInit
BmCtx * BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
Setup a Boyer Moore context for nocase search.
Definition: util-spm-bm.c:105
DEBUG_VALIDATE_BUG_ON
#define DEBUG_VALIDATE_BUG_ON(exp)
Definition: util-validate.h:103
SpmTableElmt_::DestroyThreadCtx
void(* DestroyThreadCtx)(SpmThreadCtx *thread_ctx)
Definition: util-spm.h:64
BoyerMooreCtxDeInit
void BoyerMooreCtxDeInit(BmCtx *bmctx)
Free the memory allocated to Boyer Moore context.
Definition: util-spm-bm.c:119
SpmTableElmt_::DestroyGlobalThreadCtx
void(* DestroyGlobalThreadCtx)(SpmGlobalThreadCtx *g_thread_ctx)
Definition: util-spm.h:62
SpmThreadCtx_
Definition: util-spm.h:54