suricata
util-spm-bs2bm.c
Go to the documentation of this file.
1 /* Copyright (C) 2007-2010 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22  *
23  * Bs2Bm use a simple context array to determine the characters
24  * that are not present on the pattern. This way on partial matches
25  * broken by a char not present, we can skip to the next character
26  * making less checks
27  */
28 
29 #include "suricata-common.h"
30 #include "suricata.h"
31 
32 #include "util-spm-bs2bm.h"
33 
34 /**
35  * \brief Array setup function for Bs2Bm of bad characters index (not found at the needle)
36  *
37  * \param needle pointer to the pattern we ar searching for
38  * \param needle_len length limit of the needle
39  * \param badchars pointer to an empty array of bachars. The array prepared contains
40  * characters that can't be inside the needle_len. So the skips can be
41  * faster
42  */
43 void Bs2BmBadchars(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
44 {
45  uint32_t i;
46  for (i = 0; i < ALPHABET_SIZE; i++)
47  badchars[i] = 1;
48 
49  /* set to 0 the values where index as ascii is present
50  * because they are not badchars
51  */
52  for (i = 0; i < needle_len; i++)
53  badchars[needle[i]] = 0;
54 }
55 
56 /**
57  * \brief Basic search with a bad characters array. The array badchars contains
58  * flags at character's ascii index that can't be inside the needle. So the skips can be
59  * faster
60  *
61  * \param haystack pointer to the buffer to search in
62  * \param haystack_len length limit of the buffer
63  * \param needle pointer to the pattern we ar searching for
64  * \param needle_len length limit of the needle
65  * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
66  *
67  * \retval ptr to start of the match; NULL if no match
68  */
69 uint8_t *Bs2Bm(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle,
70  uint16_t needle_len, const uint8_t badchars[])
71 {
72  const uint8_t *h, *n;
73  const uint8_t *hmax = haystack + haystack_len;
74  const uint8_t *nmax = needle + needle_len;
75 
76  if (needle_len == 0 || needle_len > haystack_len)
77  return NULL;
78 
79  for (n = needle; nmax - n <= hmax - haystack; haystack++) {
80  if (*haystack != *n) {
81  continue;
82  }
83  /* one byte needles */
84  if (needle_len == 1)
85  return (uint8_t *)haystack;
86 
87  for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
88  if (*h != *n) {
89  if (badchars[*h] == 1) {
90  /* skip it! */
91  haystack = h;
92  }
93  break;
94  }
95  /* if we run out of needle we fully matched */
96  if (n == nmax - 1 ) {
97  return (uint8_t *)haystack;
98  }
99  }
100  n = needle;
101  }
102 
103  return NULL;
104 }
105 
106 /**
107  * \brief Basic search case less with a bad characters array. The array badchars contains
108  * flags at character's ascii index that can't be inside the needle. So the skips can be
109  * faster
110  *
111  * \param haystack pointer to the buffer to search in
112  * \param haystack_len length limit of the buffer
113  * \param needle pointer to the pattern we ar searching for
114  * \param needle_len length limit of the needle
115  * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
116  *
117  * \retval ptr to start of the match; NULL if no match
118  */
119 uint8_t *Bs2BmNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle,
120  uint16_t needle_len, const uint8_t badchars[])
121 {
122  const uint8_t *h, *n;
123  const uint8_t *hmax = haystack + haystack_len;
124  const uint8_t *nmax = needle + needle_len;
125 
126  if (needle_len == 0 || needle_len > haystack_len)
127  return NULL;
128 
129  for (n = needle; nmax - n <= hmax - haystack; haystack++) {
130  if (u8_tolower(*haystack) != u8_tolower(*n)) {
131  continue;
132  }
133  /* one byte needles */
134  if (needle_len == 1)
135  return (uint8_t *)haystack;
136 
137  for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
138  if (u8_tolower(*h) != u8_tolower(*n)) {
139  if (badchars[u8_tolower(*h)] == 1) {
140  /* skip it! */
141  haystack = h;
142  }
143  break;
144  }
145  /* if we run out of needle we fully matched */
146  if (n == nmax - 1) {
147  return (uint8_t *)haystack;
148  }
149  }
150  n = needle;
151  }
152 
153  return NULL;
154 }
Bs2BmBadchars
void Bs2BmBadchars(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
Array setup function for Bs2Bm of bad characters index (not found at the needle)
Definition: util-spm-bs2bm.c:43
u8_tolower
#define u8_tolower(c)
Definition: suricata-common.h:436
ALPHABET_SIZE
#define ALPHABET_SIZE
Definition: util-spm-bm.h:30
Bs2Bm
uint8_t * Bs2Bm(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, const uint8_t badchars[])
Basic search with a bad characters array. The array badchars contains flags at character's ascii inde...
Definition: util-spm-bs2bm.c:69
suricata-common.h
Bs2BmNocase
uint8_t * Bs2BmNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, const uint8_t badchars[])
Basic search case less with a bad characters array. The array badchars contains flags at character's ...
Definition: util-spm-bs2bm.c:119
util-spm-bs2bm.h
suricata.h