suricata
util-memcmp.h
Go to the documentation of this file.
1 /* Copyright (C) 2007-2013 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  *
23  * Memcmp implementations for SSE3, SSE4.1, SSE4.2 and TILE-Gx SIMD.
24  *
25  * Both SCMemcmp and SCMemcmpLowercase return 0 on a exact match,
26  * 1 on a failed match.
27  */
28 
29 #ifndef __UTIL_MEMCMP_H__
30 #define __UTIL_MEMCMP_H__
31 
32 #include "util-optimize.h"
33 
34 /** \brief compare two patterns, converting the 2nd to lowercase
35  * \warning *ONLY* the 2nd pattern is converted to lowercase
36  */
37 static inline int SCMemcmpLowercase(const void *, const void *, size_t);
38 
39 void MemcmpRegisterTests(void);
40 
41 static inline int
42 MemcmpLowercase(const void *s1, const void *s2, size_t n)
43 {
44  ssize_t i;
45 
46  /* check backwards because we already tested the first
47  * 2 to 4 chars. This way we are more likely to detect
48  * a miss and thus speed up a little... */
49  for (i = n - 1; i >= 0; i--) {
50  if (((uint8_t *)s1)[i] != u8_tolower(*(((uint8_t *)s2)+i)))
51  return 1;
52  }
53 
54  return 0;
55 }
56 
57 #if defined(__SSE4_2__)
58 
59 #include <nmmintrin.h>
60 
61 /* No SIMD support, fall back to plain memcmp and a home grown lowercase one */
62 
63 static inline int SCMemcmp(const void *s1, const void *s2, size_t n)
64 {
65  __m128i b1, b2;
66 
67  int r;
68  /* counter for how far we already matched in the buffer */
69  size_t m = 0;
70 
71  do {
72  /* apparently we can't just read 16 bytes even though
73  * it almost always works fine :) */
74  if (likely(n - m < 16)) {
75  return memcmp(s1, s2, n - m) ? 1 : 0;
76  }
77 
78  /* load the buffers into the 128bit vars */
79  b1 = _mm_loadu_si128((const __m128i *) s1);
80  b2 = _mm_loadu_si128((const __m128i *) s2);
81 
82  /* do the actual compare */
83  m += (r = _mm_cmpestri(b1, n - m, b2, 16,
84  _SIDD_CMP_EQUAL_EACH | _SIDD_MASKED_NEGATIVE_POLARITY));
85 
86  s1 += 16;
87  s2 += 16;
88  } while (r == 16);
89 
90  return ((m == n) ? 0 : 1);
91 }
92 
93 /* Range of values of uppercase characters. We only use the first 2 bytes. */
94 static char scmemcmp_uppercase[16] __attribute__((aligned(16))) = {
95  'A', 'Z', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };
96 
97 /** \brief compare two buffers in a case insensitive way
98  * \param s1 buffer already in lowercase
99  * \param s2 buffer with mixed upper and lowercase
100  */
101 static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t n)
102 {
103  __m128i b1, b2, mask;
104 
105  int r;
106  /* counter for how far we already matched in the buffer */
107  size_t m = 0;
108 
109  __m128i ucase = _mm_load_si128((const __m128i *) scmemcmp_uppercase);
110  __m128i nulls = _mm_setzero_si128();
111  __m128i uplow = _mm_set1_epi8(0x20);
112 
113  do {
114  /* apparently we can't just read 16 bytes even though
115  * it almost always works fine :) */
116  if (likely(n - m < 16)) {
117  return MemcmpLowercase(s1, s2, n - m);
118  }
119 
120  b1 = _mm_loadu_si128((const __m128i *) s1);
121  b2 = _mm_loadu_si128((const __m128i *) s2);
122  size_t len = n - m;
123 
124  /* The first step is creating a mask that is FF for all uppercase
125  * characters, 00 for all others */
126  mask = _mm_cmpestrm(ucase, 2, b2, len, _SIDD_CMP_RANGES | _SIDD_UNIT_MASK);
127  /* Next we use that mask to create a new: this one has 0x20 for
128  * the uppercase chars, 00 for all other. */
129  mask = _mm_blendv_epi8(nulls, uplow, mask);
130  /* finally, merge the mask and the buffer converting the
131  * uppercase to lowercase */
132  b2 = _mm_add_epi8(b2, mask);
133 
134  /* search using our converted buffer */
135  m += (r = _mm_cmpestri(b1, len, b2, 16,
136  _SIDD_CMP_EQUAL_EACH | _SIDD_MASKED_NEGATIVE_POLARITY));
137 
138  s1 += 16;
139  s2 += 16;
140  } while (r == 16);
141 
142  return ((m == n) ? 0 : 1);
143 }
144 
145 #elif defined(__SSE4_1__)
146 
147 #include <smmintrin.h>
148 
149 #define SCMEMCMP_BYTES 16
150 
151 static inline int SCMemcmp(const void *s1, const void *s2, size_t len)
152 {
153  size_t offset = 0;
154  __m128i b1, b2, c;
155 
156  do {
157  /* apparently we can't just read 16 bytes even though
158  * it almost always works fine :) */
159  if (likely(len - offset < 16)) {
160  return memcmp(s1, s2, len - offset) ? 1 : 0;
161  }
162 
163  /* do unaligned loads using _mm_loadu_si128. On my Core2 E6600 using
164  * _mm_lddqu_si128 was about 2% slower even though it's supposed to
165  * be faster. */
166  b1 = _mm_loadu_si128((const __m128i *) s1);
167  b2 = _mm_loadu_si128((const __m128i *) s2);
168  c = _mm_cmpeq_epi8(b1, b2);
169 
170  int diff = len - offset;
171  if (diff < 16) {
172  int rmask = ~(0xFFFFFFFF << diff);
173 
174  if ((_mm_movemask_epi8(c) & rmask) != rmask) {
175  return 1;
176  }
177  } else {
178  if (_mm_movemask_epi8(c) != 0x0000FFFF) {
179  return 1;
180  }
181  }
182 
183  offset += SCMEMCMP_BYTES;
184  s1 += SCMEMCMP_BYTES;
185  s2 += SCMEMCMP_BYTES;
186  } while (len > offset);
187 
188  return 0;
189 }
190 
191 #define UPPER_LOW 0x40 /* "A" - 1 */
192 #define UPPER_HIGH 0x5B /* "Z" + 1 */
193 
194 static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t len)
195 {
196  size_t offset = 0;
197  __m128i b1, b2, mask1, mask2, upper1, upper2, nulls, uplow;
198 
199  /* setup registers for upper to lower conversion */
200  upper1 = _mm_set1_epi8(UPPER_LOW);
201  upper2 = _mm_set1_epi8(UPPER_HIGH);
202  nulls = _mm_setzero_si128();
203  uplow = _mm_set1_epi8(0x20);
204 
205  do {
206  /* apparently we can't just read 16 bytes even though
207  * it almost always works fine :) */
208  if (likely(len - offset < 16)) {
209  return MemcmpLowercase(s1, s2, len - offset);
210  }
211 
212  /* unaligned loading of the bytes to compare */
213  b1 = _mm_loadu_si128((const __m128i *) s1);
214  b2 = _mm_loadu_si128((const __m128i *) s2);
215 
216  /* mark all chars bigger than upper1 */
217  mask1 = _mm_cmpgt_epi8(b2, upper1);
218  /* mark all chars lower than upper2 */
219  mask2 = _mm_cmplt_epi8(b2, upper2);
220  /* merge the two, leaving only those that are true in both */
221  mask1 = _mm_cmpeq_epi8(mask1, mask2);
222  /* Next we use that mask to create a new: this one has 0x20 for
223  * the uppercase chars, 00 for all other. */
224  mask1 = _mm_blendv_epi8(nulls, uplow, mask1);
225 
226  /* add to b2, converting uppercase to lowercase */
227  b2 = _mm_add_epi8(b2, mask1);
228 
229  /* now all is lowercase, let's do the actual compare (reuse mask1 reg) */
230  mask1 = _mm_cmpeq_epi8(b1, b2);
231 
232  int diff = len - offset;
233  if (diff < 16) {
234  int rmask = ~(0xFFFFFFFF << diff);
235 
236  if ((_mm_movemask_epi8(mask1) & rmask) != rmask) {
237  return 1;
238  }
239  } else {
240  if (_mm_movemask_epi8(mask1) != 0x0000FFFF) {
241  return 1;
242  }
243  }
244 
245  offset += SCMEMCMP_BYTES;
246  s1 += SCMEMCMP_BYTES;
247  s2 += SCMEMCMP_BYTES;
248  } while (len > offset);
249 
250  return 0;
251 }
252 
253 
254 
255 #elif defined(__SSE3__)
256 
257 #include <pmmintrin.h> /* for SSE3 */
258 
259 #define SCMEMCMP_BYTES 16
260 
261 static inline int SCMemcmp(const void *s1, const void *s2, size_t len)
262 {
263  size_t offset = 0;
264  __m128i b1, b2, c;
265 
266  do {
267  /* apparently we can't just read 16 bytes even though
268  * it almost always works fine :) */
269  if (likely(len - offset < 16)) {
270  return memcmp(s1, s2, len - offset) ? 1 : 0;
271  }
272 
273  /* do unaligned loads using _mm_loadu_si128. On my Core2 E6600 using
274  * _mm_lddqu_si128 was about 2% slower even though it's supposed to
275  * be faster. */
276  b1 = _mm_loadu_si128((const __m128i *) s1);
277  b2 = _mm_loadu_si128((const __m128i *) s2);
278  c = _mm_cmpeq_epi8(b1, b2);
279 
280  int diff = len - offset;
281  if (diff < 16) {
282  int rmask = ~(0xFFFFFFFF << diff);
283 
284  if ((_mm_movemask_epi8(c) & rmask) != rmask) {
285  return 1;
286  }
287  } else {
288  if (_mm_movemask_epi8(c) != 0x0000FFFF) {
289  return 1;
290  }
291  }
292 
293  offset += SCMEMCMP_BYTES;
294  s1 += SCMEMCMP_BYTES;
295  s2 += SCMEMCMP_BYTES;
296  } while (len > offset);
297 
298  return 0;
299 }
300 
301 #define UPPER_LOW 0x40 /* "A" - 1 */
302 #define UPPER_HIGH 0x5B /* "Z" + 1 */
303 #define UPPER_DELTA 0xDF /* 0xFF - 0x20 */
304 
305 static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t len)
306 {
307  size_t offset = 0;
308  __m128i b1, b2, mask1, mask2, upper1, upper2, delta;
309 
310  /* setup registers for upper to lower conversion */
311  upper1 = _mm_set1_epi8(UPPER_LOW);
312  upper2 = _mm_set1_epi8(UPPER_HIGH);
313  delta = _mm_set1_epi8(UPPER_DELTA);
314 
315  do {
316  /* apparently we can't just read 16 bytes even though
317  * it almost always works fine :) */
318  if (likely(len - offset < 16)) {
319  return MemcmpLowercase(s1, s2, len - offset);
320  }
321 
322  /* unaligned loading of the bytes to compare */
323  b1 = _mm_loadu_si128((const __m128i *) s1);
324  b2 = _mm_loadu_si128((const __m128i *) s2);
325 
326  /* mark all chars bigger than upper1 */
327  mask1 = _mm_cmpgt_epi8(b2, upper1);
328  /* mark all chars lower than upper2 */
329  mask2 = _mm_cmplt_epi8(b2, upper2);
330  /* merge the two, leaving only those that are true in both */
331  mask1 = _mm_cmpeq_epi8(mask1, mask2);
332 
333  /* sub delta leaves 0x20 only for uppercase positions, the
334  rest is 0x00 due to the saturation (reuse mask1 reg)*/
335  mask1 = _mm_subs_epu8(mask1, delta);
336 
337  /* add to b2, converting uppercase to lowercase */
338  b2 = _mm_add_epi8(b2, mask1);
339 
340  /* now all is lowercase, let's do the actual compare (reuse mask1 reg) */
341  mask1 = _mm_cmpeq_epi8(b1, b2);
342 
343  int diff = len - offset;
344  if (diff < 16) {
345  int rmask = ~(0xFFFFFFFF << diff);
346 
347  if ((_mm_movemask_epi8(mask1) & rmask) != rmask) {
348  return 1;
349  }
350  } else {
351  if (_mm_movemask_epi8(mask1) != 0x0000FFFF) {
352  return 1;
353  }
354  }
355 
356  offset += SCMEMCMP_BYTES;
357  s1 += SCMEMCMP_BYTES;
358  s2 += SCMEMCMP_BYTES;
359  } while (len > offset);
360 
361  return 0;
362 }
363 
364 #else
365 
366 /* No SIMD support, fall back to plain memcmp and a home grown lowercase one */
367 
368 /* wrapper around memcmp to match the retvals of the SIMD implementations */
369 #define SCMemcmp(a,b,c) ({ \
370  memcmp((a), (b), (c)) ? 1 : 0; \
371 })
372 
373 static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t len)
374 {
375  return MemcmpLowercase(s1, s2, len);
376 }
377 
378 #endif /* SIMD */
379 
380 #endif /* __UTIL_MEMCMP_H__ */
381 
#define SCMemcmp(a, b, c)
Definition: util-memcmp.h:369
void MemcmpRegisterTests(void)
Definition: util-memcmp.c:384
uint64_t offset
enum @34 __attribute__
DNP3 application header.
#define u8_tolower(c)
Definition: suricata.h:183
SCMutex m
Definition: flow-hash.h:105
uint8_t len
#define likely(expr)
Definition: util-optimize.h:32