29 #ifndef __UTIL_MEMCMP_H__ 30 #define __UTIL_MEMCMP_H__ 37 static inline int SCMemcmpLowercase(
const void *,
const void *,
size_t);
42 MemcmpLowercase(
const void *s1,
const void *s2,
size_t n)
49 for (i = n - 1; i >= 0; i--) {
50 if (((uint8_t *)s1)[i] !=
u8_tolower(*(((uint8_t *)s2)+i)))
57 #if defined(__SSE4_2__) 59 #include <nmmintrin.h> 63 static inline int SCMemcmp(
const void *s1,
const void *s2,
size_t n)
75 return memcmp(s1, s2, n - m) ? 1 : 0;
79 b1 = _mm_loadu_si128((
const __m128i *) s1);
80 b2 = _mm_loadu_si128((
const __m128i *) s2);
83 m += (r = _mm_cmpestri(b1, n - m, b2, 16,
84 _SIDD_CMP_EQUAL_EACH | _SIDD_MASKED_NEGATIVE_POLARITY));
90 return ((m == n) ? 0 : 1);
94 static char scmemcmp_uppercase[16]
__attribute__((aligned(16))) = {
95 'A',
'Z', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };
101 static inline int SCMemcmpLowercase(
const void *s1,
const void *s2,
size_t n)
103 __m128i b1, b2, mask;
109 __m128i ucase = _mm_load_si128((
const __m128i *) scmemcmp_uppercase);
110 __m128i nulls = _mm_setzero_si128();
111 __m128i uplow = _mm_set1_epi8(0x20);
117 return MemcmpLowercase(s1, s2, n - m);
120 b1 = _mm_loadu_si128((
const __m128i *) s1);
121 b2 = _mm_loadu_si128((
const __m128i *) s2);
126 mask = _mm_cmpestrm(ucase, 2, b2, len, _SIDD_CMP_RANGES | _SIDD_UNIT_MASK);
129 mask = _mm_blendv_epi8(nulls, uplow, mask);
132 b2 = _mm_add_epi8(b2, mask);
135 m += (r = _mm_cmpestri(b1, len, b2, 16,
136 _SIDD_CMP_EQUAL_EACH | _SIDD_MASKED_NEGATIVE_POLARITY));
142 return ((m == n) ? 0 : 1);
145 #elif defined(__SSE4_1__) 147 #include <smmintrin.h> 149 #define SCMEMCMP_BYTES 16 151 static inline int SCMemcmp(
const void *s1,
const void *s2,
size_t len)
159 if (
likely(len - offset < 16)) {
160 return memcmp(s1, s2, len - offset) ? 1 : 0;
166 b1 = _mm_loadu_si128((
const __m128i *) s1);
167 b2 = _mm_loadu_si128((
const __m128i *) s2);
168 c = _mm_cmpeq_epi8(b1, b2);
172 int rmask = ~(0xFFFFFFFF << diff);
174 if ((_mm_movemask_epi8(c) & rmask) != rmask) {
178 if (_mm_movemask_epi8(c) != 0x0000FFFF) {
183 offset += SCMEMCMP_BYTES;
184 s1 += SCMEMCMP_BYTES;
185 s2 += SCMEMCMP_BYTES;
186 }
while (len > offset);
191 #define UPPER_LOW 0x40 192 #define UPPER_HIGH 0x5B 194 static inline int SCMemcmpLowercase(
const void *s1,
const void *s2,
size_t len)
197 __m128i b1, b2, mask1, mask2, upper1, upper2, nulls, uplow;
200 upper1 = _mm_set1_epi8(UPPER_LOW);
201 upper2 = _mm_set1_epi8(UPPER_HIGH);
202 nulls = _mm_setzero_si128();
203 uplow = _mm_set1_epi8(0x20);
208 if (
likely(len - offset < 16)) {
209 return MemcmpLowercase(s1, s2, len - offset);
213 b1 = _mm_loadu_si128((
const __m128i *) s1);
214 b2 = _mm_loadu_si128((
const __m128i *) s2);
217 mask1 = _mm_cmpgt_epi8(b2, upper1);
219 mask2 = _mm_cmplt_epi8(b2, upper2);
221 mask1 = _mm_cmpeq_epi8(mask1, mask2);
224 mask1 = _mm_blendv_epi8(nulls, uplow, mask1);
227 b2 = _mm_add_epi8(b2, mask1);
230 mask1 = _mm_cmpeq_epi8(b1, b2);
234 int rmask = ~(0xFFFFFFFF << diff);
236 if ((_mm_movemask_epi8(mask1) & rmask) != rmask) {
240 if (_mm_movemask_epi8(mask1) != 0x0000FFFF) {
245 offset += SCMEMCMP_BYTES;
246 s1 += SCMEMCMP_BYTES;
247 s2 += SCMEMCMP_BYTES;
248 }
while (len > offset);
255 #elif defined(__SSE3__) 257 #include <pmmintrin.h> 259 #define SCMEMCMP_BYTES 16 261 static inline int SCMemcmp(
const void *s1,
const void *s2,
size_t len)
269 if (
likely(len - offset < 16)) {
270 return memcmp(s1, s2, len - offset) ? 1 : 0;
276 b1 = _mm_loadu_si128((
const __m128i *) s1);
277 b2 = _mm_loadu_si128((
const __m128i *) s2);
278 c = _mm_cmpeq_epi8(b1, b2);
282 int rmask = ~(0xFFFFFFFF << diff);
284 if ((_mm_movemask_epi8(c) & rmask) != rmask) {
288 if (_mm_movemask_epi8(c) != 0x0000FFFF) {
293 offset += SCMEMCMP_BYTES;
294 s1 += SCMEMCMP_BYTES;
295 s2 += SCMEMCMP_BYTES;
296 }
while (len > offset);
301 #define UPPER_LOW 0x40 302 #define UPPER_HIGH 0x5B 303 #define UPPER_DELTA 0xDF 305 static inline int SCMemcmpLowercase(
const void *s1,
const void *s2,
size_t len)
308 __m128i b1, b2, mask1, mask2, upper1, upper2, delta;
311 upper1 = _mm_set1_epi8(UPPER_LOW);
312 upper2 = _mm_set1_epi8(UPPER_HIGH);
313 delta = _mm_set1_epi8(UPPER_DELTA);
318 if (
likely(len - offset < 16)) {
319 return MemcmpLowercase(s1, s2, len - offset);
323 b1 = _mm_loadu_si128((
const __m128i *) s1);
324 b2 = _mm_loadu_si128((
const __m128i *) s2);
327 mask1 = _mm_cmpgt_epi8(b2, upper1);
329 mask2 = _mm_cmplt_epi8(b2, upper2);
331 mask1 = _mm_cmpeq_epi8(mask1, mask2);
335 mask1 = _mm_subs_epu8(mask1, delta);
338 b2 = _mm_add_epi8(b2, mask1);
341 mask1 = _mm_cmpeq_epi8(b1, b2);
345 int rmask = ~(0xFFFFFFFF << diff);
347 if ((_mm_movemask_epi8(mask1) & rmask) != rmask) {
351 if (_mm_movemask_epi8(mask1) != 0x0000FFFF) {
356 offset += SCMEMCMP_BYTES;
357 s1 += SCMEMCMP_BYTES;
358 s2 += SCMEMCMP_BYTES;
359 }
while (len > offset);
369 #define SCMemcmp(a,b,c) ({ \ 370 memcmp((a), (b), (c)) ? 1 : 0; \ 373 static inline int SCMemcmpLowercase(
const void *s1,
const void *s2,
size_t len)
375 return MemcmpLowercase(s1, s2, len);
#define SCMemcmp(a, b, c)
void MemcmpRegisterTests(void)
enum @34 __attribute__
DNP3 application header.