suricata
util-hash-lookup3.c
Go to the documentation of this file.
1 /*
2 -------------------------------------------------------------------------------
3 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
4 
5 These are functions for producing 32-bit hashes for hash table lookup.
6 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
7 are externally useful functions. Routines to test the hash are included
8 if SELF_TEST is defined. You can use this free for any purpose. It's in
9 the public domain. It has no warranty.
10 
11 You probably want to use hashlittle(). hashlittle() and hashbig()
12 hash byte arrays. hashlittle() is is faster than hashbig() on
13 little-endian machines. Intel and AMD are little-endian machines.
14 On second thought, you probably want hashlittle2(), which is identical to
15 hashlittle() except it returns two 32-bit hashes for the price of one.
16 You could implement hashbig2() if you wanted but I haven't bothered here.
17 
18 If you want to find a hash of, say, exactly 7 integers, do
19  a = i1; b = i2; c = i3;
20  mix(a,b,c);
21  a += i4; b += i5; c += i6;
22  mix(a,b,c);
23  a += i7;
24  final(a,b,c);
25 then use c as the hash value. If you have a variable length array of
26 4-byte integers to hash, use hashword(). If you have a byte array (like
27 a character string), use hashlittle(). If you have several byte arrays, or
28 a mix of things, see the comments above hashlittle().
29 
30 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31 then mix those integers. This is fast (you can do a lot more thorough
32 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34 -------------------------------------------------------------------------------
35 */
36 //#define SELF_TEST 1
37 
38 #include <stdio.h> /* defines printf for tests */
39 #include <time.h> /* defines time_t for timings in the test */
40 #include <stdint.h> /* defines uint32_t etc */
41 #include <sys/param.h> /* attempt to define endianness */
42 #ifdef linux
43 # include <endian.h> /* attempt to define endianness */
44 #endif
45 #include "util-hash-lookup3.h"
46 
47 /*
48  * My best guess at if you are big-endian or little-endian. This may
49  * need adjustment.
50  */
51 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
52  __BYTE_ORDER == __LITTLE_ENDIAN) || \
53  (defined(i386) || defined(__i386__) || defined(__i486__) || \
54  defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
55 # define HASH_LITTLE_ENDIAN 1
56 # define HASH_BIG_ENDIAN 0
57 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
58  __BYTE_ORDER == __BIG_ENDIAN) || \
59  (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
60 # define HASH_LITTLE_ENDIAN 0
61 # define HASH_BIG_ENDIAN 1
62 #else
63 # define HASH_LITTLE_ENDIAN 0
64 # define HASH_BIG_ENDIAN 0
65 #endif
66 
67 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
68 
69 /*
70 -------------------------------------------------------------------------------
71 mix -- mix 3 32-bit values reversibly.
72 
73 This is reversible, so any information in (a,b,c) before mix() is
74 still in (a,b,c) after mix().
75 
76 If four pairs of (a,b,c) inputs are run through mix(), or through
77 mix() in reverse, there are at least 32 bits of the output that
78 are sometimes the same for one pair and different for another pair.
79 This was tested for:
80 * pairs that differed by one bit, by two bits, in any combination
81  of top bits of (a,b,c), or in any combination of bottom bits of
82  (a,b,c).
83 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
84  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
85  is commonly produced by subtraction) look like a single 1-bit
86  difference.
87 * the base values were pseudorandom, all zero but one bit set, or
88  all zero plus a counter that starts at zero.
89 
90 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
91 satisfy this are
92  4 6 8 16 19 4
93  9 15 3 18 27 15
94  14 9 3 7 17 3
95 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
96 for "differ" defined as + with a one-bit base and a two-bit delta. I
97 used http://burtleburtle.net/bob/hash/avalanche.html to choose
98 the operations, constants, and arrangements of the variables.
99 
100 This does not achieve avalanche. There are input bits of (a,b,c)
101 that fail to affect some output bits of (a,b,c), especially of a. The
102 most thoroughly mixed value is c, but it doesn't really even achieve
103 avalanche in c.
104 
105 This allows some parallelism. Read-after-writes are good at doubling
106 the number of bits affected, so the goal of mixing pulls in the opposite
107 direction as the goal of parallelism. I did what I could. Rotates
108 seem to cost as much as shifts on every machine I could lay my hands
109 on, and rotates are much kinder to the top and bottom bits, so I used
110 rotates.
111 -------------------------------------------------------------------------------
112 */
113 #define mix(a,b,c) \
114 { \
115  a -= c; a ^= rot(c, 4); c += b; \
116  b -= a; b ^= rot(a, 6); a += c; \
117  c -= b; c ^= rot(b, 8); b += a; \
118  a -= c; a ^= rot(c,16); c += b; \
119  b -= a; b ^= rot(a,19); a += c; \
120  c -= b; c ^= rot(b, 4); b += a; \
121 }
122 
123 /*
124 -------------------------------------------------------------------------------
125 final -- final mixing of 3 32-bit values (a,b,c) into c
126 
127 Pairs of (a,b,c) values differing in only a few bits will usually
128 produce values of c that look totally different. This was tested for
129 * pairs that differed by one bit, by two bits, in any combination
130  of top bits of (a,b,c), or in any combination of bottom bits of
131  (a,b,c).
132 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
133  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
134  is commonly produced by subtraction) look like a single 1-bit
135  difference.
136 * the base values were pseudorandom, all zero but one bit set, or
137  all zero plus a counter that starts at zero.
138 
139 These constants passed:
140  14 11 25 16 4 14 24
141  12 14 25 16 4 14 24
142 and these came close:
143  4 8 15 26 3 22 24
144  10 8 15 26 3 22 24
145  11 8 15 26 3 22 24
146 -------------------------------------------------------------------------------
147 */
148 #define final(a,b,c) \
149 { \
150  c ^= b; c -= rot(b,14); \
151  a ^= c; a -= rot(c,11); \
152  b ^= a; b -= rot(a,25); \
153  c ^= b; c -= rot(b,16); \
154  a ^= c; a -= rot(c,4); \
155  b ^= a; b -= rot(a,14); \
156  c ^= b; c -= rot(b,24); \
157 }
158 
159 /*
160 --------------------------------------------------------------------
161  This works on all machines. To be useful, it requires
162  -- that the key be an array of uint32_t's, and
163  -- that the length be the number of uint32_t's in the key
164 
165  The function hashword() is identical to hashlittle() on little-endian
166  machines, and identical to hashbig() on big-endian machines,
167  except that the length has to be measured in uint32_ts rather than in
168  bytes. hashlittle() is more complicated than hashword() only because
169  hashlittle() has to dance around fitting the key bytes into registers.
170 --------------------------------------------------------------------
171 */
172 uint32_t hashword(
173 const uint32_t *k, /* the key, an array of uint32_t values */
174 size_t length, /* the length of the key, in uint32_ts */
175 uint32_t initval) /* the previous hash, or an arbitrary value */
176 {
177  uint32_t a,b,c;
178 
179  /* Set up the internal state */
180  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
181 
182  /*------------------------------------------------- handle most of the key */
183  while (length > 3)
184  {
185  a += k[0];
186  b += k[1];
187  c += k[2];
188  mix(a,b,c);
189  length -= 3;
190  k += 3;
191  }
192 
193  /*------------------------------------------- handle the last 3 uint32_t's */
194  switch(length) /* all the case statements fall through */
195  {
196  case 3 : c+=k[2]; /* fall through */
197  case 2 : b+=k[1]; /* fall through */
198  case 1 : a+=k[0];
199  final(a,b,c); /* fall through */
200  case 0: /* case 0: nothing left to add */
201  break;
202  }
203  /*------------------------------------------------------ report the result */
204  return c;
205 }
206 
207 
208 /*
209 --------------------------------------------------------------------
210 hashword2() -- same as hashword(), but take two seeds and return two
211 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
212 both be initialized with seeds. If you pass in (*pb)==0, the output
213 (*pc) will be the same as the return value from hashword().
214 --------------------------------------------------------------------
215 */
216 void hashword2 (
217 const uint32_t *k, /* the key, an array of uint32_t values */
218 size_t length, /* the length of the key, in uint32_ts */
219 uint32_t *pc, /* IN: seed OUT: primary hash value */
220 uint32_t *pb) /* IN: more seed OUT: secondary hash value */
221 {
222  uint32_t a,b,c;
223 
224  /* Set up the internal state */
225  a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
226  c += *pb;
227 
228  /*------------------------------------------------- handle most of the key */
229  while (length > 3)
230  {
231  a += k[0];
232  b += k[1];
233  c += k[2];
234  mix(a,b,c);
235  length -= 3;
236  k += 3;
237  }
238 
239  /*------------------------------------------- handle the last 3 uint32_t's */
240  switch(length) /* all the case statements fall through */
241  {
242  case 3 : c+=k[2]; /* fall through */
243  case 2 : b+=k[1]; /* fall through */
244  case 1 : a+=k[0];
245  final(a,b,c); /* fall through */
246  case 0: /* case 0: nothing left to add */
247  break;
248  }
249  /*------------------------------------------------------ report the result */
250  *pc=c; *pb=b;
251 }
252 
253 
254 /*
255 -------------------------------------------------------------------------------
256 hashlittle() -- hash a variable-length key into a 32-bit value
257  k : the key (the unaligned variable-length array of bytes)
258  length : the length of the key, counting by bytes
259  initval : can be any 4-byte value
260 Returns a 32-bit value. Every bit of the key affects every bit of
261 the return value. Two keys differing by one or two bits will have
262 totally different hash values.
263 
264 The best hash table sizes are powers of 2. There is no need to do
265 mod a prime (mod is sooo slow!). If you need less than 32 bits,
266 use a bitmask. For example, if you need only 10 bits, do
267  h = (h & hashmask(10));
268 In which case, the hash table should have hashsize(10) elements.
269 
270 If you are hashing n strings (uint8_t **)k, do it like this:
271  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
272 
273 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
274 code any way you wish, private, educational, or commercial. It's free.
275 
276 Use for hash table lookup, or anything where one collision in 2^^32 is
277 acceptable. Do NOT use for cryptographic purposes.
278 -------------------------------------------------------------------------------
279 */
280 
281 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
282 {
283  uint32_t a,b,c; /* internal state */
284  union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
285 
286  /* Set up the internal state */
287  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
288 
289  u.ptr = key;
290  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
291  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
292 
293  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
294  while (length > 12)
295  {
296  a += k[0];
297  b += k[1];
298  c += k[2];
299  mix(a,b,c);
300  length -= 12;
301  k += 3;
302  }
303 
304  /*----------------------------- handle the last (probably partial) block */
305  /*
306  * "k[2]&0xffffff" actually reads beyond the end of the string, but
307  * then masks off the part it's not allowed to read. Because the
308  * string is aligned, the masked-off tail is in the same word as the
309  * rest of the string. Every machine with memory protection I've seen
310  * does it on word boundaries, so is OK with this. But VALGRIND will
311  * still catch it and complain. The masking trick does make the hash
312  * noticeably faster for short strings (like English words).
313  */
314 #ifndef VALGRIND
315 
316  switch(length)
317  {
318  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
319  case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
320  case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
321  case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
322  case 8 : b+=k[1]; a+=k[0]; break;
323  case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
324  case 6 : b+=k[1]&0xffff; a+=k[0]; break;
325  case 5 : b+=k[1]&0xff; a+=k[0]; break;
326  case 4 : a+=k[0]; break;
327  case 3 : a+=k[0]&0xffffff; break;
328  case 2 : a+=k[0]&0xffff; break;
329  case 1 : a+=k[0]&0xff; break;
330  case 0 : return c; /* zero length strings require no mixing */
331  }
332 
333 #else /* make valgrind happy */
334 
335  const uint8_t *k8 = (const uint8_t *)k;
336 
337  switch(length)
338  {
339  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
340  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
341  case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
342  case 9 : c+=k8[8]; /* fall through */
343  case 8 : b+=k[1]; a+=k[0]; break;
344  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
345  case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
346  case 5 : b+=k8[4]; /* fall through */
347  case 4 : a+=k[0]; break;
348  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
349  case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
350  case 1 : a+=k8[0]; break;
351  case 0 : return c;
352  }
353 
354 #endif /* !valgrind */
355 
356  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
357  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
358  const uint8_t *k8;
359 
360  /*--------------- all but last block: aligned reads and different mixing */
361  while (length > 12)
362  {
363  a += k[0] + (((uint32_t)k[1])<<16);
364  b += k[2] + (((uint32_t)k[3])<<16);
365  c += k[4] + (((uint32_t)k[5])<<16);
366  mix(a,b,c);
367  length -= 12;
368  k += 6;
369  }
370 
371  /*----------------------------- handle the last (probably partial) block */
372  k8 = (const uint8_t *)k;
373  switch(length)
374  {
375  case 12: c+=k[4]+(((uint32_t)k[5])<<16);
376  b+=k[2]+(((uint32_t)k[3])<<16);
377  a+=k[0]+(((uint32_t)k[1])<<16);
378  break;
379  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
380  case 10: c+=k[4];
381  b+=k[2]+(((uint32_t)k[3])<<16);
382  a+=k[0]+(((uint32_t)k[1])<<16);
383  break;
384  case 9 : c+=k8[8]; /* fall through */
385  case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
386  a+=k[0]+(((uint32_t)k[1])<<16);
387  break;
388  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
389  case 6 : b+=k[2];
390  a+=k[0]+(((uint32_t)k[1])<<16);
391  break;
392  case 5 : b+=k8[4]; /* fall through */
393  case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
394  break;
395  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
396  case 2 : a+=k[0];
397  break;
398  case 1 : a+=k8[0];
399  break;
400  case 0 : return c; /* zero length requires no mixing */
401  }
402 
403  } else { /* need to read the key one byte at a time */
404  const uint8_t *k = (const uint8_t *)key;
405 
406  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
407  while (length > 12)
408  {
409  a += k[0];
410  a += ((uint32_t)k[1])<<8;
411  a += ((uint32_t)k[2])<<16;
412  a += ((uint32_t)k[3])<<24;
413  b += k[4];
414  b += ((uint32_t)k[5])<<8;
415  b += ((uint32_t)k[6])<<16;
416  b += ((uint32_t)k[7])<<24;
417  c += k[8];
418  c += ((uint32_t)k[9])<<8;
419  c += ((uint32_t)k[10])<<16;
420  c += ((uint32_t)k[11])<<24;
421  mix(a,b,c);
422  length -= 12;
423  k += 12;
424  }
425 
426  /*-------------------------------- last block: affect all 32 bits of (c) */
427  switch(length) /* all the case statements fall through */
428  {
429  case 12: c+=((uint32_t)k[11])<<24; /* fall through */
430  case 11: c+=((uint32_t)k[10])<<16; /* fall through */
431  case 10: c+=((uint32_t)k[9])<<8; /* fall through */
432  case 9 : c+=k[8]; /* fall through */
433  case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
434  case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
435  case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
436  case 5 : b+=k[4]; /* fall through */
437  case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
438  case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
439  case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
440  case 1 : a+=k[0];
441  break;
442  case 0 : return c;
443  }
444  }
445 
446  final(a,b,c);
447  return c;
448 }
449 
450 
451 /*
452 -------------------------------------------------------------------------------
453 hashlittle_safe() -- hash a variable-length key into a 32-bit value
454  k : the key (the unaligned variable-length array of bytes)
455  length : the length of the key, counting by bytes
456  initval : can be any 4-byte value
457 Returns a 32-bit value. Every bit of the key affects every bit of
458 the return value. Two keys differing by one or two bits will have
459 totally different hash values.
460 
461 The best hash table sizes are powers of 2. There is no need to do
462 mod a prime (mod is sooo slow!). If you need less than 32 bits,
463 use a bitmask. For example, if you need only 10 bits, do
464  h = (h & hashmask(10));
465 In which case, the hash table should have hashsize(10) elements.
466 
467 If you are hashing n strings (uint8_t **)k, do it like this:
468  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
469 
470 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
471 code any way you wish, private, educational, or commercial. It's free.
472 
473 This version has been modified from hashlittle() above to avoid accesses beyond
474 the last byte of the key, which causes warnings from Valgrind and Address
475 Sanitizer.
476 
477 Use for hash table lookup, or anything where one collision in 2^^32 is
478 acceptable. Do NOT use for cryptographic purposes.
479 -------------------------------------------------------------------------------
480 */
481 
482 uint32_t hashlittle_safe(const void *key, size_t length, uint32_t initval)
483 {
484  uint32_t a,b,c; /* internal state */
485  union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
486 
487  /* Set up the internal state */
488  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
489 
490  u.ptr = key;
491  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
492  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
493 
494  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
495  while (length > 12)
496  {
497  a += k[0];
498  b += k[1];
499  c += k[2];
500  mix(a,b,c);
501  length -= 12;
502  k += 3;
503  }
504 
505  /*----------------------------- handle the last (probably partial) block */
506  /*
507  * Note that unlike hashlittle() above, we use the "safe" version of this
508  * block that is #ifdef VALGRIND above, in order to avoid warnings from
509  * Valgrind or Address Sanitizer.
510  */
511 
512  const uint8_t *k8 = (const uint8_t *)k;
513 
514  switch(length)
515  {
516  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
517  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
518  case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
519  case 9 : c+=k8[8]; /* fall through */
520  case 8 : b+=k[1]; a+=k[0]; break;
521  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
522  case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
523  case 5 : b+=k8[4]; /* fall through */
524  case 4 : a+=k[0]; break;
525  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
526  case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
527  case 1 : a+=k8[0]; break;
528  case 0 : return c;
529  }
530  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
531  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
532  const uint8_t *k8;
533 
534  /*--------------- all but last block: aligned reads and different mixing */
535  while (length > 12)
536  {
537  a += k[0] + (((uint32_t)k[1])<<16);
538  b += k[2] + (((uint32_t)k[3])<<16);
539  c += k[4] + (((uint32_t)k[5])<<16);
540  mix(a,b,c);
541  length -= 12;
542  k += 6;
543  }
544 
545  /*----------------------------- handle the last (probably partial) block */
546  k8 = (const uint8_t *)k;
547  switch(length)
548  {
549  case 12: c+=k[4]+(((uint32_t)k[5])<<16);
550  b+=k[2]+(((uint32_t)k[3])<<16);
551  a+=k[0]+(((uint32_t)k[1])<<16);
552  break;
553  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
554  case 10: c+=k[4];
555  b+=k[2]+(((uint32_t)k[3])<<16);
556  a+=k[0]+(((uint32_t)k[1])<<16);
557  break;
558  case 9 : c+=k8[8]; /* fall through */
559  case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
560  a+=k[0]+(((uint32_t)k[1])<<16);
561  break;
562  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
563  case 6 : b+=k[2];
564  a+=k[0]+(((uint32_t)k[1])<<16);
565  break;
566  case 5 : b+=k8[4]; /* fall through */
567  case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
568  break;
569  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
570  case 2 : a+=k[0];
571  break;
572  case 1 : a+=k8[0];
573  break;
574  case 0 : return c; /* zero length requires no mixing */
575  }
576 
577  } else { /* need to read the key one byte at a time */
578  const uint8_t *k = (const uint8_t *)key;
579 
580  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
581  while (length > 12)
582  {
583  a += k[0];
584  a += ((uint32_t)k[1])<<8;
585  a += ((uint32_t)k[2])<<16;
586  a += ((uint32_t)k[3])<<24;
587  b += k[4];
588  b += ((uint32_t)k[5])<<8;
589  b += ((uint32_t)k[6])<<16;
590  b += ((uint32_t)k[7])<<24;
591  c += k[8];
592  c += ((uint32_t)k[9])<<8;
593  c += ((uint32_t)k[10])<<16;
594  c += ((uint32_t)k[11])<<24;
595  mix(a,b,c);
596  length -= 12;
597  k += 12;
598  }
599 
600  /*-------------------------------- last block: affect all 32 bits of (c) */
601  switch(length) /* all the case statements fall through */
602  {
603  case 12: c+=((uint32_t)k[11])<<24; /* fall through */
604  case 11: c+=((uint32_t)k[10])<<16; /* fall through */
605  case 10: c+=((uint32_t)k[9])<<8; /* fall through */
606  case 9 : c+=k[8]; /* fall through */
607  case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
608  case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
609  case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
610  case 5 : b+=k[4]; /* fall through */
611  case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
612  case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
613  case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
614  case 1 : a+=k[0];
615  break;
616  case 0 : return c;
617  }
618  }
619 
620  final(a,b,c);
621  return c;
622 }
623 
624 
625 /*
626  * hashlittle2: return 2 32-bit hash values
627  *
628  * This is identical to hashlittle(), except it returns two 32-bit hash
629  * values instead of just one. This is good enough for hash table
630  * lookup with 2^^64 buckets, or if you want a second hash if you're not
631  * happy with the first, or if you want a probably-unique 64-bit ID for
632  * the key. *pc is better mixed than *pb, so use *pc first. If you want
633  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
634  */
636  const void *key, /* the key to hash */
637  size_t length, /* length of the key */
638  uint32_t *pc, /* IN: primary initval, OUT: primary hash */
639  uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
640 {
641  uint32_t a,b,c; /* internal state */
642  union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
643 
644  /* Set up the internal state */
645  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
646  c += *pb;
647 
648  u.ptr = key;
649  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
650  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
651 
652  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
653  while (length > 12)
654  {
655  a += k[0];
656  b += k[1];
657  c += k[2];
658  mix(a,b,c);
659  length -= 12;
660  k += 3;
661  }
662 
663  /*----------------------------- handle the last (probably partial) block */
664  /*
665  * "k[2]&0xffffff" actually reads beyond the end of the string, but
666  * then masks off the part it's not allowed to read. Because the
667  * string is aligned, the masked-off tail is in the same word as the
668  * rest of the string. Every machine with memory protection I've seen
669  * does it on word boundaries, so is OK with this. But VALGRIND will
670  * still catch it and complain. The masking trick does make the hash
671  * noticeably faster for short strings (like English words).
672  */
673 #ifndef VALGRIND
674 
675  switch(length)
676  {
677  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
678  case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
679  case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
680  case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
681  case 8 : b+=k[1]; a+=k[0]; break;
682  case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
683  case 6 : b+=k[1]&0xffff; a+=k[0]; break;
684  case 5 : b+=k[1]&0xff; a+=k[0]; break;
685  case 4 : a+=k[0]; break;
686  case 3 : a+=k[0]&0xffffff; break;
687  case 2 : a+=k[0]&0xffff; break;
688  case 1 : a+=k[0]&0xff; break;
689  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
690  }
691 
692 #else /* make valgrind happy */
693 
694  const uint8_t *k8 = (const uint8_t *)k;
695  switch(length)
696  {
697  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
698  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
699  case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
700  case 9 : c+=k8[8]; /* fall through */
701  case 8 : b+=k[1]; a+=k[0]; break;
702  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
703  case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
704  case 5 : b+=k8[4]; /* fall through */
705  case 4 : a+=k[0]; break;
706  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
707  case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
708  case 1 : a+=k8[0]; break;
709  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
710  }
711 
712 #endif /* !valgrind */
713 
714  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
715  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
716  const uint8_t *k8;
717 
718  /*--------------- all but last block: aligned reads and different mixing */
719  while (length > 12)
720  {
721  a += k[0] + (((uint32_t)k[1])<<16);
722  b += k[2] + (((uint32_t)k[3])<<16);
723  c += k[4] + (((uint32_t)k[5])<<16);
724  mix(a,b,c);
725  length -= 12;
726  k += 6;
727  }
728 
729  /*----------------------------- handle the last (probably partial) block */
730  k8 = (const uint8_t *)k;
731  switch(length)
732  {
733  case 12: c+=k[4]+(((uint32_t)k[5])<<16);
734  b+=k[2]+(((uint32_t)k[3])<<16);
735  a+=k[0]+(((uint32_t)k[1])<<16);
736  break;
737  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
738  case 10: c+=k[4];
739  b+=k[2]+(((uint32_t)k[3])<<16);
740  a+=k[0]+(((uint32_t)k[1])<<16);
741  break;
742  case 9 : c+=k8[8]; /* fall through */
743  case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
744  a+=k[0]+(((uint32_t)k[1])<<16);
745  break;
746  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
747  case 6 : b+=k[2];
748  a+=k[0]+(((uint32_t)k[1])<<16);
749  break;
750  case 5 : b+=k8[4]; /* fall through */
751  case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
752  break;
753  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
754  case 2 : a+=k[0];
755  break;
756  case 1 : a+=k8[0];
757  break;
758  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
759  }
760 
761  } else { /* need to read the key one byte at a time */
762  const uint8_t *k = (const uint8_t *)key;
763 
764  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
765  while (length > 12)
766  {
767  a += k[0];
768  a += ((uint32_t)k[1])<<8;
769  a += ((uint32_t)k[2])<<16;
770  a += ((uint32_t)k[3])<<24;
771  b += k[4];
772  b += ((uint32_t)k[5])<<8;
773  b += ((uint32_t)k[6])<<16;
774  b += ((uint32_t)k[7])<<24;
775  c += k[8];
776  c += ((uint32_t)k[9])<<8;
777  c += ((uint32_t)k[10])<<16;
778  c += ((uint32_t)k[11])<<24;
779  mix(a,b,c);
780  length -= 12;
781  k += 12;
782  }
783 
784  /*-------------------------------- last block: affect all 32 bits of (c) */
785  switch(length) /* all the case statements fall through */
786  {
787  case 12: c+=((uint32_t)k[11])<<24; /* fall through */
788  case 11: c+=((uint32_t)k[10])<<16; /* fall through */
789  case 10: c+=((uint32_t)k[9])<<8; /* fall through */
790  case 9 : c+=k[8]; /* fall through */
791  case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
792  case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
793  case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
794  case 5 : b+=k[4]; /* fall through */
795  case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
796  case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
797  case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
798  case 1 : a+=k[0];
799  break;
800  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
801  }
802  }
803 
804  final(a,b,c);
805  *pc=c; *pb=b;
806 }
807 
808 /*
809  * hashlittle2: return 2 32-bit hash values
810  *
811  * This is identical to hashlittle(), except it returns two 32-bit hash
812  * values instead of just one. This is good enough for hash table
813  * lookup with 2^^64 buckets, or if you want a second hash if you're not
814  * happy with the first, or if you want a probably-unique 64-bit ID for
815  * the key. *pc is better mixed than *pb, so use *pc first. If you want
816  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
817  */
818 void hashlittle2_safe(const void *key, /* the key to hash */
819  size_t length, /* length of the key */
820  uint32_t *pc, /* IN: primary initval, OUT: primary hash */
821  uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
822 {
823  uint32_t a, b, c; /* internal state */
824  union {
825  const void *ptr;
826  size_t i;
827  } u; /* needed for Mac Powerbook G4 */
828 
829  /* Set up the internal state */
830  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
831  c += *pb;
832 
833  u.ptr = key;
834  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
835  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
836 
837  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
838  while (length > 12) {
839  a += k[0];
840  b += k[1];
841  c += k[2];
842  mix(a, b, c);
843  length -= 12;
844  k += 3;
845  }
846 
847  /*----------------------------- handle the last (probably partial) block */
848  /*
849  * Note that unlike hashlittle() above, we use the "safe" version of this
850  * block that is #ifdef VALGRIND above, in order to avoid warnings from
851  * Valgrind or Address Sanitizer.
852  */
853  const uint8_t *k8 = (const uint8_t *)k;
854  switch (length) {
855  case 12:
856  c += k[2];
857  b += k[1];
858  a += k[0];
859  break;
860  case 11:
861  c += ((uint32_t)k8[10]) << 16; /* fall through */
862  case 10:
863  c += ((uint32_t)k8[9]) << 8; /* fall through */
864  case 9:
865  c += k8[8]; /* fall through */
866  case 8:
867  b += k[1];
868  a += k[0];
869  break;
870  case 7:
871  b += ((uint32_t)k8[6]) << 16; /* fall through */
872  case 6:
873  b += ((uint32_t)k8[5]) << 8; /* fall through */
874  case 5:
875  b += k8[4]; /* fall through */
876  case 4:
877  a += k[0];
878  break;
879  case 3:
880  a += ((uint32_t)k8[2]) << 16; /* fall through */
881  case 2:
882  a += ((uint32_t)k8[1]) << 8; /* fall through */
883  case 1:
884  a += k8[0];
885  break;
886  case 0:
887  *pc = c;
888  *pb = b;
889  return; /* zero length strings require no mixing */
890  }
891 
892  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
893  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
894  const uint8_t *k8;
895 
896  /*--------------- all but last block: aligned reads and different mixing */
897  while (length > 12) {
898  a += k[0] + (((uint32_t)k[1]) << 16);
899  b += k[2] + (((uint32_t)k[3]) << 16);
900  c += k[4] + (((uint32_t)k[5]) << 16);
901  mix(a, b, c);
902  length -= 12;
903  k += 6;
904  }
905 
906  /*----------------------------- handle the last (probably partial) block */
907  k8 = (const uint8_t *)k;
908  switch (length) {
909  case 12:
910  c += k[4] + (((uint32_t)k[5]) << 16);
911  b += k[2] + (((uint32_t)k[3]) << 16);
912  a += k[0] + (((uint32_t)k[1]) << 16);
913  break;
914  case 11:
915  c += ((uint32_t)k8[10]) << 16; /* fall through */
916  case 10:
917  c += k[4];
918  b += k[2] + (((uint32_t)k[3]) << 16);
919  a += k[0] + (((uint32_t)k[1]) << 16);
920  break;
921  case 9:
922  c += k8[8]; /* fall through */
923  case 8:
924  b += k[2] + (((uint32_t)k[3]) << 16);
925  a += k[0] + (((uint32_t)k[1]) << 16);
926  break;
927  case 7:
928  b += ((uint32_t)k8[6]) << 16; /* fall through */
929  case 6:
930  b += k[2];
931  a += k[0] + (((uint32_t)k[1]) << 16);
932  break;
933  case 5:
934  b += k8[4]; /* fall through */
935  case 4:
936  a += k[0] + (((uint32_t)k[1]) << 16);
937  break;
938  case 3:
939  a += ((uint32_t)k8[2]) << 16; /* fall through */
940  case 2:
941  a += k[0];
942  break;
943  case 1:
944  a += k8[0];
945  break;
946  case 0:
947  *pc = c;
948  *pb = b;
949  return; /* zero length strings require no mixing */
950  }
951 
952  } else { /* need to read the key one byte at a time */
953  const uint8_t *k = (const uint8_t *)key;
954 
955  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
956  while (length > 12) {
957  a += k[0];
958  a += ((uint32_t)k[1]) << 8;
959  a += ((uint32_t)k[2]) << 16;
960  a += ((uint32_t)k[3]) << 24;
961  b += k[4];
962  b += ((uint32_t)k[5]) << 8;
963  b += ((uint32_t)k[6]) << 16;
964  b += ((uint32_t)k[7]) << 24;
965  c += k[8];
966  c += ((uint32_t)k[9]) << 8;
967  c += ((uint32_t)k[10]) << 16;
968  c += ((uint32_t)k[11]) << 24;
969  mix(a, b, c);
970  length -= 12;
971  k += 12;
972  }
973 
974  /*-------------------------------- last block: affect all 32 bits of (c) */
975  switch (length) /* all the case statements fall through */
976  {
977  case 12:
978  c += ((uint32_t)k[11]) << 24; /* fall through */
979  case 11:
980  c += ((uint32_t)k[10]) << 16; /* fall through */
981  case 10:
982  c += ((uint32_t)k[9]) << 8; /* fall through */
983  case 9:
984  c += k[8]; /* fall through */
985  case 8:
986  b += ((uint32_t)k[7]) << 24; /* fall through */
987  case 7:
988  b += ((uint32_t)k[6]) << 16; /* fall through */
989  case 6:
990  b += ((uint32_t)k[5]) << 8; /* fall through */
991  case 5:
992  b += k[4]; /* fall through */
993  case 4:
994  a += ((uint32_t)k[3]) << 24; /* fall through */
995  case 3:
996  a += ((uint32_t)k[2]) << 16; /* fall through */
997  case 2:
998  a += ((uint32_t)k[1]) << 8; /* fall through */
999  case 1:
1000  a += k[0];
1001  break;
1002  case 0:
1003  *pc = c;
1004  *pb = b;
1005  return; /* zero length strings require no mixing */
1006  }
1007  }
1008 
1009  final(a, b, c);
1010  *pc = c;
1011  *pb = b;
1012 }
1013 
1014 /*
1015  * hashbig():
1016  * This is the same as hashword() on big-endian machines. It is different
1017  * from hashlittle() on all machines. hashbig() takes advantage of
1018  * big-endian byte ordering.
1019  */
1020 uint32_t hashbig( const void *key, size_t length, uint32_t initval)
1021 {
1022  uint32_t a,b,c;
1023  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
1024 
1025  /* Set up the internal state */
1026  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
1027 
1028  u.ptr = key;
1029  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
1030  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
1031 
1032  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
1033  while (length > 12)
1034  {
1035  a += k[0];
1036  b += k[1];
1037  c += k[2];
1038  mix(a,b,c);
1039  length -= 12;
1040  k += 3;
1041  }
1042 
1043  /*----------------------------- handle the last (probably partial) block */
1044  /*
1045  * "k[2]<<8" actually reads beyond the end of the string, but
1046  * then shifts out the part it's not allowed to read. Because the
1047  * string is aligned, the illegal read is in the same word as the
1048  * rest of the string. Every machine with memory protection I've seen
1049  * does it on word boundaries, so is OK with this. But VALGRIND will
1050  * still catch it and complain. The masking trick does make the hash
1051  * noticeably faster for short strings (like English words).
1052  */
1053 #ifndef VALGRIND
1054 
1055  switch(length)
1056  {
1057  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
1058  case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
1059  case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
1060  case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
1061  case 8 : b+=k[1]; a+=k[0]; break;
1062  case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
1063  case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
1064  case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
1065  case 4 : a+=k[0]; break;
1066  case 3 : a+=k[0]&0xffffff00; break;
1067  case 2 : a+=k[0]&0xffff0000; break;
1068  case 1 : a+=k[0]&0xff000000; break;
1069  case 0 : return c; /* zero length strings require no mixing */
1070  }
1071 
1072 #else /* make valgrind happy */
1073 
1074  const uint8_t *k8 = (const uint8_t *)k;
1075  switch(length) /* all the case statements fall through */
1076  {
1077  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
1078  case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
1079  case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
1080  case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
1081  case 8 : b+=k[1]; a+=k[0]; break;
1082  case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
1083  case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
1084  case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
1085  case 4 : a+=k[0]; break;
1086  case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
1087  case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
1088  case 1 : a+=((uint32_t)k8[0])<<24; break;
1089  case 0 : return c;
1090  }
1091 
1092 #endif /* !VALGRIND */
1093 
1094  } else { /* need to read the key one byte at a time */
1095  const uint8_t *k = (const uint8_t *)key;
1096 
1097  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
1098  while (length > 12)
1099  {
1100  a += ((uint32_t)k[0])<<24;
1101  a += ((uint32_t)k[1])<<16;
1102  a += ((uint32_t)k[2])<<8;
1103  a += ((uint32_t)k[3]);
1104  b += ((uint32_t)k[4])<<24;
1105  b += ((uint32_t)k[5])<<16;
1106  b += ((uint32_t)k[6])<<8;
1107  b += ((uint32_t)k[7]);
1108  c += ((uint32_t)k[8])<<24;
1109  c += ((uint32_t)k[9])<<16;
1110  c += ((uint32_t)k[10])<<8;
1111  c += ((uint32_t)k[11]);
1112  mix(a,b,c);
1113  length -= 12;
1114  k += 12;
1115  }
1116 
1117  /*-------------------------------- last block: affect all 32 bits of (c) */
1118  switch(length) /* all the case statements fall through */
1119  {
1120  case 12: c+=k[11]; /* fall through */
1121  case 11: c+=((uint32_t)k[10])<<8; /* fall through */
1122  case 10: c+=((uint32_t)k[9])<<16; /* fall through */
1123  case 9 : c+=((uint32_t)k[8])<<24; /* fall through */
1124  case 8 : b+=k[7]; /* fall through */
1125  case 7 : b+=((uint32_t)k[6])<<8; /* fall through */
1126  case 6 : b+=((uint32_t)k[5])<<16; /* fall through */
1127  case 5 : b+=((uint32_t)k[4])<<24; /* fall through */
1128  case 4 : a+=k[3]; /* fall through */
1129  case 3 : a+=((uint32_t)k[2])<<8; /* fall through */
1130  case 2 : a+=((uint32_t)k[1])<<16; /* fall through */
1131  case 1 : a+=((uint32_t)k[0])<<24;
1132  break;
1133  case 0 : return c;
1134  }
1135  }
1136 
1137  final(a,b,c);
1138  return c;
1139 }
1140 
1141 
1142 #ifdef SELF_TEST
1143 
1144 /* used for timings */
1145 void driver1(void)
1146 {
1147  uint8_t buf[256];
1148  uint32_t i;
1149  uint32_t h=0;
1150  time_t a,z;
1151 
1152  time(&a);
1153  for (i=0; i<256; ++i) buf[i] = 'x';
1154  for (i=0; i<1; ++i)
1155  {
1156  h = hashlittle(&buf[0],1,h);
1157  }
1158  time(&z);
1159  if (z-a > 0) printf("time %d %.8x\n", z-a, h);
1160 }
1161 
1162 /* check that every input bit changes every output bit half the time */
1163 #define HASHSTATE 1
1164 #define HASHLEN 1
1165 #define MAXPAIR 60
1166 #define MAXLEN 70
1167 void driver2(void)
1168 {
1169  uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
1170  uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
1171  uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
1172  uint32_t x[HASHSTATE],y[HASHSTATE];
1173  uint32_t hlen;
1174 
1175  printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
1176  for (hlen=0; hlen < MAXLEN; ++hlen)
1177  {
1178  z=0;
1179  for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
1180  {
1181  for (j=0; j<8; ++j) /*------------------------ for each input bit, */
1182  {
1183  for (m = 1; m < 8; ++m) /*------------ for several possible initvals, */
1184  {
1185  for (l = 0; l < HASHSTATE; ++l)
1186  e[l] = f[l] = g[l] = h[l] = x[l] = y[l] = ~((uint32_t)0);
1187 
1188  /*---- check that every output bit is affected by that input bit */
1189  for (k = 0; k < MAXPAIR; k += 2) {
1190  uint32_t finished = 1;
1191  /* keys have one bit different */
1192  for (l = 0; l < hlen + 1; ++l) {
1193  a[l] = b[l] = (uint8_t)0;
1194  }
1195  /* have a and b be two keys differing in only one bit */
1196  a[i] ^= (k << j);
1197  a[i] ^= (k >> (8 - j));
1198  c[0] = hashlittle(a, hlen, m);
1199  b[i] ^= ((k + 1) << j);
1200  b[i] ^= ((k + 1) >> (8 - j));
1201  d[0] = hashlittle(b, hlen, m);
1202  /* check every bit is 1, 0, set, and not set at least once */
1203  for (l = 0; l < HASHSTATE; ++l) {
1204  e[l] &= (c[l] ^ d[l]);
1205  f[l] &= ~(c[l] ^ d[l]);
1206  g[l] &= c[l];
1207  h[l] &= ~c[l];
1208  x[l] &= d[l];
1209  y[l] &= ~d[l];
1210  if (e[l] | f[l] | g[l] | h[l] | x[l] | y[l])
1211  finished = 0;
1212  }
1213  if (finished)
1214  break;
1215  }
1216  if (k > z)
1217  z = k;
1218  if (k == MAXPAIR) {
1219  printf("Some bit didn't change: ");
1220  printf("%.8x %.8x %.8x %.8x %.8x %.8x ", e[0], f[0], g[0], h[0], x[0], y[0]);
1221  printf("i %d j %d m %d len %d\n", i, j, m, hlen);
1222  }
1223  if (z == MAXPAIR)
1224  goto done;
1225  }
1226  }
1227  }
1228  done:
1229  if (z < MAXPAIR)
1230  {
1231  printf("Mix success %2d bytes %2d initvals ",i,m);
1232  printf("required %d trials\n", z/2);
1233  }
1234  }
1235  printf("\n");
1236 }
1237 
1238 /* Check for reading beyond the end of the buffer and alignment problems */
1239 void driver3(void)
1240 {
1241  uint8_t buf[MAXLEN+20], *b;
1242  uint32_t len;
1243  uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
1244  uint32_t h;
1245  uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
1246  uint32_t i;
1247  uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
1248  uint32_t j;
1249  uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
1250  uint32_t ref,x,y;
1251  uint8_t *p;
1252 
1253  printf("Endianness. These lines should all be the same (for values filled in):\n");
1254  printf("%.8x %.8x %.8x\n",
1255  hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
1256  hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
1257  hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
1258  p = q;
1259  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1260  hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
1261  hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
1262  hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
1263  hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
1264  hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
1265  hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
1266  p = &qq[1];
1267  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1268  hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
1269  hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
1270  hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
1271  hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
1272  hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
1273  hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
1274  p = &qqq[2];
1275  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1276  hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
1277  hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
1278  hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
1279  hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
1280  hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
1281  hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
1282  p = &qqqq[3];
1283  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
1284  hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
1285  hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
1286  hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
1287  hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
1288  hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
1289  hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
1290  printf("\n");
1291 
1292  /* check that hashlittle2 and hashlittle produce the same results */
1293  i=47; j=0;
1294  hashlittle2(q, sizeof(q), &i, &j);
1295  if (hashlittle(q, sizeof(q), 47) != i)
1296  printf("hashlittle2 and hashlittle mismatch\n");
1297 
1298  /* check that hashword2 and hashword produce the same results */
1299  len = 0xdeadbeef;
1300  i=47, j=0;
1301  hashword2(&len, 1, &i, &j);
1302  if (hashword(&len, 1, 47) != i)
1303  printf("hashword2 and hashword mismatch %x %x\n",
1304  i, hashword(&len, 1, 47));
1305 
1306  /* check hashlittle doesn't read before or after the ends of the string */
1307  for (h=0, b=buf+1; h<8; ++h, ++b)
1308  {
1309  for (i=0; i<MAXLEN; ++i)
1310  {
1311  len = i;
1312  for (j=0; j<i; ++j) *(b+j)=0;
1313 
1314  /* these should all be equal */
1315  ref = hashlittle(b, len, (uint32_t)1);
1316  *(b+i)=(uint8_t)~0;
1317  *(b-1)=(uint8_t)~0;
1318  x = hashlittle(b, len, (uint32_t)1);
1319  y = hashlittle(b, len, (uint32_t)1);
1320  if ((ref != x) || (ref != y))
1321  {
1322  printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
1323  h, i);
1324  }
1325  }
1326  }
1327 }
1328 
1329 /* check for problems with nulls */
1330 void driver4(void)
1331 {
1332  uint8_t buf[1];
1333  uint32_t h,i,state[HASHSTATE];
1334 
1335 
1336  buf[0] = ~0;
1337  for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1338  printf("These should all be different\n");
1339  for (i=0, h=0; i<8; ++i)
1340  {
1341  h = hashlittle(buf, 0, h);
1342  printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
1343  }
1344 }
1345 
1346 void driver5(void)
1347 {
1348  uint32_t b,c;
1349  b=0, c=0, hashlittle2("", 0, &c, &b);
1350  printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
1351  b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
1352  printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
1353  b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
1354  printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
1355  b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
1356  printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
1357  b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
1358  printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
1359  b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
1360  printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
1361  c = hashlittle("Four score and seven years ago", 30, 0);
1362  printf("hash is %.8lx\n", c); /* 17770551 */
1363  c = hashlittle("Four score and seven years ago", 30, 1);
1364  printf("hash is %.8lx\n", c); /* cd628161 */
1365 }
1366 
1367 int main(void)
1368 {
1369  driver1(); /* test that the key is hashed: used for timings */
1370  driver2(); /* test that whole key is hashed thoroughly */
1371  driver3(); /* test that nothing but the key is hashed */
1372  driver4(); /* test hashing multiple buffers (all buffers are null) */
1373  driver5(); /* test the hash against known vectors */
1374  return 1;
1375 }
1376 
1377 #endif /* SELF_TEST */
len
uint8_t len
Definition: app-layer-dnp3.h:2
hashword
uint32_t hashword(const uint32_t *k, size_t length, uint32_t initval)
Definition: util-hash-lookup3.c:172
HASH_BIG_ENDIAN
#define HASH_BIG_ENDIAN
Definition: util-hash-lookup3.c:64
hashbig
uint32_t hashbig(const void *key, size_t length, uint32_t initval)
Definition: util-hash-lookup3.c:1020
hashlittle2_safe
void hashlittle2_safe(const void *key, size_t length, uint32_t *pc, uint32_t *pb)
Definition: util-hash-lookup3.c:818
m
SCMutex m
Definition: flow-hash.h:6
hashlittle
uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
Definition: util-hash-lookup3.c:281
mix
#define mix(a, b, c)
Definition: util-hash-lookup3.c:113
main
int main(int argc, char **argv)
Definition: main.c:20
hashlittle_safe
uint32_t hashlittle_safe(const void *key, size_t length, uint32_t initval)
Definition: util-hash-lookup3.c:482
hashlittle2
void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb)
Definition: util-hash-lookup3.c:635
util-hash-lookup3.h
HASH_LITTLE_ENDIAN
#define HASH_LITTLE_ENDIAN
Definition: util-hash-lookup3.c:63
hashword2
void hashword2(const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb)
Definition: util-hash-lookup3.c:216