suricata
datasets.c
Go to the documentation of this file.
1 /* Copyright (C) 2017-2020 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  */
23 
24 #include "suricata-common.h"
25 #include "suricata.h"
26 #include "conf.h"
27 #include "datasets.h"
28 #include "datasets-string.h"
29 #include "datasets-ipv4.h"
30 #include "datasets-ipv6.h"
31 #include "datasets-md5.h"
32 #include "datasets-sha256.h"
33 #include "datasets-reputation.h"
34 #include "util-conf.h"
35 #include "util-thash.h"
36 #include "util-print.h"
37 #include "util-base64.h" // decode base64
38 #include "util-byte.h"
39 #include "util-misc.h"
40 #include "util-path.h"
41 #include "util-debug.h"
42 
44 static Dataset *sets = NULL;
45 static uint32_t set_ids = 0;
46 
47 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
48  DataRepType *rep);
49 
50 static inline void DatasetUnlockData(THashData *d)
51 {
52  (void) THashDecrUsecnt(d);
53  THashDataUnlock(d);
54 }
55 static bool DatasetIsStatic(const char *save, const char *load);
56 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
57 
58 enum DatasetTypes DatasetGetTypeFromString(const char *s)
59 {
60  if (strcasecmp("md5", s) == 0)
61  return DATASET_TYPE_MD5;
62  if (strcasecmp("sha256", s) == 0)
63  return DATASET_TYPE_SHA256;
64  if (strcasecmp("string", s) == 0)
65  return DATASET_TYPE_STRING;
66  if (strcasecmp("ipv4", s) == 0)
67  return DATASET_TYPE_IPV4;
68  if (strcasecmp("ip", s) == 0)
69  return DATASET_TYPE_IPV6;
70  return DATASET_TYPE_NOTSET;
71 }
72 
73 static Dataset *DatasetAlloc(const char *name)
74 {
75  Dataset *set = SCCalloc(1, sizeof(*set));
76  if (set) {
77  set->id = set_ids++;
78  }
79  return set;
80 }
81 
82 static Dataset *DatasetSearchByName(const char *name)
83 {
84  Dataset *set = sets;
85  while (set) {
86  if (strcasecmp(name, set->name) == 0 && set->hidden == false) {
87  return set;
88  }
89  set = set->next;
90  }
91  return NULL;
92 }
93 
94 static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
95 {
96  if (ins < 2)
97  return -1;
98  if (ins % 2 != 0)
99  return -1;
100  if (outs != ins / 2)
101  return -1;
102 
103  uint8_t hash[outs];
104  memset(hash, 0, outs);
105  size_t i, x;
106  for (x = 0, i = 0; i < ins; i+=2, x++) {
107  char buf[3] = { 0, 0, 0 };
108  buf[0] = in[i];
109  buf[1] = in[i+1];
110 
111  long value = strtol(buf, NULL, 16);
112  if (value >= 0 && value <= 255)
113  hash[x] = (uint8_t)value;
114  else {
115  SCLogError("hash byte out of range %ld", value);
116  return -1;
117  }
118  }
119 
120  memcpy(out, hash, outs);
121  return 0;
122 }
123 
124 static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out)
125 {
126  SCLogDebug("in '%s'", in);
127  char raw[ins + 1];
128  memcpy(raw, in, ins);
129  raw[ins] = '\0';
130  char *line = raw;
131 
132  char *ptrs[1] = {NULL};
133  int idx = 0;
134 
135  size_t i = 0;
136  while (i < ins + 1) {
137  if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
138  line[i] = '\0';
139  SCLogDebug("line '%s'", line);
140 
141  ptrs[idx] = line;
142  idx++;
143 
144  if (idx == 1)
145  break;
146  } else {
147  i++;
148  }
149  }
150 
151  if (idx != 1) {
152  SCLogDebug("idx %d", idx);
153  return -1;
154  }
155 
156  uint16_t v = 0;
157  int r = StringParseU16RangeCheck(&v, 10, strlen(ptrs[0]), ptrs[0], 0, USHRT_MAX);
158  if (r != (int)strlen(ptrs[0])) {
159  SCLogError("'%s' is not a valid reputation value (0-65535)", ptrs[0]);
160  return -1;
161  }
162  SCLogDebug("v %"PRIu16" raw %s", v, ptrs[0]);
163 
164  rep_out->value = v;
165  return 0;
166 }
167 
168 static int DatasetLoadIPv4(Dataset *set)
169 {
170  if (strlen(set->load) == 0)
171  return 0;
172 
173  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
174  const char *fopen_mode = "r";
175  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
176  fopen_mode = "a+";
177  }
178 
179  FILE *fp = fopen(set->load, fopen_mode);
180  if (fp == NULL) {
181  SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
182  return -1;
183  }
184 
185  uint32_t cnt = 0;
186  char line[1024];
187  while (fgets(line, (int)sizeof(line), fp) != NULL) {
188  char *r = strchr(line, ',');
189  if (r == NULL) {
190  line[strlen(line) - 1] = '\0';
191  SCLogDebug("line: '%s'", line);
192 
193  struct in_addr in;
194  if (inet_pton(AF_INET, line, &in) != 1) {
195  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
196  continue;
197  }
198 
199  if (DatasetAdd(set, (const uint8_t *)&in.s_addr, 4) < 0) {
200  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
201  continue;
202  }
203  cnt++;
204 
205  /* list with rep data */
206  } else {
207  line[strlen(line) - 1] = '\0';
208  SCLogDebug("IPv4 with REP line: '%s'", line);
209 
210  *r = '\0';
211 
212  struct in_addr in;
213  if (inet_pton(AF_INET, line, &in) != 1) {
214  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
215  continue;
216  }
217 
218  r++;
219 
220  DataRepType rep = { .value = 0 };
221  if (ParseRepLine(r, strlen(r), &rep) < 0) {
222  FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
223  continue;
224  }
225 
226  SCLogDebug("rep v:%u", rep.value);
227  if (DatasetAddwRep(set, (const uint8_t *)&in.s_addr, 4, &rep) < 0) {
228  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
229  continue;
230  }
231 
232  cnt++;
233  }
234  }
236 
237  fclose(fp);
238  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
239  return 0;
240 }
241 
242 static int ParseIpv6String(Dataset *set, char *line, struct in6_addr *in6)
243 {
244  /* Checking IPv6 case */
245  char *got_colon = strchr(line, ':');
246  if (got_colon) {
247  uint32_t ip6addr[4];
248  if (inet_pton(AF_INET6, line, in6) != 1) {
249  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
250  return -1;
251  }
252  memcpy(&ip6addr, in6->s6_addr, sizeof(ip6addr));
253  /* IPv4 in IPv6 notation needs transformation to internal Suricata storage */
254  if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
255  ip6addr[0] = ip6addr[3];
256  ip6addr[2] = 0;
257  ip6addr[3] = 0;
258  memcpy(in6, ip6addr, sizeof(struct in6_addr));
259  }
260  } else {
261  /* IPv4 case */
262  struct in_addr in;
263  if (inet_pton(AF_INET, line, &in) != 1) {
264  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
265  return -1;
266  }
267  memset(in6, 0, sizeof(struct in6_addr));
268  memcpy(in6, &in, sizeof(struct in_addr));
269  }
270  return 0;
271 }
272 
273 static int DatasetLoadIPv6(Dataset *set)
274 {
275  if (strlen(set->load) == 0)
276  return 0;
277 
278  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
279  const char *fopen_mode = "r";
280  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
281  fopen_mode = "a+";
282  }
283 
284  FILE *fp = fopen(set->load, fopen_mode);
285  if (fp == NULL) {
286  SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
287  return -1;
288  }
289 
290  uint32_t cnt = 0;
291  char line[1024];
292  while (fgets(line, (int)sizeof(line), fp) != NULL) {
293  char *r = strchr(line, ',');
294  if (r == NULL) {
295  line[strlen(line) - 1] = '\0';
296  SCLogDebug("line: '%s'", line);
297 
298  struct in6_addr in6;
299  int ret = ParseIpv6String(set, line, &in6);
300  if (ret < 0) {
301  FatalErrorOnInit("unable to parse IP address");
302  continue;
303  }
304 
305  if (DatasetAdd(set, (const uint8_t *)&in6.s6_addr, 16) < 0) {
306  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
307  continue;
308  }
309  cnt++;
310 
311  /* list with rep data */
312  } else {
313  line[strlen(line) - 1] = '\0';
314  SCLogDebug("IPv6 with REP line: '%s'", line);
315 
316  *r = '\0';
317 
318  struct in6_addr in6;
319  int ret = ParseIpv6String(set, line, &in6);
320  if (ret < 0) {
321  FatalErrorOnInit("unable to parse IP address");
322  continue;
323  }
324 
325  r++;
326 
327  DataRepType rep = { .value = 0 };
328  if (ParseRepLine(r, strlen(r), &rep) < 0) {
329  FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
330  continue;
331  }
332 
333  SCLogDebug("rep v:%u", rep.value);
334  if (DatasetAddwRep(set, (const uint8_t *)&in6.s6_addr, 16, &rep) < 0) {
335  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
336  continue;
337  }
338 
339  cnt++;
340  }
341  }
343 
344  fclose(fp);
345  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
346  return 0;
347 }
348 
349 static int DatasetLoadMd5(Dataset *set)
350 {
351  if (strlen(set->load) == 0)
352  return 0;
353 
354  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
355  const char *fopen_mode = "r";
356  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
357  fopen_mode = "a+";
358  }
359 
360  FILE *fp = fopen(set->load, fopen_mode);
361  if (fp == NULL) {
362  SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
363  return -1;
364  }
365 
366  uint32_t cnt = 0;
367  char line[1024];
368  while (fgets(line, (int)sizeof(line), fp) != NULL) {
369  /* straight black/white list */
370  if (strlen(line) == 33) {
371  line[strlen(line) - 1] = '\0';
372  SCLogDebug("line: '%s'", line);
373 
374  uint8_t hash[16];
375  if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) {
376  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
377  continue;
378  }
379 
380  if (DatasetAdd(set, (const uint8_t *)hash, 16) < 0) {
381  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
382  continue;
383  }
384  cnt++;
385 
386  /* list with rep data */
387  } else if (strlen(line) > 33 && line[32] == ',') {
388  line[strlen(line) - 1] = '\0';
389  SCLogDebug("MD5 with REP line: '%s'", line);
390 
391  uint8_t hash[16];
392  if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) {
393  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
394  continue;
395  }
396 
397  DataRepType rep = { .value = 0};
398  if (ParseRepLine(line + 33, strlen(line) - 33, &rep) < 0) {
399  FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
400  continue;
401  }
402 
403  SCLogDebug("rep v:%u", rep.value);
404  if (DatasetAddwRep(set, hash, 16, &rep) < 0) {
405  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
406  continue;
407  }
408 
409  cnt++;
410  }
411  else {
412  FatalErrorOnInit("MD5 bad line len %u: '%s'", (uint32_t)strlen(line), line);
413  continue;
414  }
415  }
417 
418  fclose(fp);
419  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
420  return 0;
421 }
422 
423 static int DatasetLoadSha256(Dataset *set)
424 {
425  if (strlen(set->load) == 0)
426  return 0;
427 
428  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
429  const char *fopen_mode = "r";
430  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
431  fopen_mode = "a+";
432  }
433 
434  FILE *fp = fopen(set->load, fopen_mode);
435  if (fp == NULL) {
436  SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
437  return -1;
438  }
439 
440  uint32_t cnt = 0;
441  char line[1024];
442  while (fgets(line, (int)sizeof(line), fp) != NULL) {
443  /* straight black/white list */
444  if (strlen(line) == 65) {
445  line[strlen(line) - 1] = '\0';
446  SCLogDebug("line: '%s'", line);
447 
448  uint8_t hash[32];
449  if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0) {
450  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
451  continue;
452  }
453 
454  if (DatasetAdd(set, (const uint8_t *)hash, (uint32_t)32) < 0) {
455  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
456  continue;
457  }
458  cnt++;
459 
460  /* list with rep data */
461  } else if (strlen(line) > 65 && line[64] == ',') {
462  line[strlen(line) - 1] = '\0';
463  SCLogDebug("SHA-256 with REP line: '%s'", line);
464 
465  uint8_t hash[32];
466  if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0) {
467  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
468  continue;
469  }
470 
471  DataRepType rep = { .value = 0 };
472  if (ParseRepLine(line + 65, strlen(line) - 65, &rep) < 0) {
473  FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
474  continue;
475  }
476 
477  SCLogDebug("rep %u", rep.value);
478 
479  if (DatasetAddwRep(set, hash, 32, &rep) < 0) {
480  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
481  continue;
482  }
483  cnt++;
484  }
485  }
487 
488  fclose(fp);
489  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
490  return 0;
491 }
492 
493 static int DatasetLoadString(Dataset *set)
494 {
495  if (strlen(set->load) == 0)
496  return 0;
497 
498  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
499  const char *fopen_mode = "r";
500  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
501  fopen_mode = "a+";
502  }
503 
504  FILE *fp = fopen(set->load, fopen_mode);
505  if (fp == NULL) {
506  SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
507  return -1;
508  }
509 
510  uint32_t cnt = 0;
511  char line[1024];
512  while (fgets(line, (int)sizeof(line), fp) != NULL) {
513  if (strlen(line) <= 1)
514  continue;
515 
516  char *r = strchr(line, ',');
517  if (r == NULL) {
518  line[strlen(line) - 1] = '\0';
519  SCLogDebug("line: '%s'", line);
520 
521  // coverity[alloc_strlen : FALSE]
522  uint8_t decoded[strlen(line)];
523  uint32_t consumed = 0, num_decoded = 0;
524  Base64Ecode code = DecodeBase64(decoded, strlen(line), (const uint8_t *)line,
525  strlen(line), &consumed, &num_decoded, BASE64_MODE_STRICT);
526  if (code == BASE64_ECODE_ERR) {
527  FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
528  continue;
529  }
530 
531  if (DatasetAdd(set, (const uint8_t *)decoded, num_decoded) < 0) {
532  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
533  continue;
534  }
535  cnt++;
536  } else {
537  line[strlen(line) - 1] = '\0';
538  SCLogDebug("line: '%s'", line);
539 
540  *r = '\0';
541 
542  // coverity[alloc_strlen : FALSE]
543  uint8_t decoded[strlen(line)];
544  uint32_t consumed = 0, num_decoded = 0;
545  Base64Ecode code = DecodeBase64(decoded, strlen(line), (const uint8_t *)line,
546  strlen(line), &consumed, &num_decoded, BASE64_MODE_STRICT);
547  if (code == BASE64_ECODE_ERR) {
548  FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
549  continue;
550  }
551 
552  r++;
553  SCLogDebug("r '%s'", r);
554 
555  DataRepType rep = { .value = 0 };
556  if (ParseRepLine(r, strlen(r), &rep) < 0) {
557  FatalErrorOnInit("die: bad rep");
558  continue;
559  }
560  SCLogDebug("rep %u", rep.value);
561 
562  if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) {
563  FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
564  continue;
565  }
566  cnt++;
567 
568  SCLogDebug("line with rep %s, %s", line, r);
569  }
570  }
572 
573  fclose(fp);
574  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
575  return 0;
576 }
577 
578 extern bool g_system;
579 
583 };
584 
585 static void DatasetGetPath(const char *in_path,
586  char *out_path, size_t out_size, enum DatasetGetPathType type)
587 {
588  char path[PATH_MAX];
589  struct stat st;
590 
591  if (PathIsAbsolute(in_path)) {
592  strlcpy(path, in_path, sizeof(path));
593  strlcpy(out_path, path, out_size);
594  return;
595  }
596 
597  const char *data_dir = ConfigGetDataDirectory();
598  if (stat(data_dir, &st) != 0) {
599  SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
600  return;
601  }
602 
603  snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
604 
605  if (type == TYPE_LOAD) {
606  if (stat(path, &st) != 0) {
607  SCLogDebug("path %s: %s", path, strerror(errno));
608  if (!g_system) {
609  snprintf(path, sizeof(path), "%s", in_path);
610  }
611  }
612  }
613  strlcpy(out_path, path, out_size);
614  SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
615 }
616 
617 /** \brief look for set by name without creating it */
618 Dataset *DatasetFind(const char *name, enum DatasetTypes type)
619 {
621  Dataset *set = DatasetSearchByName(name);
622  if (set) {
623  if (set->type != type) {
625  return NULL;
626  }
627  }
629  return set;
630 }
631 
632 Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
633  uint64_t memcap, uint32_t hashsize)
634 {
635  uint64_t default_memcap = 0;
636  uint32_t default_hashsize = 0;
637  if (strlen(name) > DATASET_NAME_MAX_LEN) {
638  return NULL;
639  }
640 
642  Dataset *set = DatasetSearchByName(name);
643  if (set) {
644  if (type != DATASET_TYPE_NOTSET && set->type != type) {
645  SCLogError("dataset %s already "
646  "exists and is of type %u",
647  set->name, set->type);
649  return NULL;
650  }
651 
652  if ((save == NULL || strlen(save) == 0) &&
653  (load == NULL || strlen(load) == 0)) {
654  // OK, rule keyword doesn't have to set state/load,
655  // even when yaml set has set it.
656  } else {
657  if ((save == NULL && strlen(set->save) > 0) ||
658  (save != NULL && strcmp(set->save, save) != 0)) {
659  SCLogError("dataset %s save mismatch: %s != %s", set->name, set->save, save);
661  return NULL;
662  }
663  if ((load == NULL && strlen(set->load) > 0) ||
664  (load != NULL && strcmp(set->load, load) != 0)) {
665  SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
667  return NULL;
668  }
669  }
670 
672  return set;
673  } else {
674  if (type == DATASET_TYPE_NOTSET) {
675  SCLogError("dataset %s not defined", name);
676  goto out_err;
677  }
678  }
679 
680  set = DatasetAlloc(name);
681  if (set == NULL) {
682  goto out_err;
683  }
684 
685  strlcpy(set->name, name, sizeof(set->name));
686  set->type = type;
687  if (save && strlen(save)) {
688  strlcpy(set->save, save, sizeof(set->save));
689  SCLogDebug("name %s save '%s'", name, set->save);
690  }
691  if (load && strlen(load)) {
692  strlcpy(set->load, load, sizeof(set->load));
693  SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
694  }
695 
696  char cnf_name[128];
697  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
698 
699  GetDefaultMemcap(&default_memcap, &default_hashsize);
700  switch (type) {
701  case DATASET_TYPE_MD5:
702  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
703  Md5StrCompare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
704  hashsize > 0 ? hashsize : default_hashsize);
705  if (set->hash == NULL)
706  goto out_err;
707  if (DatasetLoadMd5(set) < 0)
708  goto out_err;
709  break;
710  case DATASET_TYPE_STRING:
711  set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
712  StringCompare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
713  hashsize > 0 ? hashsize : default_hashsize);
714  if (set->hash == NULL)
715  goto out_err;
716  if (DatasetLoadString(set) < 0)
717  goto out_err;
718  break;
719  case DATASET_TYPE_SHA256:
720  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
721  Sha256StrHash, Sha256StrCompare, load != NULL ? 1 : 0,
722  memcap > 0 ? memcap : default_memcap,
723  hashsize > 0 ? hashsize : default_hashsize);
724  if (set->hash == NULL)
725  goto out_err;
726  if (DatasetLoadSha256(set) < 0)
727  goto out_err;
728  break;
729  case DATASET_TYPE_IPV4:
730  set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash,
731  IPv4Compare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
732  hashsize > 0 ? hashsize : default_hashsize);
733  if (set->hash == NULL)
734  goto out_err;
735  if (DatasetLoadIPv4(set) < 0)
736  goto out_err;
737  break;
738  case DATASET_TYPE_IPV6:
739  set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6Set, IPv6Free, IPv6Hash,
740  IPv6Compare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
741  hashsize > 0 ? hashsize : default_hashsize);
742  if (set->hash == NULL)
743  goto out_err;
744  if (DatasetLoadIPv6(set) < 0)
745  goto out_err;
746  break;
747  }
748 
749  SCLogDebug("set %p/%s type %u save %s load %s",
750  set, set->name, set->type, set->save, set->load);
751 
752  set->next = sets;
753  sets = set;
754 
756  return set;
757 out_err:
758  if (set) {
759  if (set->hash) {
760  THashShutdown(set->hash);
761  }
762  SCFree(set);
763  }
765  return NULL;
766 }
767 
768 static bool DatasetIsStatic(const char *save, const char *load)
769 {
770  /* A set is static if it does not have any dynamic properties like
771  * save and/or state defined but has load defined.
772  * */
773  if ((load != NULL && strlen(load) > 0) &&
774  (save == NULL || strlen(save) == 0)) {
775  return true;
776  }
777  return false;
778 }
779 
780 void DatasetReload(void)
781 {
782  /* In order to reload the datasets, just mark the current sets as hidden
783  * and clean them up later.
784  * New datasets shall be created with the rule reload and do not require
785  * any intervention.
786  * */
788  Dataset *set = sets;
789  while (set) {
790  if (!DatasetIsStatic(set->save, set->load) || set->from_yaml == true) {
791  SCLogDebug("Not a static set, skipping %s", set->name);
792  set = set->next;
793  continue;
794  }
795  set->hidden = true;
796  SCLogDebug("Set %s at %p hidden successfully", set->name, set);
797  set = set->next;
798  }
800 }
801 
803 {
804  SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
806  Dataset *cur = sets;
807  Dataset *prev = NULL;
808  while (cur) {
809  Dataset *next = cur->next;
810  if (cur->hidden == false) {
811  prev = cur;
812  cur = next;
813  continue;
814  }
815  // Delete the set in case it was hidden
816  if (prev != NULL) {
817  prev->next = next;
818  } else {
819  sets = next;
820  }
821  THashShutdown(cur->hash);
822  SCFree(cur);
823  cur = next;
824  }
826 }
827 
828 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
829 {
830  const char *str = NULL;
831  if (ConfGet("datasets.defaults.memcap", &str) == 1) {
832  if (ParseSizeStringU64(str, memcap) < 0) {
833  SCLogWarning("memcap value cannot be deduced: %s,"
834  " resetting to default",
835  str);
836  *memcap = 0;
837  }
838  }
839  if (ConfGet("datasets.defaults.hashsize", &str) == 1) {
840  if (ParseSizeStringU32(str, hashsize) < 0) {
841  SCLogWarning("hashsize value cannot be deduced: %s,"
842  " resetting to default",
843  str);
844  *hashsize = 0;
845  }
846  }
847 }
848 
849 int DatasetsInit(void)
850 {
851  SCLogDebug("datasets start");
852  ConfNode *datasets = ConfGetNode("datasets");
853  uint64_t default_memcap = 0;
854  uint32_t default_hashsize = 0;
855  GetDefaultMemcap(&default_memcap, &default_hashsize);
856  if (datasets != NULL) {
857  int list_pos = 0;
858  ConfNode *iter = NULL;
859  TAILQ_FOREACH(iter, &datasets->head, next) {
860  if (iter->name == NULL) {
861  list_pos++;
862  continue;
863  }
864 
865  char save[PATH_MAX] = "";
866  char load[PATH_MAX] = "";
867  uint64_t memcap = 0;
868  uint32_t hashsize = 0;
869 
870  const char *set_name = iter->name;
871  if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
873  "set name '%s' too long, max %d chars", set_name, DATASET_NAME_MAX_LEN);
874  continue;
875  }
876 
877  ConfNode *set_type =
878  ConfNodeLookupChild(iter, "type");
879  if (set_type == NULL) {
880  list_pos++;
881  continue;
882  }
883 
884  ConfNode *set_save =
885  ConfNodeLookupChild(iter, "state");
886  if (set_save) {
887  DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
888  strlcpy(load, save, sizeof(load));
889  } else {
890  ConfNode *set_load =
891  ConfNodeLookupChild(iter, "load");
892  if (set_load) {
893  DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
894  }
895  }
896 
897  ConfNode *set_memcap = ConfNodeLookupChild(iter, "memcap");
898  if (set_memcap) {
899  if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
900  SCLogWarning("memcap value cannot be"
901  " deduced: %s, resetting to default",
902  set_memcap->val);
903  memcap = 0;
904  }
905  }
906  ConfNode *set_hashsize = ConfNodeLookupChild(iter, "hashsize");
907  if (set_hashsize) {
908  if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
909  SCLogWarning("hashsize value cannot be"
910  " deduced: %s, resetting to default",
911  set_hashsize->val);
912  hashsize = 0;
913  }
914  }
915  char conf_str[1024];
916  snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
917 
918  SCLogDebug("set %s type %s. Conf %s", set_name, set_type->val, conf_str);
919 
920  if (strcmp(set_type->val, "md5") == 0) {
921  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
922  memcap > 0 ? memcap : default_memcap,
923  hashsize > 0 ? hashsize : default_hashsize);
924  if (dset == NULL) {
925  FatalErrorOnInit("failed to setup dataset for %s", set_name);
926  continue;
927  }
928  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
929  dset->from_yaml = true;
930 
931  } else if (strcmp(set_type->val, "sha256") == 0) {
932  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
933  memcap > 0 ? memcap : default_memcap,
934  hashsize > 0 ? hashsize : default_hashsize);
935  if (dset == NULL) {
936  FatalErrorOnInit("failed to setup dataset for %s", set_name);
937  continue;
938  }
939  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
940  dset->from_yaml = true;
941 
942  } else if (strcmp(set_type->val, "string") == 0) {
943  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
944  memcap > 0 ? memcap : default_memcap,
945  hashsize > 0 ? hashsize : default_hashsize);
946  if (dset == NULL) {
947  FatalErrorOnInit("failed to setup dataset for %s", set_name);
948  continue;
949  }
950  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
951  dset->from_yaml = true;
952  }
953 
954  list_pos++;
955  }
956  }
957  SCLogDebug("datasets done: %p", datasets);
958  return 0;
959 }
960 
961 void DatasetsDestroy(void)
962 {
963  SCLogDebug("destroying datasets: %p", sets);
965  Dataset *set = sets;
966  while (set) {
967  SCLogDebug("destroying set %s", set->name);
968  Dataset *next = set->next;
969  THashShutdown(set->hash);
970  SCFree(set);
971  set = next;
972  }
973  sets = NULL;
975  SCLogDebug("destroying datasets done: %p", sets);
976 }
977 
978 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
979 {
980  FILE *fp = ctx;
981  //PrintRawDataFp(fp, data, data_len);
982  if (fp) {
983  return fwrite(data, data_len, 1, fp);
984  }
985  return 0;
986 }
987 
988 static int Md5AsAscii(const void *s, char *out, size_t out_size)
989 {
990  const Md5Type *md5 = s;
991  char str[256];
992  PrintHexString(str, sizeof(str), (uint8_t *)md5->md5, sizeof(md5->md5));
993  strlcat(out, str, out_size);
994  strlcat(out, "\n", out_size);
995  return strlen(out);
996 }
997 
998 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
999 {
1000  const Sha256Type *sha = s;
1001  char str[256];
1002  PrintHexString(str, sizeof(str), (uint8_t *)sha->sha256, sizeof(sha->sha256));
1003  strlcat(out, str, out_size);
1004  strlcat(out, "\n", out_size);
1005  return strlen(out);
1006 }
1007 
1008 static int IPv4AsAscii(const void *s, char *out, size_t out_size)
1009 {
1010  const IPv4Type *ip4 = s;
1011  char str[256];
1012  PrintInet(AF_INET, ip4->ipv4, str, sizeof(str));
1013  strlcat(out, str, out_size);
1014  strlcat(out, "\n", out_size);
1015  return strlen(out);
1016 }
1017 
1018 static int IPv6AsAscii(const void *s, char *out, size_t out_size)
1019 {
1020  const IPv6Type *ip6 = s;
1021  char str[256];
1022  bool is_ipv4 = true;
1023  for (int i = 4; i <= 15; i++) {
1024  if (ip6->ipv6[i] != 0) {
1025  is_ipv4 = false;
1026  break;
1027  }
1028  }
1029  if (is_ipv4) {
1030  PrintInet(AF_INET, ip6->ipv6, str, sizeof(str));
1031  } else {
1032  PrintInet(AF_INET6, ip6->ipv6, str, sizeof(str));
1033  }
1034  strlcat(out, str, out_size);
1035  strlcat(out, "\n", out_size);
1036  return strlen(out);
1037 }
1038 
1039 void DatasetsSave(void)
1040 {
1041  SCLogDebug("saving datasets: %p", sets);
1043  Dataset *set = sets;
1044  while (set) {
1045  if (strlen(set->save) == 0)
1046  goto next;
1047 
1048  FILE *fp = fopen(set->save, "w");
1049  if (fp == NULL)
1050  goto next;
1051 
1052  SCLogDebug("dumping %s to %s", set->name, set->save);
1053 
1054  switch (set->type) {
1055  case DATASET_TYPE_STRING:
1056  THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
1057  break;
1058  case DATASET_TYPE_MD5:
1059  THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
1060  break;
1061  case DATASET_TYPE_SHA256:
1062  THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
1063  break;
1064  case DATASET_TYPE_IPV4:
1065  THashWalk(set->hash, IPv4AsAscii, SaveCallback, fp);
1066  break;
1067  case DATASET_TYPE_IPV6:
1068  THashWalk(set->hash, IPv6AsAscii, SaveCallback, fp);
1069  break;
1070  }
1071 
1072  fclose(fp);
1073 
1074  next:
1075  set = set->next;
1076  }
1078 }
1079 
1080 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1081 {
1082  if (set == NULL)
1083  return -1;
1084 
1085  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
1086  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1087  if (rdata) {
1088  DatasetUnlockData(rdata);
1089  return 1;
1090  }
1091  return 0;
1092 }
1093 
1094 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
1095  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1096 {
1097  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1098 
1099  if (set == NULL)
1100  return rrep;
1101 
1102  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
1103  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1104  if (rdata) {
1105  StringType *found = rdata->data;
1106  rrep.found = true;
1107  rrep.rep = found->rep;
1108  DatasetUnlockData(rdata);
1109  return rrep;
1110  }
1111  return rrep;
1112 }
1113 
1114 static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1115 {
1116  if (set == NULL)
1117  return -1;
1118 
1119  if (data_len != 4)
1120  return -1;
1121 
1122  IPv4Type lookup = { .rep.value = 0 };
1123  memcpy(lookup.ipv4, data, 4);
1124  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1125  if (rdata) {
1126  DatasetUnlockData(rdata);
1127  return 1;
1128  }
1129  return 0;
1130 }
1131 
1132 static DataRepResultType DatasetLookupIPv4wRep(
1133  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1134 {
1135  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
1136 
1137  if (set == NULL)
1138  return rrep;
1139 
1140  if (data_len != 4)
1141  return rrep;
1142 
1143  IPv4Type lookup = { .rep.value = 0 };
1144  memcpy(lookup.ipv4, data, data_len);
1145  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1146  if (rdata) {
1147  IPv4Type *found = rdata->data;
1148  rrep.found = true;
1149  rrep.rep = found->rep;
1150  DatasetUnlockData(rdata);
1151  return rrep;
1152  }
1153  return rrep;
1154 }
1155 
1156 static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1157 {
1158  if (set == NULL)
1159  return -1;
1160 
1161  if (data_len != 16 && data_len != 4)
1162  return -1;
1163 
1164  IPv6Type lookup = { .rep.value = 0 };
1165  memcpy(lookup.ipv6, data, data_len);
1166  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1167  if (rdata) {
1168  DatasetUnlockData(rdata);
1169  return 1;
1170  }
1171  return 0;
1172 }
1173 
1174 static DataRepResultType DatasetLookupIPv6wRep(
1175  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1176 {
1177  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
1178 
1179  if (set == NULL)
1180  return rrep;
1181 
1182  if (data_len != 16 && data_len != 4)
1183  return rrep;
1184 
1185  IPv6Type lookup = { .rep.value = 0 };
1186  memcpy(lookup.ipv6, data, data_len);
1187  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1188  if (rdata) {
1189  IPv6Type *found = rdata->data;
1190  rrep.found = true;
1191  rrep.rep = found->rep;
1192  DatasetUnlockData(rdata);
1193  return rrep;
1194  }
1195  return rrep;
1196 }
1197 
1198 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1199 {
1200  if (set == NULL)
1201  return -1;
1202 
1203  if (data_len != 16)
1204  return -1;
1205 
1206  Md5Type lookup = { .rep.value = 0 };
1207  memcpy(lookup.md5, data, data_len);
1208  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1209  if (rdata) {
1210  DatasetUnlockData(rdata);
1211  return 1;
1212  }
1213  return 0;
1214 }
1215 
1216 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
1217  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1218 {
1219  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1220 
1221  if (set == NULL)
1222  return rrep;
1223 
1224  if (data_len != 16)
1225  return rrep;
1226 
1227  Md5Type lookup = { .rep.value = 0};
1228  memcpy(lookup.md5, data, data_len);
1229  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1230  if (rdata) {
1231  Md5Type *found = rdata->data;
1232  rrep.found = true;
1233  rrep.rep = found->rep;
1234  DatasetUnlockData(rdata);
1235  return rrep;
1236  }
1237  return rrep;
1238 }
1239 
1240 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1241 {
1242  if (set == NULL)
1243  return -1;
1244 
1245  if (data_len != 32)
1246  return -1;
1247 
1248  Sha256Type lookup = { .rep.value = 0 };
1249  memcpy(lookup.sha256, data, data_len);
1250  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1251  if (rdata) {
1252  DatasetUnlockData(rdata);
1253  return 1;
1254  }
1255  return 0;
1256 }
1257 
1258 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
1259  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1260 {
1261  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1262 
1263  if (set == NULL)
1264  return rrep;
1265 
1266  if (data_len != 32)
1267  return rrep;
1268 
1269  Sha256Type lookup = { .rep.value = 0 };
1270  memcpy(lookup.sha256, data, data_len);
1271  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1272  if (rdata) {
1273  Sha256Type *found = rdata->data;
1274  rrep.found = true;
1275  rrep.rep = found->rep;
1276  DatasetUnlockData(rdata);
1277  return rrep;
1278  }
1279  return rrep;
1280 }
1281 
1282 /**
1283  * \brief see if \a data is part of the set
1284  * \param set dataset
1285  * \param data data to look up
1286  * \param data_len length in bytes of \a data
1287  * \retval -1 error
1288  * \retval 0 not found
1289  * \retval 1 found
1290  */
1291 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
1292 {
1293  if (set == NULL)
1294  return -1;
1295 
1296  switch (set->type) {
1297  case DATASET_TYPE_STRING:
1298  return DatasetLookupString(set, data, data_len);
1299  case DATASET_TYPE_MD5:
1300  return DatasetLookupMd5(set, data, data_len);
1301  case DATASET_TYPE_SHA256:
1302  return DatasetLookupSha256(set, data, data_len);
1303  case DATASET_TYPE_IPV4:
1304  return DatasetLookupIPv4(set, data, data_len);
1305  case DATASET_TYPE_IPV6:
1306  return DatasetLookupIPv6(set, data, data_len);
1307  }
1308  return -1;
1309 }
1310 
1311 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1312  const DataRepType *rep)
1313 {
1314  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1315  if (set == NULL)
1316  return rrep;
1317 
1318  switch (set->type) {
1319  case DATASET_TYPE_STRING:
1320  return DatasetLookupStringwRep(set, data, data_len, rep);
1321  case DATASET_TYPE_MD5:
1322  return DatasetLookupMd5wRep(set, data, data_len, rep);
1323  case DATASET_TYPE_SHA256:
1324  return DatasetLookupSha256wRep(set, data, data_len, rep);
1325  case DATASET_TYPE_IPV4:
1326  return DatasetLookupIPv4wRep(set, data, data_len, rep);
1327  case DATASET_TYPE_IPV6:
1328  return DatasetLookupIPv6wRep(set, data, data_len, rep);
1329  }
1330  return rrep;
1331 }
1332 
1333 /**
1334  * \retval 1 data was added to the hash
1335  * \retval 0 data was not added to the hash as it is already there
1336  * \retval -1 failed to add data to the hash
1337  */
1338 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1339 {
1340  if (set == NULL)
1341  return -1;
1342 
1343  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1344  .rep.value = 0 };
1345  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1346  if (res.data) {
1347  DatasetUnlockData(res.data);
1348  return res.is_new ? 1 : 0;
1349  }
1350  return -1;
1351 }
1352 
1353 /**
1354  * \retval 1 data was added to the hash
1355  * \retval 0 data was not added to the hash as it is already there
1356  * \retval -1 failed to add data to the hash
1357  */
1358 static int DatasetAddStringwRep(
1359  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1360 {
1361  if (set == NULL)
1362  return -1;
1363 
1364  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1365  .rep = *rep };
1366  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1367  if (res.data) {
1368  DatasetUnlockData(res.data);
1369  return res.is_new ? 1 : 0;
1370  }
1371  return -1;
1372 }
1373 
1374 static int DatasetAddIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1375 {
1376  if (set == NULL) {
1377  return -1;
1378  }
1379 
1380  if (data_len < 4) {
1381  return -2;
1382  }
1383 
1384  IPv4Type lookup = { .rep.value = 0 };
1385  memcpy(lookup.ipv4, data, 4);
1386  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1387  if (res.data) {
1388  DatasetUnlockData(res.data);
1389  return res.is_new ? 1 : 0;
1390  }
1391  return -1;
1392 }
1393 
1394 static int DatasetAddIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1395 {
1396  if (set == NULL) {
1397  return -1;
1398  }
1399 
1400  if (data_len != 16) {
1401  return -2;
1402  }
1403 
1404  IPv6Type lookup = { .rep.value = 0 };
1405  memcpy(lookup.ipv6, data, 16);
1406  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1407  if (res.data) {
1408  DatasetUnlockData(res.data);
1409  return res.is_new ? 1 : 0;
1410  }
1411  return -1;
1412 }
1413 
1414 static int DatasetAddIPv4wRep(
1415  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1416 {
1417  if (set == NULL)
1418  return -1;
1419 
1420  if (data_len < 4)
1421  return -2;
1422 
1423  IPv4Type lookup = { .rep = *rep };
1424  memcpy(lookup.ipv4, data, 4);
1425  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1426  if (res.data) {
1427  DatasetUnlockData(res.data);
1428  return res.is_new ? 1 : 0;
1429  }
1430  return -1;
1431 }
1432 
1433 static int DatasetAddIPv6wRep(
1434  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1435 {
1436  if (set == NULL)
1437  return -1;
1438 
1439  if (data_len != 16)
1440  return -2;
1441 
1442  IPv6Type lookup = { .rep = *rep };
1443  memcpy(lookup.ipv6, data, 16);
1444  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1445  if (res.data) {
1446  DatasetUnlockData(res.data);
1447  return res.is_new ? 1 : 0;
1448  }
1449  return -1;
1450 }
1451 
1452 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1453 {
1454  if (set == NULL)
1455  return -1;
1456 
1457  if (data_len != 16)
1458  return -2;
1459 
1460  Md5Type lookup = { .rep.value = 0 };
1461  memcpy(lookup.md5, data, 16);
1462  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1463  if (res.data) {
1464  DatasetUnlockData(res.data);
1465  return res.is_new ? 1 : 0;
1466  }
1467  return -1;
1468 }
1469 
1470 static int DatasetAddMd5wRep(
1471  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1472 {
1473  if (set == NULL)
1474  return -1;
1475 
1476  if (data_len != 16)
1477  return -2;
1478 
1479  Md5Type lookup = { .rep = *rep };
1480  memcpy(lookup.md5, data, 16);
1481  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1482  if (res.data) {
1483  DatasetUnlockData(res.data);
1484  return res.is_new ? 1 : 0;
1485  }
1486  return -1;
1487 }
1488 
1489 static int DatasetAddSha256wRep(
1490  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1491 {
1492  if (set == NULL)
1493  return -1;
1494 
1495  if (data_len != 32)
1496  return -2;
1497 
1498  Sha256Type lookup = { .rep = *rep };
1499  memcpy(lookup.sha256, data, 32);
1500  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1501  if (res.data) {
1502  DatasetUnlockData(res.data);
1503  return res.is_new ? 1 : 0;
1504  }
1505  return -1;
1506 }
1507 
1508 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1509 {
1510  if (set == NULL)
1511  return -1;
1512 
1513  if (data_len != 32)
1514  return -2;
1515 
1516  Sha256Type lookup = { .rep.value = 0 };
1517  memcpy(lookup.sha256, data, 32);
1518  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1519  if (res.data) {
1520  DatasetUnlockData(res.data);
1521  return res.is_new ? 1 : 0;
1522  }
1523  return -1;
1524 }
1525 
1526 int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1527 {
1528  if (set == NULL)
1529  return -1;
1530 
1531  switch (set->type) {
1532  case DATASET_TYPE_STRING:
1533  return DatasetAddString(set, data, data_len);
1534  case DATASET_TYPE_MD5:
1535  return DatasetAddMd5(set, data, data_len);
1536  case DATASET_TYPE_SHA256:
1537  return DatasetAddSha256(set, data, data_len);
1538  case DATASET_TYPE_IPV4:
1539  return DatasetAddIPv4(set, data, data_len);
1540  case DATASET_TYPE_IPV6:
1541  return DatasetAddIPv6(set, data, data_len);
1542  }
1543  return -1;
1544 }
1545 
1546 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1547  DataRepType *rep)
1548 {
1549  if (set == NULL)
1550  return -1;
1551 
1552  switch (set->type) {
1553  case DATASET_TYPE_STRING:
1554  return DatasetAddStringwRep(set, data, data_len, rep);
1555  case DATASET_TYPE_MD5:
1556  return DatasetAddMd5wRep(set, data, data_len, rep);
1557  case DATASET_TYPE_SHA256:
1558  return DatasetAddSha256wRep(set, data, data_len, rep);
1559  case DATASET_TYPE_IPV4:
1560  return DatasetAddIPv4wRep(set, data, data_len, rep);
1561  case DATASET_TYPE_IPV6:
1562  return DatasetAddIPv6wRep(set, data, data_len, rep);
1563  }
1564  return -1;
1565 }
1566 
1567 typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len);
1568 
1569 static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString,
1570  DatasetOpFunc DatasetOpMd5, DatasetOpFunc DatasetOpSha256, DatasetOpFunc DatasetOpIPv4,
1571  DatasetOpFunc DatasetOpIPv6)
1572 {
1573  if (set == NULL)
1574  return -1;
1575 
1576  switch (set->type) {
1577  case DATASET_TYPE_STRING: {
1578  // coverity[alloc_strlen : FALSE]
1579  uint8_t decoded[strlen(string)];
1580  uint32_t consumed = 0, num_decoded = 0;
1581  Base64Ecode code = DecodeBase64(decoded, strlen(string), (const uint8_t *)string,
1582  strlen(string), &consumed, &num_decoded, BASE64_MODE_STRICT);
1583  if (code == BASE64_ECODE_ERR) {
1584  return -2;
1585  }
1586 
1587  return DatasetOpString(set, decoded, num_decoded);
1588  }
1589  case DATASET_TYPE_MD5: {
1590  if (strlen(string) != 32)
1591  return -2;
1592  uint8_t hash[16];
1593  if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1594  return -2;
1595  return DatasetOpMd5(set, hash, 16);
1596  }
1597  case DATASET_TYPE_SHA256: {
1598  if (strlen(string) != 64)
1599  return -2;
1600  uint8_t hash[32];
1601  if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1602  return -2;
1603  return DatasetOpSha256(set, hash, 32);
1604  }
1605  case DATASET_TYPE_IPV4: {
1606  struct in_addr in;
1607  if (inet_pton(AF_INET, string, &in) != 1)
1608  return -2;
1609  return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1610  }
1611  case DATASET_TYPE_IPV6: {
1612  struct in_addr in;
1613  if (inet_pton(AF_INET6, string, &in) != 1)
1614  return -2;
1615  return DatasetOpIPv6(set, (uint8_t *)&in.s_addr, 16);
1616  }
1617  }
1618  return -1;
1619 }
1620 
1621 /** \brief add serialized data to set
1622  * \retval int 1 added
1623  * \retval int 0 already in hash
1624  * \retval int -1 API error (not added)
1625  * \retval int -2 DATA error
1626  */
1627 int DatasetAddSerialized(Dataset *set, const char *string)
1628 {
1629  return DatasetOpSerialized(set, string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1630  DatasetAddIPv4, DatasetAddIPv6);
1631 }
1632 
1633 /** \brief add serialized data to set
1634  * \retval int 1 added
1635  * \retval int 0 already in hash
1636  * \retval int -1 API error (not added)
1637  * \retval int -2 DATA error
1638  */
1639 int DatasetLookupSerialized(Dataset *set, const char *string)
1640 {
1641  return DatasetOpSerialized(set, string, DatasetLookupString, DatasetLookupMd5,
1642  DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1643 }
1644 
1645 /**
1646  * \retval 1 data was removed from the hash
1647  * \retval 0 data not removed (busy)
1648  * \retval -1 data not found
1649  */
1650 static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1651 {
1652  if (set == NULL)
1653  return -1;
1654 
1655  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1656  .rep.value = 0 };
1657  return THashRemoveFromHash(set->hash, &lookup);
1658 }
1659 
1660 static int DatasetRemoveIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1661 {
1662  if (set == NULL)
1663  return -1;
1664 
1665  if (data_len != 4)
1666  return -2;
1667 
1668  IPv4Type lookup = { .rep.value = 0 };
1669  memcpy(lookup.ipv4, data, 4);
1670  return THashRemoveFromHash(set->hash, &lookup);
1671 }
1672 
1673 static int DatasetRemoveIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1674 {
1675  if (set == NULL)
1676  return -1;
1677 
1678  if (data_len != 16)
1679  return -2;
1680 
1681  IPv6Type lookup = { .rep.value = 0 };
1682  memcpy(lookup.ipv6, data, 16);
1683  return THashRemoveFromHash(set->hash, &lookup);
1684 }
1685 
1686 static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1687 {
1688  if (set == NULL)
1689  return -1;
1690 
1691  if (data_len != 16)
1692  return -2;
1693 
1694  Md5Type lookup = { .rep.value = 0 };
1695  memcpy(lookup.md5, data, 16);
1696  return THashRemoveFromHash(set->hash, &lookup);
1697 }
1698 
1699 static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1700 {
1701  if (set == NULL)
1702  return -1;
1703 
1704  if (data_len != 32)
1705  return -2;
1706 
1707  Sha256Type lookup = { .rep.value = 0 };
1708  memcpy(lookup.sha256, data, 32);
1709  return THashRemoveFromHash(set->hash, &lookup);
1710 }
1711 
1712 /** \brief remove serialized data from set
1713  * \retval int 1 removed
1714  * \retval int 0 found but busy (not removed)
1715  * \retval int -1 API error (not removed)
1716  * \retval int -2 DATA error */
1717 int DatasetRemoveSerialized(Dataset *set, const char *string)
1718 {
1719  return DatasetOpSerialized(set, string, DatasetRemoveString, DatasetRemoveMd5,
1720  DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
1721 }
util-byte.h
sets_lock
SCMutex sets_lock
Definition: datasets.c:43
StringType::rep
DataRepType rep
Definition: datasets-string.h:31
len
uint8_t len
Definition: app-layer-dnp3.h:2
datasets-string.h
DataRepResultType::rep
DataRepType rep
Definition: datasets-reputation.h:33
THashDataGetResult::data
THashData * data
Definition: util-thash.h:205
datasets-md5.h
Dataset::name
char name[DATASET_NAME_MAX_LEN+1]
Definition: datasets.h:41
ConfNode_::val
char * val
Definition: conf.h:34
BASE64_ECODE_ERR
@ BASE64_ECODE_ERR
Definition: util-base64.h:73
Dataset::id
uint32_t id
Definition: datasets.h:43
Dataset::save
char save[PATH_MAX]
Definition: datasets.h:49
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:269
ParseSizeStringU64
int ParseSizeStringU64(const char *size, uint64_t *res)
Definition: util-misc.c:198
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
datasets-sha256.h
IPv6Compare
bool IPv6Compare(void *a, void *b)
Definition: datasets-ipv6.c:40
THashRemoveFromHash
int THashRemoveFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:775
TYPE_STATE
@ TYPE_STATE
Definition: datasets.c:581
Md5Type
Definition: datasets-md5.h:29
Dataset::hash
THashTableContext * hash
Definition: datasets.h:46
ConfGetNode
ConfNode * ConfGetNode(const char *name)
Get a ConfNode by name.
Definition: conf.c:181
IPv6Hash
uint32_t IPv6Hash(void *s)
Definition: datasets-ipv6.c:48
Sha256Type::sha256
uint8_t sha256[32]
Definition: datasets-sha256.h:30
Md5Type::rep
DataRepType rep
Definition: datasets-md5.h:31
DataRepResultType::found
bool found
Definition: datasets-reputation.h:32
PrintHexString
void PrintHexString(char *str, size_t size, uint8_t *buf, size_t buf_len)
Definition: util-print.c:298
Dataset::type
enum DatasetTypes type
Definition: datasets.h:42
TAILQ_FOREACH
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:252
THashConsolidateMemcap
void THashConsolidateMemcap(THashTableContext *ctx)
Definition: util-thash.c:339
SCMutexLock
#define SCMutexLock(mut)
Definition: threads-debug.h:117
util-base64.h
DATASET_TYPE_SHA256
@ DATASET_TYPE_SHA256
Definition: datasets.h:34
Sha256Type::rep
DataRepType rep
Definition: datasets-sha256.h:31
SCMUTEX_INITIALIZER
#define SCMUTEX_INITIALIZER
Definition: threads-debug.h:121
datasets-reputation.h
ConfigGetDataDirectory
const char * ConfigGetDataDirectory(void)
Definition: util-conf.c:84
Md5Type::md5
uint8_t md5[16]
Definition: datasets-md5.h:30
DATASET_TYPE_IPV6
@ DATASET_TYPE_IPV6
Definition: datasets.h:36
hashsize
#define hashsize(n)
Definition: util-hash-lookup3.c:67
Md5StrCompare
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:41
DatasetLookupSerialized
int DatasetLookupSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1639
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
DataRepResultType
Definition: datasets-reputation.h:31
Md5StrHash
uint32_t Md5StrHash(void *s)
Definition: datasets-md5.c:49
DatasetGet
Dataset * DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t memcap, uint32_t hashsize)
Definition: datasets.c:632
ConfGet
int ConfGet(const char *name, const char **vptr)
Retrieve the value of a configuration node.
Definition: conf.c:335
StringSet
int StringSet(void *dst, void *src)
Definition: datasets-string.c:61
datasets.h
IPv6Set
int IPv6Set(void *dst, void *src)
Definition: datasets-ipv6.c:31
util-debug.h
TYPE_LOAD
@ TYPE_LOAD
Definition: datasets.c:582
type
uint8_t type
Definition: decode-icmpv4.h:0
DatasetAdd
int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1526
strlcat
size_t strlcat(char *, const char *src, size_t siz)
Definition: util-strlcatu.c:45
StringAsBase64
int StringAsBase64(const void *s, char *out, size_t out_size)
Definition: datasets-string.c:46
SCMutexUnlock
#define SCMutexUnlock(mut)
Definition: threads-debug.h:119
datasets-ipv6.h
IPv6Type::ipv6
uint8_t ipv6[16]
Definition: datasets-ipv6.h:30
DATASET_TYPE_NOTSET
#define DATASET_TYPE_NOTSET
Definition: datasets.h:31
IPv6Type::rep
DataRepType rep
Definition: datasets-ipv6.h:31
util-print.h
DatasetPostReloadCleanup
void DatasetPostReloadCleanup(void)
Definition: datasets.c:802
PrintInet
const char * PrintInet(int af, const void *src, char *dst, socklen_t size)
Definition: util-print.c:274
StringParseU16RangeCheck
int StringParseU16RangeCheck(uint16_t *res, int base, size_t len, const char *str, uint16_t min, uint16_t max)
Definition: util-byte.c:433
DatasetOpFunc
int(* DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1567
datasets-ipv4.h
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:249
THashInit
THashTableContext * THashInit(const char *cnf_prefix, size_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(void *), bool(*DataCompare)(void *, void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
Definition: util-thash.c:295
Sha256StrSet
int Sha256StrSet(void *dst, void *src)
Definition: datasets-sha256.c:32
DatasetsDestroy
void DatasetsDestroy(void)
Definition: datasets.c:961
THashDataGetResult
Definition: util-thash.h:204
StringType
Definition: datasets-string.h:29
IPv4Set
int IPv4Set(void *dst, void *src)
Definition: datasets-ipv4.c:31
DatasetsSave
void DatasetsSave(void)
Definition: datasets.c:1039
conf.h
IPv6Type
Definition: datasets-ipv6.h:29
BASE64_MODE_STRICT
@ BASE64_MODE_STRICT
Definition: util-base64.h:52
DatasetLookup
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
see if data is part of the set
Definition: datasets.c:1291
DATASET_TYPE_IPV4
@ DATASET_TYPE_IPV4
Definition: datasets.h:35
StringType::ptr
uint8_t * ptr
Definition: datasets-string.h:32
DatasetRemoveSerialized
int DatasetRemoveSerialized(Dataset *set, const char *string)
remove serialized data from set
Definition: datasets.c:1717
Sha256StrHash
uint32_t Sha256StrHash(void *s)
Definition: datasets-sha256.c:49
g_system
bool g_system
Definition: suricata.c:186
ConfNodeLookupChild
ConfNode * ConfNodeLookupChild(const ConfNode *node, const char *name)
Lookup a child configuration node by name.
Definition: conf.c:780
THashShutdown
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:347
DatasetTypes
DatasetTypes
Definition: datasets.h:30
Dataset::next
struct Dataset * next
Definition: datasets.h:51
THashData_::data
void * data
Definition: util-thash.h:92
util-conf.h
Sha256Type
Definition: datasets-sha256.h:29
Sha256StrFree
void Sha256StrFree(void *s)
Definition: datasets-sha256.c:61
THashData_
Definition: util-thash.h:85
IPv4Free
void IPv4Free(void *s)
Definition: datasets-ipv4.c:60
IPv4Hash
uint32_t IPv4Hash(void *s)
Definition: datasets-ipv4.c:48
suricata-common.h
util-path.h
StringHash
uint32_t StringHash(void *s)
Definition: datasets-string.c:88
FatalErrorOnInit
#define FatalErrorOnInit(...)
Fatal error IF we're starting up, and configured to consider errors to be fatal errors.
Definition: util-debug.h:511
DATASET_NAME_MAX_LEN
#define DATASET_NAME_MAX_LEN
Definition: datasets.h:39
ConfNode_::name
char * name
Definition: conf.h:33
PathIsAbsolute
int PathIsAbsolute(const char *path)
Check if a path is absolute.
Definition: util-path.c:44
Md5StrSet
int Md5StrSet(void *dst, void *src)
Definition: datasets-md5.c:32
StringCompare
bool StringCompare(void *a, void *b)
Definition: datasets-string.c:77
THashGetFromHash
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:530
THashLookupFromHash
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:640
IPv4Type
Definition: datasets-ipv4.h:29
DataRepType::value
uint16_t value
Definition: datasets-reputation.h:28
ParseSizeStringU32
int ParseSizeStringU32(const char *size, uint32_t *res)
Definition: util-misc.c:181
THashDecrUsecnt
#define THashDecrUsecnt(h)
Definition: util-thash.h:170
IPv4Compare
bool IPv4Compare(void *a, void *b)
Definition: datasets-ipv4.c:40
DatasetFind
Dataset * DatasetFind(const char *name, enum DatasetTypes type)
look for set by name without creating it
Definition: datasets.c:618
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
DatasetsInit
int DatasetsInit(void)
Definition: datasets.c:849
str
#define str(s)
Definition: suricata-common.h:280
DatasetGetTypeFromString
enum DatasetTypes DatasetGetTypeFromString(const char *s)
Definition: datasets.c:58
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:261
THashWalk
int THashWalk(THashTableContext *ctx, THashFormatFunc FormatterFunc, THashOutputFunc OutputterFunc, void *output_ctx)
Walk the hash.
Definition: util-thash.c:381
SCFree
#define SCFree(p)
Definition: util-mem.h:61
ConfNode_
Definition: conf.h:32
Dataset::hidden
bool hidden
Definition: datasets.h:45
DatasetReload
void DatasetReload(void)
Definition: datasets.c:780
DatasetGetPathType
DatasetGetPathType
Definition: datasets.c:580
Sha256StrCompare
bool Sha256StrCompare(void *a, void *b)
Definition: datasets-sha256.c:41
DATASET_TYPE_MD5
@ DATASET_TYPE_MD5
Definition: datasets.h:33
DATASET_TYPE_STRING
@ DATASET_TYPE_STRING
Definition: datasets.h:32
THashDataGetResult::is_new
bool is_new
Definition: util-thash.h:206
IPv6Free
void IPv6Free(void *s)
Definition: datasets-ipv6.c:60
suricata.h
IPv4Type::ipv4
uint8_t ipv4[4]
Definition: datasets-ipv4.h:30
DatasetAddSerialized
int DatasetAddSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1627
Dataset
Definition: datasets.h:40
Dataset::from_yaml
bool from_yaml
Definition: datasets.h:44
IPv4Type::rep
DataRepType rep
Definition: datasets-ipv4.h:31
util-misc.h
util-thash.h
Dataset::load
char load[PATH_MAX]
Definition: datasets.h:48
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
DataRepType
Definition: datasets-reputation.h:27
code
uint8_t code
Definition: decode-icmpv4.h:1
Md5StrFree
void Md5StrFree(void *s)
Definition: datasets-md5.c:61
SCMutex
#define SCMutex
Definition: threads-debug.h:114
StringFree
void StringFree(void *s)
Definition: datasets-string.c:102
Base64Ecode
Base64Ecode
Definition: util-base64.h:72
DecodeBase64
Base64Ecode DecodeBase64(uint8_t *dest, uint32_t dest_size, const uint8_t *src, uint32_t len, uint32_t *consumed_bytes, uint32_t *decoded_bytes, Base64Mode mode)
Decodes a base64-encoded string buffer into an ascii-encoded byte buffer.
Definition: util-base64.c:94
DatasetLookupwRep
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
Definition: datasets.c:1311