suricata
datasets.c
Go to the documentation of this file.
1 /* Copyright (C) 2017-2024 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  */
23 
24 #include "suricata-common.h"
25 #include "suricata.h"
26 #include "rust.h"
27 #include "conf.h"
28 #include "datasets.h"
29 #include "datasets-string.h"
30 #include "datasets-ipv4.h"
31 #include "datasets-ipv6.h"
32 #include "datasets-md5.h"
33 #include "datasets-sha256.h"
34 #include "datasets-reputation.h"
35 #include "util-conf.h"
36 #include "util-thash.h"
37 #include "util-print.h"
38 #include "util-byte.h"
39 #include "util-misc.h"
40 #include "util-path.h"
41 #include "util-debug.h"
42 #include "util-validate.h"
43 
45 static Dataset *sets = NULL;
46 static uint32_t set_ids = 0;
47 
48 uint32_t dataset_max_one_hashsize = 65536;
49 uint32_t dataset_max_total_hashsize = 16777216;
50 uint32_t dataset_used_hashsize = 0;
51 
52 int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);
53 
54 static inline void DatasetUnlockData(THashData *d)
55 {
56  (void) THashDecrUsecnt(d);
57  THashDataUnlock(d);
58 }
59 static bool DatasetIsStatic(const char *save, const char *load);
60 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
61 
62 enum DatasetTypes DatasetGetTypeFromString(const char *s)
63 {
64  if (strcasecmp("md5", s) == 0)
65  return DATASET_TYPE_MD5;
66  if (strcasecmp("sha256", s) == 0)
67  return DATASET_TYPE_SHA256;
68  if (strcasecmp("string", s) == 0)
69  return DATASET_TYPE_STRING;
70  if (strcasecmp("ipv4", s) == 0)
71  return DATASET_TYPE_IPV4;
72  if (strcasecmp("ip", s) == 0)
73  return DATASET_TYPE_IPV6;
74  return DATASET_TYPE_NOTSET;
75 }
76 
77 static Dataset *DatasetAlloc(const char *name)
78 {
79  Dataset *set = SCCalloc(1, sizeof(*set));
80  if (set) {
81  set->id = set_ids++;
82  }
83  return set;
84 }
85 
86 static Dataset *DatasetSearchByName(const char *name)
87 {
88  Dataset *set = sets;
89  while (set) {
90  if (strcasecmp(name, set->name) == 0 && !set->hidden) {
91  return set;
92  }
93  set = set->next;
94  }
95  return NULL;
96 }
97 
98 static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
99 {
100  if (ins < 2)
101  return -1;
102  if (ins % 2 != 0)
103  return -1;
104  if (outs != ins / 2)
105  return -1;
106 
107  uint8_t hash[outs];
108  memset(hash, 0, outs);
109  size_t i, x;
110  for (x = 0, i = 0; i < ins; i+=2, x++) {
111  char buf[3] = { 0, 0, 0 };
112  buf[0] = in[i];
113  buf[1] = in[i+1];
114 
115  long value = strtol(buf, NULL, 16);
116  if (value >= 0 && value <= 255)
117  hash[x] = (uint8_t)value;
118  else {
119  SCLogError("hash byte out of range %ld", value);
120  return -1;
121  }
122  }
123 
124  memcpy(out, hash, outs);
125  return 0;
126 }
127 
128 static int DatasetLoadIPv4(Dataset *set)
129 {
130  if (strlen(set->load) == 0)
131  return 0;
132 
133  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
134  const char *fopen_mode = "r";
135  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
136  fopen_mode = "a+";
137  }
138 
139  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv4);
140  if (retval == -2) {
141  FatalErrorOnInit("dataset %s could not be processed", set->name);
142  } else if (retval == -1) {
143  return -1;
144  }
145 
147 
148  return 0;
149 }
150 
151 static int ParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
152 {
153  /* Checking IPv6 case */
154  char *got_colon = strchr(line, ':');
155  if (got_colon) {
156  uint32_t ip6addr[4];
157  if (inet_pton(AF_INET6, line, in6) != 1) {
158  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
159  return -1;
160  }
161  memcpy(&ip6addr, in6->s6_addr, sizeof(ip6addr));
162  /* IPv4 in IPv6 notation needs transformation to internal Suricata storage */
163  if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
164  ip6addr[0] = ip6addr[3];
165  ip6addr[2] = 0;
166  ip6addr[3] = 0;
167  memcpy(in6, ip6addr, sizeof(struct in6_addr));
168  }
169  } else {
170  /* IPv4 case */
171  struct in_addr in;
172  if (inet_pton(AF_INET, line, &in) != 1) {
173  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
174  return -1;
175  }
176  memset(in6, 0, sizeof(struct in6_addr));
177  memcpy(in6, &in, sizeof(struct in_addr));
178  }
179  return 0;
180 }
181 
182 static int DatasetLoadIPv6(Dataset *set)
183 {
184  if (strlen(set->load) == 0)
185  return 0;
186 
187  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
188  const char *fopen_mode = "r";
189  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
190  fopen_mode = "a+";
191  }
192 
193  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv6);
194  if (retval == -2) {
195  FatalErrorOnInit("dataset %s could not be processed", set->name);
196  } else if (retval == -1) {
197  return -1;
198  }
199 
201 
202  return 0;
203 }
204 
205 static int DatasetLoadMd5(Dataset *set)
206 {
207  if (strlen(set->load) == 0)
208  return 0;
209 
210  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
211  const char *fopen_mode = "r";
212  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
213  fopen_mode = "a+";
214  }
215 
216  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSMd5);
217  if (retval == -2) {
218  FatalErrorOnInit("dataset %s could not be processed", set->name);
219  } else if (retval == -1) {
220  return -1;
221  }
222 
224 
225  return 0;
226 }
227 
228 static int DatasetLoadSha256(Dataset *set)
229 {
230  if (strlen(set->load) == 0)
231  return 0;
232 
233  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
234  const char *fopen_mode = "r";
235  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
236  fopen_mode = "a+";
237  }
238 
239  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSSha256);
240  if (retval == -2) {
241  FatalErrorOnInit("dataset %s could not be processed", set->name);
242  } else if (retval == -1) {
243  return -1;
244  }
245 
247 
248  return 0;
249 }
250 
251 static int DatasetLoadString(Dataset *set)
252 {
253  if (strlen(set->load) == 0)
254  return 0;
255 
256  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
257 
258  const char *fopen_mode = "r";
259  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
260  fopen_mode = "a+";
261  }
262 
263  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSString);
264  if (retval == -2) {
265  FatalErrorOnInit("dataset %s could not be processed", set->name);
266  } else if (retval == -1) {
267  return -1;
268  }
269 
271 
272  return 0;
273 }
274 
275 extern bool g_system;
276 
280 };
281 
282 static void DatasetGetPath(const char *in_path,
283  char *out_path, size_t out_size, enum DatasetGetPathType type)
284 {
285  char path[PATH_MAX];
286  struct stat st;
287 
288  if (PathIsAbsolute(in_path)) {
289  strlcpy(path, in_path, sizeof(path));
290  strlcpy(out_path, path, out_size);
291  return;
292  }
293 
294  const char *data_dir = ConfigGetDataDirectory();
295  if (stat(data_dir, &st) != 0) {
296  SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
297  return;
298  }
299 
300  snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
301 
302  if (type == TYPE_LOAD) {
303  if (stat(path, &st) != 0) {
304  SCLogDebug("path %s: %s", path, strerror(errno));
305  if (!g_system) {
306  snprintf(path, sizeof(path), "%s", in_path);
307  }
308  }
309  }
310  strlcpy(out_path, path, out_size);
311  SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
312 }
313 
314 /** \brief look for set by name without creating it */
316 {
318  Dataset *set = DatasetSearchByName(name);
319  if (set) {
320  if (set->type != type) {
322  return NULL;
323  }
324  }
326  return set;
327 }
328 
329 static bool DatasetCheckHashsize(const char *name, uint32_t hash_size)
330 {
331  if (dataset_max_one_hashsize > 0 && hash_size > dataset_max_one_hashsize) {
332  SCLogError("hashsize %u in dataset '%s' exceeds configured 'single-hashsize' limit (%u)",
333  hash_size, name, dataset_max_one_hashsize);
334  return false;
335  }
336  // we cannot underflow as we know from conf loading that
337  // dataset_max_total_hashsize >= dataset_max_one_hashsize if dataset_max_total_hashsize > 0
338  if (dataset_max_total_hashsize > 0 &&
340  SCLogError("hashsize %u in dataset '%s' exceeds configured 'total-hashsizes' limit (%u, in "
341  "use %u)",
343  return false;
344  }
345 
346  return true;
347 }
348 
349 static void DatasetUpdateHashsize(const char *name, uint32_t hash_size)
350 {
351  if (dataset_max_total_hashsize > 0) {
352  dataset_used_hashsize += hash_size;
353  SCLogDebug("set %s adding with hash_size %u", name, hash_size);
354  }
355 }
356 
357 Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
358  uint64_t memcap, uint32_t hashsize)
359 {
360  uint64_t default_memcap = 0;
361  uint32_t default_hashsize = 0;
362  if (strlen(name) > DATASET_NAME_MAX_LEN) {
363  return NULL;
364  }
365 
367  Dataset *set = DatasetSearchByName(name);
368  if (set) {
369  if (type != DATASET_TYPE_NOTSET && set->type != type) {
370  SCLogError("dataset %s already "
371  "exists and is of type %u",
372  set->name, set->type);
374  return NULL;
375  }
376 
377  if ((save == NULL || strlen(save) == 0) &&
378  (load == NULL || strlen(load) == 0)) {
379  // OK, rule keyword doesn't have to set state/load,
380  // even when yaml set has set it.
381  } else {
382  if ((save == NULL && strlen(set->save) > 0) ||
383  (save != NULL && strcmp(set->save, save) != 0)) {
384  SCLogError("dataset %s save mismatch: %s != %s", set->name, set->save, save);
386  return NULL;
387  }
388  if ((load == NULL && strlen(set->load) > 0) ||
389  (load != NULL && strcmp(set->load, load) != 0)) {
390  SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
392  return NULL;
393  }
394  }
395 
397  return set;
398  } else {
399  if (type == DATASET_TYPE_NOTSET) {
400  SCLogError("dataset %s not defined", name);
401  goto out_err;
402  }
403  }
404 
405  GetDefaultMemcap(&default_memcap, &default_hashsize);
406  if (hashsize == 0) {
407  hashsize = default_hashsize;
408  }
409 
410  if (!DatasetCheckHashsize(name, hashsize)) {
411  goto out_err;
412  }
413 
414  set = DatasetAlloc(name);
415  if (set == NULL) {
416  goto out_err;
417  }
418 
419  strlcpy(set->name, name, sizeof(set->name));
420  set->type = type;
421  if (save && strlen(save)) {
422  strlcpy(set->save, save, sizeof(set->save));
423  SCLogDebug("name %s save '%s'", name, set->save);
424  }
425  if (load && strlen(load)) {
426  strlcpy(set->load, load, sizeof(set->load));
427  SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
428  }
429 
430  char cnf_name[128];
431  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
432 
433  switch (type) {
434  case DATASET_TYPE_MD5:
435  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
436  Md5StrCompare, NULL, NULL, load != NULL ? 1 : 0,
437  memcap > 0 ? memcap : default_memcap, hashsize);
438  if (set->hash == NULL)
439  goto out_err;
440  if (DatasetLoadMd5(set) < 0)
441  goto out_err;
442  break;
443  case DATASET_TYPE_STRING:
444  set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
445  StringCompare, NULL, StringGetLength, load != NULL ? 1 : 0,
446  memcap > 0 ? memcap : default_memcap, hashsize);
447  if (set->hash == NULL)
448  goto out_err;
449  if (DatasetLoadString(set) < 0)
450  goto out_err;
451  break;
452  case DATASET_TYPE_SHA256:
453  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
454  Sha256StrHash, Sha256StrCompare, NULL, NULL, load != NULL ? 1 : 0,
455  memcap > 0 ? memcap : default_memcap, hashsize);
456  if (set->hash == NULL)
457  goto out_err;
458  if (DatasetLoadSha256(set) < 0)
459  goto out_err;
460  break;
461  case DATASET_TYPE_IPV4:
462  set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash,
463  IPv4Compare, NULL, NULL, load != NULL ? 1 : 0,
464  memcap > 0 ? memcap : default_memcap, hashsize);
465  if (set->hash == NULL)
466  goto out_err;
467  if (DatasetLoadIPv4(set) < 0)
468  goto out_err;
469  break;
470  case DATASET_TYPE_IPV6:
471  set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6Set, IPv6Free, IPv6Hash,
472  IPv6Compare, NULL, NULL, load != NULL ? 1 : 0,
473  memcap > 0 ? memcap : default_memcap, hashsize);
474  if (set->hash == NULL)
475  goto out_err;
476  if (DatasetLoadIPv6(set) < 0)
477  goto out_err;
478  break;
479  }
480  if (set->hash == NULL) {
481  goto out_err;
482  }
483 
484  if (SC_ATOMIC_GET(set->hash->memcap_reached)) {
485  SCLogError("dataset too large for set memcap");
486  goto out_err;
487  }
488 
489  SCLogDebug("set %p/%s type %u save %s load %s",
490  set, set->name, set->type, set->save, set->load);
491 
492  set->next = sets;
493  sets = set;
494 
495  /* hash size accounting */
497  DatasetUpdateHashsize(set->name, set->hash->config.hash_size);
498 
500  return set;
501 out_err:
502  if (set) {
503  if (set->hash) {
504  THashShutdown(set->hash);
505  }
506  SCFree(set);
507  }
509  return NULL;
510 }
511 
512 static bool DatasetIsStatic(const char *save, const char *load)
513 {
514  /* A set is static if it does not have any dynamic properties like
515  * save and/or state defined but has load defined.
516  * */
517  if ((load != NULL && strlen(load) > 0) &&
518  (save == NULL || strlen(save) == 0)) {
519  return true;
520  }
521  return false;
522 }
523 
524 void DatasetReload(void)
525 {
526  /* In order to reload the datasets, just mark the current sets as hidden
527  * and clean them up later.
528  * New datasets shall be created with the rule reload and do not require
529  * any intervention.
530  * */
532  Dataset *set = sets;
533  while (set) {
534  if (!DatasetIsStatic(set->save, set->load) || set->from_yaml) {
535  SCLogDebug("Not a static set, skipping %s", set->name);
536  set = set->next;
537  continue;
538  }
539  set->hidden = true;
540  if (dataset_max_total_hashsize > 0) {
543  }
544  SCLogDebug("Set %s at %p hidden successfully", set->name, set);
545  set = set->next;
546  }
548 }
549 
551 {
552  SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
554  Dataset *cur = sets;
555  Dataset *prev = NULL;
556  while (cur) {
557  Dataset *next = cur->next;
558  if (!cur->hidden) {
559  prev = cur;
560  cur = next;
561  continue;
562  }
563  // Delete the set in case it was hidden
564  if (prev != NULL) {
565  prev->next = next;
566  } else {
567  sets = next;
568  }
569  THashShutdown(cur->hash);
570  SCFree(cur);
571  cur = next;
572  }
574 }
575 
576 /* Value reflects THASH_DEFAULT_HASHSIZE which is what the default was earlier,
577  * despite 2048 commented out in the default yaml. */
578 #define DATASETS_HASHSIZE_DEFAULT 4096
579 
580 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
581 {
582  const char *str = NULL;
583  if (ConfGet("datasets.defaults.memcap", &str) == 1) {
584  if (ParseSizeStringU64(str, memcap) < 0) {
585  SCLogWarning("memcap value cannot be deduced: %s,"
586  " resetting to default",
587  str);
588  *memcap = 0;
589  }
590  }
591 
592  *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
593  if (ConfGet("datasets.defaults.hashsize", &str) == 1) {
594  if (ParseSizeStringU32(str, hashsize) < 0) {
595  *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
596  SCLogWarning("hashsize value cannot be deduced: %s,"
597  " resetting to default: %u",
598  str, *hashsize);
599  }
600  }
601 }
602 
603 int DatasetsInit(void)
604 {
605  SCLogDebug("datasets start");
606  ConfNode *datasets = ConfGetNode("datasets");
607  uint64_t default_memcap = 0;
608  uint32_t default_hashsize = 0;
609  GetDefaultMemcap(&default_memcap, &default_hashsize);
610  if (datasets != NULL) {
611  const char *str = NULL;
612  if (ConfGet("datasets.limits.total-hashsizes", &str) == 1) {
614  FatalError("failed to parse datasets.limits.total-hashsizes value: %s", str);
615  }
616  }
617  if (ConfGet("datasets.limits.single-hashsize", &str) == 1) {
619  FatalError("failed to parse datasets.limits.single-hashsize value: %s", str);
620  }
621  }
622  if (dataset_max_total_hashsize > 0 &&
624  FatalError("total-hashsizes (%u) cannot be smaller than single-hashsize (%u)",
626  }
628  // the total limit also applies for single limit
630  }
631 
632  int list_pos = 0;
633  ConfNode *iter = NULL;
634  TAILQ_FOREACH(iter, &datasets->head, next) {
635  if (iter->name == NULL) {
636  list_pos++;
637  continue;
638  }
639 
640  char save[PATH_MAX] = "";
641  char load[PATH_MAX] = "";
642  uint64_t memcap = 0;
643  uint32_t hashsize = 0;
644 
645  const char *set_name = iter->name;
646  if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
648  "set name '%s' too long, max %d chars", set_name, DATASET_NAME_MAX_LEN);
649  continue;
650  }
651 
652  ConfNode *set_type =
653  ConfNodeLookupChild(iter, "type");
654  if (set_type == NULL) {
655  list_pos++;
656  continue;
657  }
658 
659  ConfNode *set_save =
660  ConfNodeLookupChild(iter, "state");
661  if (set_save) {
662  DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
663  strlcpy(load, save, sizeof(load));
664  } else {
665  ConfNode *set_load =
666  ConfNodeLookupChild(iter, "load");
667  if (set_load) {
668  DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
669  }
670  }
671 
672  ConfNode *set_memcap = ConfNodeLookupChild(iter, "memcap");
673  if (set_memcap) {
674  if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
675  SCLogWarning("memcap value cannot be"
676  " deduced: %s, resetting to default",
677  set_memcap->val);
678  memcap = 0;
679  }
680  }
681  ConfNode *set_hashsize = ConfNodeLookupChild(iter, "hashsize");
682  if (set_hashsize) {
683  if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
684  SCLogWarning("hashsize value cannot be"
685  " deduced: %s, resetting to default",
686  set_hashsize->val);
687  hashsize = 0;
688  }
689  }
690  char conf_str[1024];
691  snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
692 
693  SCLogDebug("set %s type %s. Conf %s", set_name, set_type->val, conf_str);
694 
695  if (strcmp(set_type->val, "md5") == 0) {
696  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
697  memcap > 0 ? memcap : default_memcap,
698  hashsize > 0 ? hashsize : default_hashsize);
699  if (dset == NULL) {
700  FatalErrorOnInit("failed to setup dataset for %s", set_name);
701  continue;
702  }
703  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
704  dset->from_yaml = true;
705 
706  } else if (strcmp(set_type->val, "sha256") == 0) {
707  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
708  memcap > 0 ? memcap : default_memcap,
709  hashsize > 0 ? hashsize : default_hashsize);
710  if (dset == NULL) {
711  FatalErrorOnInit("failed to setup dataset for %s", set_name);
712  continue;
713  }
714  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
715  dset->from_yaml = true;
716 
717  } else if (strcmp(set_type->val, "string") == 0) {
718  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
719  memcap > 0 ? memcap : default_memcap,
720  hashsize > 0 ? hashsize : default_hashsize);
721  if (dset == NULL) {
722  FatalErrorOnInit("failed to setup dataset for %s", set_name);
723  continue;
724  }
725  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
726  dset->from_yaml = true;
727 
728  } else if (strcmp(set_type->val, "ipv4") == 0) {
729  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV4, save, load,
730  memcap > 0 ? memcap : default_memcap,
731  hashsize > 0 ? hashsize : default_hashsize);
732  if (dset == NULL) {
733  FatalErrorOnInit("failed to setup dataset for %s", set_name);
734  continue;
735  }
736  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
737  dset->from_yaml = true;
738 
739  } else if (strcmp(set_type->val, "ip") == 0) {
740  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV6, save, load,
741  memcap > 0 ? memcap : default_memcap,
742  hashsize > 0 ? hashsize : default_hashsize);
743  if (dset == NULL) {
744  FatalErrorOnInit("failed to setup dataset for %s", set_name);
745  continue;
746  }
747  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
748  dset->from_yaml = true;
749  }
750 
751  list_pos++;
752  }
753  }
754  SCLogDebug("datasets done: %p", datasets);
755  return 0;
756 }
757 
758 void DatasetsDestroy(void)
759 {
760  SCLogDebug("destroying datasets: %p", sets);
762  Dataset *set = sets;
763  while (set) {
764  SCLogDebug("destroying set %s", set->name);
765  Dataset *next = set->next;
766  THashShutdown(set->hash);
767  SCFree(set);
768  set = next;
769  }
770  sets = NULL;
772  SCLogDebug("destroying datasets done: %p", sets);
773 }
774 
775 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
776 {
777  FILE *fp = ctx;
778  //PrintRawDataFp(fp, data, data_len);
779  if (fp) {
780  return (int)fwrite(data, data_len, 1, fp);
781  }
782  return 0;
783 }
784 
785 static int Md5AsAscii(const void *s, char *out, size_t out_size)
786 {
787  const Md5Type *md5 = s;
788  char str[256];
789  PrintHexString(str, sizeof(str), (uint8_t *)md5->md5, sizeof(md5->md5));
790  strlcat(out, str, out_size);
791  strlcat(out, "\n", out_size);
792  return (int)strlen(out);
793 }
794 
795 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
796 {
797  const Sha256Type *sha = s;
798  char str[256];
799  PrintHexString(str, sizeof(str), (uint8_t *)sha->sha256, sizeof(sha->sha256));
800  strlcat(out, str, out_size);
801  strlcat(out, "\n", out_size);
802  return (int)strlen(out);
803 }
804 
805 static int IPv4AsAscii(const void *s, char *out, size_t out_size)
806 {
807  const IPv4Type *ip4 = s;
808  char str[256];
809  PrintInet(AF_INET, ip4->ipv4, str, sizeof(str));
810  strlcat(out, str, out_size);
811  strlcat(out, "\n", out_size);
812  return (int)strlen(out);
813 }
814 
815 static int IPv6AsAscii(const void *s, char *out, size_t out_size)
816 {
817  const IPv6Type *ip6 = s;
818  char str[256];
819  bool is_ipv4 = true;
820  for (int i = 4; i <= 15; i++) {
821  if (ip6->ipv6[i] != 0) {
822  is_ipv4 = false;
823  break;
824  }
825  }
826  if (is_ipv4) {
827  PrintInet(AF_INET, ip6->ipv6, str, sizeof(str));
828  } else {
829  PrintInet(AF_INET6, ip6->ipv6, str, sizeof(str));
830  }
831  strlcat(out, str, out_size);
832  strlcat(out, "\n", out_size);
833  return (int)strlen(out);
834 }
835 
836 void DatasetsSave(void)
837 {
838  SCLogDebug("saving datasets: %p", sets);
840  Dataset *set = sets;
841  while (set) {
842  if (strlen(set->save) == 0)
843  goto next;
844 
845  FILE *fp = fopen(set->save, "w");
846  if (fp == NULL)
847  goto next;
848 
849  SCLogDebug("dumping %s to %s", set->name, set->save);
850 
851  switch (set->type) {
852  case DATASET_TYPE_STRING:
853  THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
854  break;
855  case DATASET_TYPE_MD5:
856  THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
857  break;
858  case DATASET_TYPE_SHA256:
859  THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
860  break;
861  case DATASET_TYPE_IPV4:
862  THashWalk(set->hash, IPv4AsAscii, SaveCallback, fp);
863  break;
864  case DATASET_TYPE_IPV6:
865  THashWalk(set->hash, IPv6AsAscii, SaveCallback, fp);
866  break;
867  }
868 
869  fclose(fp);
870 
871  next:
872  set = set->next;
873  }
875 }
876 
877 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
878 {
879  if (set == NULL)
880  return -1;
881 
882  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
883  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
884  if (rdata) {
885  DatasetUnlockData(rdata);
886  return 1;
887  }
888  return 0;
889 }
890 
891 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
892  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
893 {
894  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
895 
896  if (set == NULL)
897  return rrep;
898 
899  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
900  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
901  if (rdata) {
902  StringType *found = rdata->data;
903  rrep.found = true;
904  rrep.rep = found->rep;
905  DatasetUnlockData(rdata);
906  return rrep;
907  }
908  return rrep;
909 }
910 
911 static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
912 {
913  if (set == NULL)
914  return -1;
915 
916  if (data_len != 4)
917  return -1;
918 
919  IPv4Type lookup = { .rep.value = 0 };
920  memcpy(lookup.ipv4, data, 4);
921  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
922  if (rdata) {
923  DatasetUnlockData(rdata);
924  return 1;
925  }
926  return 0;
927 }
928 
929 static DataRepResultType DatasetLookupIPv4wRep(
930  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
931 {
932  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
933 
934  if (set == NULL)
935  return rrep;
936 
937  if (data_len != 4)
938  return rrep;
939 
940  IPv4Type lookup = { .rep.value = 0 };
941  memcpy(lookup.ipv4, data, data_len);
942  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
943  if (rdata) {
944  IPv4Type *found = rdata->data;
945  rrep.found = true;
946  rrep.rep = found->rep;
947  DatasetUnlockData(rdata);
948  return rrep;
949  }
950  return rrep;
951 }
952 
953 static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
954 {
955  if (set == NULL)
956  return -1;
957 
958  if (data_len != 16 && data_len != 4)
959  return -1;
960 
961  IPv6Type lookup = { .rep.value = 0 };
962  memcpy(lookup.ipv6, data, data_len);
963  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
964  if (rdata) {
965  DatasetUnlockData(rdata);
966  return 1;
967  }
968  return 0;
969 }
970 
971 static DataRepResultType DatasetLookupIPv6wRep(
972  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
973 {
974  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
975 
976  if (set == NULL)
977  return rrep;
978 
979  if (data_len != 16 && data_len != 4)
980  return rrep;
981 
982  IPv6Type lookup = { .rep.value = 0 };
983  memcpy(lookup.ipv6, data, data_len);
984  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
985  if (rdata) {
986  IPv6Type *found = rdata->data;
987  rrep.found = true;
988  rrep.rep = found->rep;
989  DatasetUnlockData(rdata);
990  return rrep;
991  }
992  return rrep;
993 }
994 
995 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
996 {
997  if (set == NULL)
998  return -1;
999 
1000  if (data_len != 16)
1001  return -1;
1002 
1003  Md5Type lookup = { .rep.value = 0 };
1004  memcpy(lookup.md5, data, data_len);
1005  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1006  if (rdata) {
1007  DatasetUnlockData(rdata);
1008  return 1;
1009  }
1010  return 0;
1011 }
1012 
1013 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
1014  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1015 {
1016  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1017 
1018  if (set == NULL)
1019  return rrep;
1020 
1021  if (data_len != 16)
1022  return rrep;
1023 
1024  Md5Type lookup = { .rep.value = 0};
1025  memcpy(lookup.md5, data, data_len);
1026  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1027  if (rdata) {
1028  Md5Type *found = rdata->data;
1029  rrep.found = true;
1030  rrep.rep = found->rep;
1031  DatasetUnlockData(rdata);
1032  return rrep;
1033  }
1034  return rrep;
1035 }
1036 
1037 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1038 {
1039  if (set == NULL)
1040  return -1;
1041 
1042  if (data_len != 32)
1043  return -1;
1044 
1045  Sha256Type lookup = { .rep.value = 0 };
1046  memcpy(lookup.sha256, data, data_len);
1047  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1048  if (rdata) {
1049  DatasetUnlockData(rdata);
1050  return 1;
1051  }
1052  return 0;
1053 }
1054 
1055 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
1056  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1057 {
1058  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1059 
1060  if (set == NULL)
1061  return rrep;
1062 
1063  if (data_len != 32)
1064  return rrep;
1065 
1066  Sha256Type lookup = { .rep.value = 0 };
1067  memcpy(lookup.sha256, data, data_len);
1068  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1069  if (rdata) {
1070  Sha256Type *found = rdata->data;
1071  rrep.found = true;
1072  rrep.rep = found->rep;
1073  DatasetUnlockData(rdata);
1074  return rrep;
1075  }
1076  return rrep;
1077 }
1078 
1079 /**
1080  * \brief see if \a data is part of the set
1081  * \param set dataset
1082  * \param data data to look up
1083  * \param data_len length in bytes of \a data
1084  * \retval -1 error
1085  * \retval 0 not found
1086  * \retval 1 found
1087  */
1088 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
1089 {
1090  if (set == NULL)
1091  return -1;
1092 
1093  switch (set->type) {
1094  case DATASET_TYPE_STRING:
1095  return DatasetLookupString(set, data, data_len);
1096  case DATASET_TYPE_MD5:
1097  return DatasetLookupMd5(set, data, data_len);
1098  case DATASET_TYPE_SHA256:
1099  return DatasetLookupSha256(set, data, data_len);
1100  case DATASET_TYPE_IPV4:
1101  return DatasetLookupIPv4(set, data, data_len);
1102  case DATASET_TYPE_IPV6:
1103  return DatasetLookupIPv6(set, data, data_len);
1104  }
1105  return -1;
1106 }
1107 
1108 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1109  const DataRepType *rep)
1110 {
1111  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1112  if (set == NULL)
1113  return rrep;
1114 
1115  switch (set->type) {
1116  case DATASET_TYPE_STRING:
1117  return DatasetLookupStringwRep(set, data, data_len, rep);
1118  case DATASET_TYPE_MD5:
1119  return DatasetLookupMd5wRep(set, data, data_len, rep);
1120  case DATASET_TYPE_SHA256:
1121  return DatasetLookupSha256wRep(set, data, data_len, rep);
1122  case DATASET_TYPE_IPV4:
1123  return DatasetLookupIPv4wRep(set, data, data_len, rep);
1124  case DATASET_TYPE_IPV6:
1125  return DatasetLookupIPv6wRep(set, data, data_len, rep);
1126  }
1127  return rrep;
1128 }
1129 
1130 /**
1131  * \retval 1 data was added to the hash
1132  * \retval 0 data was not added to the hash as it is already there
1133  * \retval -1 failed to add data to the hash
1134  */
1135 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1136 {
1137  if (set == NULL)
1138  return -1;
1139 
1140  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1141  .rep.value = 0 };
1142  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1143  if (res.data) {
1144  DatasetUnlockData(res.data);
1145  return res.is_new ? 1 : 0;
1146  }
1147  return -1;
1148 }
1149 
1150 /**
1151  * \retval 1 data was added to the hash
1152  * \retval 0 data was not added to the hash as it is already there
1153  * \retval -1 failed to add data to the hash
1154  */
1155 static int DatasetAddStringwRep(
1156  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1157 {
1158  if (set == NULL)
1159  return -1;
1160 
1161  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1162  .rep = *rep };
1163  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1164  if (res.data) {
1165  DatasetUnlockData(res.data);
1166  return res.is_new ? 1 : 0;
1167  }
1168  return -1;
1169 }
1170 
1171 static int DatasetAddIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1172 {
1173  if (set == NULL) {
1174  return -1;
1175  }
1176 
1177  if (data_len < 4) {
1178  return -2;
1179  }
1180 
1181  IPv4Type lookup = { .rep.value = 0 };
1182  memcpy(lookup.ipv4, data, 4);
1183  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1184  if (res.data) {
1185  DatasetUnlockData(res.data);
1186  return res.is_new ? 1 : 0;
1187  }
1188  return -1;
1189 }
1190 
1191 static int DatasetAddIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1192 {
1193  if (set == NULL) {
1194  return -1;
1195  }
1196 
1197  if (data_len != 16) {
1198  return -2;
1199  }
1200 
1201  IPv6Type lookup = { .rep.value = 0 };
1202  memcpy(lookup.ipv6, data, 16);
1203  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1204  if (res.data) {
1205  DatasetUnlockData(res.data);
1206  return res.is_new ? 1 : 0;
1207  }
1208  return -1;
1209 }
1210 
1211 static int DatasetAddIPv4wRep(
1212  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1213 {
1214  if (set == NULL)
1215  return -1;
1216 
1217  if (data_len < 4)
1218  return -2;
1219 
1220  IPv4Type lookup = { .rep = *rep };
1221  memcpy(lookup.ipv4, data, 4);
1222  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1223  if (res.data) {
1224  DatasetUnlockData(res.data);
1225  return res.is_new ? 1 : 0;
1226  }
1227  return -1;
1228 }
1229 
1230 static int DatasetAddIPv6wRep(
1231  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1232 {
1233  if (set == NULL)
1234  return -1;
1235 
1236  if (data_len != 16)
1237  return -2;
1238 
1239  IPv6Type lookup = { .rep = *rep };
1240  memcpy(lookup.ipv6, data, 16);
1241  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1242  if (res.data) {
1243  DatasetUnlockData(res.data);
1244  return res.is_new ? 1 : 0;
1245  }
1246  return -1;
1247 }
1248 
1249 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1250 {
1251  if (set == NULL)
1252  return -1;
1253 
1254  if (data_len != 16)
1255  return -2;
1256 
1257  Md5Type lookup = { .rep.value = 0 };
1258  memcpy(lookup.md5, data, 16);
1259  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1260  if (res.data) {
1261  DatasetUnlockData(res.data);
1262  return res.is_new ? 1 : 0;
1263  }
1264  return -1;
1265 }
1266 
1267 static int DatasetAddMd5wRep(
1268  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1269 {
1270  if (set == NULL)
1271  return -1;
1272 
1273  if (data_len != 16)
1274  return -2;
1275 
1276  Md5Type lookup = { .rep = *rep };
1277  memcpy(lookup.md5, data, 16);
1278  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1279  if (res.data) {
1280  DatasetUnlockData(res.data);
1281  return res.is_new ? 1 : 0;
1282  }
1283  return -1;
1284 }
1285 
1286 static int DatasetAddSha256wRep(
1287  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1288 {
1289  if (set == NULL)
1290  return -1;
1291 
1292  if (data_len != 32)
1293  return -2;
1294 
1295  Sha256Type lookup = { .rep = *rep };
1296  memcpy(lookup.sha256, data, 32);
1297  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1298  if (res.data) {
1299  DatasetUnlockData(res.data);
1300  return res.is_new ? 1 : 0;
1301  }
1302  return -1;
1303 }
1304 
1305 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1306 {
1307  if (set == NULL)
1308  return -1;
1309 
1310  if (data_len != 32)
1311  return -2;
1312 
1313  Sha256Type lookup = { .rep.value = 0 };
1314  memcpy(lookup.sha256, data, 32);
1315  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1316  if (res.data) {
1317  DatasetUnlockData(res.data);
1318  return res.is_new ? 1 : 0;
1319  }
1320  return -1;
1321 }
1322 
1323 int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1324 {
1325  if (set == NULL)
1326  return -1;
1327 
1328  switch (set->type) {
1329  case DATASET_TYPE_STRING:
1330  return DatasetAddString(set, data, data_len);
1331  case DATASET_TYPE_MD5:
1332  return DatasetAddMd5(set, data, data_len);
1333  case DATASET_TYPE_SHA256:
1334  return DatasetAddSha256(set, data, data_len);
1335  case DATASET_TYPE_IPV4:
1336  return DatasetAddIPv4(set, data, data_len);
1337  case DATASET_TYPE_IPV6:
1338  return DatasetAddIPv6(set, data, data_len);
1339  }
1340  return -1;
1341 }
1342 
1343 int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
1344 {
1345  if (set == NULL)
1346  return -1;
1347 
1348  switch (set->type) {
1349  case DATASET_TYPE_STRING:
1350  return DatasetAddStringwRep(set, data, data_len, rep);
1351  case DATASET_TYPE_MD5:
1352  return DatasetAddMd5wRep(set, data, data_len, rep);
1353  case DATASET_TYPE_SHA256:
1354  return DatasetAddSha256wRep(set, data, data_len, rep);
1355  case DATASET_TYPE_IPV4:
1356  return DatasetAddIPv4wRep(set, data, data_len, rep);
1357  case DATASET_TYPE_IPV6:
1358  return DatasetAddIPv6wRep(set, data, data_len, rep);
1359  }
1360  return -1;
1361 }
1362 
1363 typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len);
1364 
1365 static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString,
1366  DatasetOpFunc DatasetOpMd5, DatasetOpFunc DatasetOpSha256, DatasetOpFunc DatasetOpIPv4,
1367  DatasetOpFunc DatasetOpIPv6)
1368 {
1369  if (set == NULL)
1370  return -1;
1371  if (strlen(string) == 0)
1372  return -1;
1373 
1374  switch (set->type) {
1375  case DATASET_TYPE_STRING: {
1376  uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(string));
1377  uint8_t decoded[decoded_size];
1378  uint32_t num_decoded = SCBase64Decode(
1379  (const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded);
1380  if (num_decoded == 0) {
1381  return -2;
1382  }
1383 
1384  return DatasetOpString(set, decoded, num_decoded);
1385  }
1386  case DATASET_TYPE_MD5: {
1387  if (strlen(string) != 32)
1388  return -2;
1389  uint8_t hash[16];
1390  if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1391  return -2;
1392  return DatasetOpMd5(set, hash, 16);
1393  }
1394  case DATASET_TYPE_SHA256: {
1395  if (strlen(string) != 64)
1396  return -2;
1397  uint8_t hash[32];
1398  if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1399  return -2;
1400  return DatasetOpSha256(set, hash, 32);
1401  }
1402  case DATASET_TYPE_IPV4: {
1403  struct in_addr in;
1404  if (inet_pton(AF_INET, string, &in) != 1)
1405  return -2;
1406  return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1407  }
1408  case DATASET_TYPE_IPV6: {
1409  struct in6_addr in6;
1410  if (ParseIpv6String(set, string, &in6) != 0) {
1411  SCLogError("Dataset failed to import %s as IPv6", string);
1412  return -2;
1413  }
1414  return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
1415  }
1416  }
1417  return -1;
1418 }
1419 
1420 /** \brief add serialized data to set
1421  * \retval int 1 added
1422  * \retval int 0 already in hash
1423  * \retval int -1 API error (not added)
1424  * \retval int -2 DATA error
1425  */
1426 int DatasetAddSerialized(Dataset *set, const char *string)
1427 {
1428  return DatasetOpSerialized(set, string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1429  DatasetAddIPv4, DatasetAddIPv6);
1430 }
1431 
1432 /** \brief add serialized data to set
1433  * \retval int 1 added
1434  * \retval int 0 already in hash
1435  * \retval int -1 API error (not added)
1436  * \retval int -2 DATA error
1437  */
1438 int DatasetLookupSerialized(Dataset *set, const char *string)
1439 {
1440  return DatasetOpSerialized(set, string, DatasetLookupString, DatasetLookupMd5,
1441  DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1442 }
1443 
1444 /**
1445  * \retval 1 data was removed from the hash
1446  * \retval 0 data not removed (busy)
1447  * \retval -1 data not found
1448  */
1449 static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1450 {
1451  if (set == NULL)
1452  return -1;
1453 
1454  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1455  .rep.value = 0 };
1456  return THashRemoveFromHash(set->hash, &lookup);
1457 }
1458 
1459 static int DatasetRemoveIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1460 {
1461  if (set == NULL)
1462  return -1;
1463 
1464  if (data_len != 4)
1465  return -2;
1466 
1467  IPv4Type lookup = { .rep.value = 0 };
1468  memcpy(lookup.ipv4, data, 4);
1469  return THashRemoveFromHash(set->hash, &lookup);
1470 }
1471 
1472 static int DatasetRemoveIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1473 {
1474  if (set == NULL)
1475  return -1;
1476 
1477  if (data_len != 16)
1478  return -2;
1479 
1480  IPv6Type lookup = { .rep.value = 0 };
1481  memcpy(lookup.ipv6, data, 16);
1482  return THashRemoveFromHash(set->hash, &lookup);
1483 }
1484 
1485 static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1486 {
1487  if (set == NULL)
1488  return -1;
1489 
1490  if (data_len != 16)
1491  return -2;
1492 
1493  Md5Type lookup = { .rep.value = 0 };
1494  memcpy(lookup.md5, data, 16);
1495  return THashRemoveFromHash(set->hash, &lookup);
1496 }
1497 
1498 static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1499 {
1500  if (set == NULL)
1501  return -1;
1502 
1503  if (data_len != 32)
1504  return -2;
1505 
1506  Sha256Type lookup = { .rep.value = 0 };
1507  memcpy(lookup.sha256, data, 32);
1508  return THashRemoveFromHash(set->hash, &lookup);
1509 }
1510 
1511 /** \brief remove serialized data from set
1512  * \retval int 1 removed
1513  * \retval int 0 found but busy (not removed)
1514  * \retval int -1 API error (not removed)
1515  * \retval int -2 DATA error */
1516 int DatasetRemoveSerialized(Dataset *set, const char *string)
1517 {
1518  return DatasetOpSerialized(set, string, DatasetRemoveString, DatasetRemoveMd5,
1519  DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
1520 }
1521 
1522 int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
1523 {
1524  if (set == NULL)
1525  return -1;
1526 
1527  switch (set->type) {
1528  case DATASET_TYPE_STRING:
1529  return DatasetRemoveString(set, data, data_len);
1530  case DATASET_TYPE_MD5:
1531  return DatasetRemoveMd5(set, data, data_len);
1532  case DATASET_TYPE_SHA256:
1533  return DatasetRemoveSha256(set, data, data_len);
1534  case DATASET_TYPE_IPV4:
1535  return DatasetRemoveIPv4(set, data, data_len);
1536  case DATASET_TYPE_IPV6:
1537  return DatasetRemoveIPv6(set, data, data_len);
1538  }
1539  return -1;
1540 }
dataset_used_hashsize
uint32_t dataset_used_hashsize
Definition: datasets.c:50
util-byte.h
sets_lock
SCMutex sets_lock
Definition: datasets.c:44
StringType::rep
DataRepType rep
Definition: datasets-string.h:31
len
uint8_t len
Definition: app-layer-dnp3.h:2
datasets-string.h
DataRepResultType::rep
DataRepType rep
Definition: datasets-reputation.h:31
THashDataGetResult::data
THashData * data
Definition: util-thash.h:192
datasets-md5.h
Dataset::name
char name[DATASET_NAME_MAX_LEN+1]
Definition: datasets.h:42
ConfNode_::val
char * val
Definition: conf.h:34
Dataset::id
uint32_t id
Definition: datasets.h:44
Dataset::save
char save[PATH_MAX]
Definition: datasets.h:50
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:269
ParseSizeStringU64
int ParseSizeStringU64(const char *size, uint64_t *res)
Definition: util-misc.c:190
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
datasets-sha256.h
IPv6Compare
bool IPv6Compare(void *a, void *b)
Definition: datasets-ipv6.c:41
THashRemoveFromHash
int THashRemoveFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:870
TYPE_STATE
@ TYPE_STATE
Definition: datasets.c:278
Md5Type
Definition: datasets-md5.h:29
Dataset::hash
THashTableContext * hash
Definition: datasets.h:47
ConfGetNode
ConfNode * ConfGetNode(const char *name)
Get a ConfNode by name.
Definition: conf.c:181
ctx
struct Thresholds ctx
IPv4Hash
uint32_t IPv4Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv4.c:49
Sha256Type::sha256
uint8_t sha256[32]
Definition: datasets-sha256.h:30
Md5Type::rep
DataRepType rep
Definition: datasets-md5.h:31
DataRepResultType::found
bool found
Definition: datasets-reputation.h:30
PrintHexString
void PrintHexString(char *str, size_t size, uint8_t *buf, size_t buf_len)
Definition: util-print.c:255
Dataset::type
enum DatasetTypes type
Definition: datasets.h:43
TAILQ_FOREACH
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:252
THashConsolidateMemcap
void THashConsolidateMemcap(THashTableContext *ctx)
Definition: util-thash.c:348
SCMutexLock
#define SCMutexLock(mut)
Definition: threads-debug.h:117
rust.h
DATASET_TYPE_SHA256
@ DATASET_TYPE_SHA256
Definition: datasets.h:35
Sha256Type::rep
DataRepType rep
Definition: datasets-sha256.h:31
SCMUTEX_INITIALIZER
#define SCMUTEX_INITIALIZER
Definition: threads-debug.h:121
datasets-reputation.h
DatasetAddwRep
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
Definition: datasets.c:1343
ConfigGetDataDirectory
const char * ConfigGetDataDirectory(void)
Definition: util-conf.c:80
Md5Type::md5
uint8_t md5[16]
Definition: datasets-md5.h:30
DATASET_TYPE_IPV6
@ DATASET_TYPE_IPV6
Definition: datasets.h:37
Md5StrCompare
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:42
DatasetLookupSerialized
int DatasetLookupSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1438
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
DataRepResultType
Definition: datasets-reputation.h:29
DatasetGet
Dataset * DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t memcap, uint32_t hashsize)
Definition: datasets.c:357
dataset_max_one_hashsize
uint32_t dataset_max_one_hashsize
Definition: datasets.c:48
ConfGet
int ConfGet(const char *name, const char **vptr)
Retrieve the value of a configuration node.
Definition: conf.c:335
StringSet
int StringSet(void *dst, void *src)
Definition: datasets-string.c:60
DatasetRemove
int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1522
THashDataConfig_::hash_size
uint32_t hash_size
Definition: util-thash.h:127
datasets.h
IPv6Set
int IPv6Set(void *dst, void *src)
Definition: datasets-ipv6.c:32
util-debug.h
TYPE_LOAD
@ TYPE_LOAD
Definition: datasets.c:279
DatasetAdd
int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1323
strlcat
size_t strlcat(char *, const char *src, size_t siz)
Definition: util-strlcatu.c:45
DATASETS_HASHSIZE_DEFAULT
#define DATASETS_HASHSIZE_DEFAULT
Definition: datasets.c:578
StringAsBase64
int StringAsBase64(const void *s, char *out, size_t out_size)
Definition: datasets-string.c:46
SCMutexUnlock
#define SCMutexUnlock(mut)
Definition: threads-debug.h:119
datasets-ipv6.h
IPv6Type::ipv6
uint8_t ipv6[16]
Definition: datasets-ipv6.h:30
DATASET_TYPE_NOTSET
#define DATASET_TYPE_NOTSET
Definition: datasets.h:32
IPv6Type::rep
DataRepType rep
Definition: datasets-ipv6.h:31
util-print.h
DatasetPostReloadCleanup
void DatasetPostReloadCleanup(void)
Definition: datasets.c:550
PrintInet
const char * PrintInet(int af, const void *src, char *dst, socklen_t size)
Definition: util-print.c:231
DatasetOpFunc
int(* DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1363
datasets-ipv4.h
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:249
StringGetLength
uint32_t StringGetLength(void *s)
Definition: datasets-string.c:93
Sha256StrSet
int Sha256StrSet(void *dst, void *src)
Definition: datasets-sha256.c:31
DatasetsDestroy
void DatasetsDestroy(void)
Definition: datasets.c:758
Md5StrHash
uint32_t Md5StrHash(uint32_t hash_seed, void *s)
Definition: datasets-md5.c:50
THashDataGetResult
Definition: util-thash.h:191
StringType
Definition: datasets-string.h:29
IPv4Set
int IPv4Set(void *dst, void *src)
Definition: datasets-ipv4.c:32
type
uint16_t type
Definition: decode-vlan.c:106
DatasetsSave
void DatasetsSave(void)
Definition: datasets.c:836
conf.h
IPv6Type
Definition: datasets-ipv6.h:29
name
const char * name
Definition: tm-threads.c:2081
DatasetLookup
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
see if data is part of the set
Definition: datasets.c:1088
DATASET_TYPE_IPV4
@ DATASET_TYPE_IPV4
Definition: datasets.h:36
StringType::ptr
uint8_t * ptr
Definition: datasets-string.h:32
DatasetRemoveSerialized
int DatasetRemoveSerialized(Dataset *set, const char *string)
remove serialized data from set
Definition: datasets.c:1516
dataset_max_total_hashsize
uint32_t dataset_max_total_hashsize
Definition: datasets.c:49
g_system
bool g_system
Definition: suricata.c:188
ConfNodeLookupChild
ConfNode * ConfNodeLookupChild(const ConfNode *node, const char *name)
Lookup a child configuration node by name.
Definition: conf.c:781
THashShutdown
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:357
DatasetTypes
DatasetTypes
Definition: datasets.h:31
Dataset::next
struct Dataset * next
Definition: datasets.h:52
THashData_::data
void * data
Definition: util-thash.h:92
util-conf.h
Sha256Type
Definition: datasets-sha256.h:29
Sha256StrFree
void Sha256StrFree(void *s)
Definition: datasets-sha256.c:55
THashData_
Definition: util-thash.h:85
IPv4Free
void IPv4Free(void *s)
Definition: datasets-ipv4.c:56
suricata-common.h
util-path.h
FatalErrorOnInit
#define FatalErrorOnInit(...)
Fatal error IF we're starting up, and configured to consider errors to be fatal errors.
Definition: util-debug.h:511
DATASET_NAME_MAX_LEN
#define DATASET_NAME_MAX_LEN
Definition: datasets.h:40
ConfNode_::name
char * name
Definition: conf.h:33
PathIsAbsolute
int PathIsAbsolute(const char *path)
Check if a path is absolute.
Definition: util-path.c:44
Md5StrSet
int Md5StrSet(void *dst, void *src)
Definition: datasets-md5.c:33
StringCompare
bool StringCompare(void *a, void *b)
Definition: datasets-string.c:76
THashGetFromHash
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:617
FatalError
#define FatalError(...)
Definition: util-debug.h:502
hashsize
#define hashsize(n)
Definition: util-hash-lookup3.h:40
THashLookupFromHash
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:727
IPv4Type
Definition: datasets-ipv4.h:29
ParseSizeStringU32
int ParseSizeStringU32(const char *size, uint32_t *res)
Definition: util-misc.c:173
THashDecrUsecnt
#define THashDecrUsecnt(h)
Definition: util-thash.h:170
IPv4Compare
bool IPv4Compare(void *a, void *b)
Definition: datasets-ipv4.c:41
DatasetFind
Dataset * DatasetFind(const char *name, enum DatasetTypes type)
look for set by name without creating it
Definition: datasets.c:315
util-validate.h
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
IPv6Hash
uint32_t IPv6Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv6.c:49
DatasetsInit
int DatasetsInit(void)
Definition: datasets.c:603
str
#define str(s)
Definition: suricata-common.h:300
DatasetGetTypeFromString
enum DatasetTypes DatasetGetTypeFromString(const char *s)
Definition: datasets.c:62
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:261
THashWalk
int THashWalk(THashTableContext *ctx, THashFormatFunc FormatterFunc, THashOutputFunc OutputterFunc, void *output_ctx)
Walk the hash.
Definition: util-thash.c:391
SCFree
#define SCFree(p)
Definition: util-mem.h:61
ConfNode_
Definition: conf.h:32
Dataset::hidden
bool hidden
Definition: datasets.h:46
DatasetReload
void DatasetReload(void)
Definition: datasets.c:524
DatasetGetPathType
DatasetGetPathType
Definition: datasets.c:277
Sha256StrCompare
bool Sha256StrCompare(void *a, void *b)
Definition: datasets-sha256.c:40
StringHash
uint32_t StringHash(uint32_t hash_seed, void *s)
Definition: datasets-string.c:87
DATASET_TYPE_MD5
@ DATASET_TYPE_MD5
Definition: datasets.h:34
DATASET_TYPE_STRING
@ DATASET_TYPE_STRING
Definition: datasets.h:33
THashDataGetResult::is_new
bool is_new
Definition: util-thash.h:193
IPv6Free
void IPv6Free(void *s)
Definition: datasets-ipv6.c:56
suricata.h
THashInit
THashTableContext * THashInit(const char *cnf_prefix, uint32_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(uint32_t, void *), bool(*DataCompare)(void *, void *), bool(*DataExpired)(void *, SCTime_t), uint32_t(*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
Definition: util-thash.c:305
IPv4Type::ipv4
uint8_t ipv4[4]
Definition: datasets-ipv4.h:30
DatasetAddSerialized
int DatasetAddSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1426
Dataset
Definition: datasets.h:41
Dataset::from_yaml
bool from_yaml
Definition: datasets.h:45
IPv4Type::rep
DataRepType rep
Definition: datasets-ipv4.h:31
SC_ATOMIC_GET
#define SC_ATOMIC_GET(name)
Get the value from the atomic variable.
Definition: util-atomic.h:375
util-misc.h
util-thash.h
Dataset::load
char load[PATH_MAX]
Definition: datasets.h:49
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
Sha256StrHash
uint32_t Sha256StrHash(uint32_t hash_seed, void *s)
Definition: datasets-sha256.c:48
Md5StrFree
void Md5StrFree(void *s)
Definition: datasets-md5.c:57
SCMutex
#define SCMutex
Definition: threads-debug.h:114
DEBUG_VALIDATE_BUG_ON
#define DEBUG_VALIDATE_BUG_ON(exp)
Definition: util-validate.h:102
StringFree
void StringFree(void *s)
Definition: datasets-string.c:100
DatasetLookupwRep
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
Definition: datasets.c:1108
THashTableContext_::config
THashConfig config
Definition: util-thash.h:151