suricata
datasets.c
Go to the documentation of this file.
1 /* Copyright (C) 2017-2024 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  */
23 
24 #include "suricata-common.h"
25 #include "suricata.h"
26 #include "rust.h"
27 #include "conf.h"
28 #include "datasets.h"
29 #include "datasets-string.h"
30 #include "datasets-ipv4.h"
31 #include "datasets-ipv6.h"
32 #include "datasets-md5.h"
33 #include "datasets-sha256.h"
34 #include "datasets-reputation.h"
35 #include "datasets-context-json.h"
36 #include "util-conf.h"
37 #include "util-mem.h"
38 #include "util-thash.h"
39 #include "util-print.h"
40 #include "util-byte.h"
41 #include "util-misc.h"
42 #include "util-path.h"
43 #include "util-debug.h"
44 #include "util-validate.h"
45 
47 static Dataset *sets = NULL;
48 static uint32_t set_ids = 0;
49 
50 uint32_t dataset_max_one_hashsize = 65536;
51 uint32_t dataset_max_total_hashsize = 16777216;
52 uint32_t dataset_used_hashsize = 0;
53 
54 int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);
55 static void DatasetUpdateHashsize(const char *name, uint32_t hash_size);
56 
57 static inline void DatasetUnlockData(THashData *d)
58 {
59  (void) THashDecrUsecnt(d);
60  THashDataUnlock(d);
61 }
62 static bool DatasetIsStatic(const char *save, const char *load);
63 
64 enum DatasetTypes DatasetGetTypeFromString(const char *s)
65 {
66  if (strcasecmp("md5", s) == 0)
67  return DATASET_TYPE_MD5;
68  if (strcasecmp("sha256", s) == 0)
69  return DATASET_TYPE_SHA256;
70  if (strcasecmp("string", s) == 0)
71  return DATASET_TYPE_STRING;
72  if (strcasecmp("ipv4", s) == 0)
73  return DATASET_TYPE_IPV4;
74  if (strcasecmp("ip", s) == 0)
75  return DATASET_TYPE_IPV6;
76  return DATASET_TYPE_NOTSET;
77 }
78 
80 {
81 
82  if (set->hash == NULL) {
83  return -1;
84  }
85 
86  if (SC_ATOMIC_GET(set->hash->memcap_reached)) {
87  SCLogError("dataset too large for set memcap");
88  return -1;
89  }
90 
91  SCLogDebug(
92  "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
93 
94  set->next = sets;
95  sets = set;
96 
97  /* hash size accounting */
98  DatasetUpdateHashsize(set->name, set->hash->config.hash_size);
99  return 0;
100 }
101 
102 void DatasetLock(void)
103 {
105 }
106 
107 void DatasetUnlock(void)
108 {
110 }
111 
112 Dataset *DatasetAlloc(const char *name)
113 {
114  Dataset *set = SCCalloc(1, sizeof(*set));
115  if (set) {
116  set->id = set_ids++;
117  }
118  return set;
119 }
120 
122 {
123  Dataset *set = sets;
124  while (set) {
125  if (strcasecmp(name, set->name) == 0 && !set->hidden) {
126  return set;
127  }
128  set = set->next;
129  }
130  return NULL;
131 }
132 
133 static int DatasetLoadIPv4(Dataset *set)
134 {
135  if (strlen(set->load) == 0)
136  return 0;
137 
138  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
139  const char *fopen_mode = "r";
140  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
141  fopen_mode = "a+";
142  }
143 
144  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv4);
145  if (retval == -2) {
146  FatalErrorOnInit("dataset %s could not be processed", set->name);
147  } else if (retval == -1) {
148  return -1;
149  }
150 
152 
153  return 0;
154 }
155 
156 int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
157 {
158  /* Checking IPv6 case */
159  char *got_colon = strchr(line, ':');
160  if (got_colon) {
161  uint32_t ip6addr[4];
162  if (inet_pton(AF_INET6, line, in6) != 1) {
163  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
164  return -1;
165  }
166  memcpy(&ip6addr, in6->s6_addr, sizeof(ip6addr));
167  /* IPv4 in IPv6 notation needs transformation to internal Suricata storage */
168  if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
169  ip6addr[0] = ip6addr[3];
170  ip6addr[2] = 0;
171  ip6addr[3] = 0;
172  memcpy(in6, ip6addr, sizeof(struct in6_addr));
173  }
174  } else {
175  /* IPv4 case */
176  struct in_addr in;
177  if (inet_pton(AF_INET, line, &in) != 1) {
178  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
179  return -1;
180  }
181  memset(in6, 0, sizeof(struct in6_addr));
182  memcpy(in6, &in, sizeof(struct in_addr));
183  }
184  return 0;
185 }
186 
187 static int DatasetLoadIPv6(Dataset *set)
188 {
189  if (strlen(set->load) == 0)
190  return 0;
191 
192  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
193  const char *fopen_mode = "r";
194  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
195  fopen_mode = "a+";
196  }
197 
198  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv6);
199  if (retval == -2) {
200  FatalErrorOnInit("dataset %s could not be processed", set->name);
201  } else if (retval == -1) {
202  return -1;
203  }
204 
206 
207  return 0;
208 }
209 
210 static int DatasetLoadMd5(Dataset *set)
211 {
212  if (strlen(set->load) == 0)
213  return 0;
214 
215  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
216  const char *fopen_mode = "r";
217  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
218  fopen_mode = "a+";
219  }
220 
221  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSMd5);
222  if (retval == -2) {
223  FatalErrorOnInit("dataset %s could not be processed", set->name);
224  } else if (retval == -1) {
225  return -1;
226  }
227 
229 
230  return 0;
231 }
232 
233 static int DatasetLoadSha256(Dataset *set)
234 {
235  if (strlen(set->load) == 0)
236  return 0;
237 
238  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
239  const char *fopen_mode = "r";
240  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
241  fopen_mode = "a+";
242  }
243 
244  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSSha256);
245  if (retval == -2) {
246  FatalErrorOnInit("dataset %s could not be processed", set->name);
247  } else if (retval == -1) {
248  return -1;
249  }
250 
252 
253  return 0;
254 }
255 
256 static int DatasetLoadString(Dataset *set)
257 {
258  if (strlen(set->load) == 0)
259  return 0;
260 
261  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
262 
263  const char *fopen_mode = "r";
264  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
265  fopen_mode = "a+";
266  }
267 
268  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSString);
269  if (retval == -2) {
270  FatalErrorOnInit("dataset %s could not be processed", set->name);
271  } else if (retval == -1) {
272  return -1;
273  }
274 
276 
277  return 0;
278 }
279 
280 extern bool g_system;
281 
285 };
286 
287 static void DatasetGetPath(
288  const char *in_path, char *out_path, size_t out_size, enum DatasetGetPathType type)
289 {
290  char path[PATH_MAX];
291  struct stat st;
292 
293  if (PathIsAbsolute(in_path)) {
294  strlcpy(path, in_path, sizeof(path));
295  strlcpy(out_path, path, out_size);
296  return;
297  }
298 
299  const char *data_dir = ConfigGetDataDirectory();
300  if (stat(data_dir, &st) != 0) {
301  SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
302  return;
303  }
304 
305  snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
306 
307  if (type == TYPE_LOAD) {
308  if (stat(path, &st) != 0) {
309  SCLogDebug("path %s: %s", path, strerror(errno));
310  if (!g_system) {
311  snprintf(path, sizeof(path), "%s", in_path);
312  }
313  }
314  }
315  strlcpy(out_path, path, out_size);
316  SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
317 }
318 
319 /** \brief look for set by name without creating it */
321 {
324  if (set) {
325  if (set->type != type) {
327  return NULL;
328  }
329  }
331  return set;
332 }
333 
334 static bool DatasetCheckHashsize(const char *name, uint32_t hash_size)
335 {
336  if (dataset_max_one_hashsize > 0 && hash_size > dataset_max_one_hashsize) {
337  SCLogError("hashsize %u in dataset '%s' exceeds configured 'single-hashsize' limit (%u)",
338  hash_size, name, dataset_max_one_hashsize);
339  return false;
340  }
341  // we cannot underflow as we know from conf loading that
342  // dataset_max_total_hashsize >= dataset_max_one_hashsize if dataset_max_total_hashsize > 0
343  if (dataset_max_total_hashsize > 0 &&
345  SCLogError("hashsize %u in dataset '%s' exceeds configured 'total-hashsizes' limit (%u, in "
346  "use %u)",
348  return false;
349  }
350 
351  return true;
352 }
353 
354 static void DatasetUpdateHashsize(const char *name, uint32_t hash_size)
355 {
356  if (dataset_max_total_hashsize > 0) {
357  dataset_used_hashsize += hash_size;
358  SCLogDebug("set %s adding with hash_size %u", name, hash_size);
359  }
360 }
361 
362 /**
363  * \return -1 on error
364  * \return 0 on successful creation
365  * \return 1 if the dataset already exists
366  *
367  * Calling function is responsible for locking via DatasetLock()
368  */
369 int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load,
370  uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set)
371 {
372  uint64_t default_memcap = 0;
373  uint32_t default_hashsize = 0;
374  if (strlen(name) > DATASET_NAME_MAX_LEN) {
375  return -1;
376  }
377 
379  if (set) {
380  if (type != DATASET_TYPE_NOTSET && set->type != type) {
381  SCLogError("dataset %s already "
382  "exists and is of type %u",
383  set->name, set->type);
384  return -1;
385  }
386 
387  if ((save == NULL || strlen(save) == 0) &&
388  (load == NULL || strlen(load) == 0)) {
389  // OK, rule keyword doesn't have to set state/load,
390  // even when yaml set has set it.
391  } else {
392  if ((save == NULL && strlen(set->save) > 0) ||
393  (save != NULL && strcmp(set->save, save) != 0)) {
394  SCLogError("dataset %s save mismatch: %s != %s", set->name, set->save, save);
395  DatasetUnlock();
396  return -1;
397  }
398  if ((load == NULL && strlen(set->load) > 0) ||
399  (load != NULL && strcmp(set->load, load) != 0)) {
400  SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
401  return -1;
402  }
403  }
404 
405  *ret_set = set;
406  return 1;
407  }
408 
409  if (type == DATASET_TYPE_NOTSET) {
410  SCLogError("dataset %s not defined", name);
411  goto out_err;
412  }
413 
414  DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
415  if (*hashsize == 0) {
416  *hashsize = default_hashsize;
417  }
418  if (*memcap == 0) {
419  *memcap = default_memcap;
420  }
421 
422  if (!DatasetCheckHashsize(name, *hashsize)) {
423  goto out_err;
424  }
425 
426  set = DatasetAlloc(name);
427  if (set == NULL) {
428  goto out_err;
429  }
430 
431  strlcpy(set->name, name, sizeof(set->name));
432  set->type = type;
433  if (save && strlen(save)) {
434  strlcpy(set->save, save, sizeof(set->save));
435  SCLogDebug("name %s save '%s'", name, set->save);
436  }
437  if (load && strlen(load)) {
438  strlcpy(set->load, load, sizeof(set->load));
439  SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
440  }
441 
442  *ret_set = set;
443  return 0;
444 out_err:
445  if (set) {
446  SCFree(set);
447  }
448  return -1;
449 }
450 
451 Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
452  uint64_t memcap, uint32_t hashsize)
453 {
454  Dataset *set = NULL;
455 
456  DatasetLock();
457  int ret = DatasetGetOrCreate(name, type, save, load, &memcap, &hashsize, &set);
458  if (ret < 0) {
459  SCLogError("dataset %s creation failed", name);
460  DatasetUnlock();
461  return NULL;
462  }
463  if (ret == 1) {
464  SCLogDebug("dataset %s already exists", name);
465  DatasetUnlock();
466  return set;
467  }
468 
469  char cnf_name[128];
470  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
471  switch (type) {
472  case DATASET_TYPE_MD5:
473  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
474  Md5StrCompare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
475  if (set->hash == NULL)
476  goto out_err;
477  if (DatasetLoadMd5(set) < 0)
478  goto out_err;
479  break;
480  case DATASET_TYPE_STRING:
481  set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
482  StringCompare, NULL, StringGetLength, load != NULL ? 1 : 0, memcap, hashsize);
483  if (set->hash == NULL)
484  goto out_err;
485  if (DatasetLoadString(set) < 0)
486  goto out_err;
487  break;
488  case DATASET_TYPE_SHA256:
489  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
490  Sha256StrHash, Sha256StrCompare, NULL, NULL, load != NULL ? 1 : 0, memcap,
491  hashsize);
492  if (set->hash == NULL)
493  goto out_err;
494  if (DatasetLoadSha256(set) < 0)
495  goto out_err;
496  break;
497  case DATASET_TYPE_IPV4:
498  set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash,
499  IPv4Compare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
500  if (set->hash == NULL)
501  goto out_err;
502  if (DatasetLoadIPv4(set) < 0)
503  goto out_err;
504  break;
505  case DATASET_TYPE_IPV6:
506  set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6Set, IPv6Free, IPv6Hash,
507  IPv6Compare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
508  if (set->hash == NULL)
509  goto out_err;
510  if (DatasetLoadIPv6(set) < 0)
511  goto out_err;
512  break;
513  }
514 
515  if (DatasetAppendSet(set) < 0) {
516  SCLogError("dataset %s append failed", name);
517  goto out_err;
518  }
519 
520  DatasetUnlock();
521  return set;
522 out_err:
523  if (set->hash) {
524  THashShutdown(set->hash);
525  }
526  SCFree(set);
527  DatasetUnlock();
528  return NULL;
529 }
530 
531 static bool DatasetIsStatic(const char *save, const char *load)
532 {
533  /* A set is static if it does not have any dynamic properties like
534  * save and/or state defined but has load defined.
535  * */
536  if ((load != NULL && strlen(load) > 0) &&
537  (save == NULL || strlen(save) == 0)) {
538  return true;
539  }
540  return false;
541 }
542 
543 void DatasetReload(void)
544 {
545  /* In order to reload the datasets, just mark the current sets as hidden
546  * and clean them up later.
547  * New datasets shall be created with the rule reload and do not require
548  * any intervention.
549  * */
551  Dataset *set = sets;
552  while (set) {
553  if (!DatasetIsStatic(set->save, set->load) || set->from_yaml) {
554  SCLogDebug("Not a static set, skipping %s", set->name);
555  set = set->next;
556  continue;
557  }
558  set->hidden = true;
559  if (dataset_max_total_hashsize > 0) {
562  }
563  SCLogDebug("Set %s at %p hidden successfully", set->name, set);
564  set = set->next;
565  }
567 }
568 
570 {
571  SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
573  Dataset *cur = sets;
574  Dataset *prev = NULL;
575  while (cur) {
576  Dataset *next = cur->next;
577  if (!cur->hidden) {
578  prev = cur;
579  cur = next;
580  continue;
581  }
582  // Delete the set in case it was hidden
583  if (prev != NULL) {
584  prev->next = next;
585  } else {
586  sets = next;
587  }
588  THashShutdown(cur->hash);
589  SCFree(cur);
590  cur = next;
591  }
593 }
594 
595 /* Value reflects THASH_DEFAULT_HASHSIZE which is what the default was earlier,
596  * despite 2048 commented out in the default yaml. */
597 #define DATASETS_HASHSIZE_DEFAULT 4096
598 
599 void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
600 {
601  const char *str = NULL;
602  if (SCConfGet("datasets.defaults.memcap", &str) == 1) {
603  if (ParseSizeStringU64(str, memcap) < 0) {
604  SCLogWarning("memcap value cannot be deduced: %s,"
605  " resetting to default",
606  str);
607  *memcap = 0;
608  }
609  }
610 
611  *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
612  if (SCConfGet("datasets.defaults.hashsize", &str) == 1) {
613  if (ParseSizeStringU32(str, hashsize) < 0) {
614  *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
615  SCLogWarning("hashsize value cannot be deduced: %s,"
616  " resetting to default: %u",
617  str, *hashsize);
618  }
619  }
620 }
621 
622 int DatasetsInit(void)
623 {
624  SCLogDebug("datasets start");
625  SCConfNode *datasets = SCConfGetNode("datasets");
626  uint64_t default_memcap = 0;
627  uint32_t default_hashsize = 0;
628  DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
629  if (datasets != NULL) {
630  const char *str = NULL;
631  if (SCConfGet("datasets.limits.total-hashsizes", &str) == 1) {
633  FatalError("failed to parse datasets.limits.total-hashsizes value: %s", str);
634  }
635  }
636  if (SCConfGet("datasets.limits.single-hashsize", &str) == 1) {
638  FatalError("failed to parse datasets.limits.single-hashsize value: %s", str);
639  }
640  }
641  if (dataset_max_total_hashsize > 0 &&
643  FatalError("total-hashsizes (%u) cannot be smaller than single-hashsize (%u)",
645  }
647  // the total limit also applies for single limit
649  }
650 
651  int list_pos = 0;
652  SCConfNode *iter = NULL;
653  TAILQ_FOREACH(iter, &datasets->head, next) {
654  if (iter->name == NULL) {
655  list_pos++;
656  continue;
657  }
658 
659  char save[PATH_MAX] = "";
660  char load[PATH_MAX] = "";
661  uint64_t memcap = 0;
662  uint32_t hashsize = 0;
663 
664  const char *set_name = iter->name;
665  if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
667  "set name '%s' too long, max %d chars", set_name, DATASET_NAME_MAX_LEN);
668  continue;
669  }
670 
671  SCConfNode *set_type = SCConfNodeLookupChild(iter, "type");
672  if (set_type == NULL) {
673  list_pos++;
674  continue;
675  }
676 
677  SCConfNode *set_save = SCConfNodeLookupChild(iter, "state");
678  if (set_save) {
679  DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
680  strlcpy(load, save, sizeof(load));
681  } else {
682  SCConfNode *set_load = SCConfNodeLookupChild(iter, "load");
683  if (set_load) {
684  DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
685  }
686  }
687 
688  SCConfNode *set_memcap = SCConfNodeLookupChild(iter, "memcap");
689  if (set_memcap) {
690  if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
691  SCLogWarning("memcap value cannot be"
692  " deduced: %s, resetting to default",
693  set_memcap->val);
694  memcap = 0;
695  }
696  }
697  SCConfNode *set_hashsize = SCConfNodeLookupChild(iter, "hashsize");
698  if (set_hashsize) {
699  if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
700  SCLogWarning("hashsize value cannot be"
701  " deduced: %s, resetting to default",
702  set_hashsize->val);
703  hashsize = 0;
704  }
705  }
706  char conf_str[1024];
707  snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
708 
709  SCLogDebug("set %s type %s. Conf %s", set_name, set_type->val, conf_str);
710 
711  if (strcmp(set_type->val, "md5") == 0) {
712  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
713  memcap > 0 ? memcap : default_memcap,
714  hashsize > 0 ? hashsize : default_hashsize);
715  if (dset == NULL) {
716  FatalErrorOnInit("failed to setup dataset for %s", set_name);
717  continue;
718  }
719  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
720  dset->from_yaml = true;
721 
722  } else if (strcmp(set_type->val, "sha256") == 0) {
723  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
724  memcap > 0 ? memcap : default_memcap,
725  hashsize > 0 ? hashsize : default_hashsize);
726  if (dset == NULL) {
727  FatalErrorOnInit("failed to setup dataset for %s", set_name);
728  continue;
729  }
730  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
731  dset->from_yaml = true;
732 
733  } else if (strcmp(set_type->val, "string") == 0) {
734  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
735  memcap > 0 ? memcap : default_memcap,
736  hashsize > 0 ? hashsize : default_hashsize);
737  if (dset == NULL) {
738  FatalErrorOnInit("failed to setup dataset for %s", set_name);
739  continue;
740  }
741  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
742  dset->from_yaml = true;
743 
744  } else if (strcmp(set_type->val, "ipv4") == 0) {
745  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV4, save, load,
746  memcap > 0 ? memcap : default_memcap,
747  hashsize > 0 ? hashsize : default_hashsize);
748  if (dset == NULL) {
749  FatalErrorOnInit("failed to setup dataset for %s", set_name);
750  continue;
751  }
752  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
753  dset->from_yaml = true;
754 
755  } else if (strcmp(set_type->val, "ip") == 0) {
756  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV6, save, load,
757  memcap > 0 ? memcap : default_memcap,
758  hashsize > 0 ? hashsize : default_hashsize);
759  if (dset == NULL) {
760  FatalErrorOnInit("failed to setup dataset for %s", set_name);
761  continue;
762  }
763  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
764  dset->from_yaml = true;
765  }
766 
767  list_pos++;
768  }
769  }
770  SCLogDebug("datasets done: %p", datasets);
771  return 0;
772 }
773 
774 void DatasetsDestroy(void)
775 {
776  SCLogDebug("destroying datasets: %p", sets);
778  Dataset *set = sets;
779  while (set) {
780  SCLogDebug("destroying set %s", set->name);
781  Dataset *next = set->next;
782  THashShutdown(set->hash);
783  SCFree(set);
784  set = next;
785  }
786  sets = NULL;
788  SCLogDebug("destroying datasets done: %p", sets);
789 }
790 
791 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
792 {
793  FILE *fp = ctx;
794  //PrintRawDataFp(fp, data, data_len);
795  if (fp) {
796  return (int)fwrite(data, data_len, 1, fp);
797  }
798  return 0;
799 }
800 
801 static int Md5AsAscii(const void *s, char *out, size_t out_size)
802 {
803  const Md5Type *md5 = s;
804  char str[256];
805  PrintHexString(str, sizeof(str), (uint8_t *)md5->md5, sizeof(md5->md5));
806  strlcat(out, str, out_size);
807  strlcat(out, "\n", out_size);
808  return (int)strlen(out);
809 }
810 
811 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
812 {
813  const Sha256Type *sha = s;
814  char str[256];
815  PrintHexString(str, sizeof(str), (uint8_t *)sha->sha256, sizeof(sha->sha256));
816  strlcat(out, str, out_size);
817  strlcat(out, "\n", out_size);
818  return (int)strlen(out);
819 }
820 
821 static int IPv4AsAscii(const void *s, char *out, size_t out_size)
822 {
823  const IPv4Type *ip4 = s;
824  char str[256];
825  PrintInet(AF_INET, ip4->ipv4, str, sizeof(str));
826  strlcat(out, str, out_size);
827  strlcat(out, "\n", out_size);
828  return (int)strlen(out);
829 }
830 
831 static int IPv6AsAscii(const void *s, char *out, size_t out_size)
832 {
833  const IPv6Type *ip6 = s;
834  char str[256];
835  bool is_ipv4 = true;
836  for (int i = 4; i <= 15; i++) {
837  if (ip6->ipv6[i] != 0) {
838  is_ipv4 = false;
839  break;
840  }
841  }
842  if (is_ipv4) {
843  PrintInet(AF_INET, ip6->ipv6, str, sizeof(str));
844  } else {
845  PrintInet(AF_INET6, ip6->ipv6, str, sizeof(str));
846  }
847  strlcat(out, str, out_size);
848  strlcat(out, "\n", out_size);
849  return (int)strlen(out);
850 }
851 
852 void DatasetsSave(void)
853 {
854  SCLogDebug("saving datasets: %p", sets);
856  Dataset *set = sets;
857  while (set) {
858  if (strlen(set->save) == 0)
859  goto next;
860 
861  FILE *fp = fopen(set->save, "w");
862  if (fp == NULL)
863  goto next;
864 
865  SCLogDebug("dumping %s to %s", set->name, set->save);
866 
867  switch (set->type) {
868  case DATASET_TYPE_STRING:
869  THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
870  break;
871  case DATASET_TYPE_MD5:
872  THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
873  break;
874  case DATASET_TYPE_SHA256:
875  THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
876  break;
877  case DATASET_TYPE_IPV4:
878  THashWalk(set->hash, IPv4AsAscii, SaveCallback, fp);
879  break;
880  case DATASET_TYPE_IPV6:
881  THashWalk(set->hash, IPv6AsAscii, SaveCallback, fp);
882  break;
883  }
884 
885  fclose(fp);
886 
887  next:
888  set = set->next;
889  }
891 }
892 
893 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
894 {
895  if (set == NULL)
896  return -1;
897 
898  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
899  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
900  if (rdata) {
901  DatasetUnlockData(rdata);
902  return 1;
903  }
904  return 0;
905 }
906 
907 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
908  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
909 {
910  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
911 
912  if (set == NULL)
913  return rrep;
914 
915  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
916  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
917  if (rdata) {
918  StringType *found = rdata->data;
919  rrep.found = true;
920  rrep.rep = found->rep;
921  DatasetUnlockData(rdata);
922  return rrep;
923  }
924  return rrep;
925 }
926 
927 static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
928 {
929  if (set == NULL)
930  return -1;
931 
932  if (data_len != 4)
933  return -1;
934 
935  IPv4Type lookup = { .rep.value = 0 };
936  memcpy(lookup.ipv4, data, 4);
937  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
938  if (rdata) {
939  DatasetUnlockData(rdata);
940  return 1;
941  }
942  return 0;
943 }
944 
945 static DataRepResultType DatasetLookupIPv4wRep(
946  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
947 {
948  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
949 
950  if (set == NULL)
951  return rrep;
952 
953  if (data_len != 4)
954  return rrep;
955 
956  IPv4Type lookup = { .rep.value = 0 };
957  memcpy(lookup.ipv4, data, data_len);
958  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
959  if (rdata) {
960  IPv4Type *found = rdata->data;
961  rrep.found = true;
962  rrep.rep = found->rep;
963  DatasetUnlockData(rdata);
964  return rrep;
965  }
966  return rrep;
967 }
968 
969 static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
970 {
971  if (set == NULL)
972  return -1;
973 
974  if (data_len != 16 && data_len != 4)
975  return -1;
976 
977  IPv6Type lookup = { .rep.value = 0 };
978  memcpy(lookup.ipv6, data, data_len);
979  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
980  if (rdata) {
981  DatasetUnlockData(rdata);
982  return 1;
983  }
984  return 0;
985 }
986 
987 static DataRepResultType DatasetLookupIPv6wRep(
988  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
989 {
990  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
991 
992  if (set == NULL)
993  return rrep;
994 
995  if (data_len != 16 && data_len != 4)
996  return rrep;
997 
998  IPv6Type lookup = { .rep.value = 0 };
999  memcpy(lookup.ipv6, data, data_len);
1000  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1001  if (rdata) {
1002  IPv6Type *found = rdata->data;
1003  rrep.found = true;
1004  rrep.rep = found->rep;
1005  DatasetUnlockData(rdata);
1006  return rrep;
1007  }
1008  return rrep;
1009 }
1010 
1011 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1012 {
1013  if (set == NULL)
1014  return -1;
1015 
1016  if (data_len != 16)
1017  return -1;
1018 
1019  Md5Type lookup = { .rep.value = 0 };
1020  memcpy(lookup.md5, data, data_len);
1021  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1022  if (rdata) {
1023  DatasetUnlockData(rdata);
1024  return 1;
1025  }
1026  return 0;
1027 }
1028 
1029 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
1030  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1031 {
1032  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1033 
1034  if (set == NULL)
1035  return rrep;
1036 
1037  if (data_len != 16)
1038  return rrep;
1039 
1040  Md5Type lookup = { .rep.value = 0};
1041  memcpy(lookup.md5, data, data_len);
1042  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1043  if (rdata) {
1044  Md5Type *found = rdata->data;
1045  rrep.found = true;
1046  rrep.rep = found->rep;
1047  DatasetUnlockData(rdata);
1048  return rrep;
1049  }
1050  return rrep;
1051 }
1052 
1053 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1054 {
1055  if (set == NULL)
1056  return -1;
1057 
1058  if (data_len != 32)
1059  return -1;
1060 
1061  Sha256Type lookup = { .rep.value = 0 };
1062  memcpy(lookup.sha256, data, data_len);
1063  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1064  if (rdata) {
1065  DatasetUnlockData(rdata);
1066  return 1;
1067  }
1068  return 0;
1069 }
1070 
1071 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
1072  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1073 {
1074  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1075 
1076  if (set == NULL)
1077  return rrep;
1078 
1079  if (data_len != 32)
1080  return rrep;
1081 
1082  Sha256Type lookup = { .rep.value = 0 };
1083  memcpy(lookup.sha256, data, data_len);
1084  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1085  if (rdata) {
1086  Sha256Type *found = rdata->data;
1087  rrep.found = true;
1088  rrep.rep = found->rep;
1089  DatasetUnlockData(rdata);
1090  return rrep;
1091  }
1092  return rrep;
1093 }
1094 
1095 /**
1096  * \brief see if \a data is part of the set
1097  * \param set dataset
1098  * \param data data to look up
1099  * \param data_len length in bytes of \a data
1100  * \retval -1 error
1101  * \retval 0 not found
1102  * \retval 1 found
1103  */
1104 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
1105 {
1106  if (set == NULL)
1107  return -1;
1108 
1109  switch (set->type) {
1110  case DATASET_TYPE_STRING:
1111  return DatasetLookupString(set, data, data_len);
1112  case DATASET_TYPE_MD5:
1113  return DatasetLookupMd5(set, data, data_len);
1114  case DATASET_TYPE_SHA256:
1115  return DatasetLookupSha256(set, data, data_len);
1116  case DATASET_TYPE_IPV4:
1117  return DatasetLookupIPv4(set, data, data_len);
1118  case DATASET_TYPE_IPV6:
1119  return DatasetLookupIPv6(set, data, data_len);
1120  }
1121  return -1;
1122 }
1123 
1124 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1125  const DataRepType *rep)
1126 {
1127  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1128  if (set == NULL)
1129  return rrep;
1130 
1131  switch (set->type) {
1132  case DATASET_TYPE_STRING:
1133  return DatasetLookupStringwRep(set, data, data_len, rep);
1134  case DATASET_TYPE_MD5:
1135  return DatasetLookupMd5wRep(set, data, data_len, rep);
1136  case DATASET_TYPE_SHA256:
1137  return DatasetLookupSha256wRep(set, data, data_len, rep);
1138  case DATASET_TYPE_IPV4:
1139  return DatasetLookupIPv4wRep(set, data, data_len, rep);
1140  case DATASET_TYPE_IPV6:
1141  return DatasetLookupIPv6wRep(set, data, data_len, rep);
1142  }
1143  return rrep;
1144 }
1145 
1146 /**
1147  * \retval 1 data was added to the hash
1148  * \retval 0 data was not added to the hash as it is already there
1149  * \retval -1 failed to add data to the hash
1150  */
1151 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1152 {
1153  if (set == NULL)
1154  return -1;
1155 
1156  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1157  .rep.value = 0 };
1158  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1159  if (res.data) {
1160  DatasetUnlockData(res.data);
1161  return res.is_new ? 1 : 0;
1162  }
1163  return -1;
1164 }
1165 
1166 /**
1167  * \retval 1 data was added to the hash
1168  * \retval 0 data was not added to the hash as it is already there
1169  * \retval -1 failed to add data to the hash
1170  */
1171 static int DatasetAddStringwRep(
1172  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1173 {
1174  if (set == NULL)
1175  return -1;
1176 
1177  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1178  .rep = *rep };
1179  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1180  if (res.data) {
1181  DatasetUnlockData(res.data);
1182  return res.is_new ? 1 : 0;
1183  }
1184  return -1;
1185 }
1186 
1187 static int DatasetAddIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1188 {
1189  if (set == NULL) {
1190  return -1;
1191  }
1192 
1193  if (data_len < 4) {
1194  return -2;
1195  }
1196 
1197  IPv4Type lookup = { .rep.value = 0 };
1198  memcpy(lookup.ipv4, data, 4);
1199  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1200  if (res.data) {
1201  DatasetUnlockData(res.data);
1202  return res.is_new ? 1 : 0;
1203  }
1204  return -1;
1205 }
1206 
1207 static int DatasetAddIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1208 {
1209  if (set == NULL) {
1210  return -1;
1211  }
1212 
1213  if (data_len != 16 && data_len != 4) {
1214  return -2;
1215  }
1216 
1217  IPv6Type lookup = { .rep.value = 0 };
1218  memcpy(lookup.ipv6, data, data_len);
1219  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1220  if (res.data) {
1221  DatasetUnlockData(res.data);
1222  return res.is_new ? 1 : 0;
1223  }
1224  return -1;
1225 }
1226 
1227 static int DatasetAddIPv4wRep(
1228  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1229 {
1230  if (set == NULL)
1231  return -1;
1232 
1233  if (data_len < 4)
1234  return -2;
1235 
1236  IPv4Type lookup = { .rep = *rep };
1237  memcpy(lookup.ipv4, data, 4);
1238  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1239  if (res.data) {
1240  DatasetUnlockData(res.data);
1241  return res.is_new ? 1 : 0;
1242  }
1243  return -1;
1244 }
1245 
1246 static int DatasetAddIPv6wRep(
1247  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1248 {
1249  if (set == NULL)
1250  return -1;
1251 
1252  if (data_len != 16)
1253  return -2;
1254 
1255  IPv6Type lookup = { .rep = *rep };
1256  memcpy(lookup.ipv6, data, 16);
1257  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1258  if (res.data) {
1259  DatasetUnlockData(res.data);
1260  return res.is_new ? 1 : 0;
1261  }
1262  return -1;
1263 }
1264 
1265 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1266 {
1267  if (set == NULL)
1268  return -1;
1269 
1270  if (data_len != 16)
1271  return -2;
1272 
1273  Md5Type lookup = { .rep.value = 0 };
1274  memcpy(lookup.md5, data, 16);
1275  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1276  if (res.data) {
1277  DatasetUnlockData(res.data);
1278  return res.is_new ? 1 : 0;
1279  }
1280  return -1;
1281 }
1282 
1283 static int DatasetAddMd5wRep(
1284  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1285 {
1286  if (set == NULL)
1287  return -1;
1288 
1289  if (data_len != 16)
1290  return -2;
1291 
1292  Md5Type lookup = { .rep = *rep };
1293  memcpy(lookup.md5, data, 16);
1294  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1295  if (res.data) {
1296  DatasetUnlockData(res.data);
1297  return res.is_new ? 1 : 0;
1298  }
1299  return -1;
1300 }
1301 
1302 static int DatasetAddSha256wRep(
1303  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1304 {
1305  if (set == NULL)
1306  return -1;
1307 
1308  if (data_len != 32)
1309  return -2;
1310 
1311  Sha256Type lookup = { .rep = *rep };
1312  memcpy(lookup.sha256, data, 32);
1313  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1314  if (res.data) {
1315  DatasetUnlockData(res.data);
1316  return res.is_new ? 1 : 0;
1317  }
1318  return -1;
1319 }
1320 
1321 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1322 {
1323  if (set == NULL)
1324  return -1;
1325 
1326  if (data_len != 32)
1327  return -2;
1328 
1329  Sha256Type lookup = { .rep.value = 0 };
1330  memcpy(lookup.sha256, data, 32);
1331  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1332  if (res.data) {
1333  DatasetUnlockData(res.data);
1334  return res.is_new ? 1 : 0;
1335  }
1336  return -1;
1337 }
1338 
1339 int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1340 {
1341  if (set == NULL)
1342  return -1;
1343 
1344  switch (set->type) {
1345  case DATASET_TYPE_STRING:
1346  return DatasetAddString(set, data, data_len);
1347  case DATASET_TYPE_MD5:
1348  return DatasetAddMd5(set, data, data_len);
1349  case DATASET_TYPE_SHA256:
1350  return DatasetAddSha256(set, data, data_len);
1351  case DATASET_TYPE_IPV4:
1352  return DatasetAddIPv4(set, data, data_len);
1353  case DATASET_TYPE_IPV6:
1354  return DatasetAddIPv6(set, data, data_len);
1355  }
1356  return -1;
1357 }
1358 
1359 int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
1360 {
1361  if (set == NULL)
1362  return -1;
1363 
1364  switch (set->type) {
1365  case DATASET_TYPE_STRING:
1366  return DatasetAddStringwRep(set, data, data_len, rep);
1367  case DATASET_TYPE_MD5:
1368  return DatasetAddMd5wRep(set, data, data_len, rep);
1369  case DATASET_TYPE_SHA256:
1370  return DatasetAddSha256wRep(set, data, data_len, rep);
1371  case DATASET_TYPE_IPV4:
1372  return DatasetAddIPv4wRep(set, data, data_len, rep);
1373  case DATASET_TYPE_IPV6:
1374  return DatasetAddIPv6wRep(set, data, data_len, rep);
1375  }
1376  return -1;
1377 }
1378 
1379 typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len);
1380 
1381 static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString,
1382  DatasetOpFunc DatasetOpMd5, DatasetOpFunc DatasetOpSha256, DatasetOpFunc DatasetOpIPv4,
1383  DatasetOpFunc DatasetOpIPv6)
1384 {
1385  if (set == NULL)
1386  return -1;
1387  if (strlen(string) == 0)
1388  return -1;
1389 
1390  switch (set->type) {
1391  case DATASET_TYPE_STRING: {
1392  if (strlen(string) > UINT16_MAX) {
1393  // size check before cast and stack allocation
1394  return -1;
1395  }
1396  uint32_t decoded_size = SCBase64DecodeBufferSize((uint32_t)strlen(string));
1397  uint8_t decoded[decoded_size];
1398  uint32_t num_decoded = SCBase64Decode(
1399  (const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded);
1400  if (num_decoded == 0) {
1401  return -2;
1402  }
1403 
1404  return DatasetOpString(set, decoded, num_decoded);
1405  }
1406  case DATASET_TYPE_MD5: {
1407  if (strlen(string) != 32)
1408  return -2;
1409  uint8_t hash[16];
1410  if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1411  return -2;
1412  return DatasetOpMd5(set, hash, 16);
1413  }
1414  case DATASET_TYPE_SHA256: {
1415  if (strlen(string) != 64)
1416  return -2;
1417  uint8_t hash[32];
1418  if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1419  return -2;
1420  return DatasetOpSha256(set, hash, 32);
1421  }
1422  case DATASET_TYPE_IPV4: {
1423  struct in_addr in;
1424  if (inet_pton(AF_INET, string, &in) != 1)
1425  return -2;
1426  return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1427  }
1428  case DATASET_TYPE_IPV6: {
1429  struct in6_addr in6;
1430  if (DatasetParseIpv6String(set, string, &in6) != 0) {
1431  SCLogError("Dataset failed to import %s as IPv6", string);
1432  return -2;
1433  }
1434  return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
1435  }
1436  }
1437  return -1;
1438 }
1439 
1440 /** \brief add serialized data to set
1441  * \retval int 1 added
1442  * \retval int 0 already in hash
1443  * \retval int -1 API error (not added)
1444  * \retval int -2 DATA error
1445  */
1446 int DatasetAddSerialized(Dataset *set, const char *string)
1447 {
1448  return DatasetOpSerialized(set, string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1449  DatasetAddIPv4, DatasetAddIPv6);
1450 }
1451 
1452 /** \brief add serialized data to set
1453  * \retval int 1 added
1454  * \retval int 0 already in hash
1455  * \retval int -1 API error (not added)
1456  * \retval int -2 DATA error
1457  */
1458 int DatasetLookupSerialized(Dataset *set, const char *string)
1459 {
1460  return DatasetOpSerialized(set, string, DatasetLookupString, DatasetLookupMd5,
1461  DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1462 }
1463 
1464 /**
1465  * \retval 1 data was removed from the hash
1466  * \retval 0 data not removed (busy)
1467  * \retval -1 data not found
1468  */
1469 static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1470 {
1471  if (set == NULL)
1472  return -1;
1473 
1474  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1475  .rep.value = 0 };
1476  return THashRemoveFromHash(set->hash, &lookup);
1477 }
1478 
1479 static int DatasetRemoveIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1480 {
1481  if (set == NULL)
1482  return -1;
1483 
1484  if (data_len != 4)
1485  return -2;
1486 
1487  IPv4Type lookup = { .rep.value = 0 };
1488  memcpy(lookup.ipv4, data, 4);
1489  return THashRemoveFromHash(set->hash, &lookup);
1490 }
1491 
1492 static int DatasetRemoveIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1493 {
1494  if (set == NULL)
1495  return -1;
1496 
1497  if (data_len != 16)
1498  return -2;
1499 
1500  IPv6Type lookup = { .rep.value = 0 };
1501  memcpy(lookup.ipv6, data, 16);
1502  return THashRemoveFromHash(set->hash, &lookup);
1503 }
1504 
1505 static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1506 {
1507  if (set == NULL)
1508  return -1;
1509 
1510  if (data_len != 16)
1511  return -2;
1512 
1513  Md5Type lookup = { .rep.value = 0 };
1514  memcpy(lookup.md5, data, 16);
1515  return THashRemoveFromHash(set->hash, &lookup);
1516 }
1517 
1518 static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1519 {
1520  if (set == NULL)
1521  return -1;
1522 
1523  if (data_len != 32)
1524  return -2;
1525 
1526  Sha256Type lookup = { .rep.value = 0 };
1527  memcpy(lookup.sha256, data, 32);
1528  return THashRemoveFromHash(set->hash, &lookup);
1529 }
1530 
1531 /** \brief remove serialized data from set
1532  * \retval int 1 removed
1533  * \retval int 0 found but busy (not removed)
1534  * \retval int -1 API error (not removed)
1535  * \retval int -2 DATA error */
1536 int DatasetRemoveSerialized(Dataset *set, const char *string)
1537 {
1538  return DatasetOpSerialized(set, string, DatasetRemoveString, DatasetRemoveMd5,
1539  DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
1540 }
1541 
1542 int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
1543 {
1544  if (set == NULL)
1545  return -1;
1546 
1547  switch (set->type) {
1548  case DATASET_TYPE_STRING:
1549  return DatasetRemoveString(set, data, data_len);
1550  case DATASET_TYPE_MD5:
1551  return DatasetRemoveMd5(set, data, data_len);
1552  case DATASET_TYPE_SHA256:
1553  return DatasetRemoveSha256(set, data, data_len);
1554  case DATASET_TYPE_IPV4:
1555  return DatasetRemoveIPv4(set, data, data_len);
1556  case DATASET_TYPE_IPV6:
1557  return DatasetRemoveIPv6(set, data, data_len);
1558  }
1559  return -1;
1560 }
dataset_used_hashsize
uint32_t dataset_used_hashsize
Definition: datasets.c:52
util-byte.h
sets_lock
SCMutex sets_lock
Definition: datasets.c:46
StringType::rep
DataRepType rep
Definition: datasets-string.h:33
len
uint8_t len
Definition: app-layer-dnp3.h:2
datasets-string.h
DataRepResultType::rep
DataRepType rep
Definition: datasets-reputation.h:31
THashDataGetResult::data
THashData * data
Definition: util-thash.h:192
datasets-md5.h
Dataset::name
char name[DATASET_NAME_MAX_LEN+1]
Definition: datasets.h:48
DatasetAlloc
Dataset * DatasetAlloc(const char *name)
Definition: datasets.c:112
Dataset::id
uint32_t id
Definition: datasets.h:50
Dataset::save
char save[PATH_MAX]
Definition: datasets.h:57
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:275
ParseSizeStringU64
int ParseSizeStringU64(const char *size, uint64_t *res)
Definition: util-misc.c:190
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
datasets-sha256.h
IPv6Compare
bool IPv6Compare(void *a, void *b)
Definition: datasets-ipv6.c:56
HexToRaw
int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
Definition: util-byte.c:806
THashRemoveFromHash
int THashRemoveFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:867
TYPE_STATE
@ TYPE_STATE
Definition: datasets.c:283
Md5Type
Definition: datasets-md5.h:30
Dataset::hash
THashTableContext * hash
Definition: datasets.h:54
ctx
struct Thresholds ctx
IPv4Hash
uint32_t IPv4Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv4.c:63
Sha256Type::sha256
uint8_t sha256[32]
Definition: datasets-sha256.h:31
SCConfGet
int SCConfGet(const char *name, const char **vptr)
Retrieve the value of a configuration node.
Definition: conf.c:350
Md5Type::rep
DataRepType rep
Definition: datasets-md5.h:33
DataRepResultType::found
bool found
Definition: datasets-reputation.h:30
PrintHexString
void PrintHexString(char *str, size_t size, uint8_t *buf, size_t buf_len)
Definition: util-print.c:255
Dataset::type
enum DatasetTypes type
Definition: datasets.h:49
TAILQ_FOREACH
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:252
THashConsolidateMemcap
void THashConsolidateMemcap(THashTableContext *ctx)
Definition: util-thash.c:345
SCMutexLock
#define SCMutexLock(mut)
Definition: threads-debug.h:117
rust.h
DATASET_TYPE_SHA256
@ DATASET_TYPE_SHA256
Definition: datasets.h:41
Sha256Type::rep
DataRepType rep
Definition: datasets-sha256.h:33
SCMUTEX_INITIALIZER
#define SCMUTEX_INITIALIZER
Definition: threads-debug.h:121
datasets-reputation.h
DatasetAddwRep
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
Definition: datasets.c:1359
ConfigGetDataDirectory
const char * ConfigGetDataDirectory(void)
Definition: util-conf.c:80
Md5Type::md5
uint8_t md5[16]
Definition: datasets-md5.h:31
DATASET_TYPE_IPV6
@ DATASET_TYPE_IPV6
Definition: datasets.h:43
Md5StrCompare
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:57
DatasetLookupSerialized
int DatasetLookupSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1458
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
DataRepResultType
Definition: datasets-reputation.h:29
DatasetGet
Dataset * DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t memcap, uint32_t hashsize)
Definition: datasets.c:451
dataset_max_one_hashsize
uint32_t dataset_max_one_hashsize
Definition: datasets.c:50
StringSet
int StringSet(void *dst, void *src)
Definition: datasets-string.c:60
DatasetRemove
int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1542
DatasetGetDefaultMemcap
void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
Definition: datasets.c:599
THashDataConfig_::hash_size
uint32_t hash_size
Definition: util-thash.h:127
datasets.h
IPv6Set
int IPv6Set(void *dst, void *src)
Definition: datasets-ipv6.c:33
util-debug.h
TYPE_LOAD
@ TYPE_LOAD
Definition: datasets.c:284
DatasetAdd
int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1339
strlcat
size_t strlcat(char *, const char *src, size_t siz)
Definition: util-strlcatu.c:45
DatasetGetOrCreate
int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set)
Definition: datasets.c:369
DATASETS_HASHSIZE_DEFAULT
#define DATASETS_HASHSIZE_DEFAULT
Definition: datasets.c:597
StringAsBase64
int StringAsBase64(const void *s, char *out, size_t out_size)
Definition: datasets-string.c:46
SCMutexUnlock
#define SCMutexUnlock(mut)
Definition: threads-debug.h:119
datasets-ipv6.h
IPv6Type::ipv6
uint8_t ipv6[16]
Definition: datasets-ipv6.h:31
DATASET_TYPE_NOTSET
#define DATASET_TYPE_NOTSET
Definition: datasets.h:38
IPv6Type::rep
DataRepType rep
Definition: datasets-ipv6.h:33
util-print.h
DatasetPostReloadCleanup
void DatasetPostReloadCleanup(void)
Definition: datasets.c:569
PrintInet
const char * PrintInet(int af, const void *src, char *dst, socklen_t size)
Definition: util-print.c:231
DatasetOpFunc
int(* DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1379
datasets-ipv4.h
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:255
StringGetLength
uint32_t StringGetLength(void *s)
Definition: datasets-string.c:112
Sha256StrSet
int Sha256StrSet(void *dst, void *src)
Definition: datasets-sha256.c:32
DatasetsDestroy
void DatasetsDestroy(void)
Definition: datasets.c:774
Md5StrHash
uint32_t Md5StrHash(uint32_t hash_seed, void *s)
Definition: datasets-md5.c:65
THashDataGetResult
Definition: util-thash.h:191
StringType
Definition: datasets-string.h:30
IPv4Set
int IPv4Set(void *dst, void *src)
Definition: datasets-ipv4.c:32
type
uint16_t type
Definition: decode-vlan.c:106
DatasetsSave
void DatasetsSave(void)
Definition: datasets.c:852
conf.h
DatasetLock
void DatasetLock(void)
Definition: datasets.c:102
IPv6Type
Definition: datasets-ipv6.h:30
name
const char * name
Definition: tm-threads.c:2163
DatasetLookup
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
see if data is part of the set
Definition: datasets.c:1104
DATASET_TYPE_IPV4
@ DATASET_TYPE_IPV4
Definition: datasets.h:42
StringType::ptr
uint8_t * ptr
Definition: datasets-string.h:36
DatasetRemoveSerialized
int DatasetRemoveSerialized(Dataset *set, const char *string)
remove serialized data from set
Definition: datasets.c:1536
dataset_max_total_hashsize
uint32_t dataset_max_total_hashsize
Definition: datasets.c:51
g_system
bool g_system
Definition: suricata.c:191
THashShutdown
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:354
util-mem.h
SCConfNodeLookupChild
SCConfNode * SCConfNodeLookupChild(const SCConfNode *node, const char *name)
Lookup a child configuration node by name.
Definition: conf.c:796
DatasetTypes
DatasetTypes
Definition: datasets.h:37
Dataset::next
struct Dataset * next
Definition: datasets.h:59
THashData_::data
void * data
Definition: util-thash.h:92
util-conf.h
Sha256Type
Definition: datasets-sha256.h:30
Sha256StrFree
void Sha256StrFree(void *s)
Definition: datasets-sha256.c:70
THashData_
Definition: util-thash.h:85
IPv4Free
void IPv4Free(void *s)
Definition: datasets-ipv4.c:70
suricata-common.h
util-path.h
datasets-context-json.h
FatalErrorOnInit
#define FatalErrorOnInit(...)
Fatal error IF we're starting up, and configured to consider errors to be fatal errors.
Definition: util-debug.h:519
DATASET_NAME_MAX_LEN
#define DATASET_NAME_MAX_LEN
Definition: datasets.h:46
PathIsAbsolute
int PathIsAbsolute(const char *path)
Check if a path is absolute.
Definition: util-path.c:44
Md5StrSet
int Md5StrSet(void *dst, void *src)
Definition: datasets-md5.c:34
StringCompare
bool StringCompare(void *a, void *b)
Definition: datasets-string.c:95
THashGetFromHash
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:614
FatalError
#define FatalError(...)
Definition: util-debug.h:510
hashsize
#define hashsize(n)
Definition: util-hash-lookup3.h:40
THashLookupFromHash
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:724
IPv4Type
Definition: datasets-ipv4.h:30
ParseSizeStringU32
int ParseSizeStringU32(const char *size, uint32_t *res)
Definition: util-misc.c:173
THashDecrUsecnt
#define THashDecrUsecnt(h)
Definition: util-thash.h:170
IPv4Compare
bool IPv4Compare(void *a, void *b)
Definition: datasets-ipv4.c:55
DatasetFind
Dataset * DatasetFind(const char *name, enum DatasetTypes type)
look for set by name without creating it
Definition: datasets.c:320
util-validate.h
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
IPv6Hash
uint32_t IPv6Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv6.c:64
DatasetsInit
int DatasetsInit(void)
Definition: datasets.c:622
str
#define str(s)
Definition: suricata-common.h:308
DatasetGetTypeFromString
enum DatasetTypes DatasetGetTypeFromString(const char *s)
Definition: datasets.c:64
SCConfGetNode
SCConfNode * SCConfGetNode(const char *name)
Get a SCConfNode by name.
Definition: conf.c:181
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:267
THashWalk
int THashWalk(THashTableContext *ctx, THashFormatFunc FormatterFunc, THashOutputFunc OutputterFunc, void *output_ctx)
Walk the hash.
Definition: util-thash.c:388
SCFree
#define SCFree(p)
Definition: util-mem.h:61
Dataset::hidden
bool hidden
Definition: datasets.h:52
DatasetReload
void DatasetReload(void)
Definition: datasets.c:543
DatasetGetPathType
DatasetGetPathType
Definition: datasets.c:282
Sha256StrCompare
bool Sha256StrCompare(void *a, void *b)
Definition: datasets-sha256.c:55
StringHash
uint32_t StringHash(uint32_t hash_seed, void *s)
Definition: datasets-string.c:106
DATASET_TYPE_MD5
@ DATASET_TYPE_MD5
Definition: datasets.h:40
DATASET_TYPE_STRING
@ DATASET_TYPE_STRING
Definition: datasets.h:39
DatasetUnlock
void DatasetUnlock(void)
Definition: datasets.c:107
THashDataGetResult::is_new
bool is_new
Definition: util-thash.h:193
IPv6Free
void IPv6Free(void *s)
Definition: datasets-ipv6.c:71
suricata.h
THashInit
THashTableContext * THashInit(const char *cnf_prefix, uint32_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(uint32_t, void *), bool(*DataCompare)(void *, void *), bool(*DataExpired)(void *, SCTime_t), uint32_t(*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
Definition: util-thash.c:302
DatasetSearchByName
Dataset * DatasetSearchByName(const char *name)
Definition: datasets.c:121
SCConfNode_::name
char * name
Definition: conf.h:38
IPv4Type::ipv4
uint8_t ipv4[4]
Definition: datasets-ipv4.h:31
DatasetAddSerialized
int DatasetAddSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1446
Dataset
Definition: datasets.h:47
Dataset::from_yaml
bool from_yaml
Definition: datasets.h:51
IPv4Type::rep
DataRepType rep
Definition: datasets-ipv4.h:33
SC_ATOMIC_GET
#define SC_ATOMIC_GET(name)
Get the value from the atomic variable.
Definition: util-atomic.h:375
util-misc.h
util-thash.h
Dataset::load
char load[PATH_MAX]
Definition: datasets.h:56
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
SCConfNode_
Definition: conf.h:37
Sha256StrHash
uint32_t Sha256StrHash(uint32_t hash_seed, void *s)
Definition: datasets-sha256.c:63
SCConfNode_::val
char * val
Definition: conf.h:39
Md5StrFree
void Md5StrFree(void *s)
Definition: datasets-md5.c:72
SCMutex
#define SCMutex
Definition: threads-debug.h:114
DatasetAppendSet
int DatasetAppendSet(Dataset *set)
Definition: datasets.c:79
DEBUG_VALIDATE_BUG_ON
#define DEBUG_VALIDATE_BUG_ON(exp)
Definition: util-validate.h:102
StringFree
void StringFree(void *s)
Definition: datasets-string.c:119
DatasetParseIpv6String
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
Definition: datasets.c:156
DatasetLookupwRep
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
Definition: datasets.c:1124
THashTableContext_::config
THashConfig config
Definition: util-thash.h:151