suricata
datasets.c
Go to the documentation of this file.
1 /* Copyright (C) 2017-2024 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  */
23 
24 #include "suricata-common.h"
25 #include "suricata.h"
26 #include "rust.h"
27 #include "conf.h"
28 #include "datasets.h"
29 #include "datasets-string.h"
30 #include "datasets-ipv4.h"
31 #include "datasets-ipv6.h"
32 #include "datasets-md5.h"
33 #include "datasets-sha256.h"
34 #include "datasets-reputation.h"
35 #include "datasets-context-json.h"
36 #include "util-conf.h"
37 #include "util-mem.h"
38 #include "util-thash.h"
39 #include "util-print.h"
40 #include "util-byte.h"
41 #include "util-misc.h"
42 #include "util-path.h"
43 #include "util-debug.h"
44 #include "util-validate.h"
45 
47 static Dataset *sets = NULL;
48 static uint32_t set_ids = 0;
49 
50 uint32_t dataset_max_one_hashsize = 65536;
51 uint32_t dataset_max_total_hashsize = 16777216;
52 uint32_t dataset_used_hashsize = 0;
53 
54 int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);
55 static void DatasetUpdateHashsize(const char *name, uint32_t hash_size);
56 
57 static inline void DatasetUnlockData(THashData *d)
58 {
59  (void) THashDecrUsecnt(d);
60  THashDataUnlock(d);
61 }
62 static bool DatasetIsStatic(const char *save, const char *load);
63 
64 enum DatasetTypes DatasetGetTypeFromString(const char *s)
65 {
66  if (strcasecmp("md5", s) == 0)
67  return DATASET_TYPE_MD5;
68  if (strcasecmp("sha256", s) == 0)
69  return DATASET_TYPE_SHA256;
70  if (strcasecmp("string", s) == 0)
71  return DATASET_TYPE_STRING;
72  if (strcasecmp("ipv4", s) == 0)
73  return DATASET_TYPE_IPV4;
74  if (strcasecmp("ip", s) == 0)
75  return DATASET_TYPE_IPV6;
76  return DATASET_TYPE_NOTSET;
77 }
78 
80 {
81 
82  if (set->hash == NULL) {
83  return -1;
84  }
85 
86  if (SC_ATOMIC_GET(set->hash->memcap_reached)) {
87  SCLogError("dataset too large for set memcap");
88  return -1;
89  }
90 
91  SCLogDebug(
92  "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
93 
94  set->next = sets;
95  sets = set;
96 
97  /* hash size accounting */
98  DatasetUpdateHashsize(set->name, set->hash->config.hash_size);
99  return 0;
100 }
101 
102 void DatasetLock(void)
103 {
105 }
106 
107 void DatasetUnlock(void)
108 {
110 }
111 
112 Dataset *DatasetAlloc(const char *name)
113 {
114  Dataset *set = SCCalloc(1, sizeof(*set));
115  if (set) {
116  set->id = set_ids++;
117  }
118  return set;
119 }
120 
122 {
123  Dataset *set = sets;
124  while (set) {
125  if (strcasecmp(name, set->name) == 0 && !set->hidden) {
126  return set;
127  }
128  set = set->next;
129  }
130  return NULL;
131 }
132 
133 static int DatasetLoadIPv4(Dataset *set)
134 {
135  if (strlen(set->load) == 0)
136  return 0;
137 
138  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
139  const char *fopen_mode = "r";
140  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
141  fopen_mode = "a+";
142  }
143 
144  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv4);
145  if (retval == -2) {
146  FatalErrorOnInit("dataset %s could not be processed", set->name);
147  } else if (retval == -1) {
148  return -1;
149  }
150 
152 
153  return 0;
154 }
155 
156 int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
157 {
158  /* Checking IPv6 case */
159  char *got_colon = strchr(line, ':');
160  if (got_colon) {
161  uint32_t ip6addr[4];
162  if (inet_pton(AF_INET6, line, in6) != 1) {
163  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
164  return -1;
165  }
166  memcpy(&ip6addr, in6->s6_addr, sizeof(ip6addr));
167  /* IPv4 in IPv6 notation needs transformation to internal Suricata storage */
168  if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
169  ip6addr[0] = ip6addr[3];
170  ip6addr[2] = 0;
171  ip6addr[3] = 0;
172  memcpy(in6, ip6addr, sizeof(struct in6_addr));
173  }
174  } else {
175  /* IPv4 case */
176  struct in_addr in;
177  if (inet_pton(AF_INET, line, &in) != 1) {
178  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
179  return -1;
180  }
181  memset(in6, 0, sizeof(struct in6_addr));
182  memcpy(in6, &in, sizeof(struct in_addr));
183  }
184  return 0;
185 }
186 
187 static int DatasetLoadIPv6(Dataset *set)
188 {
189  if (strlen(set->load) == 0)
190  return 0;
191 
192  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
193  const char *fopen_mode = "r";
194  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
195  fopen_mode = "a+";
196  }
197 
198  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv6);
199  if (retval == -2) {
200  FatalErrorOnInit("dataset %s could not be processed", set->name);
201  } else if (retval == -1) {
202  return -1;
203  }
204 
206 
207  return 0;
208 }
209 
210 static int DatasetLoadMd5(Dataset *set)
211 {
212  if (strlen(set->load) == 0)
213  return 0;
214 
215  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
216  const char *fopen_mode = "r";
217  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
218  fopen_mode = "a+";
219  }
220 
221  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSMd5);
222  if (retval == -2) {
223  FatalErrorOnInit("dataset %s could not be processed", set->name);
224  } else if (retval == -1) {
225  return -1;
226  }
227 
229 
230  return 0;
231 }
232 
233 static int DatasetLoadSha256(Dataset *set)
234 {
235  if (strlen(set->load) == 0)
236  return 0;
237 
238  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
239  const char *fopen_mode = "r";
240  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
241  fopen_mode = "a+";
242  }
243 
244  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSSha256);
245  if (retval == -2) {
246  FatalErrorOnInit("dataset %s could not be processed", set->name);
247  } else if (retval == -1) {
248  return -1;
249  }
250 
252 
253  return 0;
254 }
255 
256 static int DatasetLoadString(Dataset *set)
257 {
258  if (strlen(set->load) == 0)
259  return 0;
260 
261  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
262 
263  const char *fopen_mode = "r";
264  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
265  fopen_mode = "a+";
266  }
267 
268  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSString);
269  if (retval == -2) {
270  FatalErrorOnInit("dataset %s could not be processed", set->name);
271  } else if (retval == -1) {
272  return -1;
273  }
274 
276 
277  return 0;
278 }
279 
280 extern bool g_system;
281 
285 };
286 
287 static void DatasetGetPath(
288  const char *in_path, char *out_path, size_t out_size, enum DatasetGetPathType type)
289 {
290  char path[PATH_MAX];
291  struct stat st;
292 
293  if (PathIsAbsolute(in_path)) {
294  strlcpy(path, in_path, sizeof(path));
295  strlcpy(out_path, path, out_size);
296  return;
297  }
298 
299  const char *data_dir = ConfigGetDataDirectory();
300  if (stat(data_dir, &st) != 0) {
301  SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
302  return;
303  }
304 
305  snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
306 
307  if (type == TYPE_LOAD) {
308  if (stat(path, &st) != 0) {
309  SCLogDebug("path %s: %s", path, strerror(errno));
310  if (!g_system) {
311  snprintf(path, sizeof(path), "%s", in_path);
312  }
313  }
314  }
315  strlcpy(out_path, path, out_size);
316  SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
317 }
318 
319 /** \brief look for set by name without creating it */
321 {
322  DatasetLock();
324  if (set) {
325  if (set->type != type) {
326  DatasetUnlock();
327  return NULL;
328  }
329  }
330  DatasetUnlock();
331  return set;
332 }
333 
334 static bool DatasetCheckHashsize(const char *name, uint32_t hash_size)
335 {
336  if (dataset_max_one_hashsize > 0 && hash_size > dataset_max_one_hashsize) {
337  SCLogError("hashsize %u in dataset '%s' exceeds configured 'single-hashsize' limit (%u)",
338  hash_size, name, dataset_max_one_hashsize);
339  return false;
340  }
341  // we cannot underflow as we know from conf loading that
342  // dataset_max_total_hashsize >= dataset_max_one_hashsize if dataset_max_total_hashsize > 0
343  if (dataset_max_total_hashsize > 0 &&
345  SCLogError("hashsize %u in dataset '%s' exceeds configured 'total-hashsizes' limit (%u, in "
346  "use %u)",
348  return false;
349  }
350 
351  return true;
352 }
353 
354 static void DatasetUpdateHashsize(const char *name, uint32_t hash_size)
355 {
356  if (dataset_max_total_hashsize > 0) {
357  dataset_used_hashsize += hash_size;
358  SCLogDebug("set %s adding with hash_size %u", name, hash_size);
359  }
360 }
361 
362 /**
363  * \return -1 on error
364  * \return 0 on successful creation
365  * \return 1 if the dataset already exists
366  *
367  * Calling function is responsible for locking via DatasetLock()
368  */
369 int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load,
370  uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set)
371 {
372  uint64_t default_memcap = 0;
373  uint32_t default_hashsize = 0;
374  if (strlen(name) > DATASET_NAME_MAX_LEN) {
375  return -1;
376  }
377 
379  if (set) {
380  if (type != DATASET_TYPE_NOTSET && set->type != type) {
381  SCLogError("dataset %s already "
382  "exists and is of type %u",
383  set->name, set->type);
384  return -1;
385  }
386 
387  if ((save == NULL || strlen(save) == 0) &&
388  (load == NULL || strlen(load) == 0)) {
389  // OK, rule keyword doesn't have to set state/load,
390  // even when yaml set has set it.
391  } else {
392  if ((save == NULL && strlen(set->save) > 0) ||
393  (save != NULL && strcmp(set->save, save) != 0)) {
394  SCLogError("dataset %s save mismatch: %s != %s", set->name, set->save, save);
395  DatasetUnlock();
396  return -1;
397  }
398  if ((load == NULL && strlen(set->load) > 0) ||
399  (load != NULL && strcmp(set->load, load) != 0)) {
400  SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
401  return -1;
402  }
403  }
404 
405  *ret_set = set;
406  return 1;
407  }
408 
409  if (type == DATASET_TYPE_NOTSET) {
410  SCLogError("dataset %s not defined", name);
411  goto out_err;
412  }
413 
414  DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
415  if (*hashsize == 0) {
416  *hashsize = default_hashsize;
417  }
418  if (*memcap == 0) {
419  *memcap = default_memcap;
420  }
421 
422  if (!DatasetCheckHashsize(name, *hashsize)) {
423  goto out_err;
424  }
425 
426  set = DatasetAlloc(name);
427  if (set == NULL) {
428  goto out_err;
429  }
430 
431  strlcpy(set->name, name, sizeof(set->name));
432  set->type = type;
433  if (save && strlen(save)) {
434  strlcpy(set->save, save, sizeof(set->save));
435  SCLogDebug("name %s save '%s'", name, set->save);
436  }
437  if (load && strlen(load)) {
438  strlcpy(set->load, load, sizeof(set->load));
439  SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
440  }
441 
442  *ret_set = set;
443  return 0;
444 out_err:
445  if (set) {
446  SCFree(set);
447  }
448  return -1;
449 }
450 
451 Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
452  uint64_t memcap, uint32_t hashsize)
453 {
454  Dataset *set = NULL;
455 
456  DatasetLock();
457  int ret = DatasetGetOrCreate(name, type, save, load, &memcap, &hashsize, &set);
458  if (ret < 0) {
459  SCLogError("dataset %s creation failed", name);
460  DatasetUnlock();
461  return NULL;
462  }
463  if (ret == 1) {
464  SCLogDebug("dataset %s already exists", name);
465  DatasetUnlock();
466  return set;
467  }
468 
469  char cnf_name[128];
470  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
471  switch (type) {
472  case DATASET_TYPE_MD5:
473  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
474  Md5StrCompare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
475  if (set->hash == NULL)
476  goto out_err;
477  if (DatasetLoadMd5(set) < 0)
478  goto out_err;
479  break;
480  case DATASET_TYPE_STRING:
481  set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
482  StringCompare, NULL, StringGetLength, load != NULL ? 1 : 0, memcap, hashsize);
483  if (set->hash == NULL)
484  goto out_err;
485  if (DatasetLoadString(set) < 0)
486  goto out_err;
487  break;
488  case DATASET_TYPE_SHA256:
489  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
490  Sha256StrHash, Sha256StrCompare, NULL, NULL, load != NULL ? 1 : 0, memcap,
491  hashsize);
492  if (set->hash == NULL)
493  goto out_err;
494  if (DatasetLoadSha256(set) < 0)
495  goto out_err;
496  break;
497  case DATASET_TYPE_IPV4:
498  set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash,
499  IPv4Compare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
500  if (set->hash == NULL)
501  goto out_err;
502  if (DatasetLoadIPv4(set) < 0)
503  goto out_err;
504  break;
505  case DATASET_TYPE_IPV6:
506  set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6Set, IPv6Free, IPv6Hash,
507  IPv6Compare, NULL, NULL, load != NULL ? 1 : 0, memcap, hashsize);
508  if (set->hash == NULL)
509  goto out_err;
510  if (DatasetLoadIPv6(set) < 0)
511  goto out_err;
512  break;
513  }
514 
515  if (DatasetAppendSet(set) < 0) {
516  SCLogError("dataset %s append failed", name);
517  goto out_err;
518  }
519 
520  DatasetUnlock();
521  return set;
522 out_err:
523  if (set->hash) {
524  THashShutdown(set->hash);
525  }
526  SCFree(set);
527  DatasetUnlock();
528  return NULL;
529 }
530 
531 static bool DatasetIsStatic(const char *save, const char *load)
532 {
533  /* A set is static if it does not have any dynamic properties like
534  * save and/or state defined but has load defined.
535  * */
536  return (load != NULL && strlen(load) > 0) && (save == NULL || strlen(save) == 0);
537 }
538 
539 void DatasetReload(void)
540 {
541  /* In order to reload the datasets, just mark the current sets as hidden
542  * and clean them up later.
543  * New datasets shall be created with the rule reload and do not require
544  * any intervention.
545  * */
546  DatasetLock();
547  Dataset *set = sets;
548  while (set) {
549  if (!DatasetIsStatic(set->save, set->load) || set->from_yaml) {
550  SCLogDebug("Not a static set, skipping %s", set->name);
551  set = set->next;
552  continue;
553  }
554  set->hidden = true;
555  if (dataset_max_total_hashsize > 0) {
558  }
559  SCLogDebug("Set %s at %p hidden successfully", set->name, set);
560  set = set->next;
561  }
562  DatasetUnlock();
563 }
564 
566 {
567  DatasetLock();
568  SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
569  Dataset *cur = sets;
570  Dataset *prev = NULL;
571  while (cur) {
572  Dataset *next = cur->next;
573  if (!cur->hidden) {
574  prev = cur;
575  cur = next;
576  continue;
577  }
578  // Delete the set in case it was hidden
579  if (prev != NULL) {
580  prev->next = next;
581  } else {
582  sets = next;
583  }
584  THashShutdown(cur->hash);
585  SCFree(cur);
586  cur = next;
587  }
588  DatasetUnlock();
589 }
590 
591 /* Value reflects THASH_DEFAULT_HASHSIZE which is what the default was earlier,
592  * despite 2048 commented out in the default yaml. */
593 #define DATASETS_HASHSIZE_DEFAULT 4096
594 
595 void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
596 {
597  const char *str = NULL;
598  if (SCConfGet("datasets.defaults.memcap", &str) == 1) {
599  if (ParseSizeStringU64(str, memcap) < 0) {
600  SCLogWarning("memcap value cannot be deduced: %s,"
601  " resetting to default",
602  str);
603  *memcap = 0;
604  }
605  }
606 
607  *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
608  if (SCConfGet("datasets.defaults.hashsize", &str) == 1) {
609  if (ParseSizeStringU32(str, hashsize) < 0) {
610  *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
611  SCLogWarning("hashsize value cannot be deduced: %s,"
612  " resetting to default: %u",
613  str, *hashsize);
614  }
615  }
616 }
617 
618 int DatasetsInit(void)
619 {
620  SCLogDebug("datasets start");
621  SCConfNode *datasets = SCConfGetNode("datasets");
622  uint64_t default_memcap = 0;
623  uint32_t default_hashsize = 0;
624  DatasetGetDefaultMemcap(&default_memcap, &default_hashsize);
625  if (datasets != NULL) {
626  const char *str = NULL;
627  if (SCConfGet("datasets.limits.total-hashsizes", &str) == 1) {
629  FatalError("failed to parse datasets.limits.total-hashsizes value: %s", str);
630  }
631  }
632  if (SCConfGet("datasets.limits.single-hashsize", &str) == 1) {
634  FatalError("failed to parse datasets.limits.single-hashsize value: %s", str);
635  }
636  }
637  if (dataset_max_total_hashsize > 0 &&
639  FatalError("total-hashsizes (%u) cannot be smaller than single-hashsize (%u)",
641  }
643  // the total limit also applies for single limit
645  }
646 
647  int list_pos = 0;
648  SCConfNode *iter = NULL;
649  TAILQ_FOREACH(iter, &datasets->head, next) {
650  if (iter->name == NULL) {
651  list_pos++;
652  continue;
653  }
654 
655  char save[PATH_MAX] = "";
656  char load[PATH_MAX] = "";
657  uint64_t memcap = 0;
658  uint32_t hashsize = 0;
659 
660  const char *set_name = iter->name;
661  if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
663  "set name '%s' too long, max %d chars", set_name, DATASET_NAME_MAX_LEN);
664  continue;
665  }
666 
667  SCConfNode *set_type = SCConfNodeLookupChild(iter, "type");
668  if (set_type == NULL) {
669  list_pos++;
670  continue;
671  }
672 
673  SCConfNode *set_save = SCConfNodeLookupChild(iter, "state");
674  if (set_save) {
675  DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
676  strlcpy(load, save, sizeof(load));
677  } else {
678  SCConfNode *set_load = SCConfNodeLookupChild(iter, "load");
679  if (set_load) {
680  DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
681  }
682  }
683 
684  SCConfNode *set_memcap = SCConfNodeLookupChild(iter, "memcap");
685  if (set_memcap) {
686  if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
687  SCLogWarning("memcap value cannot be"
688  " deduced: %s, resetting to default",
689  set_memcap->val);
690  memcap = 0;
691  }
692  }
693  SCConfNode *set_hashsize = SCConfNodeLookupChild(iter, "hashsize");
694  if (set_hashsize) {
695  if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
696  SCLogWarning("hashsize value cannot be"
697  " deduced: %s, resetting to default",
698  set_hashsize->val);
699  hashsize = 0;
700  }
701  }
702  char conf_str[1024];
703  snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
704 
705  SCLogDebug("set %s type %s. Conf %s", set_name, set_type->val, conf_str);
706 
707  if (strcmp(set_type->val, "md5") == 0) {
708  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
709  memcap > 0 ? memcap : default_memcap,
710  hashsize > 0 ? hashsize : default_hashsize);
711  if (dset == NULL) {
712  FatalErrorOnInit("failed to setup dataset for %s", set_name);
713  continue;
714  }
715  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
716  dset->from_yaml = true;
717 
718  } else if (strcmp(set_type->val, "sha256") == 0) {
719  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
720  memcap > 0 ? memcap : default_memcap,
721  hashsize > 0 ? hashsize : default_hashsize);
722  if (dset == NULL) {
723  FatalErrorOnInit("failed to setup dataset for %s", set_name);
724  continue;
725  }
726  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
727  dset->from_yaml = true;
728 
729  } else if (strcmp(set_type->val, "string") == 0) {
730  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
731  memcap > 0 ? memcap : default_memcap,
732  hashsize > 0 ? hashsize : default_hashsize);
733  if (dset == NULL) {
734  FatalErrorOnInit("failed to setup dataset for %s", set_name);
735  continue;
736  }
737  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
738  dset->from_yaml = true;
739 
740  } else if (strcmp(set_type->val, "ipv4") == 0) {
741  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV4, save, load,
742  memcap > 0 ? memcap : default_memcap,
743  hashsize > 0 ? hashsize : default_hashsize);
744  if (dset == NULL) {
745  FatalErrorOnInit("failed to setup dataset for %s", set_name);
746  continue;
747  }
748  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
749  dset->from_yaml = true;
750 
751  } else if (strcmp(set_type->val, "ip") == 0) {
752  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV6, save, load,
753  memcap > 0 ? memcap : default_memcap,
754  hashsize > 0 ? hashsize : default_hashsize);
755  if (dset == NULL) {
756  FatalErrorOnInit("failed to setup dataset for %s", set_name);
757  continue;
758  }
759  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
760  dset->from_yaml = true;
761  }
762 
763  list_pos++;
764  }
765  }
766  SCLogDebug("datasets done: %p", datasets);
767  return 0;
768 }
769 
770 void DatasetsDestroy(void)
771 {
772  DatasetLock();
773  SCLogDebug("destroying datasets: %p", sets);
774  Dataset *set = sets;
775  while (set) {
776  SCLogDebug("destroying set %s", set->name);
777  Dataset *next = set->next;
778  THashShutdown(set->hash);
779  SCFree(set);
780  set = next;
781  }
782  sets = NULL;
783  DatasetUnlock();
784  SCLogDebug("destroying datasets done: %p", sets);
785 }
786 
787 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
788 {
789  FILE *fp = ctx;
790  //PrintRawDataFp(fp, data, data_len);
791  if (fp) {
792  return (int)fwrite(data, data_len, 1, fp);
793  }
794  return 0;
795 }
796 
797 static int Md5AsAscii(const void *s, char *out, size_t out_size)
798 {
799  const Md5Type *md5 = s;
800  char str[256];
801  PrintHexString(str, sizeof(str), (uint8_t *)md5->md5, sizeof(md5->md5));
802  strlcat(out, str, out_size);
803  strlcat(out, "\n", out_size);
804  return (int)strlen(out);
805 }
806 
807 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
808 {
809  const Sha256Type *sha = s;
810  char str[256];
811  PrintHexString(str, sizeof(str), (uint8_t *)sha->sha256, sizeof(sha->sha256));
812  strlcat(out, str, out_size);
813  strlcat(out, "\n", out_size);
814  return (int)strlen(out);
815 }
816 
817 static int IPv4AsAscii(const void *s, char *out, size_t out_size)
818 {
819  const IPv4Type *ip4 = s;
820  char str[256];
821  PrintInet(AF_INET, ip4->ipv4, str, sizeof(str));
822  strlcat(out, str, out_size);
823  strlcat(out, "\n", out_size);
824  return (int)strlen(out);
825 }
826 
827 static int IPv6AsAscii(const void *s, char *out, size_t out_size)
828 {
829  const IPv6Type *ip6 = s;
830  char str[256];
831  bool is_ipv4 = true;
832  for (int i = 4; i <= 15; i++) {
833  if (ip6->ipv6[i] != 0) {
834  is_ipv4 = false;
835  break;
836  }
837  }
838  if (is_ipv4) {
839  PrintInet(AF_INET, ip6->ipv6, str, sizeof(str));
840  } else {
841  PrintInet(AF_INET6, ip6->ipv6, str, sizeof(str));
842  }
843  strlcat(out, str, out_size);
844  strlcat(out, "\n", out_size);
845  return (int)strlen(out);
846 }
847 
848 void DatasetsSave(void)
849 {
850  DatasetLock();
851  SCLogDebug("saving datasets: %p", sets);
852  Dataset *set = sets;
853  while (set) {
854  if (strlen(set->save) == 0)
855  goto next;
856 
857  FILE *fp = fopen(set->save, "w");
858  if (fp == NULL)
859  goto next;
860 
861  SCLogDebug("dumping %s to %s", set->name, set->save);
862 
863  switch (set->type) {
864  case DATASET_TYPE_STRING:
865  THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
866  break;
867  case DATASET_TYPE_MD5:
868  THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
869  break;
870  case DATASET_TYPE_SHA256:
871  THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
872  break;
873  case DATASET_TYPE_IPV4:
874  THashWalk(set->hash, IPv4AsAscii, SaveCallback, fp);
875  break;
876  case DATASET_TYPE_IPV6:
877  THashWalk(set->hash, IPv6AsAscii, SaveCallback, fp);
878  break;
879  }
880 
881  fclose(fp);
882 
883  next:
884  set = set->next;
885  }
886  DatasetUnlock();
887 }
888 
889 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
890 {
891  if (set == NULL)
892  return -1;
893 
894  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = 0 };
895  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
896  if (rdata) {
897  DatasetUnlockData(rdata);
898  return 1;
899  }
900  return 0;
901 }
902 
903 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
904  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
905 {
906  DataRepResultType rrep = { .found = false, .rep = 0 };
907 
908  if (set == NULL)
909  return rrep;
910 
911  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
912  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
913  if (rdata) {
914  StringType *found = rdata->data;
915  rrep.found = true;
916  rrep.rep = found->rep;
917  DatasetUnlockData(rdata);
918  return rrep;
919  }
920  return rrep;
921 }
922 
923 static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
924 {
925  if (set == NULL)
926  return -1;
927 
928  if (data_len != 4)
929  return -1;
930 
931  IPv4Type lookup = { .rep = 0 };
932  memcpy(lookup.ipv4, data, 4);
933  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
934  if (rdata) {
935  DatasetUnlockData(rdata);
936  return 1;
937  }
938  return 0;
939 }
940 
941 static DataRepResultType DatasetLookupIPv4wRep(
942  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
943 {
944  DataRepResultType rrep = { .found = false, .rep = 0 };
945 
946  if (set == NULL)
947  return rrep;
948 
949  if (data_len != 4)
950  return rrep;
951 
952  IPv4Type lookup = { .rep = 0 };
953  memcpy(lookup.ipv4, data, data_len);
954  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
955  if (rdata) {
956  IPv4Type *found = rdata->data;
957  rrep.found = true;
958  rrep.rep = found->rep;
959  DatasetUnlockData(rdata);
960  return rrep;
961  }
962  return rrep;
963 }
964 
965 static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
966 {
967  if (set == NULL)
968  return -1;
969 
970  if (data_len != 16 && data_len != 4)
971  return -1;
972 
973  IPv6Type lookup = { .rep = 0 };
974  memcpy(lookup.ipv6, data, data_len);
975  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
976  if (rdata) {
977  DatasetUnlockData(rdata);
978  return 1;
979  }
980  return 0;
981 }
982 
983 static DataRepResultType DatasetLookupIPv6wRep(
984  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
985 {
986  DataRepResultType rrep = { .found = false, .rep = 0 };
987 
988  if (set == NULL)
989  return rrep;
990 
991  if (data_len != 16 && data_len != 4)
992  return rrep;
993 
994  IPv6Type lookup = { .rep = 0 };
995  memcpy(lookup.ipv6, data, data_len);
996  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
997  if (rdata) {
998  IPv6Type *found = rdata->data;
999  rrep.found = true;
1000  rrep.rep = found->rep;
1001  DatasetUnlockData(rdata);
1002  return rrep;
1003  }
1004  return rrep;
1005 }
1006 
1007 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1008 {
1009  if (set == NULL)
1010  return -1;
1011 
1012  if (data_len != 16)
1013  return -1;
1014 
1015  Md5Type lookup = { .rep = 0 };
1016  memcpy(lookup.md5, data, data_len);
1017  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1018  if (rdata) {
1019  DatasetUnlockData(rdata);
1020  return 1;
1021  }
1022  return 0;
1023 }
1024 
1025 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
1026  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1027 {
1028  DataRepResultType rrep = { .found = false, .rep = 0 };
1029 
1030  if (set == NULL)
1031  return rrep;
1032 
1033  if (data_len != 16)
1034  return rrep;
1035 
1036  Md5Type lookup = { .rep = 0 };
1037  memcpy(lookup.md5, data, data_len);
1038  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1039  if (rdata) {
1040  Md5Type *found = rdata->data;
1041  rrep.found = true;
1042  rrep.rep = found->rep;
1043  DatasetUnlockData(rdata);
1044  return rrep;
1045  }
1046  return rrep;
1047 }
1048 
1049 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1050 {
1051  if (set == NULL)
1052  return -1;
1053 
1054  if (data_len != 32)
1055  return -1;
1056 
1057  Sha256Type lookup = { .rep = 0 };
1058  memcpy(lookup.sha256, data, data_len);
1059  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1060  if (rdata) {
1061  DatasetUnlockData(rdata);
1062  return 1;
1063  }
1064  return 0;
1065 }
1066 
1067 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
1068  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1069 {
1070  DataRepResultType rrep = { .found = false, .rep = 0 };
1071 
1072  if (set == NULL)
1073  return rrep;
1074 
1075  if (data_len != 32)
1076  return rrep;
1077 
1078  Sha256Type lookup = { .rep = 0 };
1079  memcpy(lookup.sha256, data, data_len);
1080  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1081  if (rdata) {
1082  Sha256Type *found = rdata->data;
1083  rrep.found = true;
1084  rrep.rep = found->rep;
1085  DatasetUnlockData(rdata);
1086  return rrep;
1087  }
1088  return rrep;
1089 }
1090 
1091 /**
1092  * \brief see if \a data is part of the set
1093  * \param set dataset
1094  * \param data data to look up
1095  * \param data_len length in bytes of \a data
1096  * \retval -1 error
1097  * \retval 0 not found
1098  * \retval 1 found
1099  */
1100 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
1101 {
1102  if (set == NULL)
1103  return -1;
1104 
1105  switch (set->type) {
1106  case DATASET_TYPE_STRING:
1107  return DatasetLookupString(set, data, data_len);
1108  case DATASET_TYPE_MD5:
1109  return DatasetLookupMd5(set, data, data_len);
1110  case DATASET_TYPE_SHA256:
1111  return DatasetLookupSha256(set, data, data_len);
1112  case DATASET_TYPE_IPV4:
1113  return DatasetLookupIPv4(set, data, data_len);
1114  case DATASET_TYPE_IPV6:
1115  return DatasetLookupIPv6(set, data, data_len);
1116  }
1117  return -1;
1118 }
1119 
1120 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1121  const DataRepType *rep)
1122 {
1123  DataRepResultType rrep = { .found = false, .rep = 0 };
1124  if (set == NULL)
1125  return rrep;
1126 
1127  switch (set->type) {
1128  case DATASET_TYPE_STRING:
1129  return DatasetLookupStringwRep(set, data, data_len, rep);
1130  case DATASET_TYPE_MD5:
1131  return DatasetLookupMd5wRep(set, data, data_len, rep);
1132  case DATASET_TYPE_SHA256:
1133  return DatasetLookupSha256wRep(set, data, data_len, rep);
1134  case DATASET_TYPE_IPV4:
1135  return DatasetLookupIPv4wRep(set, data, data_len, rep);
1136  case DATASET_TYPE_IPV6:
1137  return DatasetLookupIPv6wRep(set, data, data_len, rep);
1138  }
1139  return rrep;
1140 }
1141 
1142 /**
1143  * \retval 1 data was added to the hash
1144  * \retval 0 data was not added to the hash as it is already there
1145  * \retval -1 failed to add data to the hash
1146  */
1147 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1148 {
1149  if (set == NULL)
1150  return -1;
1151 
1152  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = 0 };
1153  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1154  if (res.data) {
1155  DatasetUnlockData(res.data);
1156  return res.is_new ? 1 : 0;
1157  }
1158  return -1;
1159 }
1160 
1161 /**
1162  * \retval 1 data was added to the hash
1163  * \retval 0 data was not added to the hash as it is already there
1164  * \retval -1 failed to add data to the hash
1165  */
1166 static int DatasetAddStringwRep(
1167  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1168 {
1169  if (set == NULL)
1170  return -1;
1171 
1172  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1173  .rep = *rep };
1174  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1175  if (res.data) {
1176  DatasetUnlockData(res.data);
1177  return res.is_new ? 1 : 0;
1178  }
1179  return -1;
1180 }
1181 
1182 static int DatasetAddIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1183 {
1184  if (set == NULL) {
1185  return -1;
1186  }
1187 
1188  if (data_len < 4) {
1189  return -2;
1190  }
1191 
1192  IPv4Type lookup = { .rep = 0 };
1193  memcpy(lookup.ipv4, data, 4);
1194  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1195  if (res.data) {
1196  DatasetUnlockData(res.data);
1197  return res.is_new ? 1 : 0;
1198  }
1199  return -1;
1200 }
1201 
1202 static int DatasetAddIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1203 {
1204  if (set == NULL) {
1205  return -1;
1206  }
1207 
1208  if (data_len != 16 && data_len != 4) {
1209  return -2;
1210  }
1211 
1212  IPv6Type lookup = { .rep = 0 };
1213  memcpy(lookup.ipv6, data, data_len);
1214  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1215  if (res.data) {
1216  DatasetUnlockData(res.data);
1217  return res.is_new ? 1 : 0;
1218  }
1219  return -1;
1220 }
1221 
1222 static int DatasetAddIPv4wRep(
1223  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1224 {
1225  if (set == NULL)
1226  return -1;
1227 
1228  if (data_len < 4)
1229  return -2;
1230 
1231  IPv4Type lookup = { .rep = *rep };
1232  memcpy(lookup.ipv4, data, 4);
1233  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1234  if (res.data) {
1235  DatasetUnlockData(res.data);
1236  return res.is_new ? 1 : 0;
1237  }
1238  return -1;
1239 }
1240 
1241 static int DatasetAddIPv6wRep(
1242  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1243 {
1244  if (set == NULL)
1245  return -1;
1246 
1247  if (data_len != 16)
1248  return -2;
1249 
1250  IPv6Type lookup = { .rep = *rep };
1251  memcpy(lookup.ipv6, data, 16);
1252  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1253  if (res.data) {
1254  DatasetUnlockData(res.data);
1255  return res.is_new ? 1 : 0;
1256  }
1257  return -1;
1258 }
1259 
1260 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1261 {
1262  if (set == NULL)
1263  return -1;
1264 
1265  if (data_len != 16)
1266  return -2;
1267 
1268  Md5Type lookup = { .rep = 0 };
1269  memcpy(lookup.md5, data, 16);
1270  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1271  if (res.data) {
1272  DatasetUnlockData(res.data);
1273  return res.is_new ? 1 : 0;
1274  }
1275  return -1;
1276 }
1277 
1278 static int DatasetAddMd5wRep(
1279  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1280 {
1281  if (set == NULL)
1282  return -1;
1283 
1284  if (data_len != 16)
1285  return -2;
1286 
1287  Md5Type lookup = { .rep = *rep };
1288  memcpy(lookup.md5, data, 16);
1289  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1290  if (res.data) {
1291  DatasetUnlockData(res.data);
1292  return res.is_new ? 1 : 0;
1293  }
1294  return -1;
1295 }
1296 
1297 static int DatasetAddSha256wRep(
1298  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1299 {
1300  if (set == NULL)
1301  return -1;
1302 
1303  if (data_len != 32)
1304  return -2;
1305 
1306  Sha256Type lookup = { .rep = *rep };
1307  memcpy(lookup.sha256, data, 32);
1308  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1309  if (res.data) {
1310  DatasetUnlockData(res.data);
1311  return res.is_new ? 1 : 0;
1312  }
1313  return -1;
1314 }
1315 
1316 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1317 {
1318  if (set == NULL)
1319  return -1;
1320 
1321  if (data_len != 32)
1322  return -2;
1323 
1324  Sha256Type lookup = { .rep = 0 };
1325  memcpy(lookup.sha256, data, 32);
1326  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1327  if (res.data) {
1328  DatasetUnlockData(res.data);
1329  return res.is_new ? 1 : 0;
1330  }
1331  return -1;
1332 }
1333 
1334 int SCDatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1335 {
1336  if (set == NULL)
1337  return -1;
1338 
1339  switch (set->type) {
1340  case DATASET_TYPE_STRING:
1341  return DatasetAddString(set, data, data_len);
1342  case DATASET_TYPE_MD5:
1343  return DatasetAddMd5(set, data, data_len);
1344  case DATASET_TYPE_SHA256:
1345  return DatasetAddSha256(set, data, data_len);
1346  case DATASET_TYPE_IPV4:
1347  return DatasetAddIPv4(set, data, data_len);
1348  case DATASET_TYPE_IPV6:
1349  return DatasetAddIPv6(set, data, data_len);
1350  }
1351  return -1;
1352 }
1353 
1355  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1356 {
1357  if (set == NULL)
1358  return -1;
1359 
1360  switch (set->type) {
1361  case DATASET_TYPE_STRING:
1362  return DatasetAddStringwRep(set, data, data_len, rep);
1363  case DATASET_TYPE_MD5:
1364  return DatasetAddMd5wRep(set, data, data_len, rep);
1365  case DATASET_TYPE_SHA256:
1366  return DatasetAddSha256wRep(set, data, data_len, rep);
1367  case DATASET_TYPE_IPV4:
1368  return DatasetAddIPv4wRep(set, data, data_len, rep);
1369  case DATASET_TYPE_IPV6:
1370  return DatasetAddIPv6wRep(set, data, data_len, rep);
1371  }
1372  return -1;
1373 }
1374 
1375 typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len);
1376 
1377 static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString,
1378  DatasetOpFunc DatasetOpMd5, DatasetOpFunc DatasetOpSha256, DatasetOpFunc DatasetOpIPv4,
1379  DatasetOpFunc DatasetOpIPv6)
1380 {
1381  if (set == NULL)
1382  return -1;
1383  if (strlen(string) == 0)
1384  return -1;
1385 
1386  switch (set->type) {
1387  case DATASET_TYPE_STRING: {
1388  if (strlen(string) > UINT16_MAX) {
1389  // size check before cast and stack allocation
1390  return -1;
1391  }
1392  uint32_t decoded_size = SCBase64DecodeBufferSize((uint32_t)strlen(string));
1393  uint8_t decoded[decoded_size];
1394  uint32_t num_decoded = SCBase64Decode(
1395  (const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded);
1396  if (num_decoded == 0) {
1397  return -2;
1398  }
1399 
1400  return DatasetOpString(set, decoded, num_decoded);
1401  }
1402  case DATASET_TYPE_MD5: {
1403  if (strlen(string) != 32)
1404  return -2;
1405  uint8_t hash[16];
1406  if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1407  return -2;
1408  return DatasetOpMd5(set, hash, 16);
1409  }
1410  case DATASET_TYPE_SHA256: {
1411  if (strlen(string) != 64)
1412  return -2;
1413  uint8_t hash[32];
1414  if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1415  return -2;
1416  return DatasetOpSha256(set, hash, 32);
1417  }
1418  case DATASET_TYPE_IPV4: {
1419  struct in_addr in;
1420  if (inet_pton(AF_INET, string, &in) != 1)
1421  return -2;
1422  return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1423  }
1424  case DATASET_TYPE_IPV6: {
1425  struct in6_addr in6;
1426  if (DatasetParseIpv6String(set, string, &in6) != 0) {
1427  SCLogError("Dataset failed to import %s as IPv6", string);
1428  return -2;
1429  }
1430  return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
1431  }
1432  }
1433  return -1;
1434 }
1435 
1436 /** \brief add serialized data to set
1437  * \retval int 1 added
1438  * \retval int 0 already in hash
1439  * \retval int -1 API error (not added)
1440  * \retval int -2 DATA error
1441  */
1442 int DatasetAddSerialized(Dataset *set, const char *string)
1443 {
1444  return DatasetOpSerialized(set, string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1445  DatasetAddIPv4, DatasetAddIPv6);
1446 }
1447 
1448 /** \brief add serialized data to set
1449  * \retval int 1 added
1450  * \retval int 0 already in hash
1451  * \retval int -1 API error (not added)
1452  * \retval int -2 DATA error
1453  */
1454 int DatasetLookupSerialized(Dataset *set, const char *string)
1455 {
1456  return DatasetOpSerialized(set, string, DatasetLookupString, DatasetLookupMd5,
1457  DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1458 }
1459 
1460 /**
1461  * \retval 1 data was removed from the hash
1462  * \retval 0 data not removed (busy)
1463  * \retval -1 data not found
1464  */
1465 static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1466 {
1467  if (set == NULL)
1468  return -1;
1469 
1470  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = 0 };
1471  return THashRemoveFromHash(set->hash, &lookup);
1472 }
1473 
1474 static int DatasetRemoveIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1475 {
1476  if (set == NULL)
1477  return -1;
1478 
1479  if (data_len != 4)
1480  return -2;
1481 
1482  IPv4Type lookup = { .rep = 0 };
1483  memcpy(lookup.ipv4, data, 4);
1484  return THashRemoveFromHash(set->hash, &lookup);
1485 }
1486 
1487 static int DatasetRemoveIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1488 {
1489  if (set == NULL)
1490  return -1;
1491 
1492  if (data_len != 16)
1493  return -2;
1494 
1495  IPv6Type lookup = { .rep = 0 };
1496  memcpy(lookup.ipv6, data, 16);
1497  return THashRemoveFromHash(set->hash, &lookup);
1498 }
1499 
1500 static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1501 {
1502  if (set == NULL)
1503  return -1;
1504 
1505  if (data_len != 16)
1506  return -2;
1507 
1508  Md5Type lookup = { .rep = 0 };
1509  memcpy(lookup.md5, data, 16);
1510  return THashRemoveFromHash(set->hash, &lookup);
1511 }
1512 
1513 static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1514 {
1515  if (set == NULL)
1516  return -1;
1517 
1518  if (data_len != 32)
1519  return -2;
1520 
1521  Sha256Type lookup = { .rep = 0 };
1522  memcpy(lookup.sha256, data, 32);
1523  return THashRemoveFromHash(set->hash, &lookup);
1524 }
1525 
1526 /** \brief remove serialized data from set
1527  * \retval int 1 removed
1528  * \retval int 0 found but busy (not removed)
1529  * \retval int -1 API error (not removed)
1530  * \retval int -2 DATA error */
1531 int DatasetRemoveSerialized(Dataset *set, const char *string)
1532 {
1533  return DatasetOpSerialized(set, string, DatasetRemoveString, DatasetRemoveMd5,
1534  DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
1535 }
1536 
1537 int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
1538 {
1539  if (set == NULL)
1540  return -1;
1541 
1542  switch (set->type) {
1543  case DATASET_TYPE_STRING:
1544  return DatasetRemoveString(set, data, data_len);
1545  case DATASET_TYPE_MD5:
1546  return DatasetRemoveMd5(set, data, data_len);
1547  case DATASET_TYPE_SHA256:
1548  return DatasetRemoveSha256(set, data, data_len);
1549  case DATASET_TYPE_IPV4:
1550  return DatasetRemoveIPv4(set, data, data_len);
1551  case DATASET_TYPE_IPV6:
1552  return DatasetRemoveIPv6(set, data, data_len);
1553  }
1554  return -1;
1555 }
dataset_used_hashsize
uint32_t dataset_used_hashsize
Definition: datasets.c:52
util-byte.h
sets_lock
SCMutex sets_lock
Definition: datasets.c:46
StringType::rep
DataRepType rep
Definition: datasets-string.h:33
len
uint8_t len
Definition: app-layer-dnp3.h:2
datasets-string.h
DataRepResultType::rep
DataRepType rep
Definition: datasets-reputation.h:31
THashDataGetResult::data
THashData * data
Definition: util-thash.h:192
datasets-md5.h
Dataset::name
char name[DATASET_NAME_MAX_LEN+1]
Definition: datasets.h:56
DatasetAlloc
Dataset * DatasetAlloc(const char *name)
Definition: datasets.c:112
Dataset::id
uint32_t id
Definition: datasets.h:58
Dataset::save
char save[PATH_MAX]
Definition: datasets.h:65
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:282
ParseSizeStringU64
int ParseSizeStringU64(const char *size, uint64_t *res)
Definition: util-misc.c:191
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
datasets-sha256.h
IPv6Compare
bool IPv6Compare(void *a, void *b)
Definition: datasets-ipv6.c:56
name
const char * name
Definition: detect-engine-proto.c:48
HexToRaw
int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
Definition: util-byte.c:771
THashRemoveFromHash
int THashRemoveFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:890
TYPE_STATE
@ TYPE_STATE
Definition: datasets.c:283
Md5Type
Definition: datasets-md5.h:30
Dataset::hash
THashTableContext * hash
Definition: datasets.h:62
ctx
struct Thresholds ctx
IPv4Hash
uint32_t IPv4Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv4.c:63
Sha256Type::sha256
uint8_t sha256[32]
Definition: datasets-sha256.h:31
SCConfGet
int SCConfGet(const char *name, const char **vptr)
Retrieve the value of a configuration node.
Definition: conf.c:351
Md5Type::rep
DataRepType rep
Definition: datasets-md5.h:33
DataRepResultType::found
bool found
Definition: datasets-reputation.h:30
PrintHexString
void PrintHexString(char *str, size_t size, uint8_t *buf, size_t buf_len)
Definition: util-print.c:262
Dataset::type
enum DatasetTypes type
Definition: datasets.h:57
TAILQ_FOREACH
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:252
THashConsolidateMemcap
void THashConsolidateMemcap(THashTableContext *ctx)
Definition: util-thash.c:345
SCMutexLock
#define SCMutexLock(mut)
Definition: threads-debug.h:117
rust.h
DATASET_TYPE_SHA256
@ DATASET_TYPE_SHA256
Definition: datasets.h:49
Sha256Type::rep
DataRepType rep
Definition: datasets-sha256.h:33
SCMUTEX_INITIALIZER
#define SCMUTEX_INITIALIZER
Definition: threads-debug.h:122
datasets-reputation.h
DatasetAddwRep
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
ConfigGetDataDirectory
const char * ConfigGetDataDirectory(void)
Definition: util-conf.c:85
Md5Type::md5
uint8_t md5[16]
Definition: datasets-md5.h:31
DATASET_TYPE_IPV6
@ DATASET_TYPE_IPV6
Definition: datasets.h:51
Md5StrCompare
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:57
DatasetLookupSerialized
int DatasetLookupSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1454
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
DataRepResultType
Definition: datasets-reputation.h:29
DatasetGet
Dataset * DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t memcap, uint32_t hashsize)
Definition: datasets.c:451
dataset_max_one_hashsize
uint32_t dataset_max_one_hashsize
Definition: datasets.c:50
StringSet
int StringSet(void *dst, void *src)
Definition: datasets-string.c:62
DatasetRemove
int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1537
DatasetGetDefaultMemcap
void DatasetGetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
Definition: datasets.c:595
THashDataConfig_::hash_size
uint32_t hash_size
Definition: util-thash.h:127
datasets.h
IPv6Set
int IPv6Set(void *dst, void *src)
Definition: datasets-ipv6.c:33
util-debug.h
TYPE_LOAD
@ TYPE_LOAD
Definition: datasets.c:284
strlcat
size_t strlcat(char *, const char *src, size_t siz)
Definition: util-strlcatu.c:45
DatasetGetOrCreate
int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set)
Definition: datasets.c:369
DATASETS_HASHSIZE_DEFAULT
#define DATASETS_HASHSIZE_DEFAULT
Definition: datasets.c:593
StringAsBase64
int StringAsBase64(const void *s, char *out, size_t out_size)
Definition: datasets-string.c:46
SCMutexUnlock
#define SCMutexUnlock(mut)
Definition: threads-debug.h:120
datasets-ipv6.h
IPv6Type::ipv6
uint8_t ipv6[16]
Definition: datasets-ipv6.h:31
DATASET_TYPE_NOTSET
#define DATASET_TYPE_NOTSET
Definition: datasets.h:46
IPv6Type::rep
DataRepType rep
Definition: datasets-ipv6.h:33
util-print.h
DatasetPostReloadCleanup
void DatasetPostReloadCleanup(void)
Definition: datasets.c:565
PrintInet
const char * PrintInet(int af, const void *src, char *dst, socklen_t size)
Definition: util-print.c:238
DatasetOpFunc
int(* DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1375
datasets-ipv4.h
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:262
StringGetLength
uint32_t StringGetLength(void *s)
Definition: datasets-string.c:114
Sha256StrSet
int Sha256StrSet(void *dst, void *src)
Definition: datasets-sha256.c:32
DatasetsDestroy
void DatasetsDestroy(void)
Definition: datasets.c:770
Md5StrHash
uint32_t Md5StrHash(uint32_t hash_seed, void *s)
Definition: datasets-md5.c:65
THashDataGetResult
Definition: util-thash.h:191
StringType
Definition: datasets-string.h:30
IPv4Set
int IPv4Set(void *dst, void *src)
Definition: datasets-ipv4.c:32
type
uint16_t type
Definition: decode-vlan.c:106
SCDatasetAdd
int SCDatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1334
DatasetsSave
void DatasetsSave(void)
Definition: datasets.c:848
conf.h
DatasetLock
void DatasetLock(void)
Definition: datasets.c:102
IPv6Type
Definition: datasets-ipv6.h:30
DatasetLookup
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
see if data is part of the set
Definition: datasets.c:1100
DATASET_TYPE_IPV4
@ DATASET_TYPE_IPV4
Definition: datasets.h:50
StringType::ptr
uint8_t * ptr
Definition: datasets-string.h:36
DatasetRemoveSerialized
int DatasetRemoveSerialized(Dataset *set, const char *string)
remove serialized data from set
Definition: datasets.c:1531
dataset_max_total_hashsize
uint32_t dataset_max_total_hashsize
Definition: datasets.c:51
g_system
bool g_system
Definition: suricata.c:195
THashShutdown
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:354
util-mem.h
SCConfNodeLookupChild
SCConfNode * SCConfNodeLookupChild(const SCConfNode *node, const char *name)
Lookup a child configuration node by name.
Definition: conf.c:824
DatasetTypes
DatasetTypes
Definition: datasets.h:45
Dataset::next
struct Dataset * next
Definition: datasets.h:67
THashData_::data
void * data
Definition: util-thash.h:92
util-conf.h
Sha256Type
Definition: datasets-sha256.h:30
Sha256StrFree
void Sha256StrFree(void *s)
Definition: datasets-sha256.c:70
THashData_
Definition: util-thash.h:85
IPv4Free
void IPv4Free(void *s)
Definition: datasets-ipv4.c:70
suricata-common.h
util-path.h
datasets-context-json.h
FatalErrorOnInit
#define FatalErrorOnInit(...)
Fatal error IF we're starting up, and configured to consider errors to be fatal errors.
Definition: util-debug.h:526
DATASET_NAME_MAX_LEN
#define DATASET_NAME_MAX_LEN
Definition: datasets.h:54
PathIsAbsolute
int PathIsAbsolute(const char *path)
Check if a path is absolute.
Definition: util-path.c:44
Md5StrSet
int Md5StrSet(void *dst, void *src)
Definition: datasets-md5.c:34
StringCompare
bool StringCompare(void *a, void *b)
Definition: datasets-string.c:97
THashGetFromHash
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:637
FatalError
#define FatalError(...)
Definition: util-debug.h:517
hashsize
#define hashsize(n)
Definition: util-hash-lookup3.h:40
THashLookupFromHash
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:747
IPv4Type
Definition: datasets-ipv4.h:30
ParseSizeStringU32
int ParseSizeStringU32(const char *size, uint32_t *res)
Definition: util-misc.c:174
THashDecrUsecnt
#define THashDecrUsecnt(h)
Definition: util-thash.h:170
IPv4Compare
bool IPv4Compare(void *a, void *b)
Definition: datasets-ipv4.c:55
DatasetFind
Dataset * DatasetFind(const char *name, enum DatasetTypes type)
look for set by name without creating it
Definition: datasets.c:320
util-validate.h
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
IPv6Hash
uint32_t IPv6Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv6.c:64
DatasetsInit
int DatasetsInit(void)
Definition: datasets.c:618
str
#define str(s)
Definition: suricata-common.h:308
DatasetGetTypeFromString
enum DatasetTypes DatasetGetTypeFromString(const char *s)
Definition: datasets.c:64
SCConfGetNode
SCConfNode * SCConfGetNode(const char *name)
Get a SCConfNode by name.
Definition: conf.c:182
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:274
THashWalk
int THashWalk(THashTableContext *ctx, THashFormatFunc FormatterFunc, THashOutputFunc OutputterFunc, void *output_ctx)
Walk the hash.
Definition: util-thash.c:388
SCFree
#define SCFree(p)
Definition: util-mem.h:61
Dataset::hidden
bool hidden
Definition: datasets.h:60
DatasetReload
void DatasetReload(void)
Definition: datasets.c:539
DatasetGetPathType
DatasetGetPathType
Definition: datasets.c:282
Sha256StrCompare
bool Sha256StrCompare(void *a, void *b)
Definition: datasets-sha256.c:55
StringHash
uint32_t StringHash(uint32_t hash_seed, void *s)
Definition: datasets-string.c:108
DataRepType
uint16_t DataRepType
Definition: datasets.h:22
DATASET_TYPE_MD5
@ DATASET_TYPE_MD5
Definition: datasets.h:48
DATASET_TYPE_STRING
@ DATASET_TYPE_STRING
Definition: datasets.h:47
DatasetUnlock
void DatasetUnlock(void)
Definition: datasets.c:107
THashDataGetResult::is_new
bool is_new
Definition: util-thash.h:193
IPv6Free
void IPv6Free(void *s)
Definition: datasets-ipv6.c:71
suricata.h
THashInit
THashTableContext * THashInit(const char *cnf_prefix, uint32_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(uint32_t, void *), bool(*DataCompare)(void *, void *), bool(*DataExpired)(void *, SCTime_t), uint32_t(*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
Definition: util-thash.c:302
DatasetSearchByName
Dataset * DatasetSearchByName(const char *name)
Definition: datasets.c:121
SCConfNode_::name
char * name
Definition: conf.h:38
IPv4Type::ipv4
uint8_t ipv4[4]
Definition: datasets-ipv4.h:31
DatasetAddSerialized
int DatasetAddSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1442
Dataset
Definition: datasets.h:55
Dataset::from_yaml
bool from_yaml
Definition: datasets.h:59
IPv4Type::rep
DataRepType rep
Definition: datasets-ipv4.h:33
SC_ATOMIC_GET
#define SC_ATOMIC_GET(name)
Get the value from the atomic variable.
Definition: util-atomic.h:375
util-misc.h
util-thash.h
Dataset::load
char load[PATH_MAX]
Definition: datasets.h:64
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
SCConfNode_
Definition: conf.h:37
Sha256StrHash
uint32_t Sha256StrHash(uint32_t hash_seed, void *s)
Definition: datasets-sha256.c:63
SCConfNode_::val
char * val
Definition: conf.h:39
Md5StrFree
void Md5StrFree(void *s)
Definition: datasets-md5.c:72
SCMutex
#define SCMutex
Definition: threads-debug.h:114
DatasetAppendSet
int DatasetAppendSet(Dataset *set)
Definition: datasets.c:79
DEBUG_VALIDATE_BUG_ON
#define DEBUG_VALIDATE_BUG_ON(exp)
Definition: util-validate.h:102
StringFree
void StringFree(void *s)
Definition: datasets-string.c:121
SCDatasetAddwRep
int SCDatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
Definition: datasets.c:1354
DatasetParseIpv6String
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
Definition: datasets.c:156
DatasetLookupwRep
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
Definition: datasets.c:1120
THashTableContext_::config
THashConfig config
Definition: util-thash.h:151