suricata
datasets.c
Go to the documentation of this file.
1 /* Copyright (C) 2017-2024 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  */
23 
24 #include "suricata-common.h"
25 #include "suricata.h"
26 #include "rust.h"
27 #include "conf.h"
28 #include "datasets.h"
29 #include "datasets-string.h"
30 #include "datasets-ipv4.h"
31 #include "datasets-ipv6.h"
32 #include "datasets-md5.h"
33 #include "datasets-sha256.h"
34 #include "datasets-reputation.h"
35 #include "util-conf.h"
36 #include "util-thash.h"
37 #include "util-print.h"
38 #include "util-byte.h"
39 #include "util-misc.h"
40 #include "util-path.h"
41 #include "util-debug.h"
42 #include "util-validate.h"
43 
45 static Dataset *sets = NULL;
46 static uint32_t set_ids = 0;
47 
48 uint32_t dataset_max_one_hashsize = 65536;
49 uint32_t dataset_max_total_hashsize = 16777216;
50 uint32_t dataset_used_hashsize = 0;
51 
52 int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);
53 
54 static inline void DatasetUnlockData(THashData *d)
55 {
56  (void) THashDecrUsecnt(d);
57  THashDataUnlock(d);
58 }
59 static bool DatasetIsStatic(const char *save, const char *load);
60 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
61 
62 enum DatasetTypes DatasetGetTypeFromString(const char *s)
63 {
64  if (strcasecmp("md5", s) == 0)
65  return DATASET_TYPE_MD5;
66  if (strcasecmp("sha256", s) == 0)
67  return DATASET_TYPE_SHA256;
68  if (strcasecmp("string", s) == 0)
69  return DATASET_TYPE_STRING;
70  if (strcasecmp("ipv4", s) == 0)
71  return DATASET_TYPE_IPV4;
72  if (strcasecmp("ip", s) == 0)
73  return DATASET_TYPE_IPV6;
74  return DATASET_TYPE_NOTSET;
75 }
76 
77 static Dataset *DatasetAlloc(const char *name)
78 {
79  Dataset *set = SCCalloc(1, sizeof(*set));
80  if (set) {
81  set->id = set_ids++;
82  }
83  return set;
84 }
85 
86 static Dataset *DatasetSearchByName(const char *name)
87 {
88  Dataset *set = sets;
89  while (set) {
90  if (strcasecmp(name, set->name) == 0 && !set->hidden) {
91  return set;
92  }
93  set = set->next;
94  }
95  return NULL;
96 }
97 
98 static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
99 {
100  if (ins < 2)
101  return -1;
102  if (ins % 2 != 0)
103  return -1;
104  if (outs != ins / 2)
105  return -1;
106 
107  uint8_t hash[outs];
108  memset(hash, 0, outs);
109  size_t i, x;
110  for (x = 0, i = 0; i < ins; i+=2, x++) {
111  char buf[3] = { 0, 0, 0 };
112  buf[0] = in[i];
113  buf[1] = in[i+1];
114 
115  long value = strtol(buf, NULL, 16);
116  if (value >= 0 && value <= 255)
117  hash[x] = (uint8_t)value;
118  else {
119  SCLogError("hash byte out of range %ld", value);
120  return -1;
121  }
122  }
123 
124  memcpy(out, hash, outs);
125  return 0;
126 }
127 
128 static int DatasetLoadIPv4(Dataset *set)
129 {
130  if (strlen(set->load) == 0)
131  return 0;
132 
133  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
134  const char *fopen_mode = "r";
135  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
136  fopen_mode = "a+";
137  }
138 
139  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv4);
140  if (retval == -2) {
141  FatalErrorOnInit("dataset %s could not be processed", set->name);
142  } else if (retval == -1) {
143  return -1;
144  }
145 
147 
148  return 0;
149 }
150 
151 static int ParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
152 {
153  /* Checking IPv6 case */
154  char *got_colon = strchr(line, ':');
155  if (got_colon) {
156  uint32_t ip6addr[4];
157  if (inet_pton(AF_INET6, line, in6) != 1) {
158  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
159  return -1;
160  }
161  memcpy(&ip6addr, in6->s6_addr, sizeof(ip6addr));
162  /* IPv4 in IPv6 notation needs transformation to internal Suricata storage */
163  if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
164  ip6addr[0] = ip6addr[3];
165  ip6addr[2] = 0;
166  ip6addr[3] = 0;
167  memcpy(in6, ip6addr, sizeof(struct in6_addr));
168  }
169  } else {
170  /* IPv4 case */
171  struct in_addr in;
172  if (inet_pton(AF_INET, line, &in) != 1) {
173  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
174  return -1;
175  }
176  memset(in6, 0, sizeof(struct in6_addr));
177  memcpy(in6, &in, sizeof(struct in_addr));
178  }
179  return 0;
180 }
181 
182 static int DatasetLoadIPv6(Dataset *set)
183 {
184  if (strlen(set->load) == 0)
185  return 0;
186 
187  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
188  const char *fopen_mode = "r";
189  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
190  fopen_mode = "a+";
191  }
192 
193  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv6);
194  if (retval == -2) {
195  FatalErrorOnInit("dataset %s could not be processed", set->name);
196  } else if (retval == -1) {
197  return -1;
198  }
199 
201 
202  return 0;
203 }
204 
205 static int DatasetLoadMd5(Dataset *set)
206 {
207  if (strlen(set->load) == 0)
208  return 0;
209 
210  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
211  const char *fopen_mode = "r";
212  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
213  fopen_mode = "a+";
214  }
215 
216  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSMd5);
217  if (retval == -2) {
218  FatalErrorOnInit("dataset %s could not be processed", set->name);
219  } else if (retval == -1) {
220  return -1;
221  }
222 
224 
225  return 0;
226 }
227 
228 static int DatasetLoadSha256(Dataset *set)
229 {
230  if (strlen(set->load) == 0)
231  return 0;
232 
233  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
234  const char *fopen_mode = "r";
235  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
236  fopen_mode = "a+";
237  }
238 
239  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSSha256);
240  if (retval == -2) {
241  FatalErrorOnInit("dataset %s could not be processed", set->name);
242  } else if (retval == -1) {
243  return -1;
244  }
245 
247 
248  return 0;
249 }
250 
251 static int DatasetLoadString(Dataset *set)
252 {
253  if (strlen(set->load) == 0)
254  return 0;
255 
256  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
257 
258  const char *fopen_mode = "r";
259  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
260  fopen_mode = "a+";
261  }
262 
263  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSString);
264  if (retval == -2) {
265  FatalErrorOnInit("dataset %s could not be processed", set->name);
266  } else if (retval == -1) {
267  return -1;
268  }
269 
271 
272  return 0;
273 }
274 
275 extern bool g_system;
276 
280 };
281 
282 static void DatasetGetPath(const char *in_path,
283  char *out_path, size_t out_size, enum DatasetGetPathType type)
284 {
285  char path[PATH_MAX];
286  struct stat st;
287 
288  if (PathIsAbsolute(in_path)) {
289  strlcpy(path, in_path, sizeof(path));
290  strlcpy(out_path, path, out_size);
291  return;
292  }
293 
294  const char *data_dir = ConfigGetDataDirectory();
295  if (stat(data_dir, &st) != 0) {
296  SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
297  return;
298  }
299 
300  snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
301 
302  if (type == TYPE_LOAD) {
303  if (stat(path, &st) != 0) {
304  SCLogDebug("path %s: %s", path, strerror(errno));
305  if (!g_system) {
306  snprintf(path, sizeof(path), "%s", in_path);
307  }
308  }
309  }
310  strlcpy(out_path, path, out_size);
311  SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
312 }
313 
314 /** \brief look for set by name without creating it */
316 {
318  Dataset *set = DatasetSearchByName(name);
319  if (set) {
320  if (set->type != type) {
322  return NULL;
323  }
324  }
326  return set;
327 }
328 
329 static bool DatasetCheckHashsize(const char *name, uint32_t hash_size)
330 {
331  if (dataset_max_one_hashsize > 0 && hash_size > dataset_max_one_hashsize) {
332  SCLogError("hashsize %u in dataset '%s' exceeds configured 'single-hashsize' limit (%u)",
333  hash_size, name, dataset_max_one_hashsize);
334  return false;
335  }
336  // we cannot underflow as we know from conf loading that
337  // dataset_max_total_hashsize >= dataset_max_one_hashsize if dataset_max_total_hashsize > 0
338  if (dataset_max_total_hashsize > 0 &&
340  SCLogError("hashsize %u in dataset '%s' exceeds configured 'total-hashsizes' limit (%u, in "
341  "use %u)",
343  return false;
344  }
345 
346  return true;
347 }
348 
349 static void DatasetUpdateHashsize(const char *name, uint32_t hash_size)
350 {
351  if (dataset_max_total_hashsize > 0) {
352  dataset_used_hashsize += hash_size;
353  SCLogDebug("set %s adding with hash_size %u", name, hash_size);
354  }
355 }
356 
357 Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
358  uint64_t memcap, uint32_t hashsize)
359 {
360  uint64_t default_memcap = 0;
361  uint32_t default_hashsize = 0;
362  if (strlen(name) > DATASET_NAME_MAX_LEN) {
363  return NULL;
364  }
365 
367  Dataset *set = DatasetSearchByName(name);
368  if (set) {
369  if (type != DATASET_TYPE_NOTSET && set->type != type) {
370  SCLogError("dataset %s already "
371  "exists and is of type %u",
372  set->name, set->type);
374  return NULL;
375  }
376 
377  if ((save == NULL || strlen(save) == 0) &&
378  (load == NULL || strlen(load) == 0)) {
379  // OK, rule keyword doesn't have to set state/load,
380  // even when yaml set has set it.
381  } else {
382  if ((save == NULL && strlen(set->save) > 0) ||
383  (save != NULL && strcmp(set->save, save) != 0)) {
384  SCLogError("dataset %s save mismatch: %s != %s", set->name, set->save, save);
386  return NULL;
387  }
388  if ((load == NULL && strlen(set->load) > 0) ||
389  (load != NULL && strcmp(set->load, load) != 0)) {
390  SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
392  return NULL;
393  }
394  }
395 
397  return set;
398  } else {
399  if (type == DATASET_TYPE_NOTSET) {
400  SCLogError("dataset %s not defined", name);
401  goto out_err;
402  }
403  }
404 
405  GetDefaultMemcap(&default_memcap, &default_hashsize);
406  if (hashsize == 0) {
407  hashsize = default_hashsize;
408  }
409 
410  if (!DatasetCheckHashsize(name, hashsize)) {
411  goto out_err;
412  }
413 
414  set = DatasetAlloc(name);
415  if (set == NULL) {
416  goto out_err;
417  }
418 
419  strlcpy(set->name, name, sizeof(set->name));
420  set->type = type;
421  if (save && strlen(save)) {
422  strlcpy(set->save, save, sizeof(set->save));
423  SCLogDebug("name %s save '%s'", name, set->save);
424  }
425  if (load && strlen(load)) {
426  strlcpy(set->load, load, sizeof(set->load));
427  SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
428  }
429 
430  char cnf_name[128];
431  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
432 
433  switch (type) {
434  case DATASET_TYPE_MD5:
435  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
436  Md5StrCompare, NULL, NULL, load != NULL ? 1 : 0,
437  memcap > 0 ? memcap : default_memcap, hashsize);
438  if (set->hash == NULL)
439  goto out_err;
440  if (DatasetLoadMd5(set) < 0)
441  goto out_err;
442  break;
443  case DATASET_TYPE_STRING:
444  set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
445  StringCompare, NULL, StringGetLength, load != NULL ? 1 : 0,
446  memcap > 0 ? memcap : default_memcap, hashsize);
447  if (set->hash == NULL)
448  goto out_err;
449  if (DatasetLoadString(set) < 0)
450  goto out_err;
451  break;
452  case DATASET_TYPE_SHA256:
453  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
454  Sha256StrHash, Sha256StrCompare, NULL, NULL, load != NULL ? 1 : 0,
455  memcap > 0 ? memcap : default_memcap, hashsize);
456  if (set->hash == NULL)
457  goto out_err;
458  if (DatasetLoadSha256(set) < 0)
459  goto out_err;
460  break;
461  case DATASET_TYPE_IPV4:
462  set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash,
463  IPv4Compare, NULL, NULL, load != NULL ? 1 : 0,
464  memcap > 0 ? memcap : default_memcap, hashsize);
465  if (set->hash == NULL)
466  goto out_err;
467  if (DatasetLoadIPv4(set) < 0)
468  goto out_err;
469  break;
470  case DATASET_TYPE_IPV6:
471  set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6Set, IPv6Free, IPv6Hash,
472  IPv6Compare, NULL, NULL, load != NULL ? 1 : 0,
473  memcap > 0 ? memcap : default_memcap, hashsize);
474  if (set->hash == NULL)
475  goto out_err;
476  if (DatasetLoadIPv6(set) < 0)
477  goto out_err;
478  break;
479  }
480  if (set->hash == NULL) {
481  goto out_err;
482  }
483 
484  if (SC_ATOMIC_GET(set->hash->memcap_reached)) {
485  SCLogError("dataset too large for set memcap");
486  goto out_err;
487  }
488 
489  SCLogDebug("set %p/%s type %u save %s load %s",
490  set, set->name, set->type, set->save, set->load);
491 
492  set->next = sets;
493  sets = set;
494 
495  /* hash size accounting */
497  DatasetUpdateHashsize(set->name, set->hash->config.hash_size);
498 
500  return set;
501 out_err:
502  if (set) {
503  if (set->hash) {
504  THashShutdown(set->hash);
505  }
506  SCFree(set);
507  }
509  return NULL;
510 }
511 
512 static bool DatasetIsStatic(const char *save, const char *load)
513 {
514  /* A set is static if it does not have any dynamic properties like
515  * save and/or state defined but has load defined.
516  * */
517  if ((load != NULL && strlen(load) > 0) &&
518  (save == NULL || strlen(save) == 0)) {
519  return true;
520  }
521  return false;
522 }
523 
524 void DatasetReload(void)
525 {
526  /* In order to reload the datasets, just mark the current sets as hidden
527  * and clean them up later.
528  * New datasets shall be created with the rule reload and do not require
529  * any intervention.
530  * */
532  Dataset *set = sets;
533  while (set) {
534  if (!DatasetIsStatic(set->save, set->load) || set->from_yaml) {
535  SCLogDebug("Not a static set, skipping %s", set->name);
536  set = set->next;
537  continue;
538  }
539  set->hidden = true;
540  if (dataset_max_total_hashsize > 0) {
543  }
544  SCLogDebug("Set %s at %p hidden successfully", set->name, set);
545  set = set->next;
546  }
548 }
549 
551 {
552  SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
554  Dataset *cur = sets;
555  Dataset *prev = NULL;
556  while (cur) {
557  Dataset *next = cur->next;
558  if (!cur->hidden) {
559  prev = cur;
560  cur = next;
561  continue;
562  }
563  // Delete the set in case it was hidden
564  if (prev != NULL) {
565  prev->next = next;
566  } else {
567  sets = next;
568  }
569  THashShutdown(cur->hash);
570  SCFree(cur);
571  cur = next;
572  }
574 }
575 
576 /* Value reflects THASH_DEFAULT_HASHSIZE which is what the default was earlier,
577  * despite 2048 commented out in the default yaml. */
578 #define DATASETS_HASHSIZE_DEFAULT 4096
579 
580 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
581 {
582  const char *str = NULL;
583  if (SCConfGet("datasets.defaults.memcap", &str) == 1) {
584  if (ParseSizeStringU64(str, memcap) < 0) {
585  SCLogWarning("memcap value cannot be deduced: %s,"
586  " resetting to default",
587  str);
588  *memcap = 0;
589  }
590  }
591 
592  *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
593  if (SCConfGet("datasets.defaults.hashsize", &str) == 1) {
594  if (ParseSizeStringU32(str, hashsize) < 0) {
595  *hashsize = (uint32_t)DATASETS_HASHSIZE_DEFAULT;
596  SCLogWarning("hashsize value cannot be deduced: %s,"
597  " resetting to default: %u",
598  str, *hashsize);
599  }
600  }
601 }
602 
603 int DatasetsInit(void)
604 {
605  SCLogDebug("datasets start");
606  SCConfNode *datasets = SCConfGetNode("datasets");
607  uint64_t default_memcap = 0;
608  uint32_t default_hashsize = 0;
609  GetDefaultMemcap(&default_memcap, &default_hashsize);
610  if (datasets != NULL) {
611  const char *str = NULL;
612  if (SCConfGet("datasets.limits.total-hashsizes", &str) == 1) {
614  FatalError("failed to parse datasets.limits.total-hashsizes value: %s", str);
615  }
616  }
617  if (SCConfGet("datasets.limits.single-hashsize", &str) == 1) {
619  FatalError("failed to parse datasets.limits.single-hashsize value: %s", str);
620  }
621  }
622  if (dataset_max_total_hashsize > 0 &&
624  FatalError("total-hashsizes (%u) cannot be smaller than single-hashsize (%u)",
626  }
628  // the total limit also applies for single limit
630  }
631 
632  int list_pos = 0;
633  SCConfNode *iter = NULL;
634  TAILQ_FOREACH(iter, &datasets->head, next) {
635  if (iter->name == NULL) {
636  list_pos++;
637  continue;
638  }
639 
640  char save[PATH_MAX] = "";
641  char load[PATH_MAX] = "";
642  uint64_t memcap = 0;
643  uint32_t hashsize = 0;
644 
645  const char *set_name = iter->name;
646  if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
648  "set name '%s' too long, max %d chars", set_name, DATASET_NAME_MAX_LEN);
649  continue;
650  }
651 
652  SCConfNode *set_type = SCConfNodeLookupChild(iter, "type");
653  if (set_type == NULL) {
654  list_pos++;
655  continue;
656  }
657 
658  SCConfNode *set_save = SCConfNodeLookupChild(iter, "state");
659  if (set_save) {
660  DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
661  strlcpy(load, save, sizeof(load));
662  } else {
663  SCConfNode *set_load = SCConfNodeLookupChild(iter, "load");
664  if (set_load) {
665  DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
666  }
667  }
668 
669  SCConfNode *set_memcap = SCConfNodeLookupChild(iter, "memcap");
670  if (set_memcap) {
671  if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
672  SCLogWarning("memcap value cannot be"
673  " deduced: %s, resetting to default",
674  set_memcap->val);
675  memcap = 0;
676  }
677  }
678  SCConfNode *set_hashsize = SCConfNodeLookupChild(iter, "hashsize");
679  if (set_hashsize) {
680  if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
681  SCLogWarning("hashsize value cannot be"
682  " deduced: %s, resetting to default",
683  set_hashsize->val);
684  hashsize = 0;
685  }
686  }
687  char conf_str[1024];
688  snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
689 
690  SCLogDebug("set %s type %s. Conf %s", set_name, set_type->val, conf_str);
691 
692  if (strcmp(set_type->val, "md5") == 0) {
693  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
694  memcap > 0 ? memcap : default_memcap,
695  hashsize > 0 ? hashsize : default_hashsize);
696  if (dset == NULL) {
697  FatalErrorOnInit("failed to setup dataset for %s", set_name);
698  continue;
699  }
700  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
701  dset->from_yaml = true;
702 
703  } else if (strcmp(set_type->val, "sha256") == 0) {
704  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
705  memcap > 0 ? memcap : default_memcap,
706  hashsize > 0 ? hashsize : default_hashsize);
707  if (dset == NULL) {
708  FatalErrorOnInit("failed to setup dataset for %s", set_name);
709  continue;
710  }
711  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
712  dset->from_yaml = true;
713 
714  } else if (strcmp(set_type->val, "string") == 0) {
715  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
716  memcap > 0 ? memcap : default_memcap,
717  hashsize > 0 ? hashsize : default_hashsize);
718  if (dset == NULL) {
719  FatalErrorOnInit("failed to setup dataset for %s", set_name);
720  continue;
721  }
722  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
723  dset->from_yaml = true;
724 
725  } else if (strcmp(set_type->val, "ipv4") == 0) {
726  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV4, save, load,
727  memcap > 0 ? memcap : default_memcap,
728  hashsize > 0 ? hashsize : default_hashsize);
729  if (dset == NULL) {
730  FatalErrorOnInit("failed to setup dataset for %s", set_name);
731  continue;
732  }
733  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
734  dset->from_yaml = true;
735 
736  } else if (strcmp(set_type->val, "ip") == 0) {
737  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV6, save, load,
738  memcap > 0 ? memcap : default_memcap,
739  hashsize > 0 ? hashsize : default_hashsize);
740  if (dset == NULL) {
741  FatalErrorOnInit("failed to setup dataset for %s", set_name);
742  continue;
743  }
744  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
745  dset->from_yaml = true;
746  }
747 
748  list_pos++;
749  }
750  }
751  SCLogDebug("datasets done: %p", datasets);
752  return 0;
753 }
754 
755 void DatasetsDestroy(void)
756 {
757  SCLogDebug("destroying datasets: %p", sets);
759  Dataset *set = sets;
760  while (set) {
761  SCLogDebug("destroying set %s", set->name);
762  Dataset *next = set->next;
763  THashShutdown(set->hash);
764  SCFree(set);
765  set = next;
766  }
767  sets = NULL;
769  SCLogDebug("destroying datasets done: %p", sets);
770 }
771 
772 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
773 {
774  FILE *fp = ctx;
775  //PrintRawDataFp(fp, data, data_len);
776  if (fp) {
777  return (int)fwrite(data, data_len, 1, fp);
778  }
779  return 0;
780 }
781 
782 static int Md5AsAscii(const void *s, char *out, size_t out_size)
783 {
784  const Md5Type *md5 = s;
785  char str[256];
786  PrintHexString(str, sizeof(str), (uint8_t *)md5->md5, sizeof(md5->md5));
787  strlcat(out, str, out_size);
788  strlcat(out, "\n", out_size);
789  return (int)strlen(out);
790 }
791 
792 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
793 {
794  const Sha256Type *sha = s;
795  char str[256];
796  PrintHexString(str, sizeof(str), (uint8_t *)sha->sha256, sizeof(sha->sha256));
797  strlcat(out, str, out_size);
798  strlcat(out, "\n", out_size);
799  return (int)strlen(out);
800 }
801 
802 static int IPv4AsAscii(const void *s, char *out, size_t out_size)
803 {
804  const IPv4Type *ip4 = s;
805  char str[256];
806  PrintInet(AF_INET, ip4->ipv4, str, sizeof(str));
807  strlcat(out, str, out_size);
808  strlcat(out, "\n", out_size);
809  return (int)strlen(out);
810 }
811 
812 static int IPv6AsAscii(const void *s, char *out, size_t out_size)
813 {
814  const IPv6Type *ip6 = s;
815  char str[256];
816  bool is_ipv4 = true;
817  for (int i = 4; i <= 15; i++) {
818  if (ip6->ipv6[i] != 0) {
819  is_ipv4 = false;
820  break;
821  }
822  }
823  if (is_ipv4) {
824  PrintInet(AF_INET, ip6->ipv6, str, sizeof(str));
825  } else {
826  PrintInet(AF_INET6, ip6->ipv6, str, sizeof(str));
827  }
828  strlcat(out, str, out_size);
829  strlcat(out, "\n", out_size);
830  return (int)strlen(out);
831 }
832 
833 void DatasetsSave(void)
834 {
835  SCLogDebug("saving datasets: %p", sets);
837  Dataset *set = sets;
838  while (set) {
839  if (strlen(set->save) == 0)
840  goto next;
841 
842  FILE *fp = fopen(set->save, "w");
843  if (fp == NULL)
844  goto next;
845 
846  SCLogDebug("dumping %s to %s", set->name, set->save);
847 
848  switch (set->type) {
849  case DATASET_TYPE_STRING:
850  THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
851  break;
852  case DATASET_TYPE_MD5:
853  THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
854  break;
855  case DATASET_TYPE_SHA256:
856  THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
857  break;
858  case DATASET_TYPE_IPV4:
859  THashWalk(set->hash, IPv4AsAscii, SaveCallback, fp);
860  break;
861  case DATASET_TYPE_IPV6:
862  THashWalk(set->hash, IPv6AsAscii, SaveCallback, fp);
863  break;
864  }
865 
866  fclose(fp);
867 
868  next:
869  set = set->next;
870  }
872 }
873 
874 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
875 {
876  if (set == NULL)
877  return -1;
878 
879  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
880  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
881  if (rdata) {
882  DatasetUnlockData(rdata);
883  return 1;
884  }
885  return 0;
886 }
887 
888 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
889  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
890 {
891  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
892 
893  if (set == NULL)
894  return rrep;
895 
896  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
897  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
898  if (rdata) {
899  StringType *found = rdata->data;
900  rrep.found = true;
901  rrep.rep = found->rep;
902  DatasetUnlockData(rdata);
903  return rrep;
904  }
905  return rrep;
906 }
907 
908 static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
909 {
910  if (set == NULL)
911  return -1;
912 
913  if (data_len != 4)
914  return -1;
915 
916  IPv4Type lookup = { .rep.value = 0 };
917  memcpy(lookup.ipv4, data, 4);
918  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
919  if (rdata) {
920  DatasetUnlockData(rdata);
921  return 1;
922  }
923  return 0;
924 }
925 
926 static DataRepResultType DatasetLookupIPv4wRep(
927  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
928 {
929  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
930 
931  if (set == NULL)
932  return rrep;
933 
934  if (data_len != 4)
935  return rrep;
936 
937  IPv4Type lookup = { .rep.value = 0 };
938  memcpy(lookup.ipv4, data, data_len);
939  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
940  if (rdata) {
941  IPv4Type *found = rdata->data;
942  rrep.found = true;
943  rrep.rep = found->rep;
944  DatasetUnlockData(rdata);
945  return rrep;
946  }
947  return rrep;
948 }
949 
950 static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
951 {
952  if (set == NULL)
953  return -1;
954 
955  if (data_len != 16 && data_len != 4)
956  return -1;
957 
958  IPv6Type lookup = { .rep.value = 0 };
959  memcpy(lookup.ipv6, data, data_len);
960  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
961  if (rdata) {
962  DatasetUnlockData(rdata);
963  return 1;
964  }
965  return 0;
966 }
967 
968 static DataRepResultType DatasetLookupIPv6wRep(
969  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
970 {
971  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
972 
973  if (set == NULL)
974  return rrep;
975 
976  if (data_len != 16 && data_len != 4)
977  return rrep;
978 
979  IPv6Type lookup = { .rep.value = 0 };
980  memcpy(lookup.ipv6, data, data_len);
981  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
982  if (rdata) {
983  IPv6Type *found = rdata->data;
984  rrep.found = true;
985  rrep.rep = found->rep;
986  DatasetUnlockData(rdata);
987  return rrep;
988  }
989  return rrep;
990 }
991 
992 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
993 {
994  if (set == NULL)
995  return -1;
996 
997  if (data_len != 16)
998  return -1;
999 
1000  Md5Type lookup = { .rep.value = 0 };
1001  memcpy(lookup.md5, data, data_len);
1002  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1003  if (rdata) {
1004  DatasetUnlockData(rdata);
1005  return 1;
1006  }
1007  return 0;
1008 }
1009 
1010 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
1011  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1012 {
1013  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1014 
1015  if (set == NULL)
1016  return rrep;
1017 
1018  if (data_len != 16)
1019  return rrep;
1020 
1021  Md5Type lookup = { .rep.value = 0};
1022  memcpy(lookup.md5, data, data_len);
1023  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1024  if (rdata) {
1025  Md5Type *found = rdata->data;
1026  rrep.found = true;
1027  rrep.rep = found->rep;
1028  DatasetUnlockData(rdata);
1029  return rrep;
1030  }
1031  return rrep;
1032 }
1033 
1034 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1035 {
1036  if (set == NULL)
1037  return -1;
1038 
1039  if (data_len != 32)
1040  return -1;
1041 
1042  Sha256Type lookup = { .rep.value = 0 };
1043  memcpy(lookup.sha256, data, data_len);
1044  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1045  if (rdata) {
1046  DatasetUnlockData(rdata);
1047  return 1;
1048  }
1049  return 0;
1050 }
1051 
1052 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
1053  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1054 {
1055  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1056 
1057  if (set == NULL)
1058  return rrep;
1059 
1060  if (data_len != 32)
1061  return rrep;
1062 
1063  Sha256Type lookup = { .rep.value = 0 };
1064  memcpy(lookup.sha256, data, data_len);
1065  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
1066  if (rdata) {
1067  Sha256Type *found = rdata->data;
1068  rrep.found = true;
1069  rrep.rep = found->rep;
1070  DatasetUnlockData(rdata);
1071  return rrep;
1072  }
1073  return rrep;
1074 }
1075 
1076 /**
1077  * \brief see if \a data is part of the set
1078  * \param set dataset
1079  * \param data data to look up
1080  * \param data_len length in bytes of \a data
1081  * \retval -1 error
1082  * \retval 0 not found
1083  * \retval 1 found
1084  */
1085 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
1086 {
1087  if (set == NULL)
1088  return -1;
1089 
1090  switch (set->type) {
1091  case DATASET_TYPE_STRING:
1092  return DatasetLookupString(set, data, data_len);
1093  case DATASET_TYPE_MD5:
1094  return DatasetLookupMd5(set, data, data_len);
1095  case DATASET_TYPE_SHA256:
1096  return DatasetLookupSha256(set, data, data_len);
1097  case DATASET_TYPE_IPV4:
1098  return DatasetLookupIPv4(set, data, data_len);
1099  case DATASET_TYPE_IPV6:
1100  return DatasetLookupIPv6(set, data, data_len);
1101  }
1102  return -1;
1103 }
1104 
1105 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1106  const DataRepType *rep)
1107 {
1108  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1109  if (set == NULL)
1110  return rrep;
1111 
1112  switch (set->type) {
1113  case DATASET_TYPE_STRING:
1114  return DatasetLookupStringwRep(set, data, data_len, rep);
1115  case DATASET_TYPE_MD5:
1116  return DatasetLookupMd5wRep(set, data, data_len, rep);
1117  case DATASET_TYPE_SHA256:
1118  return DatasetLookupSha256wRep(set, data, data_len, rep);
1119  case DATASET_TYPE_IPV4:
1120  return DatasetLookupIPv4wRep(set, data, data_len, rep);
1121  case DATASET_TYPE_IPV6:
1122  return DatasetLookupIPv6wRep(set, data, data_len, rep);
1123  }
1124  return rrep;
1125 }
1126 
1127 /**
1128  * \retval 1 data was added to the hash
1129  * \retval 0 data was not added to the hash as it is already there
1130  * \retval -1 failed to add data to the hash
1131  */
1132 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1133 {
1134  if (set == NULL)
1135  return -1;
1136 
1137  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1138  .rep.value = 0 };
1139  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1140  if (res.data) {
1141  DatasetUnlockData(res.data);
1142  return res.is_new ? 1 : 0;
1143  }
1144  return -1;
1145 }
1146 
1147 /**
1148  * \retval 1 data was added to the hash
1149  * \retval 0 data was not added to the hash as it is already there
1150  * \retval -1 failed to add data to the hash
1151  */
1152 static int DatasetAddStringwRep(
1153  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1154 {
1155  if (set == NULL)
1156  return -1;
1157 
1158  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1159  .rep = *rep };
1160  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1161  if (res.data) {
1162  DatasetUnlockData(res.data);
1163  return res.is_new ? 1 : 0;
1164  }
1165  return -1;
1166 }
1167 
1168 static int DatasetAddIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1169 {
1170  if (set == NULL) {
1171  return -1;
1172  }
1173 
1174  if (data_len < 4) {
1175  return -2;
1176  }
1177 
1178  IPv4Type lookup = { .rep.value = 0 };
1179  memcpy(lookup.ipv4, data, 4);
1180  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1181  if (res.data) {
1182  DatasetUnlockData(res.data);
1183  return res.is_new ? 1 : 0;
1184  }
1185  return -1;
1186 }
1187 
1188 static int DatasetAddIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1189 {
1190  if (set == NULL) {
1191  return -1;
1192  }
1193 
1194  if (data_len != 16) {
1195  return -2;
1196  }
1197 
1198  IPv6Type lookup = { .rep.value = 0 };
1199  memcpy(lookup.ipv6, data, 16);
1200  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1201  if (res.data) {
1202  DatasetUnlockData(res.data);
1203  return res.is_new ? 1 : 0;
1204  }
1205  return -1;
1206 }
1207 
1208 static int DatasetAddIPv4wRep(
1209  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1210 {
1211  if (set == NULL)
1212  return -1;
1213 
1214  if (data_len < 4)
1215  return -2;
1216 
1217  IPv4Type lookup = { .rep = *rep };
1218  memcpy(lookup.ipv4, data, 4);
1219  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1220  if (res.data) {
1221  DatasetUnlockData(res.data);
1222  return res.is_new ? 1 : 0;
1223  }
1224  return -1;
1225 }
1226 
1227 static int DatasetAddIPv6wRep(
1228  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1229 {
1230  if (set == NULL)
1231  return -1;
1232 
1233  if (data_len != 16)
1234  return -2;
1235 
1236  IPv6Type lookup = { .rep = *rep };
1237  memcpy(lookup.ipv6, data, 16);
1238  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1239  if (res.data) {
1240  DatasetUnlockData(res.data);
1241  return res.is_new ? 1 : 0;
1242  }
1243  return -1;
1244 }
1245 
1246 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1247 {
1248  if (set == NULL)
1249  return -1;
1250 
1251  if (data_len != 16)
1252  return -2;
1253 
1254  Md5Type lookup = { .rep.value = 0 };
1255  memcpy(lookup.md5, data, 16);
1256  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1257  if (res.data) {
1258  DatasetUnlockData(res.data);
1259  return res.is_new ? 1 : 0;
1260  }
1261  return -1;
1262 }
1263 
1264 static int DatasetAddMd5wRep(
1265  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1266 {
1267  if (set == NULL)
1268  return -1;
1269 
1270  if (data_len != 16)
1271  return -2;
1272 
1273  Md5Type lookup = { .rep = *rep };
1274  memcpy(lookup.md5, data, 16);
1275  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1276  if (res.data) {
1277  DatasetUnlockData(res.data);
1278  return res.is_new ? 1 : 0;
1279  }
1280  return -1;
1281 }
1282 
1283 static int DatasetAddSha256wRep(
1284  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1285 {
1286  if (set == NULL)
1287  return -1;
1288 
1289  if (data_len != 32)
1290  return -2;
1291 
1292  Sha256Type lookup = { .rep = *rep };
1293  memcpy(lookup.sha256, data, 32);
1294  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1295  if (res.data) {
1296  DatasetUnlockData(res.data);
1297  return res.is_new ? 1 : 0;
1298  }
1299  return -1;
1300 }
1301 
1302 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1303 {
1304  if (set == NULL)
1305  return -1;
1306 
1307  if (data_len != 32)
1308  return -2;
1309 
1310  Sha256Type lookup = { .rep.value = 0 };
1311  memcpy(lookup.sha256, data, 32);
1312  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1313  if (res.data) {
1314  DatasetUnlockData(res.data);
1315  return res.is_new ? 1 : 0;
1316  }
1317  return -1;
1318 }
1319 
1320 int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1321 {
1322  if (set == NULL)
1323  return -1;
1324 
1325  switch (set->type) {
1326  case DATASET_TYPE_STRING:
1327  return DatasetAddString(set, data, data_len);
1328  case DATASET_TYPE_MD5:
1329  return DatasetAddMd5(set, data, data_len);
1330  case DATASET_TYPE_SHA256:
1331  return DatasetAddSha256(set, data, data_len);
1332  case DATASET_TYPE_IPV4:
1333  return DatasetAddIPv4(set, data, data_len);
1334  case DATASET_TYPE_IPV6:
1335  return DatasetAddIPv6(set, data, data_len);
1336  }
1337  return -1;
1338 }
1339 
1340 int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
1341 {
1342  if (set == NULL)
1343  return -1;
1344 
1345  switch (set->type) {
1346  case DATASET_TYPE_STRING:
1347  return DatasetAddStringwRep(set, data, data_len, rep);
1348  case DATASET_TYPE_MD5:
1349  return DatasetAddMd5wRep(set, data, data_len, rep);
1350  case DATASET_TYPE_SHA256:
1351  return DatasetAddSha256wRep(set, data, data_len, rep);
1352  case DATASET_TYPE_IPV4:
1353  return DatasetAddIPv4wRep(set, data, data_len, rep);
1354  case DATASET_TYPE_IPV6:
1355  return DatasetAddIPv6wRep(set, data, data_len, rep);
1356  }
1357  return -1;
1358 }
1359 
1360 typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len);
1361 
1362 static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString,
1363  DatasetOpFunc DatasetOpMd5, DatasetOpFunc DatasetOpSha256, DatasetOpFunc DatasetOpIPv4,
1364  DatasetOpFunc DatasetOpIPv6)
1365 {
1366  if (set == NULL)
1367  return -1;
1368  if (strlen(string) == 0)
1369  return -1;
1370 
1371  switch (set->type) {
1372  case DATASET_TYPE_STRING: {
1373  uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(string));
1374  uint8_t decoded[decoded_size];
1375  uint32_t num_decoded = SCBase64Decode(
1376  (const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded);
1377  if (num_decoded == 0) {
1378  return -2;
1379  }
1380 
1381  return DatasetOpString(set, decoded, num_decoded);
1382  }
1383  case DATASET_TYPE_MD5: {
1384  if (strlen(string) != 32)
1385  return -2;
1386  uint8_t hash[16];
1387  if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1388  return -2;
1389  return DatasetOpMd5(set, hash, 16);
1390  }
1391  case DATASET_TYPE_SHA256: {
1392  if (strlen(string) != 64)
1393  return -2;
1394  uint8_t hash[32];
1395  if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1396  return -2;
1397  return DatasetOpSha256(set, hash, 32);
1398  }
1399  case DATASET_TYPE_IPV4: {
1400  struct in_addr in;
1401  if (inet_pton(AF_INET, string, &in) != 1)
1402  return -2;
1403  return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1404  }
1405  case DATASET_TYPE_IPV6: {
1406  struct in6_addr in6;
1407  if (ParseIpv6String(set, string, &in6) != 0) {
1408  SCLogError("Dataset failed to import %s as IPv6", string);
1409  return -2;
1410  }
1411  return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
1412  }
1413  }
1414  return -1;
1415 }
1416 
1417 /** \brief add serialized data to set
1418  * \retval int 1 added
1419  * \retval int 0 already in hash
1420  * \retval int -1 API error (not added)
1421  * \retval int -2 DATA error
1422  */
1423 int DatasetAddSerialized(Dataset *set, const char *string)
1424 {
1425  return DatasetOpSerialized(set, string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1426  DatasetAddIPv4, DatasetAddIPv6);
1427 }
1428 
1429 /** \brief add serialized data to set
1430  * \retval int 1 added
1431  * \retval int 0 already in hash
1432  * \retval int -1 API error (not added)
1433  * \retval int -2 DATA error
1434  */
1435 int DatasetLookupSerialized(Dataset *set, const char *string)
1436 {
1437  return DatasetOpSerialized(set, string, DatasetLookupString, DatasetLookupMd5,
1438  DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1439 }
1440 
1441 /**
1442  * \retval 1 data was removed from the hash
1443  * \retval 0 data not removed (busy)
1444  * \retval -1 data not found
1445  */
1446 static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1447 {
1448  if (set == NULL)
1449  return -1;
1450 
1451  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1452  .rep.value = 0 };
1453  return THashRemoveFromHash(set->hash, &lookup);
1454 }
1455 
1456 static int DatasetRemoveIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1457 {
1458  if (set == NULL)
1459  return -1;
1460 
1461  if (data_len != 4)
1462  return -2;
1463 
1464  IPv4Type lookup = { .rep.value = 0 };
1465  memcpy(lookup.ipv4, data, 4);
1466  return THashRemoveFromHash(set->hash, &lookup);
1467 }
1468 
1469 static int DatasetRemoveIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1470 {
1471  if (set == NULL)
1472  return -1;
1473 
1474  if (data_len != 16)
1475  return -2;
1476 
1477  IPv6Type lookup = { .rep.value = 0 };
1478  memcpy(lookup.ipv6, data, 16);
1479  return THashRemoveFromHash(set->hash, &lookup);
1480 }
1481 
1482 static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1483 {
1484  if (set == NULL)
1485  return -1;
1486 
1487  if (data_len != 16)
1488  return -2;
1489 
1490  Md5Type lookup = { .rep.value = 0 };
1491  memcpy(lookup.md5, data, 16);
1492  return THashRemoveFromHash(set->hash, &lookup);
1493 }
1494 
1495 static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1496 {
1497  if (set == NULL)
1498  return -1;
1499 
1500  if (data_len != 32)
1501  return -2;
1502 
1503  Sha256Type lookup = { .rep.value = 0 };
1504  memcpy(lookup.sha256, data, 32);
1505  return THashRemoveFromHash(set->hash, &lookup);
1506 }
1507 
1508 /** \brief remove serialized data from set
1509  * \retval int 1 removed
1510  * \retval int 0 found but busy (not removed)
1511  * \retval int -1 API error (not removed)
1512  * \retval int -2 DATA error */
1513 int DatasetRemoveSerialized(Dataset *set, const char *string)
1514 {
1515  return DatasetOpSerialized(set, string, DatasetRemoveString, DatasetRemoveMd5,
1516  DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
1517 }
1518 
1519 int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
1520 {
1521  if (set == NULL)
1522  return -1;
1523 
1524  switch (set->type) {
1525  case DATASET_TYPE_STRING:
1526  return DatasetRemoveString(set, data, data_len);
1527  case DATASET_TYPE_MD5:
1528  return DatasetRemoveMd5(set, data, data_len);
1529  case DATASET_TYPE_SHA256:
1530  return DatasetRemoveSha256(set, data, data_len);
1531  case DATASET_TYPE_IPV4:
1532  return DatasetRemoveIPv4(set, data, data_len);
1533  case DATASET_TYPE_IPV6:
1534  return DatasetRemoveIPv6(set, data, data_len);
1535  }
1536  return -1;
1537 }
dataset_used_hashsize
uint32_t dataset_used_hashsize
Definition: datasets.c:50
util-byte.h
sets_lock
SCMutex sets_lock
Definition: datasets.c:44
StringType::rep
DataRepType rep
Definition: datasets-string.h:31
len
uint8_t len
Definition: app-layer-dnp3.h:2
datasets-string.h
DataRepResultType::rep
DataRepType rep
Definition: datasets-reputation.h:31
THashDataGetResult::data
THashData * data
Definition: util-thash.h:192
datasets-md5.h
Dataset::name
char name[DATASET_NAME_MAX_LEN+1]
Definition: datasets.h:42
Dataset::id
uint32_t id
Definition: datasets.h:44
Dataset::save
char save[PATH_MAX]
Definition: datasets.h:50
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:269
ParseSizeStringU64
int ParseSizeStringU64(const char *size, uint64_t *res)
Definition: util-misc.c:190
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
datasets-sha256.h
IPv6Compare
bool IPv6Compare(void *a, void *b)
Definition: datasets-ipv6.c:41
THashRemoveFromHash
int THashRemoveFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:867
TYPE_STATE
@ TYPE_STATE
Definition: datasets.c:278
Md5Type
Definition: datasets-md5.h:29
Dataset::hash
THashTableContext * hash
Definition: datasets.h:47
ctx
struct Thresholds ctx
IPv4Hash
uint32_t IPv4Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv4.c:49
Sha256Type::sha256
uint8_t sha256[32]
Definition: datasets-sha256.h:30
SCConfGet
int SCConfGet(const char *name, const char **vptr)
Retrieve the value of a configuration node.
Definition: conf.c:350
Md5Type::rep
DataRepType rep
Definition: datasets-md5.h:31
DataRepResultType::found
bool found
Definition: datasets-reputation.h:30
PrintHexString
void PrintHexString(char *str, size_t size, uint8_t *buf, size_t buf_len)
Definition: util-print.c:255
Dataset::type
enum DatasetTypes type
Definition: datasets.h:43
TAILQ_FOREACH
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:252
THashConsolidateMemcap
void THashConsolidateMemcap(THashTableContext *ctx)
Definition: util-thash.c:345
SCMutexLock
#define SCMutexLock(mut)
Definition: threads-debug.h:117
rust.h
DATASET_TYPE_SHA256
@ DATASET_TYPE_SHA256
Definition: datasets.h:35
Sha256Type::rep
DataRepType rep
Definition: datasets-sha256.h:31
SCMUTEX_INITIALIZER
#define SCMUTEX_INITIALIZER
Definition: threads-debug.h:121
datasets-reputation.h
DatasetAddwRep
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
Definition: datasets.c:1340
ConfigGetDataDirectory
const char * ConfigGetDataDirectory(void)
Definition: util-conf.c:80
Md5Type::md5
uint8_t md5[16]
Definition: datasets-md5.h:30
DATASET_TYPE_IPV6
@ DATASET_TYPE_IPV6
Definition: datasets.h:37
Md5StrCompare
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:42
DatasetLookupSerialized
int DatasetLookupSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1435
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
DataRepResultType
Definition: datasets-reputation.h:29
DatasetGet
Dataset * DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t memcap, uint32_t hashsize)
Definition: datasets.c:357
dataset_max_one_hashsize
uint32_t dataset_max_one_hashsize
Definition: datasets.c:48
StringSet
int StringSet(void *dst, void *src)
Definition: datasets-string.c:60
DatasetRemove
int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1519
THashDataConfig_::hash_size
uint32_t hash_size
Definition: util-thash.h:127
datasets.h
IPv6Set
int IPv6Set(void *dst, void *src)
Definition: datasets-ipv6.c:32
util-debug.h
TYPE_LOAD
@ TYPE_LOAD
Definition: datasets.c:279
DatasetAdd
int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1320
strlcat
size_t strlcat(char *, const char *src, size_t siz)
Definition: util-strlcatu.c:45
DATASETS_HASHSIZE_DEFAULT
#define DATASETS_HASHSIZE_DEFAULT
Definition: datasets.c:578
StringAsBase64
int StringAsBase64(const void *s, char *out, size_t out_size)
Definition: datasets-string.c:46
SCMutexUnlock
#define SCMutexUnlock(mut)
Definition: threads-debug.h:119
datasets-ipv6.h
IPv6Type::ipv6
uint8_t ipv6[16]
Definition: datasets-ipv6.h:30
DATASET_TYPE_NOTSET
#define DATASET_TYPE_NOTSET
Definition: datasets.h:32
IPv6Type::rep
DataRepType rep
Definition: datasets-ipv6.h:31
util-print.h
DatasetPostReloadCleanup
void DatasetPostReloadCleanup(void)
Definition: datasets.c:550
PrintInet
const char * PrintInet(int af, const void *src, char *dst, socklen_t size)
Definition: util-print.c:231
DatasetOpFunc
int(* DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1360
datasets-ipv4.h
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:249
StringGetLength
uint32_t StringGetLength(void *s)
Definition: datasets-string.c:93
Sha256StrSet
int Sha256StrSet(void *dst, void *src)
Definition: datasets-sha256.c:31
DatasetsDestroy
void DatasetsDestroy(void)
Definition: datasets.c:755
Md5StrHash
uint32_t Md5StrHash(uint32_t hash_seed, void *s)
Definition: datasets-md5.c:50
THashDataGetResult
Definition: util-thash.h:191
StringType
Definition: datasets-string.h:29
IPv4Set
int IPv4Set(void *dst, void *src)
Definition: datasets-ipv4.c:32
type
uint16_t type
Definition: decode-vlan.c:106
DatasetsSave
void DatasetsSave(void)
Definition: datasets.c:833
conf.h
IPv6Type
Definition: datasets-ipv6.h:29
name
const char * name
Definition: tm-threads.c:2135
DatasetLookup
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
see if data is part of the set
Definition: datasets.c:1085
DATASET_TYPE_IPV4
@ DATASET_TYPE_IPV4
Definition: datasets.h:36
StringType::ptr
uint8_t * ptr
Definition: datasets-string.h:32
DatasetRemoveSerialized
int DatasetRemoveSerialized(Dataset *set, const char *string)
remove serialized data from set
Definition: datasets.c:1513
dataset_max_total_hashsize
uint32_t dataset_max_total_hashsize
Definition: datasets.c:49
g_system
bool g_system
Definition: suricata.c:190
THashShutdown
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:354
SCConfNodeLookupChild
SCConfNode * SCConfNodeLookupChild(const SCConfNode *node, const char *name)
Lookup a child configuration node by name.
Definition: conf.c:796
DatasetTypes
DatasetTypes
Definition: datasets.h:31
Dataset::next
struct Dataset * next
Definition: datasets.h:52
THashData_::data
void * data
Definition: util-thash.h:92
util-conf.h
Sha256Type
Definition: datasets-sha256.h:29
Sha256StrFree
void Sha256StrFree(void *s)
Definition: datasets-sha256.c:55
THashData_
Definition: util-thash.h:85
IPv4Free
void IPv4Free(void *s)
Definition: datasets-ipv4.c:56
suricata-common.h
util-path.h
FatalErrorOnInit
#define FatalErrorOnInit(...)
Fatal error IF we're starting up, and configured to consider errors to be fatal errors.
Definition: util-debug.h:511
DATASET_NAME_MAX_LEN
#define DATASET_NAME_MAX_LEN
Definition: datasets.h:40
PathIsAbsolute
int PathIsAbsolute(const char *path)
Check if a path is absolute.
Definition: util-path.c:44
Md5StrSet
int Md5StrSet(void *dst, void *src)
Definition: datasets-md5.c:33
StringCompare
bool StringCompare(void *a, void *b)
Definition: datasets-string.c:76
THashGetFromHash
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:614
FatalError
#define FatalError(...)
Definition: util-debug.h:502
hashsize
#define hashsize(n)
Definition: util-hash-lookup3.h:40
THashLookupFromHash
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:724
IPv4Type
Definition: datasets-ipv4.h:29
ParseSizeStringU32
int ParseSizeStringU32(const char *size, uint32_t *res)
Definition: util-misc.c:173
THashDecrUsecnt
#define THashDecrUsecnt(h)
Definition: util-thash.h:170
IPv4Compare
bool IPv4Compare(void *a, void *b)
Definition: datasets-ipv4.c:41
DatasetFind
Dataset * DatasetFind(const char *name, enum DatasetTypes type)
look for set by name without creating it
Definition: datasets.c:315
util-validate.h
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
IPv6Hash
uint32_t IPv6Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv6.c:49
DatasetsInit
int DatasetsInit(void)
Definition: datasets.c:603
str
#define str(s)
Definition: suricata-common.h:300
DatasetGetTypeFromString
enum DatasetTypes DatasetGetTypeFromString(const char *s)
Definition: datasets.c:62
SCConfGetNode
SCConfNode * SCConfGetNode(const char *name)
Get a SCConfNode by name.
Definition: conf.c:181
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:261
THashWalk
int THashWalk(THashTableContext *ctx, THashFormatFunc FormatterFunc, THashOutputFunc OutputterFunc, void *output_ctx)
Walk the hash.
Definition: util-thash.c:388
SCFree
#define SCFree(p)
Definition: util-mem.h:61
Dataset::hidden
bool hidden
Definition: datasets.h:46
DatasetReload
void DatasetReload(void)
Definition: datasets.c:524
DatasetGetPathType
DatasetGetPathType
Definition: datasets.c:277
Sha256StrCompare
bool Sha256StrCompare(void *a, void *b)
Definition: datasets-sha256.c:40
StringHash
uint32_t StringHash(uint32_t hash_seed, void *s)
Definition: datasets-string.c:87
DATASET_TYPE_MD5
@ DATASET_TYPE_MD5
Definition: datasets.h:34
DATASET_TYPE_STRING
@ DATASET_TYPE_STRING
Definition: datasets.h:33
THashDataGetResult::is_new
bool is_new
Definition: util-thash.h:193
IPv6Free
void IPv6Free(void *s)
Definition: datasets-ipv6.c:56
suricata.h
THashInit
THashTableContext * THashInit(const char *cnf_prefix, uint32_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(uint32_t, void *), bool(*DataCompare)(void *, void *), bool(*DataExpired)(void *, SCTime_t), uint32_t(*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
Definition: util-thash.c:302
SCConfNode_::name
char * name
Definition: conf.h:33
IPv4Type::ipv4
uint8_t ipv4[4]
Definition: datasets-ipv4.h:30
DatasetAddSerialized
int DatasetAddSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1423
Dataset
Definition: datasets.h:41
Dataset::from_yaml
bool from_yaml
Definition: datasets.h:45
IPv4Type::rep
DataRepType rep
Definition: datasets-ipv4.h:31
SC_ATOMIC_GET
#define SC_ATOMIC_GET(name)
Get the value from the atomic variable.
Definition: util-atomic.h:375
util-misc.h
util-thash.h
Dataset::load
char load[PATH_MAX]
Definition: datasets.h:49
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
SCConfNode_
Definition: conf.h:32
Sha256StrHash
uint32_t Sha256StrHash(uint32_t hash_seed, void *s)
Definition: datasets-sha256.c:48
SCConfNode_::val
char * val
Definition: conf.h:34
Md5StrFree
void Md5StrFree(void *s)
Definition: datasets-md5.c:57
SCMutex
#define SCMutex
Definition: threads-debug.h:114
DEBUG_VALIDATE_BUG_ON
#define DEBUG_VALIDATE_BUG_ON(exp)
Definition: util-validate.h:102
StringFree
void StringFree(void *s)
Definition: datasets-string.c:100
DatasetLookupwRep
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
Definition: datasets.c:1105
THashTableContext_::config
THashConfig config
Definition: util-thash.h:151