suricata
datasets.c
Go to the documentation of this file.
1 /* Copyright (C) 2017-2024 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  */
23 
24 #include "suricata-common.h"
25 #include "suricata.h"
26 #include "rust.h"
27 #include "conf.h"
28 #include "datasets.h"
29 #include "datasets-string.h"
30 #include "datasets-ipv4.h"
31 #include "datasets-ipv6.h"
32 #include "datasets-md5.h"
33 #include "datasets-sha256.h"
34 #include "datasets-reputation.h"
35 #include "util-conf.h"
36 #include "util-thash.h"
37 #include "util-print.h"
38 #include "util-byte.h"
39 #include "util-misc.h"
40 #include "util-path.h"
41 #include "util-debug.h"
42 
44 static Dataset *sets = NULL;
45 static uint32_t set_ids = 0;
46 
47 int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);
48 
49 static inline void DatasetUnlockData(THashData *d)
50 {
51  (void) THashDecrUsecnt(d);
52  THashDataUnlock(d);
53 }
54 static bool DatasetIsStatic(const char *save, const char *load);
55 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
56 
57 enum DatasetTypes DatasetGetTypeFromString(const char *s)
58 {
59  if (strcasecmp("md5", s) == 0)
60  return DATASET_TYPE_MD5;
61  if (strcasecmp("sha256", s) == 0)
62  return DATASET_TYPE_SHA256;
63  if (strcasecmp("string", s) == 0)
64  return DATASET_TYPE_STRING;
65  if (strcasecmp("ipv4", s) == 0)
66  return DATASET_TYPE_IPV4;
67  if (strcasecmp("ip", s) == 0)
68  return DATASET_TYPE_IPV6;
69  return DATASET_TYPE_NOTSET;
70 }
71 
72 static Dataset *DatasetAlloc(const char *name)
73 {
74  Dataset *set = SCCalloc(1, sizeof(*set));
75  if (set) {
76  set->id = set_ids++;
77  }
78  return set;
79 }
80 
81 static Dataset *DatasetSearchByName(const char *name)
82 {
83  Dataset *set = sets;
84  while (set) {
85  if (strcasecmp(name, set->name) == 0 && !set->hidden) {
86  return set;
87  }
88  set = set->next;
89  }
90  return NULL;
91 }
92 
93 static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
94 {
95  if (ins < 2)
96  return -1;
97  if (ins % 2 != 0)
98  return -1;
99  if (outs != ins / 2)
100  return -1;
101 
102  uint8_t hash[outs];
103  memset(hash, 0, outs);
104  size_t i, x;
105  for (x = 0, i = 0; i < ins; i+=2, x++) {
106  char buf[3] = { 0, 0, 0 };
107  buf[0] = in[i];
108  buf[1] = in[i+1];
109 
110  long value = strtol(buf, NULL, 16);
111  if (value >= 0 && value <= 255)
112  hash[x] = (uint8_t)value;
113  else {
114  SCLogError("hash byte out of range %ld", value);
115  return -1;
116  }
117  }
118 
119  memcpy(out, hash, outs);
120  return 0;
121 }
122 
123 static int DatasetLoadIPv4(Dataset *set)
124 {
125  if (strlen(set->load) == 0)
126  return 0;
127 
128  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
129  const char *fopen_mode = "r";
130  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
131  fopen_mode = "a+";
132  }
133 
134  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv4);
135  if (retval == -2) {
136  FatalErrorOnInit("dataset %s could not be processed", set->name);
137  } else if (retval == -1) {
138  return -1;
139  }
140 
142 
143  return 0;
144 }
145 
146 static int ParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
147 {
148  /* Checking IPv6 case */
149  char *got_colon = strchr(line, ':');
150  if (got_colon) {
151  uint32_t ip6addr[4];
152  if (inet_pton(AF_INET6, line, in6) != 1) {
153  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
154  return -1;
155  }
156  memcpy(&ip6addr, in6->s6_addr, sizeof(ip6addr));
157  /* IPv4 in IPv6 notation needs transformation to internal Suricata storage */
158  if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
159  ip6addr[0] = ip6addr[3];
160  ip6addr[2] = 0;
161  ip6addr[3] = 0;
162  memcpy(in6, ip6addr, sizeof(struct in6_addr));
163  }
164  } else {
165  /* IPv4 case */
166  struct in_addr in;
167  if (inet_pton(AF_INET, line, &in) != 1) {
168  FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
169  return -1;
170  }
171  memset(in6, 0, sizeof(struct in6_addr));
172  memcpy(in6, &in, sizeof(struct in_addr));
173  }
174  return 0;
175 }
176 
177 static int DatasetLoadIPv6(Dataset *set)
178 {
179  if (strlen(set->load) == 0)
180  return 0;
181 
182  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
183  const char *fopen_mode = "r";
184  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
185  fopen_mode = "a+";
186  }
187 
188  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv6);
189  if (retval == -2) {
190  FatalErrorOnInit("dataset %s could not be processed", set->name);
191  } else if (retval == -1) {
192  return -1;
193  }
194 
196 
197  return 0;
198 }
199 
200 static int DatasetLoadMd5(Dataset *set)
201 {
202  if (strlen(set->load) == 0)
203  return 0;
204 
205  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
206  const char *fopen_mode = "r";
207  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
208  fopen_mode = "a+";
209  }
210 
211  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSMd5);
212  if (retval == -2) {
213  FatalErrorOnInit("dataset %s could not be processed", set->name);
214  } else if (retval == -1) {
215  return -1;
216  }
217 
219 
220  return 0;
221 }
222 
223 static int DatasetLoadSha256(Dataset *set)
224 {
225  if (strlen(set->load) == 0)
226  return 0;
227 
228  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
229  const char *fopen_mode = "r";
230  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
231  fopen_mode = "a+";
232  }
233 
234  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSSha256);
235  if (retval == -2) {
236  FatalErrorOnInit("dataset %s could not be processed", set->name);
237  } else if (retval == -1) {
238  return -1;
239  }
240 
242 
243  return 0;
244 }
245 
246 static int DatasetLoadString(Dataset *set)
247 {
248  if (strlen(set->load) == 0)
249  return 0;
250 
251  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
252 
253  const char *fopen_mode = "r";
254  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
255  fopen_mode = "a+";
256  }
257 
258  int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSString);
259  if (retval == -2) {
260  FatalErrorOnInit("dataset %s could not be processed", set->name);
261  } else if (retval == -1) {
262  return -1;
263  }
264 
266 
267  return 0;
268 }
269 
270 extern bool g_system;
271 
275 };
276 
277 static void DatasetGetPath(const char *in_path,
278  char *out_path, size_t out_size, enum DatasetGetPathType type)
279 {
280  char path[PATH_MAX];
281  struct stat st;
282 
283  if (PathIsAbsolute(in_path)) {
284  strlcpy(path, in_path, sizeof(path));
285  strlcpy(out_path, path, out_size);
286  return;
287  }
288 
289  const char *data_dir = ConfigGetDataDirectory();
290  if (stat(data_dir, &st) != 0) {
291  SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
292  return;
293  }
294 
295  snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
296 
297  if (type == TYPE_LOAD) {
298  if (stat(path, &st) != 0) {
299  SCLogDebug("path %s: %s", path, strerror(errno));
300  if (!g_system) {
301  snprintf(path, sizeof(path), "%s", in_path);
302  }
303  }
304  }
305  strlcpy(out_path, path, out_size);
306  SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
307 }
308 
309 /** \brief look for set by name without creating it */
311 {
313  Dataset *set = DatasetSearchByName(name);
314  if (set) {
315  if (set->type != type) {
317  return NULL;
318  }
319  }
321  return set;
322 }
323 
324 Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
325  uint64_t memcap, uint32_t hashsize)
326 {
327  uint64_t default_memcap = 0;
328  uint32_t default_hashsize = 0;
329  if (strlen(name) > DATASET_NAME_MAX_LEN) {
330  return NULL;
331  }
332 
334  Dataset *set = DatasetSearchByName(name);
335  if (set) {
336  if (type != DATASET_TYPE_NOTSET && set->type != type) {
337  SCLogError("dataset %s already "
338  "exists and is of type %u",
339  set->name, set->type);
341  return NULL;
342  }
343 
344  if ((save == NULL || strlen(save) == 0) &&
345  (load == NULL || strlen(load) == 0)) {
346  // OK, rule keyword doesn't have to set state/load,
347  // even when yaml set has set it.
348  } else {
349  if ((save == NULL && strlen(set->save) > 0) ||
350  (save != NULL && strcmp(set->save, save) != 0)) {
351  SCLogError("dataset %s save mismatch: %s != %s", set->name, set->save, save);
353  return NULL;
354  }
355  if ((load == NULL && strlen(set->load) > 0) ||
356  (load != NULL && strcmp(set->load, load) != 0)) {
357  SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
359  return NULL;
360  }
361  }
362 
364  return set;
365  } else {
366  if (type == DATASET_TYPE_NOTSET) {
367  SCLogError("dataset %s not defined", name);
368  goto out_err;
369  }
370  }
371 
372  set = DatasetAlloc(name);
373  if (set == NULL) {
374  goto out_err;
375  }
376 
377  strlcpy(set->name, name, sizeof(set->name));
378  set->type = type;
379  if (save && strlen(save)) {
380  strlcpy(set->save, save, sizeof(set->save));
381  SCLogDebug("name %s save '%s'", name, set->save);
382  }
383  if (load && strlen(load)) {
384  strlcpy(set->load, load, sizeof(set->load));
385  SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
386  }
387 
388  char cnf_name[128];
389  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
390 
391  GetDefaultMemcap(&default_memcap, &default_hashsize);
392  switch (type) {
393  case DATASET_TYPE_MD5:
394  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
395  Md5StrCompare, NULL, NULL, load != NULL ? 1 : 0,
396  memcap > 0 ? memcap : default_memcap,
397  hashsize > 0 ? hashsize : default_hashsize);
398  if (set->hash == NULL)
399  goto out_err;
400  if (DatasetLoadMd5(set) < 0)
401  goto out_err;
402  break;
403  case DATASET_TYPE_STRING:
404  set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
405  StringCompare, NULL, StringGetLength, load != NULL ? 1 : 0,
406  memcap > 0 ? memcap : default_memcap,
407  hashsize > 0 ? hashsize : default_hashsize);
408  if (set->hash == NULL)
409  goto out_err;
410  if (DatasetLoadString(set) < 0)
411  goto out_err;
412  break;
413  case DATASET_TYPE_SHA256:
414  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
415  Sha256StrHash, Sha256StrCompare, NULL, NULL, load != NULL ? 1 : 0,
416  memcap > 0 ? memcap : default_memcap,
417  hashsize > 0 ? hashsize : default_hashsize);
418  if (set->hash == NULL)
419  goto out_err;
420  if (DatasetLoadSha256(set) < 0)
421  goto out_err;
422  break;
423  case DATASET_TYPE_IPV4:
424  set->hash =
425  THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash, IPv4Compare,
426  NULL, NULL, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
427  hashsize > 0 ? hashsize : default_hashsize);
428  if (set->hash == NULL)
429  goto out_err;
430  if (DatasetLoadIPv4(set) < 0)
431  goto out_err;
432  break;
433  case DATASET_TYPE_IPV6:
434  set->hash =
435  THashInit(cnf_name, sizeof(IPv6Type), IPv6Set, IPv6Free, IPv6Hash, IPv6Compare,
436  NULL, NULL, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
437  hashsize > 0 ? hashsize : default_hashsize);
438  if (set->hash == NULL)
439  goto out_err;
440  if (DatasetLoadIPv6(set) < 0)
441  goto out_err;
442  break;
443  }
444 
445  if (set->hash && SC_ATOMIC_GET(set->hash->memcap_reached)) {
446  SCLogError("dataset too large for set memcap");
447  goto out_err;
448  }
449 
450  SCLogDebug("set %p/%s type %u save %s load %s",
451  set, set->name, set->type, set->save, set->load);
452 
453  set->next = sets;
454  sets = set;
455 
457  return set;
458 out_err:
459  if (set) {
460  if (set->hash) {
461  THashShutdown(set->hash);
462  }
463  SCFree(set);
464  }
466  return NULL;
467 }
468 
469 static bool DatasetIsStatic(const char *save, const char *load)
470 {
471  /* A set is static if it does not have any dynamic properties like
472  * save and/or state defined but has load defined.
473  * */
474  if ((load != NULL && strlen(load) > 0) &&
475  (save == NULL || strlen(save) == 0)) {
476  return true;
477  }
478  return false;
479 }
480 
481 void DatasetReload(void)
482 {
483  /* In order to reload the datasets, just mark the current sets as hidden
484  * and clean them up later.
485  * New datasets shall be created with the rule reload and do not require
486  * any intervention.
487  * */
489  Dataset *set = sets;
490  while (set) {
491  if (!DatasetIsStatic(set->save, set->load) || set->from_yaml) {
492  SCLogDebug("Not a static set, skipping %s", set->name);
493  set = set->next;
494  continue;
495  }
496  set->hidden = true;
497  SCLogDebug("Set %s at %p hidden successfully", set->name, set);
498  set = set->next;
499  }
501 }
502 
504 {
505  SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
507  Dataset *cur = sets;
508  Dataset *prev = NULL;
509  while (cur) {
510  Dataset *next = cur->next;
511  if (!cur->hidden) {
512  prev = cur;
513  cur = next;
514  continue;
515  }
516  // Delete the set in case it was hidden
517  if (prev != NULL) {
518  prev->next = next;
519  } else {
520  sets = next;
521  }
522  THashShutdown(cur->hash);
523  SCFree(cur);
524  cur = next;
525  }
527 }
528 
529 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
530 {
531  const char *str = NULL;
532  if (ConfGet("datasets.defaults.memcap", &str) == 1) {
533  if (ParseSizeStringU64(str, memcap) < 0) {
534  SCLogWarning("memcap value cannot be deduced: %s,"
535  " resetting to default",
536  str);
537  *memcap = 0;
538  }
539  }
540  if (ConfGet("datasets.defaults.hashsize", &str) == 1) {
541  if (ParseSizeStringU32(str, hashsize) < 0) {
542  SCLogWarning("hashsize value cannot be deduced: %s,"
543  " resetting to default",
544  str);
545  *hashsize = 0;
546  }
547  }
548 }
549 
550 int DatasetsInit(void)
551 {
552  SCLogDebug("datasets start");
553  ConfNode *datasets = ConfGetNode("datasets");
554  uint64_t default_memcap = 0;
555  uint32_t default_hashsize = 0;
556  GetDefaultMemcap(&default_memcap, &default_hashsize);
557  if (datasets != NULL) {
558  int list_pos = 0;
559  ConfNode *iter = NULL;
560  TAILQ_FOREACH(iter, &datasets->head, next) {
561  if (iter->name == NULL) {
562  list_pos++;
563  continue;
564  }
565 
566  char save[PATH_MAX] = "";
567  char load[PATH_MAX] = "";
568  uint64_t memcap = 0;
569  uint32_t hashsize = 0;
570 
571  const char *set_name = iter->name;
572  if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
574  "set name '%s' too long, max %d chars", set_name, DATASET_NAME_MAX_LEN);
575  continue;
576  }
577 
578  ConfNode *set_type =
579  ConfNodeLookupChild(iter, "type");
580  if (set_type == NULL) {
581  list_pos++;
582  continue;
583  }
584 
585  ConfNode *set_save =
586  ConfNodeLookupChild(iter, "state");
587  if (set_save) {
588  DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
589  strlcpy(load, save, sizeof(load));
590  } else {
591  ConfNode *set_load =
592  ConfNodeLookupChild(iter, "load");
593  if (set_load) {
594  DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
595  }
596  }
597 
598  ConfNode *set_memcap = ConfNodeLookupChild(iter, "memcap");
599  if (set_memcap) {
600  if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
601  SCLogWarning("memcap value cannot be"
602  " deduced: %s, resetting to default",
603  set_memcap->val);
604  memcap = 0;
605  }
606  }
607  ConfNode *set_hashsize = ConfNodeLookupChild(iter, "hashsize");
608  if (set_hashsize) {
609  if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
610  SCLogWarning("hashsize value cannot be"
611  " deduced: %s, resetting to default",
612  set_hashsize->val);
613  hashsize = 0;
614  }
615  }
616  char conf_str[1024];
617  snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
618 
619  SCLogDebug("set %s type %s. Conf %s", set_name, set_type->val, conf_str);
620 
621  if (strcmp(set_type->val, "md5") == 0) {
622  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
623  memcap > 0 ? memcap : default_memcap,
624  hashsize > 0 ? hashsize : default_hashsize);
625  if (dset == NULL) {
626  FatalErrorOnInit("failed to setup dataset for %s", set_name);
627  continue;
628  }
629  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
630  dset->from_yaml = true;
631 
632  } else if (strcmp(set_type->val, "sha256") == 0) {
633  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
634  memcap > 0 ? memcap : default_memcap,
635  hashsize > 0 ? hashsize : default_hashsize);
636  if (dset == NULL) {
637  FatalErrorOnInit("failed to setup dataset for %s", set_name);
638  continue;
639  }
640  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
641  dset->from_yaml = true;
642 
643  } else if (strcmp(set_type->val, "string") == 0) {
644  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
645  memcap > 0 ? memcap : default_memcap,
646  hashsize > 0 ? hashsize : default_hashsize);
647  if (dset == NULL) {
648  FatalErrorOnInit("failed to setup dataset for %s", set_name);
649  continue;
650  }
651  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
652  dset->from_yaml = true;
653 
654  } else if (strcmp(set_type->val, "ipv4") == 0) {
655  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV4, save, load,
656  memcap > 0 ? memcap : default_memcap,
657  hashsize > 0 ? hashsize : default_hashsize);
658  if (dset == NULL) {
659  FatalErrorOnInit("failed to setup dataset for %s", set_name);
660  continue;
661  }
662  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
663  dset->from_yaml = true;
664 
665  } else if (strcmp(set_type->val, "ip") == 0) {
666  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV6, save, load,
667  memcap > 0 ? memcap : default_memcap,
668  hashsize > 0 ? hashsize : default_hashsize);
669  if (dset == NULL) {
670  FatalErrorOnInit("failed to setup dataset for %s", set_name);
671  continue;
672  }
673  SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
674  dset->from_yaml = true;
675  }
676 
677  list_pos++;
678  }
679  }
680  SCLogDebug("datasets done: %p", datasets);
681  return 0;
682 }
683 
684 void DatasetsDestroy(void)
685 {
686  SCLogDebug("destroying datasets: %p", sets);
688  Dataset *set = sets;
689  while (set) {
690  SCLogDebug("destroying set %s", set->name);
691  Dataset *next = set->next;
692  THashShutdown(set->hash);
693  SCFree(set);
694  set = next;
695  }
696  sets = NULL;
698  SCLogDebug("destroying datasets done: %p", sets);
699 }
700 
701 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
702 {
703  FILE *fp = ctx;
704  //PrintRawDataFp(fp, data, data_len);
705  if (fp) {
706  return (int)fwrite(data, data_len, 1, fp);
707  }
708  return 0;
709 }
710 
711 static int Md5AsAscii(const void *s, char *out, size_t out_size)
712 {
713  const Md5Type *md5 = s;
714  char str[256];
715  PrintHexString(str, sizeof(str), (uint8_t *)md5->md5, sizeof(md5->md5));
716  strlcat(out, str, out_size);
717  strlcat(out, "\n", out_size);
718  return (int)strlen(out);
719 }
720 
721 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
722 {
723  const Sha256Type *sha = s;
724  char str[256];
725  PrintHexString(str, sizeof(str), (uint8_t *)sha->sha256, sizeof(sha->sha256));
726  strlcat(out, str, out_size);
727  strlcat(out, "\n", out_size);
728  return (int)strlen(out);
729 }
730 
731 static int IPv4AsAscii(const void *s, char *out, size_t out_size)
732 {
733  const IPv4Type *ip4 = s;
734  char str[256];
735  PrintInet(AF_INET, ip4->ipv4, str, sizeof(str));
736  strlcat(out, str, out_size);
737  strlcat(out, "\n", out_size);
738  return (int)strlen(out);
739 }
740 
741 static int IPv6AsAscii(const void *s, char *out, size_t out_size)
742 {
743  const IPv6Type *ip6 = s;
744  char str[256];
745  bool is_ipv4 = true;
746  for (int i = 4; i <= 15; i++) {
747  if (ip6->ipv6[i] != 0) {
748  is_ipv4 = false;
749  break;
750  }
751  }
752  if (is_ipv4) {
753  PrintInet(AF_INET, ip6->ipv6, str, sizeof(str));
754  } else {
755  PrintInet(AF_INET6, ip6->ipv6, str, sizeof(str));
756  }
757  strlcat(out, str, out_size);
758  strlcat(out, "\n", out_size);
759  return (int)strlen(out);
760 }
761 
762 void DatasetsSave(void)
763 {
764  SCLogDebug("saving datasets: %p", sets);
766  Dataset *set = sets;
767  while (set) {
768  if (strlen(set->save) == 0)
769  goto next;
770 
771  FILE *fp = fopen(set->save, "w");
772  if (fp == NULL)
773  goto next;
774 
775  SCLogDebug("dumping %s to %s", set->name, set->save);
776 
777  switch (set->type) {
778  case DATASET_TYPE_STRING:
779  THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
780  break;
781  case DATASET_TYPE_MD5:
782  THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
783  break;
784  case DATASET_TYPE_SHA256:
785  THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
786  break;
787  case DATASET_TYPE_IPV4:
788  THashWalk(set->hash, IPv4AsAscii, SaveCallback, fp);
789  break;
790  case DATASET_TYPE_IPV6:
791  THashWalk(set->hash, IPv6AsAscii, SaveCallback, fp);
792  break;
793  }
794 
795  fclose(fp);
796 
797  next:
798  set = set->next;
799  }
801 }
802 
803 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
804 {
805  if (set == NULL)
806  return -1;
807 
808  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
809  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
810  if (rdata) {
811  DatasetUnlockData(rdata);
812  return 1;
813  }
814  return 0;
815 }
816 
817 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
818  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
819 {
820  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
821 
822  if (set == NULL)
823  return rrep;
824 
825  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
826  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
827  if (rdata) {
828  StringType *found = rdata->data;
829  rrep.found = true;
830  rrep.rep = found->rep;
831  DatasetUnlockData(rdata);
832  return rrep;
833  }
834  return rrep;
835 }
836 
837 static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
838 {
839  if (set == NULL)
840  return -1;
841 
842  if (data_len != 4)
843  return -1;
844 
845  IPv4Type lookup = { .rep.value = 0 };
846  memcpy(lookup.ipv4, data, 4);
847  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
848  if (rdata) {
849  DatasetUnlockData(rdata);
850  return 1;
851  }
852  return 0;
853 }
854 
855 static DataRepResultType DatasetLookupIPv4wRep(
856  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
857 {
858  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
859 
860  if (set == NULL)
861  return rrep;
862 
863  if (data_len != 4)
864  return rrep;
865 
866  IPv4Type lookup = { .rep.value = 0 };
867  memcpy(lookup.ipv4, data, data_len);
868  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
869  if (rdata) {
870  IPv4Type *found = rdata->data;
871  rrep.found = true;
872  rrep.rep = found->rep;
873  DatasetUnlockData(rdata);
874  return rrep;
875  }
876  return rrep;
877 }
878 
879 static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
880 {
881  if (set == NULL)
882  return -1;
883 
884  if (data_len != 16 && data_len != 4)
885  return -1;
886 
887  IPv6Type lookup = { .rep.value = 0 };
888  memcpy(lookup.ipv6, data, data_len);
889  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
890  if (rdata) {
891  DatasetUnlockData(rdata);
892  return 1;
893  }
894  return 0;
895 }
896 
897 static DataRepResultType DatasetLookupIPv6wRep(
898  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
899 {
900  DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
901 
902  if (set == NULL)
903  return rrep;
904 
905  if (data_len != 16 && data_len != 4)
906  return rrep;
907 
908  IPv6Type lookup = { .rep.value = 0 };
909  memcpy(lookup.ipv6, data, data_len);
910  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
911  if (rdata) {
912  IPv6Type *found = rdata->data;
913  rrep.found = true;
914  rrep.rep = found->rep;
915  DatasetUnlockData(rdata);
916  return rrep;
917  }
918  return rrep;
919 }
920 
921 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
922 {
923  if (set == NULL)
924  return -1;
925 
926  if (data_len != 16)
927  return -1;
928 
929  Md5Type lookup = { .rep.value = 0 };
930  memcpy(lookup.md5, data, data_len);
931  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
932  if (rdata) {
933  DatasetUnlockData(rdata);
934  return 1;
935  }
936  return 0;
937 }
938 
939 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
940  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
941 {
942  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
943 
944  if (set == NULL)
945  return rrep;
946 
947  if (data_len != 16)
948  return rrep;
949 
950  Md5Type lookup = { .rep.value = 0};
951  memcpy(lookup.md5, data, data_len);
952  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
953  if (rdata) {
954  Md5Type *found = rdata->data;
955  rrep.found = true;
956  rrep.rep = found->rep;
957  DatasetUnlockData(rdata);
958  return rrep;
959  }
960  return rrep;
961 }
962 
963 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
964 {
965  if (set == NULL)
966  return -1;
967 
968  if (data_len != 32)
969  return -1;
970 
971  Sha256Type lookup = { .rep.value = 0 };
972  memcpy(lookup.sha256, data, data_len);
973  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
974  if (rdata) {
975  DatasetUnlockData(rdata);
976  return 1;
977  }
978  return 0;
979 }
980 
981 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
982  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
983 {
984  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
985 
986  if (set == NULL)
987  return rrep;
988 
989  if (data_len != 32)
990  return rrep;
991 
992  Sha256Type lookup = { .rep.value = 0 };
993  memcpy(lookup.sha256, data, data_len);
994  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
995  if (rdata) {
996  Sha256Type *found = rdata->data;
997  rrep.found = true;
998  rrep.rep = found->rep;
999  DatasetUnlockData(rdata);
1000  return rrep;
1001  }
1002  return rrep;
1003 }
1004 
1005 /**
1006  * \brief see if \a data is part of the set
1007  * \param set dataset
1008  * \param data data to look up
1009  * \param data_len length in bytes of \a data
1010  * \retval -1 error
1011  * \retval 0 not found
1012  * \retval 1 found
1013  */
1014 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
1015 {
1016  if (set == NULL)
1017  return -1;
1018 
1019  switch (set->type) {
1020  case DATASET_TYPE_STRING:
1021  return DatasetLookupString(set, data, data_len);
1022  case DATASET_TYPE_MD5:
1023  return DatasetLookupMd5(set, data, data_len);
1024  case DATASET_TYPE_SHA256:
1025  return DatasetLookupSha256(set, data, data_len);
1026  case DATASET_TYPE_IPV4:
1027  return DatasetLookupIPv4(set, data, data_len);
1028  case DATASET_TYPE_IPV6:
1029  return DatasetLookupIPv6(set, data, data_len);
1030  }
1031  return -1;
1032 }
1033 
1034 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
1035  const DataRepType *rep)
1036 {
1037  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
1038  if (set == NULL)
1039  return rrep;
1040 
1041  switch (set->type) {
1042  case DATASET_TYPE_STRING:
1043  return DatasetLookupStringwRep(set, data, data_len, rep);
1044  case DATASET_TYPE_MD5:
1045  return DatasetLookupMd5wRep(set, data, data_len, rep);
1046  case DATASET_TYPE_SHA256:
1047  return DatasetLookupSha256wRep(set, data, data_len, rep);
1048  case DATASET_TYPE_IPV4:
1049  return DatasetLookupIPv4wRep(set, data, data_len, rep);
1050  case DATASET_TYPE_IPV6:
1051  return DatasetLookupIPv6wRep(set, data, data_len, rep);
1052  }
1053  return rrep;
1054 }
1055 
1056 /**
1057  * \retval 1 data was added to the hash
1058  * \retval 0 data was not added to the hash as it is already there
1059  * \retval -1 failed to add data to the hash
1060  */
1061 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1062 {
1063  if (set == NULL)
1064  return -1;
1065 
1066  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1067  .rep.value = 0 };
1068  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1069  if (res.data) {
1070  DatasetUnlockData(res.data);
1071  return res.is_new ? 1 : 0;
1072  }
1073  return -1;
1074 }
1075 
1076 /**
1077  * \retval 1 data was added to the hash
1078  * \retval 0 data was not added to the hash as it is already there
1079  * \retval -1 failed to add data to the hash
1080  */
1081 static int DatasetAddStringwRep(
1082  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1083 {
1084  if (set == NULL)
1085  return -1;
1086 
1087  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1088  .rep = *rep };
1089  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1090  if (res.data) {
1091  DatasetUnlockData(res.data);
1092  return res.is_new ? 1 : 0;
1093  }
1094  return -1;
1095 }
1096 
1097 static int DatasetAddIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1098 {
1099  if (set == NULL) {
1100  return -1;
1101  }
1102 
1103  if (data_len < 4) {
1104  return -2;
1105  }
1106 
1107  IPv4Type lookup = { .rep.value = 0 };
1108  memcpy(lookup.ipv4, data, 4);
1109  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1110  if (res.data) {
1111  DatasetUnlockData(res.data);
1112  return res.is_new ? 1 : 0;
1113  }
1114  return -1;
1115 }
1116 
1117 static int DatasetAddIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1118 {
1119  if (set == NULL) {
1120  return -1;
1121  }
1122 
1123  if (data_len != 16) {
1124  return -2;
1125  }
1126 
1127  IPv6Type lookup = { .rep.value = 0 };
1128  memcpy(lookup.ipv6, data, 16);
1129  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1130  if (res.data) {
1131  DatasetUnlockData(res.data);
1132  return res.is_new ? 1 : 0;
1133  }
1134  return -1;
1135 }
1136 
1137 static int DatasetAddIPv4wRep(
1138  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1139 {
1140  if (set == NULL)
1141  return -1;
1142 
1143  if (data_len < 4)
1144  return -2;
1145 
1146  IPv4Type lookup = { .rep = *rep };
1147  memcpy(lookup.ipv4, data, 4);
1148  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1149  if (res.data) {
1150  DatasetUnlockData(res.data);
1151  return res.is_new ? 1 : 0;
1152  }
1153  return -1;
1154 }
1155 
1156 static int DatasetAddIPv6wRep(
1157  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1158 {
1159  if (set == NULL)
1160  return -1;
1161 
1162  if (data_len != 16)
1163  return -2;
1164 
1165  IPv6Type lookup = { .rep = *rep };
1166  memcpy(lookup.ipv6, data, 16);
1167  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1168  if (res.data) {
1169  DatasetUnlockData(res.data);
1170  return res.is_new ? 1 : 0;
1171  }
1172  return -1;
1173 }
1174 
1175 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1176 {
1177  if (set == NULL)
1178  return -1;
1179 
1180  if (data_len != 16)
1181  return -2;
1182 
1183  Md5Type lookup = { .rep.value = 0 };
1184  memcpy(lookup.md5, data, 16);
1185  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1186  if (res.data) {
1187  DatasetUnlockData(res.data);
1188  return res.is_new ? 1 : 0;
1189  }
1190  return -1;
1191 }
1192 
1193 static int DatasetAddMd5wRep(
1194  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1195 {
1196  if (set == NULL)
1197  return -1;
1198 
1199  if (data_len != 16)
1200  return -2;
1201 
1202  Md5Type lookup = { .rep = *rep };
1203  memcpy(lookup.md5, data, 16);
1204  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1205  if (res.data) {
1206  DatasetUnlockData(res.data);
1207  return res.is_new ? 1 : 0;
1208  }
1209  return -1;
1210 }
1211 
1212 static int DatasetAddSha256wRep(
1213  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
1214 {
1215  if (set == NULL)
1216  return -1;
1217 
1218  if (data_len != 32)
1219  return -2;
1220 
1221  Sha256Type lookup = { .rep = *rep };
1222  memcpy(lookup.sha256, data, 32);
1223  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1224  if (res.data) {
1225  DatasetUnlockData(res.data);
1226  return res.is_new ? 1 : 0;
1227  }
1228  return -1;
1229 }
1230 
1231 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1232 {
1233  if (set == NULL)
1234  return -1;
1235 
1236  if (data_len != 32)
1237  return -2;
1238 
1239  Sha256Type lookup = { .rep.value = 0 };
1240  memcpy(lookup.sha256, data, 32);
1241  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
1242  if (res.data) {
1243  DatasetUnlockData(res.data);
1244  return res.is_new ? 1 : 0;
1245  }
1246  return -1;
1247 }
1248 
1249 int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
1250 {
1251  if (set == NULL)
1252  return -1;
1253 
1254  switch (set->type) {
1255  case DATASET_TYPE_STRING:
1256  return DatasetAddString(set, data, data_len);
1257  case DATASET_TYPE_MD5:
1258  return DatasetAddMd5(set, data, data_len);
1259  case DATASET_TYPE_SHA256:
1260  return DatasetAddSha256(set, data, data_len);
1261  case DATASET_TYPE_IPV4:
1262  return DatasetAddIPv4(set, data, data_len);
1263  case DATASET_TYPE_IPV6:
1264  return DatasetAddIPv6(set, data, data_len);
1265  }
1266  return -1;
1267 }
1268 
1269 int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
1270 {
1271  if (set == NULL)
1272  return -1;
1273 
1274  switch (set->type) {
1275  case DATASET_TYPE_STRING:
1276  return DatasetAddStringwRep(set, data, data_len, rep);
1277  case DATASET_TYPE_MD5:
1278  return DatasetAddMd5wRep(set, data, data_len, rep);
1279  case DATASET_TYPE_SHA256:
1280  return DatasetAddSha256wRep(set, data, data_len, rep);
1281  case DATASET_TYPE_IPV4:
1282  return DatasetAddIPv4wRep(set, data, data_len, rep);
1283  case DATASET_TYPE_IPV6:
1284  return DatasetAddIPv6wRep(set, data, data_len, rep);
1285  }
1286  return -1;
1287 }
1288 
1289 typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len);
1290 
1291 static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString,
1292  DatasetOpFunc DatasetOpMd5, DatasetOpFunc DatasetOpSha256, DatasetOpFunc DatasetOpIPv4,
1293  DatasetOpFunc DatasetOpIPv6)
1294 {
1295  if (set == NULL)
1296  return -1;
1297  if (strlen(string) == 0)
1298  return -1;
1299 
1300  switch (set->type) {
1301  case DATASET_TYPE_STRING: {
1302  uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(string));
1303  uint8_t decoded[decoded_size];
1304  uint32_t num_decoded = SCBase64Decode(
1305  (const uint8_t *)string, strlen(string), SCBase64ModeStrict, decoded);
1306  if (num_decoded == 0) {
1307  return -2;
1308  }
1309 
1310  return DatasetOpString(set, decoded, num_decoded);
1311  }
1312  case DATASET_TYPE_MD5: {
1313  if (strlen(string) != 32)
1314  return -2;
1315  uint8_t hash[16];
1316  if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1317  return -2;
1318  return DatasetOpMd5(set, hash, 16);
1319  }
1320  case DATASET_TYPE_SHA256: {
1321  if (strlen(string) != 64)
1322  return -2;
1323  uint8_t hash[32];
1324  if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1325  return -2;
1326  return DatasetOpSha256(set, hash, 32);
1327  }
1328  case DATASET_TYPE_IPV4: {
1329  struct in_addr in;
1330  if (inet_pton(AF_INET, string, &in) != 1)
1331  return -2;
1332  return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1333  }
1334  case DATASET_TYPE_IPV6: {
1335  struct in6_addr in6;
1336  if (ParseIpv6String(set, string, &in6) != 0) {
1337  SCLogError("Dataset failed to import %s as IPv6", string);
1338  return -2;
1339  }
1340  return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
1341  }
1342  }
1343  return -1;
1344 }
1345 
1346 /** \brief add serialized data to set
1347  * \retval int 1 added
1348  * \retval int 0 already in hash
1349  * \retval int -1 API error (not added)
1350  * \retval int -2 DATA error
1351  */
1352 int DatasetAddSerialized(Dataset *set, const char *string)
1353 {
1354  return DatasetOpSerialized(set, string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1355  DatasetAddIPv4, DatasetAddIPv6);
1356 }
1357 
1358 /** \brief add serialized data to set
1359  * \retval int 1 added
1360  * \retval int 0 already in hash
1361  * \retval int -1 API error (not added)
1362  * \retval int -2 DATA error
1363  */
1364 int DatasetLookupSerialized(Dataset *set, const char *string)
1365 {
1366  return DatasetOpSerialized(set, string, DatasetLookupString, DatasetLookupMd5,
1367  DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1368 }
1369 
1370 /**
1371  * \retval 1 data was removed from the hash
1372  * \retval 0 data not removed (busy)
1373  * \retval -1 data not found
1374  */
1375 static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
1376 {
1377  if (set == NULL)
1378  return -1;
1379 
1380  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
1381  .rep.value = 0 };
1382  return THashRemoveFromHash(set->hash, &lookup);
1383 }
1384 
1385 static int DatasetRemoveIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
1386 {
1387  if (set == NULL)
1388  return -1;
1389 
1390  if (data_len != 4)
1391  return -2;
1392 
1393  IPv4Type lookup = { .rep.value = 0 };
1394  memcpy(lookup.ipv4, data, 4);
1395  return THashRemoveFromHash(set->hash, &lookup);
1396 }
1397 
1398 static int DatasetRemoveIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
1399 {
1400  if (set == NULL)
1401  return -1;
1402 
1403  if (data_len != 16)
1404  return -2;
1405 
1406  IPv6Type lookup = { .rep.value = 0 };
1407  memcpy(lookup.ipv6, data, 16);
1408  return THashRemoveFromHash(set->hash, &lookup);
1409 }
1410 
1411 static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
1412 {
1413  if (set == NULL)
1414  return -1;
1415 
1416  if (data_len != 16)
1417  return -2;
1418 
1419  Md5Type lookup = { .rep.value = 0 };
1420  memcpy(lookup.md5, data, 16);
1421  return THashRemoveFromHash(set->hash, &lookup);
1422 }
1423 
1424 static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
1425 {
1426  if (set == NULL)
1427  return -1;
1428 
1429  if (data_len != 32)
1430  return -2;
1431 
1432  Sha256Type lookup = { .rep.value = 0 };
1433  memcpy(lookup.sha256, data, 32);
1434  return THashRemoveFromHash(set->hash, &lookup);
1435 }
1436 
1437 /** \brief remove serialized data from set
1438  * \retval int 1 removed
1439  * \retval int 0 found but busy (not removed)
1440  * \retval int -1 API error (not removed)
1441  * \retval int -2 DATA error */
1442 int DatasetRemoveSerialized(Dataset *set, const char *string)
1443 {
1444  return DatasetOpSerialized(set, string, DatasetRemoveString, DatasetRemoveMd5,
1445  DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
1446 }
1447 
1448 int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
1449 {
1450  if (set == NULL)
1451  return -1;
1452 
1453  switch (set->type) {
1454  case DATASET_TYPE_STRING:
1455  return DatasetRemoveString(set, data, data_len);
1456  case DATASET_TYPE_MD5:
1457  return DatasetRemoveMd5(set, data, data_len);
1458  case DATASET_TYPE_SHA256:
1459  return DatasetRemoveSha256(set, data, data_len);
1460  case DATASET_TYPE_IPV4:
1461  return DatasetRemoveIPv4(set, data, data_len);
1462  case DATASET_TYPE_IPV6:
1463  return DatasetRemoveIPv6(set, data, data_len);
1464  }
1465  return -1;
1466 }
util-byte.h
sets_lock
SCMutex sets_lock
Definition: datasets.c:43
StringType::rep
DataRepType rep
Definition: datasets-string.h:31
len
uint8_t len
Definition: app-layer-dnp3.h:2
datasets-string.h
DataRepResultType::rep
DataRepType rep
Definition: datasets-reputation.h:31
THashDataGetResult::data
THashData * data
Definition: util-thash.h:192
datasets-md5.h
Dataset::name
char name[DATASET_NAME_MAX_LEN+1]
Definition: datasets.h:42
ConfNode_::val
char * val
Definition: conf.h:34
Dataset::id
uint32_t id
Definition: datasets.h:44
Dataset::save
char save[PATH_MAX]
Definition: datasets.h:50
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:269
ParseSizeStringU64
int ParseSizeStringU64(const char *size, uint64_t *res)
Definition: util-misc.c:190
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
datasets-sha256.h
IPv6Compare
bool IPv6Compare(void *a, void *b)
Definition: datasets-ipv6.c:41
THashRemoveFromHash
int THashRemoveFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:875
TYPE_STATE
@ TYPE_STATE
Definition: datasets.c:273
Md5Type
Definition: datasets-md5.h:29
Dataset::hash
THashTableContext * hash
Definition: datasets.h:47
ConfGetNode
ConfNode * ConfGetNode(const char *name)
Get a ConfNode by name.
Definition: conf.c:181
ctx
struct Thresholds ctx
IPv4Hash
uint32_t IPv4Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv4.c:49
Sha256Type::sha256
uint8_t sha256[32]
Definition: datasets-sha256.h:30
Md5Type::rep
DataRepType rep
Definition: datasets-md5.h:31
DataRepResultType::found
bool found
Definition: datasets-reputation.h:30
PrintHexString
void PrintHexString(char *str, size_t size, uint8_t *buf, size_t buf_len)
Definition: util-print.c:255
Dataset::type
enum DatasetTypes type
Definition: datasets.h:43
TAILQ_FOREACH
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:252
THashConsolidateMemcap
void THashConsolidateMemcap(THashTableContext *ctx)
Definition: util-thash.c:353
SCMutexLock
#define SCMutexLock(mut)
Definition: threads-debug.h:117
rust.h
DATASET_TYPE_SHA256
@ DATASET_TYPE_SHA256
Definition: datasets.h:35
Sha256Type::rep
DataRepType rep
Definition: datasets-sha256.h:31
SCMUTEX_INITIALIZER
#define SCMUTEX_INITIALIZER
Definition: threads-debug.h:121
datasets-reputation.h
DatasetAddwRep
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
Definition: datasets.c:1269
ConfigGetDataDirectory
const char * ConfigGetDataDirectory(void)
Definition: util-conf.c:80
Md5Type::md5
uint8_t md5[16]
Definition: datasets-md5.h:30
DATASET_TYPE_IPV6
@ DATASET_TYPE_IPV6
Definition: datasets.h:37
Md5StrCompare
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:42
DatasetLookupSerialized
int DatasetLookupSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1364
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
DataRepResultType
Definition: datasets-reputation.h:29
DatasetGet
Dataset * DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t memcap, uint32_t hashsize)
Definition: datasets.c:324
ConfGet
int ConfGet(const char *name, const char **vptr)
Retrieve the value of a configuration node.
Definition: conf.c:335
StringSet
int StringSet(void *dst, void *src)
Definition: datasets-string.c:60
DatasetRemove
int DatasetRemove(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1448
datasets.h
IPv6Set
int IPv6Set(void *dst, void *src)
Definition: datasets-ipv6.c:32
util-debug.h
TYPE_LOAD
@ TYPE_LOAD
Definition: datasets.c:274
DatasetAdd
int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1249
strlcat
size_t strlcat(char *, const char *src, size_t siz)
Definition: util-strlcatu.c:45
StringAsBase64
int StringAsBase64(const void *s, char *out, size_t out_size)
Definition: datasets-string.c:46
SCMutexUnlock
#define SCMutexUnlock(mut)
Definition: threads-debug.h:119
datasets-ipv6.h
IPv6Type::ipv6
uint8_t ipv6[16]
Definition: datasets-ipv6.h:30
DATASET_TYPE_NOTSET
#define DATASET_TYPE_NOTSET
Definition: datasets.h:32
IPv6Type::rep
DataRepType rep
Definition: datasets-ipv6.h:31
util-print.h
DatasetPostReloadCleanup
void DatasetPostReloadCleanup(void)
Definition: datasets.c:503
PrintInet
const char * PrintInet(int af, const void *src, char *dst, socklen_t size)
Definition: util-print.c:231
DatasetOpFunc
int(* DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:1289
datasets-ipv4.h
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:249
StringGetLength
uint32_t StringGetLength(void *s)
Definition: datasets-string.c:93
Sha256StrSet
int Sha256StrSet(void *dst, void *src)
Definition: datasets-sha256.c:31
DatasetsDestroy
void DatasetsDestroy(void)
Definition: datasets.c:684
Md5StrHash
uint32_t Md5StrHash(uint32_t hash_seed, void *s)
Definition: datasets-md5.c:50
THashDataGetResult
Definition: util-thash.h:191
StringType
Definition: datasets-string.h:29
IPv4Set
int IPv4Set(void *dst, void *src)
Definition: datasets-ipv4.c:32
type
uint16_t type
Definition: decode-vlan.c:106
DatasetsSave
void DatasetsSave(void)
Definition: datasets.c:762
conf.h
IPv6Type
Definition: datasets-ipv6.h:29
name
const char * name
Definition: tm-threads.c:2081
DatasetLookup
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
see if data is part of the set
Definition: datasets.c:1014
DATASET_TYPE_IPV4
@ DATASET_TYPE_IPV4
Definition: datasets.h:36
StringType::ptr
uint8_t * ptr
Definition: datasets-string.h:32
DatasetRemoveSerialized
int DatasetRemoveSerialized(Dataset *set, const char *string)
remove serialized data from set
Definition: datasets.c:1442
g_system
bool g_system
Definition: suricata.c:188
ConfNodeLookupChild
ConfNode * ConfNodeLookupChild(const ConfNode *node, const char *name)
Lookup a child configuration node by name.
Definition: conf.c:781
THashShutdown
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:362
DatasetTypes
DatasetTypes
Definition: datasets.h:31
Dataset::next
struct Dataset * next
Definition: datasets.h:52
THashData_::data
void * data
Definition: util-thash.h:92
util-conf.h
Sha256Type
Definition: datasets-sha256.h:29
Sha256StrFree
void Sha256StrFree(void *s)
Definition: datasets-sha256.c:55
THashData_
Definition: util-thash.h:85
IPv4Free
void IPv4Free(void *s)
Definition: datasets-ipv4.c:56
suricata-common.h
util-path.h
FatalErrorOnInit
#define FatalErrorOnInit(...)
Fatal error IF we're starting up, and configured to consider errors to be fatal errors.
Definition: util-debug.h:511
DATASET_NAME_MAX_LEN
#define DATASET_NAME_MAX_LEN
Definition: datasets.h:40
ConfNode_::name
char * name
Definition: conf.h:33
PathIsAbsolute
int PathIsAbsolute(const char *path)
Check if a path is absolute.
Definition: util-path.c:44
Md5StrSet
int Md5StrSet(void *dst, void *src)
Definition: datasets-md5.c:33
StringCompare
bool StringCompare(void *a, void *b)
Definition: datasets-string.c:76
THashGetFromHash
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:622
hashsize
#define hashsize(n)
Definition: util-hash-lookup3.h:40
THashLookupFromHash
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:732
IPv4Type
Definition: datasets-ipv4.h:29
ParseSizeStringU32
int ParseSizeStringU32(const char *size, uint32_t *res)
Definition: util-misc.c:173
THashDecrUsecnt
#define THashDecrUsecnt(h)
Definition: util-thash.h:170
IPv4Compare
bool IPv4Compare(void *a, void *b)
Definition: datasets-ipv4.c:41
DatasetFind
Dataset * DatasetFind(const char *name, enum DatasetTypes type)
look for set by name without creating it
Definition: datasets.c:310
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
IPv6Hash
uint32_t IPv6Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv6.c:49
DatasetsInit
int DatasetsInit(void)
Definition: datasets.c:550
str
#define str(s)
Definition: suricata-common.h:300
DatasetGetTypeFromString
enum DatasetTypes DatasetGetTypeFromString(const char *s)
Definition: datasets.c:57
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:261
THashWalk
int THashWalk(THashTableContext *ctx, THashFormatFunc FormatterFunc, THashOutputFunc OutputterFunc, void *output_ctx)
Walk the hash.
Definition: util-thash.c:396
SCFree
#define SCFree(p)
Definition: util-mem.h:61
ConfNode_
Definition: conf.h:32
Dataset::hidden
bool hidden
Definition: datasets.h:46
DatasetReload
void DatasetReload(void)
Definition: datasets.c:481
DatasetGetPathType
DatasetGetPathType
Definition: datasets.c:272
Sha256StrCompare
bool Sha256StrCompare(void *a, void *b)
Definition: datasets-sha256.c:40
StringHash
uint32_t StringHash(uint32_t hash_seed, void *s)
Definition: datasets-string.c:87
DATASET_TYPE_MD5
@ DATASET_TYPE_MD5
Definition: datasets.h:34
DATASET_TYPE_STRING
@ DATASET_TYPE_STRING
Definition: datasets.h:33
THashDataGetResult::is_new
bool is_new
Definition: util-thash.h:193
IPv6Free
void IPv6Free(void *s)
Definition: datasets-ipv6.c:56
suricata.h
THashInit
THashTableContext * THashInit(const char *cnf_prefix, uint32_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(uint32_t, void *), bool(*DataCompare)(void *, void *), bool(*DataExpired)(void *, SCTime_t), uint32_t(*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
Definition: util-thash.c:305
IPv4Type::ipv4
uint8_t ipv4[4]
Definition: datasets-ipv4.h:30
DatasetAddSerialized
int DatasetAddSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1352
Dataset
Definition: datasets.h:41
Dataset::from_yaml
bool from_yaml
Definition: datasets.h:45
IPv4Type::rep
DataRepType rep
Definition: datasets-ipv4.h:31
SC_ATOMIC_GET
#define SC_ATOMIC_GET(name)
Get the value from the atomic variable.
Definition: util-atomic.h:375
util-misc.h
util-thash.h
Dataset::load
char load[PATH_MAX]
Definition: datasets.h:49
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
Sha256StrHash
uint32_t Sha256StrHash(uint32_t hash_seed, void *s)
Definition: datasets-sha256.c:48
Md5StrFree
void Md5StrFree(void *s)
Definition: datasets-md5.c:57
SCMutex
#define SCMutex
Definition: threads-debug.h:114
StringFree
void StringFree(void *s)
Definition: datasets-string.c:100
DatasetLookupwRep
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
Definition: datasets.c:1034