suricata
datasets.c
Go to the documentation of this file.
1 /* Copyright (C) 2017-2019 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  */
23 
24 #include "suricata-common.h"
25 #include "conf.h"
26 #include "datasets.h"
27 #include "datasets-string.h"
28 #include "datasets-md5.h"
29 #include "datasets-sha256.h"
30 #include "datasets-reputation.h"
31 #include "util-thash.h"
32 #include "util-print.h"
33 #include "util-crypt.h" // encode base64
34 #include "util-base64.h" // decode base64
35 
37 static Dataset *sets = NULL;
38 static uint32_t set_ids = 0;
39 
40 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
41  DataRepType *rep);
42 
44 {
45  if (strcasecmp("md5", s) == 0)
46  return DATASET_TYPE_MD5;
47  if (strcasecmp("sha256", s) == 0)
48  return DATASET_TYPE_SHA256;
49  if (strcasecmp("string", s) == 0)
50  return DATASET_TYPE_STRING;
51  return DATASET_TYPE_NOTSET;
52 }
53 
54 static Dataset *DatasetAlloc(const char *name)
55 {
56  Dataset *set = SCCalloc(1, sizeof(*set));
57  if (set) {
58  set->id = set_ids++;
59  }
60  return set;
61 }
62 
63 static Dataset *DatasetSearchByName(const char *name)
64 {
65  Dataset *set = sets;
66  while (set) {
67  if (strcasecmp(name, set->name) == 0) {
68  return set;
69  }
70  set = set->next;
71  }
72  return NULL;
73 }
74 
75 static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
76 {
77  if (ins % 2 != 0)
78  return -1;
79  if (outs != ins / 2)
80  return -1;
81 
82  uint8_t hash[outs];
83  size_t i, x;
84  for (x = 0, i = 0; i < ins; i+=2, x++) {
85  char buf[3] = { 0, 0, 0 };
86  buf[0] = in[i];
87  buf[1] = in[i+1];
88 
89  long value = strtol(buf, NULL, 16);
90  if (value >= 0 && value <= 255)
91  hash[x] = (uint8_t)value;
92  else {
93  SCLogError(SC_ERR_INVALID_HASH, "hash byte out of range %ld", value);
94  return -1;
95  }
96  }
97 
98  memcpy(out, hash, outs);
99  return 0;
100 }
101 
102 static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out)
103 {
104  SCLogDebug("in '%s'", in);
105  char raw[ins + 1];
106  memcpy(raw, in, ins);
107  raw[ins] = '\0';
108  char *line = raw;
109 
110  char *ptrs[1] = {NULL};
111  int idx = 0;
112 
113  size_t i = 0;
114  while (i < ins + 1) {
115  if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
116  line[i] = '\0';
117  SCLogDebug("line '%s'", line);
118 
119  ptrs[idx] = line;
120  idx++;
121 
122  if (idx == 1)
123  break;
124  } else {
125  i++;
126  }
127  }
128 
129  if (idx != 1) {
130  SCLogDebug("idx %d", idx);
131  return -1;
132  }
133 
134  int v = atoi(ptrs[0]);
135  if (v < 0 || v > USHRT_MAX) {
136  SCLogDebug("v %d", v);
137  return -1;
138  }
139  SCLogDebug("v %d raw %s", v, ptrs[0]);
140 
141  rep_out->value = v;
142  return 0;
143 }
144 
145 static int DatasetLoadMd5(Dataset *set)
146 {
147  if (strlen(set->load) == 0)
148  return 0;
149 
150  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
151  const char *fopen_mode = "r";
152  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
153  fopen_mode = "a+";
154  }
155 
156  FILE *fp = fopen(set->load, fopen_mode);
157  if (fp == NULL) {
158  SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
159  set->load, strerror(errno));
160  return -1;
161  }
162 
163  uint32_t cnt = 0;
164  char line[1024];
165  while (fgets(line, (int)sizeof(line), fp) != NULL) {
166  /* straight black/white list */
167  if (strlen(line) == 33) {
168  line[strlen(line) - 1] = '\0';
169  SCLogDebug("line: '%s'", line);
170 
171  uint8_t hash[16];
172  if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0)
173  FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
174  set->name, set->load);
175 
176  if (DatasetAdd(set, (const uint8_t *)hash, 16) < 0)
177  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
178  set->name, set->load);
179  cnt++;
180 
181  /* list with rep data */
182  } else if (strlen(line) > 33 && line[32] == ',') {
183  line[strlen(line) - 1] = '\0';
184  SCLogDebug("MD5 with REP line: '%s'", line);
185 
186  uint8_t hash[16];
187  if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0)
188  FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
189  set->name, set->load);
190 
191  DataRepType rep = { .value = 0};
192  if (ParseRepLine(line+33, strlen(line)-33, &rep) < 0)
193  FatalError(SC_ERR_FATAL, "bad rep for dataset %s/%s",
194  set->name, set->load);
195 
196  SCLogDebug("rep v:%u", rep.value);
197  if (DatasetAddwRep(set, hash, 16, &rep) < 0)
198  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
199  set->name, set->load);
200 
201  cnt++;
202  }
203  else {
204  FatalError(SC_ERR_FATAL, "MD5 bad line len %u: '%s'",
205  (uint32_t)strlen(line), line);
206  }
207  }
208 
209  fclose(fp);
210  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
211  return 0;
212 }
213 
214 static int DatasetLoadSha256(Dataset *set)
215 {
216  if (strlen(set->load) == 0)
217  return 0;
218 
219  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
220  const char *fopen_mode = "r";
221  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
222  fopen_mode = "a+";
223  }
224 
225  FILE *fp = fopen(set->load, fopen_mode);
226  if (fp == NULL) {
227  SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
228  set->load, strerror(errno));
229  return -1;
230  }
231 
232  uint32_t cnt = 0;
233  char line[1024];
234  while (fgets(line, (int)sizeof(line), fp) != NULL) {
235  /* straight black/white list */
236  if (strlen(line) == 65) {
237  line[strlen(line) - 1] = '\0';
238  SCLogDebug("line: '%s'", line);
239 
240  uint8_t hash[32];
241  if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0)
242  FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
243  set->name, set->load);
244 
245  if (DatasetAdd(set, (const uint8_t *)hash, (uint32_t)32) < 0)
246  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
247  set->name, set->load);
248  cnt++;
249 
250  /* list with rep data */
251  } else if (strlen(line) > 65 && line[64] == ',') {
252  line[strlen(line) - 1] = '\0';
253  SCLogDebug("SHA-256 with REP line: '%s'", line);
254 
255  uint8_t hash[32];
256  if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0)
257  FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
258  set->name, set->load);
259 
260  DataRepType rep = { .value = 0 };
261  if (ParseRepLine(line+65, strlen(line)-65, &rep) < 0)
262  FatalError(SC_ERR_FATAL, "bad rep for dataset %s/%s",
263  set->name, set->load);
264 
265  SCLogDebug("rep %u", rep.value);
266 
267  if (DatasetAddwRep(set, hash, 32, &rep) < 0)
268  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
269  set->name, set->load);
270  cnt++;
271  }
272  }
273 
274  fclose(fp);
275  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
276  return 0;
277 }
278 
279 static int DatasetLoadString(Dataset *set)
280 {
281  if (strlen(set->load) == 0)
282  return 0;
283 
284  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
285  const char *fopen_mode = "r";
286  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
287  fopen_mode = "a+";
288  }
289 
290  FILE *fp = fopen(set->load, fopen_mode);
291  if (fp == NULL) {
292  SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
293  set->load, strerror(errno));
294  return -1;
295  }
296 
297  uint32_t cnt = 0;
298  char line[1024];
299  while (fgets(line, (int)sizeof(line), fp) != NULL) {
300  if (strlen(line) <= 1)
301  continue;
302 
303  char *r = strchr(line, ',');
304  if (r == NULL) {
305  line[strlen(line) - 1] = '\0';
306  SCLogDebug("line: '%s'", line);
307 
308  uint8_t decoded[strlen(line)];
309  uint32_t len = DecodeBase64(decoded, (const uint8_t *)line, strlen(line), 1);
310  if (len == 0)
311  FatalError(SC_ERR_FATAL, "bad base64 encoding %s/%s",
312  set->name, set->load);
313 
314  if (DatasetAdd(set, (const uint8_t *)decoded, len) < 0)
315  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
316  set->name, set->load);
317  cnt++;
318  } else {
319  line[strlen(line) - 1] = '\0';
320  SCLogDebug("line: '%s'", line);
321 
322  *r = '\0';
323 
324  uint8_t decoded[strlen(line)];
325  uint32_t len = DecodeBase64(decoded, (const uint8_t *)line, strlen(line), 1);
326  if (len == 0)
327  FatalError(SC_ERR_FATAL, "bad base64 encoding %s/%s",
328  set->name, set->load);
329 
330  r++;
331  SCLogDebug("r '%s'", r);
332 
333  DataRepType rep = { .value = 0 };
334  if (ParseRepLine(r, strlen(r), &rep) < 0)
335  FatalError(SC_ERR_FATAL, "die: bad rep");
336  SCLogDebug("rep %u", rep.value);
337 
338  if (DatasetAddwRep(set, (const uint8_t *)decoded, len, &rep) < 0)
339  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
340  set->name, set->load);
341  cnt++;
342 
343  SCLogDebug("line with rep %s, %s", line, r);
344  }
345  }
346 
347  fclose(fp);
348  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
349  return 0;
350 }
351 
352 extern bool g_system;
353 
357 };
358 
359 static void DatasetGetPath(const char *in_path,
360  char *out_path, size_t out_size, enum DatasetGetPathType type)
361 {
362  char path[PATH_MAX];
363  struct stat st;
364  int ret;
365 
366  if (PathIsAbsolute(in_path)) {
367  strlcpy(path, in_path, sizeof(path));
368  strlcpy(out_path, path, out_size);
369  return;
370  }
371 
372  const char *data_dir = ConfigGetDataDirectory();
373  if ((ret = stat(data_dir, &st)) != 0) {
374  SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
375  return;
376  }
377 
378  snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
379 
380  if (type == TYPE_LOAD) {
381  if ((ret = stat(path, &st)) != 0) {
382  SCLogDebug("path %s: %s", path, strerror(errno));
383  if (!g_system) {
384  snprintf(path, sizeof(path), "%s", in_path);
385  }
386  }
387  }
388  strlcpy(out_path, path, out_size);
389  SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
390 }
391 
392 /** \brief look for set by name without creating it */
393 Dataset *DatasetFind(const char *name, enum DatasetTypes type)
394 {
396  Dataset *set = DatasetSearchByName(name);
397  if (set) {
398  if (set->type != type) {
400  return NULL;
401  }
402  }
404  return set;
405 }
406 
407 Dataset *DatasetGet(const char *name, enum DatasetTypes type,
408  const char *save, const char *load)
409 {
411  Dataset *set = DatasetSearchByName(name);
412  if (set) {
413  if (type != DATASET_TYPE_NOTSET && set->type != type) {
414  SCLogError(SC_ERR_DATASET, "dataset %s already "
415  "exists and is of type %u",
416  set->name, set->type);
417  goto out_err;
418  }
419 
420  if ((save == NULL || strlen(save) == 0) &&
421  (load == NULL || strlen(load) == 0)) {
422  // OK, rule keyword doesn't have to set state/load,
423  // even when yaml set has set it.
424  } else {
425  if ((save == NULL && strlen(set->save) > 0) ||
426  (save != NULL && strcmp(set->save, save) != 0)) {
427  SCLogError(SC_ERR_DATASET, "dataset %s save mismatch: %s != %s",
428  set->name, set->save, save);
429  goto out_err;
430  }
431  if ((load == NULL && strlen(set->load) > 0) ||
432  (load != NULL && strcmp(set->load, load) != 0)) {
433  SCLogError(SC_ERR_DATASET, "dataset %s load mismatch: %s != %s",
434  set->name, set->load, load);
435  goto out_err;
436  }
437  }
438 
440  return set;
441  } else {
442  if (type == DATASET_TYPE_NOTSET) {
443  SCLogError(SC_ERR_DATASET, "dataset %s not defined", name);
444  goto out_err;
445  }
446  }
447 
448  set = DatasetAlloc(name);
449  if (set == NULL) {
450  goto out_err;
451  }
452 
453  strlcpy(set->name, name, sizeof(set->name));
454  set->type = type;
455  if (save && strlen(save)) {
456  strlcpy(set->save, save, sizeof(set->save));
457  SCLogDebug("name %s save '%s'", name, set->save);
458  }
459  if (load && strlen(load)) {
460  strlcpy(set->load, load, sizeof(set->load));
461  SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
462  }
463 
464  switch (type) {
465  case DATASET_TYPE_MD5:
466  set->hash = THashInit(name, sizeof(Md5Type), Md5StrSet,
468  if (set->hash == NULL)
469  goto out_err;
470  if (DatasetLoadMd5(set) < 0)
471  goto out_err;
472  break;
473  case DATASET_TYPE_STRING:
474  set->hash = THashInit(name, sizeof(StringType), StringSet,
476  if (set->hash == NULL)
477  goto out_err;
478  if (DatasetLoadString(set) < 0)
479  goto out_err;
480  break;
481  case DATASET_TYPE_SHA256:
482  set->hash = THashInit(name, sizeof(Sha256Type), Sha256StrSet,
484  if (set->hash == NULL)
485  goto out_err;
486  if (DatasetLoadSha256(set) < 0)
487  goto out_err;
488  break;
489  }
490 
491  SCLogDebug("set %p/%s type %u save %s load %s",
492  set, set->name, set->type, set->save, set->load);
493 
494  set->next = sets;
495  sets = set;
496 
498  return set;
499 out_err:
500  if (set) {
501  if (set->hash) {
502  THashShutdown(set->hash);
503  }
504  SCFree(set);
505  }
507  return NULL;
508 }
509 
510 #define SETNAME_MAX 63
511 
512 int DatasetsInit(void)
513 {
514  SCLogDebug("datasets start");
515  int n = 0;
516  ConfNode *datasets = ConfGetNode("datasets");
517  if (datasets != NULL) {
518  int list_pos = 0;
519  ConfNode *iter = NULL;
520  TAILQ_FOREACH(iter, &datasets->head, next) {
521  if (iter->val == NULL) {
522  list_pos++;
523  continue;
524  }
525 
526  char save[PATH_MAX] = "";
527  char load[PATH_MAX] = "";
528 
529  const char *set_name = iter->val;
530  if (strlen(set_name) > SETNAME_MAX) {
531  FatalError(SC_ERR_CONF_NAME_TOO_LONG, "set name '%s' too long, max %d chars",
532  set_name, SETNAME_MAX);
533  }
534 
535  ConfNode *set = ConfNodeLookupChild(iter, set_name);
536  if (set == NULL) {
537  list_pos++;
538  continue;
539  }
540 
541  ConfNode *set_type =
542  ConfNodeLookupChild(set, "type");
543  if (set_type == NULL) {
544  list_pos++;
545  continue;
546  }
547 
548  ConfNode *set_save =
549  ConfNodeLookupChild(set, "state");
550  if (set_save) {
551  DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
552  strlcpy(load, save, sizeof(load));
553  } else {
554  ConfNode *set_load =
555  ConfNodeLookupChild(set, "load");
556  if (set_load) {
557  DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
558  }
559  }
560 
561  char conf_str[1024];
562  snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
563 
564  SCLogDebug("(%d) set %s type %s. Conf %s", n, set_name, set_type->val, conf_str);
565 
566  if (strcmp(set_type->val, "md5") == 0) {
567  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load);
568  if (dset == NULL)
569  FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
570  SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
571  n++;
572 
573  } else if (strcmp(set_type->val, "sha256") == 0) {
574  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load);
575  if (dset == NULL)
576  FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
577  SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
578  n++;
579 
580  } else if (strcmp(set_type->val, "string") == 0) {
581  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load);
582  if (dset == NULL)
583  FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
584  SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
585  n++;
586  }
587 
588  list_pos++;
589  }
590  }
591  SCLogDebug("datasets done: %p", datasets);
592  return 0;
593 }
594 
595 void DatasetsDestroy(void)
596 {
597  SCLogDebug("destroying datasets: %p", sets);
599  Dataset *set = sets;
600  while (set) {
601  SCLogDebug("destroying set %s", set->name);
602  Dataset *next = set->next;
603  THashShutdown(set->hash);
604  SCFree(set);
605  set = next;
606  }
607  sets = NULL;
609  SCLogDebug("destroying datasets done: %p", sets);
610 }
611 
612 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
613 {
614  FILE *fp = ctx;
615  //PrintRawDataFp(fp, data, data_len);
616  if (fp) {
617  return fwrite(data, data_len, 1, fp);
618  }
619  return 0;
620 }
621 
622 static int Md5AsAscii(const void *s, char *out, size_t out_size)
623 {
624  const Md5Type *md5 = s;
625  uint32_t x;
626  int i;
627  char str[256];
628  for (i = 0, x = 0; x < sizeof(md5->md5); x++) {
629  i += snprintf(&str[i], 255-i, "%02x", md5->md5[x]);
630  }
631  strlcat(out, str, out_size);
632  strlcat(out, "\n", out_size);
633  return strlen(out);
634 }
635 
636 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
637 {
638  const Sha256Type *sha = s;
639  uint32_t x;
640  int i;
641  char str[256];
642  for (i = 0, x = 0; x < sizeof(sha->sha256); x++) {
643  i += snprintf(&str[i], 255-i, "%02x", sha->sha256[x]);
644  }
645  strlcat(out, str, out_size);
646  strlcat(out, "\n", out_size);
647  return strlen(out);
648 }
649 
650 void DatasetsSave(void)
651 {
652  SCLogDebug("saving datasets: %p", sets);
654  Dataset *set = sets;
655  while (set) {
656  if (strlen(set->save) == 0)
657  goto next;
658 
659  FILE *fp = fopen(set->save, "w");
660  if (fp == NULL)
661  goto next;
662 
663  SCLogDebug("dumping %s to %s", set->name, set->save);
664 
665  switch (set->type) {
666  case DATASET_TYPE_STRING:
667  THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
668  break;
669  case DATASET_TYPE_MD5:
670  THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
671  break;
672  case DATASET_TYPE_SHA256:
673  THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
674  break;
675  }
676 
677  fclose(fp);
678 
679  next:
680  set = set->next;
681  }
683 }
684 
685 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
686 {
687  if (set == NULL)
688  return -1;
689 
690  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
691  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
692  if (rdata) {
693  THashDataUnlock(rdata);
694  return 1;
695  }
696  return -1;
697 }
698 
699 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
700  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
701 {
702  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
703 
704  if (set == NULL)
705  return rrep;
706 
707  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
708  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
709  if (rdata) {
710  StringType *found = rdata->data;
711  rrep.found = true;
712  rrep.rep = found->rep;
713  THashDataUnlock(rdata);
714  return rrep;
715  }
716  return rrep;
717 }
718 
719 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
720 {
721  if (set == NULL)
722  return -1;
723 
724  if (data_len != 16)
725  return 0;
726 
727  Md5Type lookup = { .rep.value = 0 };
728  memcpy(lookup.md5, data, data_len);
729  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
730  if (rdata) {
731  THashDataUnlock(rdata);
732  return 1;
733  }
734  return -1;
735 }
736 
737 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
738  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
739 {
740  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
741 
742  if (set == NULL)
743  return rrep;
744 
745  if (data_len != 16)
746  return rrep;
747 
748  Md5Type lookup = { .rep.value = 0};
749  memcpy(lookup.md5, data, data_len);
750  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
751  if (rdata) {
752  Md5Type *found = rdata->data;
753  rrep.found = true;
754  rrep.rep = found->rep;
755  THashDataUnlock(rdata);
756  return rrep;
757  }
758  return rrep;
759 }
760 
761 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
762 {
763  if (set == NULL)
764  return -1;
765 
766  if (data_len != 32)
767  return 0;
768 
769  Sha256Type lookup = { .rep.value = 0 };
770  memcpy(lookup.sha256, data, data_len);
771  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
772  if (rdata) {
773  THashDataUnlock(rdata);
774  return 1;
775  }
776  return -1;
777 }
778 
779 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
780  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
781 {
782  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
783 
784  if (set == NULL)
785  return rrep;
786 
787  if (data_len != 32)
788  return rrep;
789 
790  Sha256Type lookup = { .rep.value = 0 };
791  memcpy(lookup.sha256, data, data_len);
792  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
793  if (rdata) {
794  Sha256Type *found = rdata->data;
795  rrep.found = true;
796  rrep.rep = found->rep;
797  THashDataUnlock(rdata);
798  return rrep;
799  }
800  return rrep;
801 }
802 
803 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
804 {
805  if (set == NULL)
806  return -1;
807 
808  switch (set->type) {
809  case DATASET_TYPE_STRING:
810  return DatasetLookupString(set, data, data_len);
811  case DATASET_TYPE_MD5:
812  return DatasetLookupMd5(set, data, data_len);
813  case DATASET_TYPE_SHA256:
814  return DatasetLookupSha256(set, data, data_len);
815  }
816  return -1;
817 }
818 
819 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
820  const DataRepType *rep)
821 {
822  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
823  if (set == NULL)
824  return rrep;
825 
826  switch (set->type) {
827  case DATASET_TYPE_STRING:
828  return DatasetLookupStringwRep(set, data, data_len, rep);
829  case DATASET_TYPE_MD5:
830  return DatasetLookupMd5wRep(set, data, data_len, rep);
831  case DATASET_TYPE_SHA256:
832  return DatasetLookupSha256wRep(set, data, data_len, rep);
833  }
834  return rrep;
835 }
836 
837 /**
838  * \retval 1 data was added to the hash
839  * \retval 0 data was not added to the hash as it is already there
840  * \retval -1 failed to add data to the hash
841  */
842 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
843 {
844  if (set == NULL)
845  return -1;
846 
847  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
848  .rep.value = 0 };
849  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
850  if (res.data) {
851  THashDataUnlock(res.data);
852  return res.is_new ? 1 : 0;
853  }
854  return -1;
855 }
856 
857 /**
858  * \retval 1 data was added to the hash
859  * \retval 0 data was not added to the hash as it is already there
860  * \retval -1 failed to add data to the hash
861  */
862 static int DatasetAddStringwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
863  DataRepType *rep)
864 {
865  if (set == NULL)
866  return -1;
867 
868  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
869  .rep = *rep };
870  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
871  if (res.data) {
872  THashDataUnlock(res.data);
873  return res.is_new ? 1 : 0;
874  }
875  return -1;
876 }
877 
878 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
879 {
880  if (set == NULL)
881  return -1;
882 
883  if (data_len != 16)
884  return -1;
885 
886  Md5Type lookup = { .rep.value = 0 };
887  memcpy(lookup.md5, data, 16);
888  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
889  if (res.data) {
890  THashDataUnlock(res.data);
891  return res.is_new ? 1 : 0;
892  }
893  return -1;
894 }
895 
896 static int DatasetAddMd5wRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
897  DataRepType *rep)
898 {
899  if (set == NULL)
900  return -1;
901 
902  if (data_len != 16)
903  return -1;
904 
905  Md5Type lookup = { .rep = *rep };
906  memcpy(lookup.md5, data, 16);
907  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
908  if (res.data) {
909  THashDataUnlock(res.data);
910  return res.is_new ? 1 : 0;
911  }
912  return -1;
913 }
914 
915 static int DatasetAddSha256wRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
916  DataRepType *rep)
917 {
918  if (set == NULL)
919  return -1;
920 
921  if (data_len != 32)
922  return 0;
923 
924  Sha256Type lookup = { .rep = *rep };
925  memcpy(lookup.sha256, data, 32);
926  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
927  if (res.data) {
928  THashDataUnlock(res.data);
929  return res.is_new ? 1 : 0;
930  }
931  return -1;
932 }
933 
934 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
935 {
936  if (set == NULL)
937  return -1;
938 
939  if (data_len != 32)
940  return 0;
941 
942  Sha256Type lookup = { .rep.value = 0 };
943  memcpy(lookup.sha256, data, 32);
944  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
945  if (res.data) {
946  THashDataUnlock(res.data);
947  return res.is_new ? 1 : 0;
948  }
949  return -1;
950 }
951 
952 int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
953 {
954  if (set == NULL)
955  return -1;
956 
957  switch (set->type) {
958  case DATASET_TYPE_STRING:
959  return DatasetAddString(set, data, data_len);
960  case DATASET_TYPE_MD5:
961  return DatasetAddMd5(set, data, data_len);
962  case DATASET_TYPE_SHA256:
963  return DatasetAddSha256(set, data, data_len);
964  }
965  return -1;
966 }
967 
968 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
969  DataRepType *rep)
970 {
971  if (set == NULL)
972  return -1;
973 
974  switch (set->type) {
975  case DATASET_TYPE_STRING:
976  return DatasetAddStringwRep(set, data, data_len, rep);
977  case DATASET_TYPE_MD5:
978  return DatasetAddMd5wRep(set, data, data_len, rep);
979  case DATASET_TYPE_SHA256:
980  return DatasetAddSha256wRep(set, data, data_len, rep);
981  }
982  return -1;
983 }
984 
985 /** \brief add serialized data to set */
986 int DatasetAddSerialized(Dataset *set, const char *string)
987 {
988  if (set == NULL)
989  return -1;
990 
991  switch (set->type) {
992  case DATASET_TYPE_STRING: {
993  uint8_t decoded[strlen(string)];
994  uint32_t len = DecodeBase64(decoded, (const uint8_t *)string, strlen(string), 1);
995  if (len == 0) {
996  return -1;
997  }
998 
999  return DatasetAddString(set, decoded, len);
1000  }
1001  case DATASET_TYPE_MD5: {
1002  if (strlen(string) != 32)
1003  return -1;
1004  uint8_t hash[16];
1005  if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1006  return -1;
1007  return DatasetAddMd5(set, hash, 16);
1008  }
1009  case DATASET_TYPE_SHA256: {
1010  if (strlen(string) != 64)
1011  return -1;
1012  uint8_t hash[32];
1013  if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1014  return -1;
1015  return DatasetAddSha256(set, hash, 32);
1016  }
1017  }
1018  return -1;
1019 }
#define SCMutex
void DatasetsDestroy(void)
Definition: datasets.c:595
#define SCLogDebug(...)
Definition: util-debug.h:335
bool g_system
Definition: suricata.c:228
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:327
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:350
struct HtpBodyChunk_ * next
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
struct HtpBodyChunk_ * next
uint32_t Md5StrHash(void *s)
Definition: datasets-md5.c:50
int Sha256StrSet(void *dst, void *src)
DatasetTypes
Definition: datasets.h:28
uint8_t sha256[32]
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:512
int THashWalk(THashTableContext *ctx, THashFormatFunc FormatterFunc, THashOutputFunc OutputterFunc, void *output_ctx)
Walk the hash.
Definition: util-thash.c:367
size_t strlcat(char *, const char *src, size_t siz)
Definition: util-strlcatu.c:45
char * val
Definition: conf.h:34
THashData * data
Definition: util-thash.h:205
ConfNode * ConfNodeLookupChild(const ConfNode *node, const char *name)
Lookup a child configuration node by name.
Definition: conf.c:815
int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:952
DatasetGetPathType
Definition: datasets.c:354
#define DATASET_TYPE_NOTSET
Definition: datasets.h:29
struct Dataset * next
Definition: datasets.h:45
DataRepType rep
#define SCMutexLock(mut)
Dataset * DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load)
Definition: datasets.c:407
enum DatasetTypes DatasetGetTypeFromString(const char *s)
Definition: datasets.c:43
#define str(s)
#define SCCalloc(nm, a)
Definition: util-mem.h:253
#define SCMutexUnlock(mut)
#define SCMUTEX_INITIALIZER
const char * ConfigGetDataDirectory()
Definition: util-conf.c:83
uint8_t type
#define SCLogError(err_code,...)
Macro used to log ERROR messages.
Definition: util-debug.h:294
int StringSet(void *dst, void *src)
void * data
Definition: util-thash.h:93
THashTableContext * THashInit(const char *cnf_prefix, size_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(void *), _Bool(*DataCompare)(void *, void *))
Definition: util-thash.c:295
int PathIsAbsolute(const char *path)
Check if a path is absolute.
Definition: util-path.c:39
int StringAsBase64(const void *s, char *out, size_t out_size)
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:42
void Md5StrFree(void *s)
Definition: datasets-md5.c:62
uint8_t * ptr
void DatasetsSave(void)
Definition: datasets.c:650
Dataset * DatasetFind(const char *name, enum DatasetTypes type)
look for set by name without creating it
Definition: datasets.c:393
Definition: conf.h:32
void StringFree(void *s)
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:803
#define SCFree(a)
Definition: util-mem.h:322
void Sha256StrFree(void *s)
uint32_t StringHash(void *s)
DataRepType rep
Definition: datasets-md5.h:31
uint8_t md5[16]
Definition: datasets-md5.h:30
#define FatalError(x,...)
Definition: util-debug.h:539
bool StringCompare(void *a, void *b)
ConfNode * ConfGetNode(const char *name)
Get a ConfNode by name.
Definition: conf.c:176
int DatasetAddSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:986
uint32_t Sha256StrHash(void *s)
int DatasetsInit(void)
Definition: datasets.c:512
bool Sha256StrCompare(void *a, void *b)
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
Definition: datasets.c:819
#define SETNAME_MAX
Definition: datasets.c:510
uint8_t len
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
SCMutex sets_lock
Definition: datasets.c:36
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:620
uint32_t DecodeBase64(uint8_t *dest, const uint8_t *src, uint32_t len, int strict)
Decodes a base64-encoded string buffer into an ascii-encoded byte buffer.
Definition: util-base64.c:91
int Md5StrSet(void *dst, void *src)
Definition: datasets-md5.c:33
DataRepType rep