suricata
datasets.c
Go to the documentation of this file.
1 /* Copyright (C) 2017-2019 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Victor Julien <victor@inliniac.net>
22  */
23 
24 #include "suricata-common.h"
25 #include "conf.h"
26 #include "datasets.h"
27 #include "datasets-string.h"
28 #include "datasets-md5.h"
29 #include "datasets-sha256.h"
30 #include "datasets-reputation.h"
31 #include "util-thash.h"
32 #include "util-print.h"
33 #include "util-crypt.h" // encode base64
34 #include "util-base64.h" // decode base64
35 
37 static Dataset *sets = NULL;
38 static uint32_t set_ids = 0;
39 static bool experimental_warning = false;
40 
41 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
42  DataRepType *rep);
43 
45 {
46  if (strcasecmp("md5", s) == 0)
47  return DATASET_TYPE_MD5;
48  if (strcasecmp("sha256", s) == 0)
49  return DATASET_TYPE_SHA256;
50  if (strcasecmp("string", s) == 0)
51  return DATASET_TYPE_STRING;
52  return DATASET_TYPE_NOTSET;
53 }
54 
55 static Dataset *DatasetAlloc(const char *name)
56 {
57  Dataset *set = SCCalloc(1, sizeof(*set));
58  if (set) {
59  set->id = set_ids++;
60  }
61  return set;
62 }
63 
64 static Dataset *DatasetSearchByName(const char *name)
65 {
66  Dataset *set = sets;
67  while (set) {
68  if (strcasecmp(name, set->name) == 0) {
69  return set;
70  }
71  set = set->next;
72  }
73  return NULL;
74 }
75 
76 static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
77 {
78  if (ins % 2 != 0)
79  return -1;
80  if (outs != ins / 2)
81  return -1;
82 
83  uint8_t hash[outs];
84  size_t i, x;
85  for (x = 0, i = 0; i < ins; i+=2, x++) {
86  char buf[3] = { 0, 0, 0 };
87  buf[0] = in[i];
88  buf[1] = in[i+1];
89 
90  long value = strtol(buf, NULL, 16);
91  if (value >= 0 && value <= 255)
92  hash[x] = (uint8_t)value;
93  else {
94  SCLogError(SC_ERR_INVALID_HASH, "hash byte out of range %ld", value);
95  return -1;
96  }
97  }
98 
99  memcpy(out, hash, outs);
100  return 0;
101 }
102 
103 static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out)
104 {
105  SCLogDebug("in '%s'", in);
106  char raw[ins + 1];
107  memcpy(raw, in, ins);
108  raw[ins] = '\0';
109  char *line = raw;
110 
111  char *ptrs[1] = {NULL};
112  int idx = 0;
113 
114  size_t i = 0;
115  while (i < ins + 1) {
116  if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
117  line[i] = '\0';
118  SCLogDebug("line '%s'", line);
119 
120  ptrs[idx] = line;
121  idx++;
122 
123  if (idx == 1)
124  break;
125  } else {
126  i++;
127  }
128  }
129 
130  if (idx != 1) {
131  SCLogDebug("idx %d", idx);
132  return -1;
133  }
134 
135  int v = atoi(ptrs[0]);
136  if (v < 0 || v > USHRT_MAX) {
137  SCLogDebug("v %d", v);
138  return -1;
139  }
140  SCLogDebug("v %d raw %s", v, ptrs[0]);
141 
142  rep_out->value = v;
143  return 0;
144 }
145 
146 static int DatasetLoadMd5(Dataset *set)
147 {
148  if (strlen(set->load) == 0)
149  return 0;
150 
151  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
152  const char *fopen_mode = "r";
153  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
154  fopen_mode = "a+";
155  }
156 
157  FILE *fp = fopen(set->load, fopen_mode);
158  if (fp == NULL) {
159  SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
160  set->load, strerror(errno));
161  return -1;
162  }
163 
164  uint32_t cnt = 0;
165  char line[1024];
166  while (fgets(line, (int)sizeof(line), fp) != NULL) {
167  /* straight black/white list */
168  if (strlen(line) == 33) {
169  line[strlen(line) - 1] = '\0';
170  SCLogDebug("line: '%s'", line);
171 
172  uint8_t hash[16];
173  if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0)
174  FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
175  set->name, set->load);
176 
177  if (DatasetAdd(set, (const uint8_t *)hash, 16) < 0)
178  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
179  set->name, set->load);
180  cnt++;
181 
182  /* list with rep data */
183  } else if (strlen(line) > 33 && line[32] == ',') {
184  line[strlen(line) - 1] = '\0';
185  SCLogDebug("MD5 with REP line: '%s'", line);
186 
187  uint8_t hash[16];
188  if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0)
189  FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
190  set->name, set->load);
191 
192  DataRepType rep = { .value = 0};
193  if (ParseRepLine(line+33, strlen(line)-33, &rep) < 0)
194  FatalError(SC_ERR_FATAL, "bad rep for dataset %s/%s",
195  set->name, set->load);
196 
197  SCLogDebug("rep v:%u", rep.value);
198  if (DatasetAddwRep(set, hash, 16, &rep) < 0)
199  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
200  set->name, set->load);
201 
202  cnt++;
203  }
204  else {
205  FatalError(SC_ERR_FATAL, "MD5 bad line len %u: '%s'",
206  (uint32_t)strlen(line), line);
207  }
208  }
209 
210  fclose(fp);
211  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
212  return 0;
213 }
214 
215 static int DatasetLoadSha256(Dataset *set)
216 {
217  if (strlen(set->load) == 0)
218  return 0;
219 
220  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
221  const char *fopen_mode = "r";
222  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
223  fopen_mode = "a+";
224  }
225 
226  FILE *fp = fopen(set->load, fopen_mode);
227  if (fp == NULL) {
228  SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
229  set->load, strerror(errno));
230  return -1;
231  }
232 
233  uint32_t cnt = 0;
234  char line[1024];
235  while (fgets(line, (int)sizeof(line), fp) != NULL) {
236  /* straight black/white list */
237  if (strlen(line) == 65) {
238  line[strlen(line) - 1] = '\0';
239  SCLogDebug("line: '%s'", line);
240 
241  uint8_t hash[32];
242  if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0)
243  FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
244  set->name, set->load);
245 
246  if (DatasetAdd(set, (const uint8_t *)hash, (uint32_t)32) < 0)
247  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
248  set->name, set->load);
249  cnt++;
250 
251  /* list with rep data */
252  } else if (strlen(line) > 65 && line[64] == ',') {
253  line[strlen(line) - 1] = '\0';
254  SCLogDebug("SHA-256 with REP line: '%s'", line);
255 
256  uint8_t hash[32];
257  if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0)
258  FatalError(SC_ERR_FATAL, "bad hash for dataset %s/%s",
259  set->name, set->load);
260 
261  DataRepType rep = { .value = 0 };
262  if (ParseRepLine(line+65, strlen(line)-65, &rep) < 0)
263  FatalError(SC_ERR_FATAL, "bad rep for dataset %s/%s",
264  set->name, set->load);
265 
266  SCLogDebug("rep %u", rep.value);
267 
268  if (DatasetAddwRep(set, hash, 32, &rep) < 0)
269  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
270  set->name, set->load);
271  cnt++;
272  }
273  }
274 
275  fclose(fp);
276  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
277  return 0;
278 }
279 
280 static int DatasetLoadString(Dataset *set)
281 {
282  if (strlen(set->load) == 0)
283  return 0;
284 
285  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
286  const char *fopen_mode = "r";
287  if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
288  fopen_mode = "a+";
289  }
290 
291  FILE *fp = fopen(set->load, fopen_mode);
292  if (fp == NULL) {
293  SCLogError(SC_ERR_DATASET, "fopen '%s' failed: %s",
294  set->load, strerror(errno));
295  return -1;
296  }
297 
298  uint32_t cnt = 0;
299  char line[1024];
300  while (fgets(line, (int)sizeof(line), fp) != NULL) {
301  if (strlen(line) <= 1)
302  continue;
303 
304  char *r = strchr(line, ',');
305  if (r == NULL) {
306  line[strlen(line) - 1] = '\0';
307  SCLogDebug("line: '%s'", line);
308 
309  uint8_t decoded[strlen(line)];
310  uint32_t len = DecodeBase64(decoded, (const uint8_t *)line, strlen(line), 1);
311  if (len == 0)
312  FatalError(SC_ERR_FATAL, "bad base64 encoding %s/%s",
313  set->name, set->load);
314 
315  if (DatasetAdd(set, (const uint8_t *)decoded, len) < 0)
316  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
317  set->name, set->load);
318  cnt++;
319  } else {
320  line[strlen(line) - 1] = '\0';
321  SCLogDebug("line: '%s'", line);
322 
323  *r = '\0';
324 
325  uint8_t decoded[strlen(line)];
326  uint32_t len = DecodeBase64(decoded, (const uint8_t *)line, strlen(line), 1);
327  if (len == 0)
328  FatalError(SC_ERR_FATAL, "bad base64 encoding %s/%s",
329  set->name, set->load);
330 
331  r++;
332  SCLogDebug("r '%s'", r);
333 
334  DataRepType rep = { .value = 0 };
335  if (ParseRepLine(r, strlen(r), &rep) < 0)
336  FatalError(SC_ERR_FATAL, "die: bad rep");
337  SCLogDebug("rep %u", rep.value);
338 
339  if (DatasetAddwRep(set, (const uint8_t *)decoded, len, &rep) < 0)
340  FatalError(SC_ERR_FATAL, "dataset data add failed %s/%s",
341  set->name, set->load);
342  cnt++;
343 
344  SCLogDebug("line with rep %s, %s", line, r);
345  }
346  }
347 
348  fclose(fp);
349  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
350  return 0;
351 }
352 
353 extern bool g_system;
354 
358 };
359 
360 static void DatasetGetPath(const char *in_path,
361  char *out_path, size_t out_size, enum DatasetGetPathType type)
362 {
363  char path[PATH_MAX];
364  struct stat st;
365  int ret;
366 
367  if (PathIsAbsolute(in_path)) {
368  strlcpy(path, in_path, sizeof(path));
369  strlcpy(out_path, path, out_size);
370  return;
371  }
372 
373  const char *data_dir = ConfigGetDataDirectory();
374  if ((ret = stat(data_dir, &st)) != 0) {
375  SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
376  return;
377  }
378 
379  snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
380 
381  if (type == TYPE_LOAD) {
382  if ((ret = stat(path, &st)) != 0) {
383  SCLogDebug("path %s: %s", path, strerror(errno));
384  if (!g_system) {
385  snprintf(path, sizeof(path), "%s", in_path);
386  }
387  }
388  }
389  strlcpy(out_path, path, out_size);
390  SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
391 }
392 
393 /** \brief look for set by name without creating it */
394 Dataset *DatasetFind(const char *name, enum DatasetTypes type)
395 {
397  Dataset *set = DatasetSearchByName(name);
398  if (set) {
399  if (set->type != type) {
401  return NULL;
402  }
403  }
405  return set;
406 }
407 
408 Dataset *DatasetGet(const char *name, enum DatasetTypes type,
409  const char *save, const char *load)
410 {
411  if (strlen(name) > DATASET_NAME_MAX_LEN) {
412  return NULL;
413  }
414 
416  if (!experimental_warning) {
417  SCLogNotice("dataset and datarep features are experimental and subject to change");
418  experimental_warning = true;
419  }
420  Dataset *set = DatasetSearchByName(name);
421  if (set) {
422  if (type != DATASET_TYPE_NOTSET && set->type != type) {
423  SCLogError(SC_ERR_DATASET, "dataset %s already "
424  "exists and is of type %u",
425  set->name, set->type);
427  return NULL;
428  }
429 
430  if ((save == NULL || strlen(save) == 0) &&
431  (load == NULL || strlen(load) == 0)) {
432  // OK, rule keyword doesn't have to set state/load,
433  // even when yaml set has set it.
434  } else {
435  if ((save == NULL && strlen(set->save) > 0) ||
436  (save != NULL && strcmp(set->save, save) != 0)) {
437  SCLogError(SC_ERR_DATASET, "dataset %s save mismatch: %s != %s",
438  set->name, set->save, save);
440  return NULL;
441  }
442  if ((load == NULL && strlen(set->load) > 0) ||
443  (load != NULL && strcmp(set->load, load) != 0)) {
444  SCLogError(SC_ERR_DATASET, "dataset %s load mismatch: %s != %s",
445  set->name, set->load, load);
447  return NULL;
448  }
449  }
450 
452  return set;
453  } else {
454  if (type == DATASET_TYPE_NOTSET) {
455  SCLogError(SC_ERR_DATASET, "dataset %s not defined", name);
456  goto out_err;
457  }
458  }
459 
460  set = DatasetAlloc(name);
461  if (set == NULL) {
462  goto out_err;
463  }
464 
465  strlcpy(set->name, name, sizeof(set->name));
466  set->type = type;
467  if (save && strlen(save)) {
468  strlcpy(set->save, save, sizeof(set->save));
469  SCLogDebug("name %s save '%s'", name, set->save);
470  }
471  if (load && strlen(load)) {
472  strlcpy(set->load, load, sizeof(set->load));
473  SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
474  }
475 
476  char cnf_name[128];
477  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
478 
479  switch (type) {
480  case DATASET_TYPE_MD5:
481  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet,
483  if (set->hash == NULL)
484  goto out_err;
485  if (DatasetLoadMd5(set) < 0)
486  goto out_err;
487  break;
488  case DATASET_TYPE_STRING:
489  set->hash = THashInit(cnf_name, sizeof(StringType), StringSet,
491  if (set->hash == NULL)
492  goto out_err;
493  if (DatasetLoadString(set) < 0)
494  goto out_err;
495  break;
496  case DATASET_TYPE_SHA256:
497  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet,
499  if (set->hash == NULL)
500  goto out_err;
501  if (DatasetLoadSha256(set) < 0)
502  goto out_err;
503  break;
504  }
505 
506  SCLogDebug("set %p/%s type %u save %s load %s",
507  set, set->name, set->type, set->save, set->load);
508 
509  set->next = sets;
510  sets = set;
511 
513  return set;
514 out_err:
515  if (set) {
516  if (set->hash) {
517  THashShutdown(set->hash);
518  }
519  SCFree(set);
520  }
522  return NULL;
523 }
524 
525 int DatasetsInit(void)
526 {
527  SCLogDebug("datasets start");
528  int n = 0;
529  ConfNode *datasets = ConfGetNode("datasets");
530  if (datasets != NULL) {
531  int list_pos = 0;
532  ConfNode *iter = NULL;
533  TAILQ_FOREACH(iter, &datasets->head, next) {
534  if (iter->name == NULL) {
535  list_pos++;
536  continue;
537  }
538 
539  char save[PATH_MAX] = "";
540  char load[PATH_MAX] = "";
541 
542  const char *set_name = iter->name;
543  if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
544  FatalError(SC_ERR_CONF_NAME_TOO_LONG, "set name '%s' too long, max %d chars",
545  set_name, DATASET_NAME_MAX_LEN);
546  }
547 
548  ConfNode *set_type =
549  ConfNodeLookupChild(iter, "type");
550  if (set_type == NULL) {
551  list_pos++;
552  continue;
553  }
554 
555  ConfNode *set_save =
556  ConfNodeLookupChild(iter, "state");
557  if (set_save) {
558  DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
559  strlcpy(load, save, sizeof(load));
560  } else {
561  ConfNode *set_load =
562  ConfNodeLookupChild(iter, "load");
563  if (set_load) {
564  DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
565  }
566  }
567 
568  char conf_str[1024];
569  snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
570 
571  SCLogDebug("(%d) set %s type %s. Conf %s", n, set_name, set_type->val, conf_str);
572 
573  if (strcmp(set_type->val, "md5") == 0) {
574  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load);
575  if (dset == NULL)
576  FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
577  SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
578  n++;
579 
580  } else if (strcmp(set_type->val, "sha256") == 0) {
581  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load);
582  if (dset == NULL)
583  FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
584  SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
585  n++;
586 
587  } else if (strcmp(set_type->val, "string") == 0) {
588  Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load);
589  if (dset == NULL)
590  FatalError(SC_ERR_FATAL, "failed to setup dataset for %s", set_name);
591  SCLogDebug("dataset %s: id %d type %s", set_name, n, set_type->val);
592  n++;
593  }
594 
595  list_pos++;
596  }
597  }
598  SCLogDebug("datasets done: %p", datasets);
599  return 0;
600 }
601 
602 void DatasetsDestroy(void)
603 {
604  SCLogDebug("destroying datasets: %p", sets);
606  Dataset *set = sets;
607  while (set) {
608  SCLogDebug("destroying set %s", set->name);
609  Dataset *next = set->next;
610  THashShutdown(set->hash);
611  SCFree(set);
612  set = next;
613  }
614  sets = NULL;
616  SCLogDebug("destroying datasets done: %p", sets);
617 }
618 
619 static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
620 {
621  FILE *fp = ctx;
622  //PrintRawDataFp(fp, data, data_len);
623  if (fp) {
624  return fwrite(data, data_len, 1, fp);
625  }
626  return 0;
627 }
628 
629 static int Md5AsAscii(const void *s, char *out, size_t out_size)
630 {
631  const Md5Type *md5 = s;
632  uint32_t x;
633  int i;
634  char str[256];
635  for (i = 0, x = 0; x < sizeof(md5->md5); x++) {
636  i += snprintf(&str[i], 255-i, "%02x", md5->md5[x]);
637  }
638  strlcat(out, str, out_size);
639  strlcat(out, "\n", out_size);
640  return strlen(out);
641 }
642 
643 static int Sha256AsAscii(const void *s, char *out, size_t out_size)
644 {
645  const Sha256Type *sha = s;
646  uint32_t x;
647  int i;
648  char str[256];
649  for (i = 0, x = 0; x < sizeof(sha->sha256); x++) {
650  i += snprintf(&str[i], 255-i, "%02x", sha->sha256[x]);
651  }
652  strlcat(out, str, out_size);
653  strlcat(out, "\n", out_size);
654  return strlen(out);
655 }
656 
657 void DatasetsSave(void)
658 {
659  SCLogDebug("saving datasets: %p", sets);
661  Dataset *set = sets;
662  while (set) {
663  if (strlen(set->save) == 0)
664  goto next;
665 
666  FILE *fp = fopen(set->save, "w");
667  if (fp == NULL)
668  goto next;
669 
670  SCLogDebug("dumping %s to %s", set->name, set->save);
671 
672  switch (set->type) {
673  case DATASET_TYPE_STRING:
674  THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
675  break;
676  case DATASET_TYPE_MD5:
677  THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
678  break;
679  case DATASET_TYPE_SHA256:
680  THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
681  break;
682  }
683 
684  fclose(fp);
685 
686  next:
687  set = set->next;
688  }
690 }
691 
692 static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
693 {
694  if (set == NULL)
695  return -1;
696 
697  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
698  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
699  if (rdata) {
700  THashDataUnlock(rdata);
701  return 1;
702  }
703  return 0;
704 }
705 
706 static DataRepResultType DatasetLookupStringwRep(Dataset *set,
707  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
708 {
709  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
710 
711  if (set == NULL)
712  return rrep;
713 
714  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
715  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
716  if (rdata) {
717  StringType *found = rdata->data;
718  rrep.found = true;
719  rrep.rep = found->rep;
720  THashDataUnlock(rdata);
721  return rrep;
722  }
723  return rrep;
724 }
725 
726 static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
727 {
728  if (set == NULL)
729  return -1;
730 
731  if (data_len != 16)
732  return -1;
733 
734  Md5Type lookup = { .rep.value = 0 };
735  memcpy(lookup.md5, data, data_len);
736  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
737  if (rdata) {
738  THashDataUnlock(rdata);
739  return 1;
740  }
741  return 0;
742 }
743 
744 static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
745  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
746 {
747  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
748 
749  if (set == NULL)
750  return rrep;
751 
752  if (data_len != 16)
753  return rrep;
754 
755  Md5Type lookup = { .rep.value = 0};
756  memcpy(lookup.md5, data, data_len);
757  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
758  if (rdata) {
759  Md5Type *found = rdata->data;
760  rrep.found = true;
761  rrep.rep = found->rep;
762  THashDataUnlock(rdata);
763  return rrep;
764  }
765  return rrep;
766 }
767 
768 static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
769 {
770  if (set == NULL)
771  return -1;
772 
773  if (data_len != 32)
774  return -1;
775 
776  Sha256Type lookup = { .rep.value = 0 };
777  memcpy(lookup.sha256, data, data_len);
778  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
779  if (rdata) {
780  THashDataUnlock(rdata);
781  return 1;
782  }
783  return 0;
784 }
785 
786 static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
787  const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
788 {
789  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
790 
791  if (set == NULL)
792  return rrep;
793 
794  if (data_len != 32)
795  return rrep;
796 
797  Sha256Type lookup = { .rep.value = 0 };
798  memcpy(lookup.sha256, data, data_len);
799  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
800  if (rdata) {
801  Sha256Type *found = rdata->data;
802  rrep.found = true;
803  rrep.rep = found->rep;
804  THashDataUnlock(rdata);
805  return rrep;
806  }
807  return rrep;
808 }
809 
810 /**
811  * \brief see if \a data is part of the set
812  * \param set dataset
813  * \param data data to look up
814  * \param data_len length in bytes of \a data
815  * \retval -1 error
816  * \retval 0 not found
817  * \retval 1 found
818  */
819 int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
820 {
821  if (set == NULL)
822  return -1;
823 
824  switch (set->type) {
825  case DATASET_TYPE_STRING:
826  return DatasetLookupString(set, data, data_len);
827  case DATASET_TYPE_MD5:
828  return DatasetLookupMd5(set, data, data_len);
829  case DATASET_TYPE_SHA256:
830  return DatasetLookupSha256(set, data, data_len);
831  }
832  return -1;
833 }
834 
835 DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
836  const DataRepType *rep)
837 {
838  DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
839  if (set == NULL)
840  return rrep;
841 
842  switch (set->type) {
843  case DATASET_TYPE_STRING:
844  return DatasetLookupStringwRep(set, data, data_len, rep);
845  case DATASET_TYPE_MD5:
846  return DatasetLookupMd5wRep(set, data, data_len, rep);
847  case DATASET_TYPE_SHA256:
848  return DatasetLookupSha256wRep(set, data, data_len, rep);
849  }
850  return rrep;
851 }
852 
853 /**
854  * \retval 1 data was added to the hash
855  * \retval 0 data was not added to the hash as it is already there
856  * \retval -1 failed to add data to the hash
857  */
858 static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
859 {
860  if (set == NULL)
861  return -1;
862 
863  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
864  .rep.value = 0 };
865  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
866  if (res.data) {
867  THashDataUnlock(res.data);
868  return res.is_new ? 1 : 0;
869  }
870  return -1;
871 }
872 
873 /**
874  * \retval 1 data was added to the hash
875  * \retval 0 data was not added to the hash as it is already there
876  * \retval -1 failed to add data to the hash
877  */
878 static int DatasetAddStringwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
879  DataRepType *rep)
880 {
881  if (set == NULL)
882  return -1;
883 
884  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
885  .rep = *rep };
886  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
887  if (res.data) {
888  THashDataUnlock(res.data);
889  return res.is_new ? 1 : 0;
890  }
891  return -1;
892 }
893 
894 static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
895 {
896  if (set == NULL)
897  return -1;
898 
899  if (data_len != 16)
900  return -1;
901 
902  Md5Type lookup = { .rep.value = 0 };
903  memcpy(lookup.md5, data, 16);
904  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
905  if (res.data) {
906  THashDataUnlock(res.data);
907  return res.is_new ? 1 : 0;
908  }
909  return -1;
910 }
911 
912 static int DatasetAddMd5wRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
913  DataRepType *rep)
914 {
915  if (set == NULL)
916  return -1;
917 
918  if (data_len != 16)
919  return -1;
920 
921  Md5Type lookup = { .rep = *rep };
922  memcpy(lookup.md5, data, 16);
923  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
924  if (res.data) {
925  THashDataUnlock(res.data);
926  return res.is_new ? 1 : 0;
927  }
928  return -1;
929 }
930 
931 static int DatasetAddSha256wRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
932  DataRepType *rep)
933 {
934  if (set == NULL)
935  return -1;
936 
937  if (data_len != 32)
938  return 0;
939 
940  Sha256Type lookup = { .rep = *rep };
941  memcpy(lookup.sha256, data, 32);
942  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
943  if (res.data) {
944  THashDataUnlock(res.data);
945  return res.is_new ? 1 : 0;
946  }
947  return -1;
948 }
949 
950 static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
951 {
952  if (set == NULL)
953  return -1;
954 
955  if (data_len != 32)
956  return 0;
957 
958  Sha256Type lookup = { .rep.value = 0 };
959  memcpy(lookup.sha256, data, 32);
960  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
961  if (res.data) {
962  THashDataUnlock(res.data);
963  return res.is_new ? 1 : 0;
964  }
965  return -1;
966 }
967 
968 int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
969 {
970  if (set == NULL)
971  return -1;
972 
973  switch (set->type) {
974  case DATASET_TYPE_STRING:
975  return DatasetAddString(set, data, data_len);
976  case DATASET_TYPE_MD5:
977  return DatasetAddMd5(set, data, data_len);
978  case DATASET_TYPE_SHA256:
979  return DatasetAddSha256(set, data, data_len);
980  }
981  return -1;
982 }
983 
984 static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
985  DataRepType *rep)
986 {
987  if (set == NULL)
988  return -1;
989 
990  switch (set->type) {
991  case DATASET_TYPE_STRING:
992  return DatasetAddStringwRep(set, data, data_len, rep);
993  case DATASET_TYPE_MD5:
994  return DatasetAddMd5wRep(set, data, data_len, rep);
995  case DATASET_TYPE_SHA256:
996  return DatasetAddSha256wRep(set, data, data_len, rep);
997  }
998  return -1;
999 }
1000 
1001 /** \brief add serialized data to set */
1002 int DatasetAddSerialized(Dataset *set, const char *string)
1003 {
1004  if (set == NULL)
1005  return -1;
1006 
1007  switch (set->type) {
1008  case DATASET_TYPE_STRING: {
1009  uint8_t decoded[strlen(string)];
1010  uint32_t len = DecodeBase64(decoded, (const uint8_t *)string, strlen(string), 1);
1011  if (len == 0) {
1012  return -1;
1013  }
1014 
1015  return DatasetAddString(set, decoded, len);
1016  }
1017  case DATASET_TYPE_MD5: {
1018  if (strlen(string) != 32)
1019  return -1;
1020  uint8_t hash[16];
1021  if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
1022  return -1;
1023  return DatasetAddMd5(set, hash, 16);
1024  }
1025  case DATASET_TYPE_SHA256: {
1026  if (strlen(string) != 64)
1027  return -1;
1028  uint8_t hash[32];
1029  if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
1030  return -1;
1031  return DatasetAddSha256(set, hash, 32);
1032  }
1033  }
1034  return -1;
1035 }
#define SCMutex
void DatasetsDestroy(void)
Definition: datasets.c:602
#define SCLogDebug(...)
Definition: util-debug.h:335
bool g_system
Definition: suricata.c:226
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:327
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:350
struct HtpBodyChunk_ * next
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
struct HtpBodyChunk_ * next
uint32_t Md5StrHash(void *s)
Definition: datasets-md5.c:50
int Sha256StrSet(void *dst, void *src)
DatasetTypes
Definition: datasets.h:28
uint8_t sha256[32]
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:512
int THashWalk(THashTableContext *ctx, THashFormatFunc FormatterFunc, THashOutputFunc OutputterFunc, void *output_ctx)
Walk the hash.
Definition: util-thash.c:367
size_t strlcat(char *, const char *src, size_t siz)
Definition: util-strlcatu.c:45
char * val
Definition: conf.h:34
THashData * data
Definition: util-thash.h:205
ConfNode * ConfNodeLookupChild(const ConfNode *node, const char *name)
Lookup a child configuration node by name.
Definition: conf.c:815
int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets.c:968
DatasetGetPathType
Definition: datasets.c:355
#define DATASET_TYPE_NOTSET
Definition: datasets.h:29
struct Dataset * next
Definition: datasets.h:46
DataRepType rep
#define SCMutexLock(mut)
Dataset * DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load)
Definition: datasets.c:408
enum DatasetTypes DatasetGetTypeFromString(const char *s)
Definition: datasets.c:44
#define str(s)
#define SCCalloc(nm, a)
Definition: util-mem.h:253
#define SCMutexUnlock(mut)
#define SCMUTEX_INITIALIZER
const char * ConfigGetDataDirectory()
Definition: util-conf.c:83
uint8_t type
#define SCLogError(err_code,...)
Macro used to log ERROR messages.
Definition: util-debug.h:294
int StringSet(void *dst, void *src)
void * data
Definition: util-thash.h:93
THashTableContext * THashInit(const char *cnf_prefix, size_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(void *), _Bool(*DataCompare)(void *, void *))
Definition: util-thash.c:295
int PathIsAbsolute(const char *path)
Check if a path is absolute.
Definition: util-path.c:39
int StringAsBase64(const void *s, char *out, size_t out_size)
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:42
void Md5StrFree(void *s)
Definition: datasets-md5.c:62
uint8_t * ptr
void DatasetsSave(void)
Definition: datasets.c:657
Dataset * DatasetFind(const char *name, enum DatasetTypes type)
look for set by name without creating it
Definition: datasets.c:394
Definition: conf.h:32
void StringFree(void *s)
int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
see if data is part of the set
Definition: datasets.c:819
#define SCFree(a)
Definition: util-mem.h:322
void Sha256StrFree(void *s)
#define SCLogNotice(...)
Macro used to log NOTICE messages.
Definition: util-debug.h:269
uint32_t StringHash(void *s)
DataRepType rep
Definition: datasets-md5.h:31
uint8_t md5[16]
Definition: datasets-md5.h:30
char * name
Definition: conf.h:33
#define FatalError(x,...)
Definition: util-debug.h:539
bool StringCompare(void *a, void *b)
ConfNode * ConfGetNode(const char *name)
Get a ConfNode by name.
Definition: conf.c:176
int DatasetAddSerialized(Dataset *set, const char *string)
add serialized data to set
Definition: datasets.c:1002
uint32_t Sha256StrHash(void *s)
int DatasetsInit(void)
Definition: datasets.c:525
bool Sha256StrCompare(void *a, void *b)
DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
Definition: datasets.c:835
uint8_t len
#define DATASET_NAME_MAX_LEN
Definition: datasets.h:35
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
SCMutex sets_lock
Definition: datasets.c:36
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:620
uint32_t DecodeBase64(uint8_t *dest, const uint8_t *src, uint32_t len, int strict)
Decodes a base64-encoded string buffer into an ascii-encoded byte buffer.
Definition: util-base64.c:91
int Md5StrSet(void *dst, void *src)
Definition: datasets-md5.c:33
DataRepType rep