suricata
datasets-context-json.c
Go to the documentation of this file.
1 /* Copyright (C) 2025 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Eric Leblond <el@stamus-networks.com>
22  */
23 
24 #include "suricata-common.h"
25 #include "suricata.h"
26 #include "rust.h"
27 #include "datasets.h"
28 #include "datasets-context-json.h"
29 #include "datasets-ipv4.h"
30 #include "datasets-ipv6.h"
31 #include "datasets-md5.h"
32 #include "datasets-sha256.h"
33 #include "datasets-string.h"
34 #include "util-byte.h"
35 #include "util-ip.h"
36 #include "util-debug.h"
37 
38 static int DatajsonAdd(
39  Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json);
40 
41 static inline void DatajsonUnlockData(THashData *d)
42 {
43  (void)THashDecrUsecnt(d);
44  THashDataUnlock(d);
45 }
46 
48 {
49  if (r->hashdata) {
50  DatajsonUnlockData(r->hashdata);
51  }
52 }
53 
55 {
56  dst->len = src->len;
57  dst->value = SCMalloc(dst->len + 1);
58  if (dst->value == NULL)
59  return -1;
60  memcpy(dst->value, src->value, dst->len);
61  dst->value[dst->len] = '\0'; // Ensure null-termination
62  return 0;
63 }
64 
65 /* return true if number is a float or an integer */
66 static bool IsFloat(const char *in, size_t ins)
67 {
68  char *endptr;
69  float val = strtof(in, &endptr);
70  const char *end_ins = in + ins - 1;
71  if (val != 0 && (endptr == end_ins)) {
72  return true;
73  }
74  /* if value is 0 then we need to check if some parsing has been done */
75  if (val == 0 && (endptr == in)) {
76  return false;
77  }
78  return true;
79 }
80 
81 static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out)
82 {
83  if (ins > DATAJSON_JSON_LENGTH) {
84  SCLogError("dataset: json string too long: %s", in);
85  return -1;
86  }
87 
88  json_error_t jerror;
89  json_t *msg = json_loads(in, 0, &jerror);
90  if (msg == NULL) {
91  /* JANSSON does not see an integer, float or a string as valid JSON.
92  So we need to exclude them from failure. */
93  if (!IsFloat(in, ins) && !((in[0] == '"') && (in[ins - 1] == '"'))) {
94  SCLogError("dataset: Invalid json: %s: '%s'", jerror.text, in);
95  return -1;
96  }
97  } else {
98  json_decref(msg);
99  }
100  rep_out->len = (uint16_t)ins;
101  rep_out->value = SCStrndup(in, ins);
102  if (rep_out->value == NULL) {
103  return -1;
104  }
105  return 0;
106 }
107 
108 static json_t *GetSubObjectByKey(json_t *json, const char *key)
109 {
110  if (!json || !key || !json_is_object(json)) {
111  return NULL;
112  }
113  if (strlen(key) > SIG_JSON_CONTENT_KEY_LEN) {
115  return NULL;
116  }
117 
118  const char *current_key = key;
119  json_t *current = json;
120  while (current_key) {
121  const char *dot = strchr(current_key, '.');
122 
123  size_t key_len = dot ? (size_t)(dot - current_key) : strlen(current_key);
124  char key_buffer[key_len + 1];
125  strlcpy(key_buffer, current_key, key_len + 1);
126 
127  if (json_is_object(current) == false) {
128  return NULL;
129  }
130  current = json_object_get(current, key_buffer);
131  if (current == NULL) {
132  return NULL;
133  }
134  current_key = dot ? dot + 1 : NULL;
135  }
136  return current;
137 }
138 
139 static int ParseJsonFile(const char *file, json_t **array, char *key)
140 {
141  json_t *json;
142  json_error_t error;
143  /* assume we have one single JSON element in FILE */
144  json = json_load_file(file, 0, &error);
145  if (json == NULL) {
146  FatalErrorOnInit("can't load JSON, error on line %d: %s", error.line, error.text);
147  return -1;
148  }
149 
150  if (key == NULL || strlen(key) == 0) {
151  *array = json;
152  } else {
153  *array = GetSubObjectByKey(json, key);
154  if (*array == NULL) {
155  SCLogError("dataset: %s failed to get key '%s'", file, key);
156  json_decref(json);
157  return -1;
158  }
159  json_incref(*array);
160  json_decref(json);
161  }
162  if (!json_is_array(*array)) {
163  FatalErrorOnInit("not an array");
164  json_decref(*array);
165  return -1;
166  }
167  return 0;
168 }
169 
170 static int DatajsonSetValue(
171  Dataset *set, const uint8_t *val, uint16_t val_len, json_t *value, const char *json_key)
172 {
173  DataJsonType elt = { .value = NULL, .len = 0 };
174  if (set->remove_key) {
175  json_object_del(value, json_key);
176  }
177 
178  elt.value = json_dumps(value, JSON_COMPACT);
179  if (elt.value == NULL) {
180  FatalErrorOnInit("json_dumps failed for %s/%s", set->name, set->load);
181  return 0;
182  }
183  if (strlen(elt.value) > DATAJSON_JSON_LENGTH) {
184  SCLogError("dataset: json string too long: %s/%s", set->name, set->load);
185  SCFree(elt.value);
186  elt.value = NULL;
187  return 0;
188  }
189  elt.len = (uint16_t)strlen(elt.value);
190 
191  int add_ret = DatajsonAdd(set, val, val_len, &elt);
192  if (add_ret < 0) {
193  FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
194  return 0;
195  }
196  return add_ret;
197 }
198 
199 /**
200  * \retval 1 data was added to the hash
201  * \retval 0 data was not added to the hash as it is already there
202  * \retval -1 failed to add data to the hash
203  */
204 static int DatajsonAddString(
205  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
206 {
207  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json };
208  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
209  if (res.data) {
210  DatajsonUnlockData(res.data);
211  return res.is_new ? 1 : 0;
212  }
213  return -1;
214 }
215 
216 static int DatajsonAddMd5(
217  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
218 {
219  if (data_len != SC_MD5_LEN)
220  return -2;
221 
222  Md5Type lookup = { .json = *json };
223  memcpy(lookup.md5, data, SC_MD5_LEN);
224  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
225  if (res.data) {
226  DatajsonUnlockData(res.data);
227  return res.is_new ? 1 : 0;
228  }
229  return -1;
230 }
231 
232 static int DatajsonAddSha256(
233  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
234 {
235  if (data_len != SC_SHA256_LEN)
236  return -2;
237 
238  Sha256Type lookup = { .json = *json };
239  memcpy(lookup.sha256, data, SC_SHA256_LEN);
240  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
241  if (res.data) {
242  DatajsonUnlockData(res.data);
243  return res.is_new ? 1 : 0;
244  }
245  return -1;
246 }
247 
248 static int DatajsonAddIPv4(
249  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
250 {
251  if (data_len < SC_IPV4_LEN)
252  return -2;
253 
254  IPv4Type lookup = { .json = *json };
255  memcpy(lookup.ipv4, data, SC_IPV4_LEN);
256  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
257  if (res.data) {
258  DatajsonUnlockData(res.data);
259  return res.is_new ? 1 : 0;
260  }
261  return -1;
262 }
263 
264 static int DatajsonAddIPv6(
265  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
266 {
267  if (data_len != SC_IPV6_LEN)
268  return -2;
269 
270  IPv6Type lookup = { .json = *json };
271  memcpy(lookup.ipv6, data, SC_IPV6_LEN);
272  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
273  if (res.data) {
274  DatajsonUnlockData(res.data);
275  return res.is_new ? 1 : 0;
276  }
277  return -1;
278 }
279 
280 /*
281  * \brief Add data to the dataset from a JSON object.
282  *
283  * \param set The dataset to add data to.
284  * \param data The data to add.
285  * \param data_len The length of the data.
286  * \param json The JSON object containing additional information.
287  *
288  * Memory allocated for the `json` parameter will be freed if the data
289  * is not added to the hash.
290  *
291  * \retval 1 Data was added to the hash.
292  * \retval 0 Data was not added to the hash as it is already there.
293  * \retval -1 Failed to add data to the hash.
294  */
295 static int DatajsonAdd(
296  Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json)
297 {
298  if (json == NULL)
299  return -1;
300  if (json->value == NULL)
301  return -1;
302 
303  if (set == NULL) {
304  if (json->value != NULL) {
305  SCFree(json->value);
306  json->value = NULL;
307  }
308  return -1;
309  }
310 
311  int add_ret = 0;
312  switch (set->type) {
313  case DATASET_TYPE_STRING:
314  add_ret = DatajsonAddString(set, data, data_len, json);
315  break;
316  case DATASET_TYPE_MD5:
317  add_ret = DatajsonAddMd5(set, data, data_len, json);
318  break;
319  case DATASET_TYPE_SHA256:
320  add_ret = DatajsonAddSha256(set, data, data_len, json);
321  break;
322  case DATASET_TYPE_IPV4:
323  add_ret = DatajsonAddIPv4(set, data, data_len, json);
324  break;
325  case DATASET_TYPE_IPV6:
326  add_ret = DatajsonAddIPv6(set, data, data_len, json);
327  break;
328  default:
329  add_ret = -1;
330  break;
331  }
332 
333  SCFree(json->value);
334  json->value = NULL;
335 
336  return add_ret;
337 }
338 
339 static int DatajsonLoadTypeFromJSON(Dataset *set, char *json_key, char *array_key,
340  uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
341 {
342  if (strlen(set->load) == 0)
343  return 0;
344 
345  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
346 
347  uint32_t cnt = 0;
348  json_t *json;
349  bool found = false;
350  SCLogDebug("dataset: array_key '%s' %p", array_key, array_key);
351  if (ParseJsonFile(set->load, &json, array_key) == -1) {
352  SCLogError("dataset: %s failed to parse from '%s'", set->name, set->load);
353  return -1;
354  }
355 
356  size_t index;
357  json_t *value;
358  json_array_foreach (json, index, value) {
359  cnt += DatajsonAddTypeElement(set, value, json_key, &found);
360  }
361  json_decref(json);
362 
363  if (found == false) {
365  "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
366  return -1;
367  }
369 
370  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
371  return 0;
372 }
373 
374 static uint32_t DatajsonLoadTypeFromJsonline(Dataset *set, char *json_key,
375  uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
376 {
377  uint32_t cnt = 0;
378  FILE *fp = fopen(set->load, "r");
379  bool found = false;
380 
381  if (fp == NULL) {
382  SCLogError("dataset: %s failed to open file '%s'", set->name, set->load);
383  return 0;
384  }
385 
386  char line[DATAJSON_JSON_LENGTH];
387  while (fgets(line, sizeof(line), fp) != NULL) {
388  json_t *json = json_loads(line, 0, NULL);
389  if (json == NULL) {
390  SCLogError("dataset: %s failed to parse line '%s'", set->name, line);
391  goto out_err;
392  }
393  cnt += DatajsonAddTypeElement(set, json, json_key, &found);
394  json_decref(json);
395  }
396  int close_op = fclose(fp);
397  if (close_op != 0) {
398  SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
399  return 0;
400  }
401 
402  if (found == false) {
404  "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
405  return 0;
406  }
407  return cnt;
408 out_err:
409  close_op = fclose(fp);
410  if (close_op != 0) {
411  SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
412  }
413  return 0;
414 }
415 
416 static uint32_t DatajsonAddStringElement(Dataset *set, json_t *value, char *json_key, bool *found)
417 {
418  json_t *key = GetSubObjectByKey(value, json_key);
419  if (key == NULL) {
420  /* ignore error as it can be a working mode where some entries
421  are not in the same format */
422  return 0;
423  }
424 
425  *found = true;
426 
427  const char *val_key = json_string_value(key);
428  if (val_key == NULL) {
429  FatalErrorOnInit("dataset: %s failed to get value for key '%s'", set->name, json_key);
430  return 0;
431  }
432  size_t val_len = strlen(val_key);
433 
434  json_incref(key);
435  int ret = DatajsonSetValue(set, (const uint8_t *)val_key, (uint16_t)val_len, value, json_key);
436  json_decref(key);
437  if (ret < 0) {
438  FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
439  return 0;
440  }
441  return ret;
442 }
443 
444 static int DatajsonLoadString(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
445 {
446  if (strlen(set->load) == 0)
447  return 0;
448 
449  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
450 
451  uint32_t cnt = 0;
452  if (format == DATASET_FORMAT_JSON) {
453  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddStringElement);
454  } else if (format == DATASET_FORMAT_NDJSON) {
455  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddStringElement);
456  }
458 
459  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
460  return 0;
461 }
462 
463 static uint32_t DatajsonAddMd5Element(Dataset *set, json_t *value, char *json_key, bool *found)
464 {
465  json_t *key = GetSubObjectByKey(value, json_key);
466  if (key == NULL) {
467  /* ignore error as it can be a working mode where some entries
468  are not in the same format */
469  return 0;
470  }
471 
472  *found = true;
473 
474  const char *hash_string = json_string_value(key);
475  if (strlen(hash_string) != SC_MD5_HEX_LEN) {
476  FatalErrorOnInit("Not correct length for a hash");
477  return 0;
478  }
479 
480  uint8_t hash[SC_MD5_LEN];
481  if (HexToRaw((const uint8_t *)hash_string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) {
482  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
483  return 0;
484  }
485  return DatajsonSetValue(set, hash, SC_MD5_LEN, value, json_key);
486 }
487 
488 static int DatajsonLoadMd5(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
489 {
490  if (strlen(set->load) == 0)
491  return 0;
492 
493  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
494 
495  uint32_t cnt = 0;
496  if (format == DATASET_FORMAT_JSON) {
497  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddMd5Element);
498  } else if (format == DATASET_FORMAT_NDJSON) {
499  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddMd5Element);
500  }
502 
503  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
504  return 0;
505 }
506 
507 static uint32_t DatajsonAddSha256Element(Dataset *set, json_t *value, char *json_key, bool *found)
508 {
509  json_t *key = GetSubObjectByKey(value, json_key);
510  if (key == NULL) {
511  /* ignore error as it can be a working mode where some entries
512  are not in the same format */
513  return 0;
514  }
515 
516  *found = true;
517 
518  const char *hash_string = json_string_value(key);
519  if (strlen(hash_string) != SC_SHA256_HEX_LEN) {
520  FatalErrorOnInit("Not correct length for a hash");
521  return 0;
522  }
523 
524  uint8_t hash[SC_SHA256_LEN];
525  if (HexToRaw((const uint8_t *)hash_string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) {
526  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
527  return 0;
528  }
529 
530  return DatajsonSetValue(set, hash, SC_SHA256_LEN, value, json_key);
531 }
532 
533 static int DatajsonLoadSha256(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
534 {
535  if (strlen(set->load) == 0)
536  return 0;
537 
538  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
539 
540  uint32_t cnt = 0;
541  if (format == DATASET_FORMAT_JSON) {
542  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddSha256Element);
543  } else if (format == DATASET_FORMAT_NDJSON) {
544  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddSha256Element);
545  }
547 
548  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
549  return 0;
550 }
551 
552 static uint32_t DatajsonAddIpv4Element(Dataset *set, json_t *value, char *json_key, bool *found)
553 {
554  json_t *key = GetSubObjectByKey(value, json_key);
555  if (key == NULL) {
556  /* ignore error as it can be a working mode where some entries
557  are not in the same format */
558  return 0;
559  }
560 
561  *found = true;
562 
563  const char *ip_string = json_string_value(key);
564  struct in_addr in;
565  if (inet_pton(AF_INET, ip_string, &in) != 1) {
566  FatalErrorOnInit("datajson IPv4 parse failed %s/%s: %s", set->name, set->load, ip_string);
567  return 0;
568  }
569 
570  return DatajsonSetValue(set, (const uint8_t *)&in.s_addr, SC_IPV4_LEN, value, json_key);
571 }
572 
573 static int DatajsonLoadIPv4(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
574 {
575  if (strlen(set->load) == 0)
576  return 0;
577 
578  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
579  uint32_t cnt = 0;
580 
581  if (format == DATASET_FORMAT_JSON) {
582  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIpv4Element);
583  } else if (format == DATASET_FORMAT_NDJSON) {
584  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIpv4Element);
585  }
587 
588  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
589  return 0;
590 }
591 
592 static uint32_t DatajsonAddIPv6Element(Dataset *set, json_t *value, char *json_key, bool *found)
593 {
594  json_t *key = GetSubObjectByKey(value, json_key);
595  if (key == NULL) {
596  /* ignore error as it can be a working mode where some entries
597  are not in the same format */
598  return 0;
599  }
600 
601  *found = true;
602 
603  const char *ip_string = json_string_value(key);
604  struct in6_addr in6;
605  int ret = DatasetParseIpv6String(set, ip_string, &in6);
606  if (ret < 0) {
607  FatalErrorOnInit("unable to parse IP address");
608  return 0;
609  }
610 
611  return DatajsonSetValue(set, (const uint8_t *)&in6.s6_addr, SC_IPV6_LEN, value, json_key);
612 }
613 
614 static int DatajsonLoadIPv6(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
615 {
616  if (strlen(set->load) == 0)
617  return 0;
618 
619  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
620 
621  uint32_t cnt = 0;
622 
623  if (format == DATASET_FORMAT_JSON) {
624  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIPv6Element);
625  } else if (format == DATASET_FORMAT_NDJSON) {
626  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIPv6Element);
627  }
628 
630 
631  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
632  return 0;
633 }
634 
635 Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap,
636  uint32_t hashsize, char *json_key_value, char *json_array_key, DatasetFormats format,
637  bool remove_key)
638 {
639  Dataset *set = NULL;
640 
641  DatasetLock();
642  int ret = DatasetGetOrCreate(name, type, NULL, load, &memcap, &hashsize, &set);
643  if (ret < 0) {
644  SCLogError("dataset with JSON %s creation failed", name);
645  DatasetUnlock();
646  return NULL;
647  }
648  if (ret == 1) {
649  SCLogDebug("dataset %s already exists", name);
650  if (set->remove_key != remove_key) {
651  SCLogError("dataset %s remove_key mismatch: %d != %d", set->name, set->remove_key,
652  remove_key);
653  DatasetUnlock();
654  return NULL;
655  }
656  DatasetUnlock();
657  return set;
658  }
659 
660  set->remove_key = remove_key;
661 
662  char cnf_name[128];
663  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
664  switch (type) {
665  case DATASET_TYPE_MD5:
666  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrJsonSet, Md5StrJsonFree,
667  Md5StrHash, Md5StrCompare, NULL, Md5StrJsonGetLength, load != NULL ? 1 : 0,
668  memcap, hashsize);
669  if (set->hash == NULL)
670  goto out_err;
671  if (DatajsonLoadMd5(set, json_key_value, json_array_key, format) < 0)
672  goto out_err;
673  break;
674  case DATASET_TYPE_STRING:
675  set->hash = THashInit(cnf_name, sizeof(StringType), StringJsonSet, StringJsonFree,
676  StringHash, StringCompare, NULL, StringJsonGetLength, load != NULL ? 1 : 0,
677  memcap, hashsize);
678  if (set->hash == NULL)
679  goto out_err;
680  if (DatajsonLoadString(set, json_key_value, json_array_key, format) < 0) {
681  SCLogError("dataset %s loading failed", name);
682  goto out_err;
683  }
684  break;
685  case DATASET_TYPE_SHA256:
686  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrJsonSet, Sha256StrJsonFree,
688  load != NULL ? 1 : 0, memcap, hashsize);
689  if (set->hash == NULL)
690  goto out_err;
691  if (DatajsonLoadSha256(set, json_key_value, json_array_key, format) < 0)
692  goto out_err;
693  break;
694  case DATASET_TYPE_IPV4:
695  set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4JsonSet, IPv4JsonFree, IPv4Hash,
696  IPv4Compare, NULL, IPv4JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
697  if (set->hash == NULL)
698  goto out_err;
699  if (DatajsonLoadIPv4(set, json_key_value, json_array_key, format) < 0)
700  goto out_err;
701  break;
702  case DATASET_TYPE_IPV6:
703  set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6JsonSet, IPv6JsonFree, IPv6Hash,
704  IPv6Compare, NULL, IPv6JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
705  if (set->hash == NULL)
706  goto out_err;
707  if (DatajsonLoadIPv6(set, json_key_value, json_array_key, format) < 0)
708  goto out_err;
709  break;
710  }
711 
712  SCLogDebug(
713  "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
714 
715  if (DatasetAppendSet(set) < 0) {
716  SCLogError("dataset %s append failed", name);
717  goto out_err;
718  }
719 
720  DatasetUnlock();
721  return set;
722 out_err:
723  if (set->hash) {
724  THashShutdown(set->hash);
725  }
726  SCFree(set);
727  DatasetUnlock();
728  return NULL;
729 }
730 
731 static DataJsonResultType DatajsonLookupString(
732  Dataset *set, const uint8_t *data, const uint32_t data_len)
733 {
734  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
735 
736  if (set == NULL)
737  return rrep;
738 
739  StringType lookup = {
740  .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0
741  };
742  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
743  if (rdata) {
744  StringType *found = rdata->data;
745  rrep.found = true;
746  rrep.json = found->json;
747  rrep.hashdata = rdata;
748  return rrep;
749  }
750  return rrep;
751 }
752 
753 static DataJsonResultType DatajsonLookupMd5(
754  Dataset *set, const uint8_t *data, const uint32_t data_len)
755 {
756  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
757 
758  if (set == NULL)
759  return rrep;
760 
761  if (data_len != SC_MD5_LEN)
762  return rrep;
763 
764  Md5Type lookup = { .json.value = NULL, .json.len = 0 };
765  memcpy(lookup.md5, data, data_len);
766  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
767  if (rdata) {
768  Md5Type *found = rdata->data;
769  rrep.found = true;
770  rrep.json = found->json;
771  rrep.hashdata = rdata;
772  return rrep;
773  }
774  return rrep;
775 }
776 
777 static DataJsonResultType DatajsonLookupSha256(
778  Dataset *set, const uint8_t *data, const uint32_t data_len)
779 {
780  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
781 
782  if (set == NULL)
783  return rrep;
784 
785  if (data_len != SC_SHA256_LEN)
786  return rrep;
787 
788  Sha256Type lookup = { .json.value = NULL, .json.len = 0 };
789  memcpy(lookup.sha256, data, data_len);
790  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
791  if (rdata) {
792  Sha256Type *found = rdata->data;
793  rrep.found = true;
794  rrep.json = found->json;
795  rrep.hashdata = rdata;
796  return rrep;
797  }
798  return rrep;
799 }
800 
801 static DataJsonResultType DatajsonLookupIPv4(
802  Dataset *set, const uint8_t *data, const uint32_t data_len)
803 {
804  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
805 
806  if (set == NULL)
807  return rrep;
808 
809  if (data_len != SC_IPV4_LEN)
810  return rrep;
811 
812  IPv4Type lookup = { .json.value = NULL, .json.len = 0 };
813  memcpy(lookup.ipv4, data, data_len);
814  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
815  if (rdata) {
816  IPv4Type *found = rdata->data;
817  rrep.found = true;
818  rrep.json = found->json;
819  rrep.hashdata = rdata;
820  return rrep;
821  }
822  return rrep;
823 }
824 
825 static DataJsonResultType DatajsonLookupIPv6(
826  Dataset *set, const uint8_t *data, const uint32_t data_len)
827 {
828  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
829 
830  if (set == NULL)
831  return rrep;
832 
833  /* We can have IPv4 or IPV6 here due to ip.src and ip.dst implementation */
834  if (data_len != SC_IPV6_LEN && data_len != SC_IPV4_LEN)
835  return rrep;
836 
837  IPv6Type lookup = { .json.value = NULL, .json.len = 0 };
838  memcpy(lookup.ipv6, data, data_len);
839  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
840  if (rdata) {
841  IPv6Type *found = rdata->data;
842  rrep.found = true;
843  rrep.json = found->json;
844  rrep.hashdata = rdata;
845  return rrep;
846  }
847  return rrep;
848 }
849 
850 DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
851 {
852  DataJsonResultType rrep = { .found = false, .json = { .value = 0 } };
853  if (set == NULL)
854  return rrep;
855 
856  switch (set->type) {
857  case DATASET_TYPE_STRING:
858  return DatajsonLookupString(set, data, data_len);
859  case DATASET_TYPE_MD5:
860  return DatajsonLookupMd5(set, data, data_len);
861  case DATASET_TYPE_SHA256:
862  return DatajsonLookupSha256(set, data, data_len);
863  case DATASET_TYPE_IPV4:
864  return DatajsonLookupIPv4(set, data, data_len);
865  case DATASET_TYPE_IPV6:
866  return DatajsonLookupIPv6(set, data, data_len);
867  default:
868  break;
869  }
870  return rrep;
871 }
872 
873 /** \brief add serialized data to json set
874  * \retval int 1 added
875  * \retval int 0 already in hash
876  * \retval int -1 API error (not added)
877  * \retval int -2 DATA error
878  */
879 int DatajsonAddSerialized(Dataset *set, const char *value, const char *json)
880 {
881  if (set == NULL)
882  return -1;
883 
884  if (strlen(value) == 0)
885  return -1;
886 
887  DataJsonType jvalue = { .value = NULL, .len = 0 };
888  if (json) {
889  if (ParseJsonLine(json, strlen(json), &jvalue) < 0) {
890  SCLogNotice("bad json value for dataset %s/%s", set->name, set->load);
891  return -1;
892  }
893  }
894 
895  int ret = -1;
896  switch (set->type) {
897  case DATASET_TYPE_STRING: {
898  uint32_t decoded_size = SCBase64DecodeBufferSize((uint32_t)strlen(value));
899  uint8_t decoded[decoded_size];
900  uint32_t num_decoded = SCBase64Decode(
901  (const uint8_t *)value, strlen(value), SCBase64ModeStrict, decoded);
902  if (num_decoded == 0)
903  goto operror;
904  ret = DatajsonAdd(set, decoded, num_decoded, &jvalue);
905  break;
906  }
907  case DATASET_TYPE_MD5: {
908  if (strlen(value) != SC_MD5_HEX_LEN)
909  goto operror;
910  uint8_t hash[SC_MD5_LEN];
911  if (HexToRaw((const uint8_t *)value, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0)
912  goto operror;
913  ret = DatajsonAdd(set, hash, SC_MD5_LEN, &jvalue);
914  break;
915  }
916  case DATASET_TYPE_SHA256: {
917  if (strlen(value) != SC_SHA256_HEX_LEN)
918  goto operror;
919  uint8_t hash[SC_SHA256_LEN];
920  if (HexToRaw((const uint8_t *)value, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0)
921  goto operror;
922  ret = DatajsonAdd(set, hash, SC_SHA256_LEN, &jvalue);
923  break;
924  }
925  case DATASET_TYPE_IPV4: {
926  struct in_addr in;
927  if (inet_pton(AF_INET, value, &in) != 1)
928  goto operror;
929  ret = DatajsonAdd(set, (uint8_t *)&in.s_addr, SC_IPV4_LEN, &jvalue);
930  break;
931  }
932  case DATASET_TYPE_IPV6: {
933  struct in6_addr in6;
934  if (DatasetParseIpv6String(set, value, &in6) != 0) {
935  SCLogError("Dataset failed to import %s as IPv6", value);
936  goto operror;
937  }
938  ret = DatajsonAdd(set, (uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &jvalue);
939  break;
940  }
941  }
942  SCFree(jvalue.value);
943  return ret;
944 operror:
945  SCFree(jvalue.value);
946  return -2;
947 }
DatajsonLookup
DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets-context-json.c:850
Sha256StrJsonSet
int Sha256StrJsonSet(void *dst, void *src)
Definition: datasets-sha256.c:41
DataJsonType::len
uint16_t len
Definition: datasets-context-json.h:34
util-byte.h
len
uint8_t len
Definition: app-layer-dnp3.h:2
datasets-string.h
THashDataGetResult::data
THashData * data
Definition: util-thash.h:192
datasets-md5.h
IPv4JsonSet
int IPv4JsonSet(void *dst, void *src)
Definition: datasets-ipv4.c:41
Dataset::name
char name[DATASET_NAME_MAX_LEN+1]
Definition: datasets.h:56
Dataset::save
char save[PATH_MAX]
Definition: datasets.h:65
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:279
datasets-sha256.h
IPv6Compare
bool IPv6Compare(void *a, void *b)
Definition: datasets-ipv6.c:56
name
const char * name
Definition: detect-engine-proto.c:48
StringJsonFree
void StringJsonFree(void *s)
Definition: datasets-string.c:125
HexToRaw
int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
Definition: util-byte.c:771
SC_SHA256_LEN
#define SC_SHA256_LEN
Definition: util-file.h:97
Md5Type
Definition: datasets-md5.h:30
Dataset::hash
THashTableContext * hash
Definition: datasets.h:62
IPv4JsonFree
void IPv4JsonFree(void *s)
Definition: datasets-ipv4.c:74
DatasetFormats
DatasetFormats
Definition: datasets.h:39
IPv4Hash
uint32_t IPv4Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv4.c:63
Sha256Type::sha256
uint8_t sha256[32]
Definition: datasets-sha256.h:31
Dataset::type
enum DatasetTypes type
Definition: datasets.h:57
THashConsolidateMemcap
void THashConsolidateMemcap(THashTableContext *ctx)
Definition: util-thash.c:345
SC_IPV6_LEN
#define SC_IPV6_LEN
Definition: util-ip.h:29
rust.h
DATASET_TYPE_SHA256
@ DATASET_TYPE_SHA256
Definition: datasets.h:49
StringJsonGetLength
uint32_t StringJsonGetLength(void *s)
Definition: datasets-string.c:134
Md5Type::md5
uint8_t md5[16]
Definition: datasets-md5.h:31
DataJsonResultType::found
bool found
Definition: datasets-context-json.h:38
DATASET_TYPE_IPV6
@ DATASET_TYPE_IPV6
Definition: datasets.h:51
Md5StrCompare
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:57
DatajsonUnlockElt
void DatajsonUnlockElt(DataJsonResultType *r)
Definition: datasets-context-json.c:47
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
DataJsonResultType::hashdata
THashData * hashdata
Definition: datasets-context-json.h:40
DataJsonResultType
Definition: datasets-context-json.h:37
datasets.h
IPv4JsonGetLength
uint32_t IPv4JsonGetLength(void *s)
Definition: datasets-ipv4.c:82
util-debug.h
DatasetGetOrCreate
int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set)
Definition: datasets.c:369
datasets-ipv6.h
IPv6Type::ipv6
uint8_t ipv6[16]
Definition: datasets-ipv6.h:31
SIG_JSON_CONTENT_KEY_LEN
#define SIG_JSON_CONTENT_KEY_LEN
Definition: detect.h:1234
util-ip.h
DataJsonResultType::json
DataJsonType json
Definition: datasets-context-json.h:39
datasets-ipv4.h
IPv6JsonGetLength
uint32_t IPv6JsonGetLength(void *s)
Definition: datasets-ipv6.c:83
Md5StrHash
uint32_t Md5StrHash(uint32_t hash_seed, void *s)
Definition: datasets-md5.c:65
THashDataGetResult
Definition: util-thash.h:191
StringType
Definition: datasets-string.h:30
Md5Type::json
DataJsonType json
Definition: datasets-md5.h:34
Sha256Type::json
DataJsonType json
Definition: datasets-sha256.h:34
type
uint16_t type
Definition: decode-vlan.c:106
DatasetLock
void DatasetLock(void)
Definition: datasets.c:102
IPv6Type
Definition: datasets-ipv6.h:30
IPv4Type::json
DataJsonType json
Definition: datasets-ipv4.h:34
DATASET_TYPE_IPV4
@ DATASET_TYPE_IPV4
Definition: datasets.h:50
StringType::ptr
uint8_t * ptr
Definition: datasets-string.h:36
IPv6JsonFree
void IPv6JsonFree(void *s)
Definition: datasets-ipv6.c:75
StringType::json
DataJsonType json
Definition: datasets-string.h:34
THashShutdown
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:354
DatasetTypes
DatasetTypes
Definition: datasets.h:45
Sha256StrJsonGetLength
uint32_t Sha256StrJsonGetLength(void *s)
Definition: datasets-sha256.c:83
THashData_::data
void * data
Definition: util-thash.h:92
cnt
uint32_t cnt
Definition: tmqh-packetpool.h:7
Sha256Type
Definition: datasets-sha256.h:30
THashData_
Definition: util-thash.h:85
Dataset::remove_key
bool remove_key
Definition: datasets.h:61
Sha256StrJsonFree
void Sha256StrJsonFree(void *s)
Definition: datasets-sha256.c:75
DATASET_FORMAT_NDJSON
@ DATASET_FORMAT_NDJSON
Definition: datasets.h:42
DATASET_FORMAT_JSON
@ DATASET_FORMAT_JSON
Definition: datasets.h:41
suricata-common.h
SCStrndup
#define SCStrndup(s, n)
Definition: util-mem.h:59
datasets-context-json.h
FatalErrorOnInit
#define FatalErrorOnInit(...)
Fatal error IF we're starting up, and configured to consider errors to be fatal errors.
Definition: util-debug.h:523
StringCompare
bool StringCompare(void *a, void *b)
Definition: datasets-string.c:95
THashGetFromHash
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:618
DataJsonType
Definition: datasets-context-json.h:32
hashsize
#define hashsize(n)
Definition: util-hash-lookup3.h:40
THashLookupFromHash
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:728
IPv4Type
Definition: datasets-ipv4.h:30
THashDecrUsecnt
#define THashDecrUsecnt(h)
Definition: util-thash.h:170
IPv4Compare
bool IPv4Compare(void *a, void *b)
Definition: datasets-ipv4.c:55
DatajsonCopyJson
int DatajsonCopyJson(DataJsonType *dst, DataJsonType *src)
Definition: datasets-context-json.c:54
SCMalloc
#define SCMalloc(sz)
Definition: util-mem.h:47
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
IPv6Hash
uint32_t IPv6Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv6.c:64
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:271
SCFree
#define SCFree(p)
Definition: util-mem.h:61
Sha256StrCompare
bool Sha256StrCompare(void *a, void *b)
Definition: datasets-sha256.c:55
DatajsonGet
Dataset * DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, uint32_t hashsize, char *json_key_value, char *json_array_key, DatasetFormats format, bool remove_key)
Definition: datasets-context-json.c:635
StringHash
uint32_t StringHash(uint32_t hash_seed, void *s)
Definition: datasets-string.c:106
src
uint16_t src
Definition: app-layer-dnp3.h:5
DataJsonType::value
char * value
Definition: datasets-context-json.h:33
IPv6JsonSet
int IPv6JsonSet(void *dst, void *src)
Definition: datasets-ipv6.c:42
DATASET_TYPE_MD5
@ DATASET_TYPE_MD5
Definition: datasets.h:48
DATASET_TYPE_STRING
@ DATASET_TYPE_STRING
Definition: datasets.h:47
DatasetUnlock
void DatasetUnlock(void)
Definition: datasets.c:107
THashDataGetResult::is_new
bool is_new
Definition: util-thash.h:193
suricata.h
THashInit
THashTableContext * THashInit(const char *cnf_prefix, uint32_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(uint32_t, void *), bool(*DataCompare)(void *, void *), bool(*DataExpired)(void *, SCTime_t), uint32_t(*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
Definition: util-thash.c:302
StringJsonSet
int StringJsonSet(void *dst, void *src)
Definition: datasets-string.c:79
IPv4Type::ipv4
uint8_t ipv4[4]
Definition: datasets-ipv4.h:31
Dataset
Definition: datasets.h:55
SC_MD5_LEN
#define SC_MD5_LEN
Definition: util-file.h:103
IPv6Type::json
DataJsonType json
Definition: datasets-ipv6.h:34
dst
uint16_t dst
Definition: app-layer-dnp3.h:4
SCLogNotice
#define SCLogNotice(...)
Macro used to log NOTICE messages.
Definition: util-debug.h:247
Md5StrJsonFree
void Md5StrJsonFree(void *s)
Definition: datasets-md5.c:76
Dataset::load
char load[PATH_MAX]
Definition: datasets.h:64
Sha256StrHash
uint32_t Sha256StrHash(uint32_t hash_seed, void *s)
Definition: datasets-sha256.c:63
SC_IPV4_LEN
#define SC_IPV4_LEN
Definition: util-ip.h:28
Md5StrJsonGetLength
uint32_t Md5StrJsonGetLength(void *s)
Definition: datasets-md5.c:84
DatajsonAddSerialized
int DatajsonAddSerialized(Dataset *set, const char *value, const char *json)
add serialized data to json set
Definition: datasets-context-json.c:879
DatasetAppendSet
int DatasetAppendSet(Dataset *set)
Definition: datasets.c:79
DEBUG_VALIDATE_BUG_ON
#define DEBUG_VALIDATE_BUG_ON(exp)
Definition: util-validate.h:102
Md5StrJsonSet
int Md5StrJsonSet(void *dst, void *src)
Definition: datasets-md5.c:43
DatasetParseIpv6String
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
Definition: datasets.c:156
DATAJSON_JSON_LENGTH
#define DATAJSON_JSON_LENGTH
Definition: datasets-context-json.h:30