suricata
datasets-context-json.c
Go to the documentation of this file.
1 /* Copyright (C) 2025 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Eric Leblond <el@stamus-networks.com>
22  */
23 
24 #include "suricata-common.h"
25 #include "suricata.h"
26 #include "rust.h"
27 #include "datasets.h"
28 #include "datasets-context-json.h"
29 #include "datasets-ipv4.h"
30 #include "datasets-ipv6.h"
31 #include "datasets-md5.h"
32 #include "datasets-sha256.h"
33 #include "datasets-string.h"
34 #include "util-byte.h"
35 #include "util-ip.h"
36 #include "util-debug.h"
37 
38 static int DatajsonAdd(
39  Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json);
40 
41 static inline void DatajsonUnlockData(THashData *d)
42 {
43  (void)THashDecrUsecnt(d);
44  THashDataUnlock(d);
45 }
46 
48 {
49  if (r->hashdata) {
50  DatajsonUnlockData(r->hashdata);
51  }
52 }
53 
55 {
56  dst->len = src->len;
57  dst->value = SCMalloc(dst->len + 1);
58  if (dst->value == NULL)
59  return -1;
60  memcpy(dst->value, src->value, dst->len);
61  dst->value[dst->len] = '\0'; // Ensure null-termination
62  return 0;
63 }
64 
65 /* return true if number is a float or an integer */
66 static bool IsFloat(const char *in, size_t ins)
67 {
68  char *endptr;
69  float val = strtof(in, &endptr);
70  const char *end_ins = in + ins - 1;
71  if (val != 0 && (endptr == end_ins)) {
72  return true;
73  }
74  /* if value is 0 then we need to check if some parsing has been done */
75  if (val == 0 && (endptr == in)) {
76  return false;
77  }
78  return true;
79 }
80 
81 static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out)
82 {
83  if (ins > DATAJSON_JSON_LENGTH) {
84  SCLogError("dataset: json string too long: %s", in);
85  return -1;
86  }
87 
88  json_error_t jerror;
89  json_t *msg = json_loads(in, 0, &jerror);
90  if (msg == NULL) {
91  /* JANSSON does not see an integer, float or a string as valid JSON.
92  So we need to exclude them from failure. */
93  if (!IsFloat(in, ins) && !((in[0] == '"') && (in[ins - 1] == '"'))) {
94  SCLogError("dataset: Invalid json: %s: '%s'", jerror.text, in);
95  return -1;
96  }
97  } else {
98  json_decref(msg);
99  }
100  rep_out->len = (uint16_t)ins;
101  rep_out->value = SCStrndup(in, ins);
102  if (rep_out->value == NULL) {
103  return -1;
104  }
105  return 0;
106 }
107 
108 static json_t *GetSubObjectByKey(json_t *json, const char *key)
109 {
110  if (!json || !key || !json_is_object(json)) {
111  return NULL;
112  }
113 
114  const char *current_key = key;
115  json_t *current = json;
116  while (current_key) {
117  const char *dot = strchr(current_key, '.');
118 
119  size_t key_len = dot ? (size_t)(dot - current_key) : strlen(current_key);
120  char key_buffer[key_len + 1];
121  strlcpy(key_buffer, current_key, key_len + 1);
122 
123  if (json_is_object(current) == false) {
124  return NULL;
125  }
126  current = json_object_get(current, key_buffer);
127  if (current == NULL) {
128  return NULL;
129  }
130  current_key = dot ? dot + 1 : NULL;
131  }
132  return current;
133 }
134 
135 static int ParseJsonFile(const char *file, json_t **array, char *key)
136 {
137  json_t *json;
138  json_error_t error;
139  /* assume we have one single JSON element in FILE */
140  json = json_load_file(file, 0, &error);
141  if (json == NULL) {
142  FatalErrorOnInit("can't load JSON, error on line %d: %s", error.line, error.text);
143  return -1;
144  }
145 
146  if (key == NULL || strlen(key) == 0) {
147  *array = json;
148  } else {
149  *array = GetSubObjectByKey(json, key);
150  if (*array == NULL) {
151  SCLogError("dataset: %s failed to get key '%s'", file, key);
152  json_decref(json);
153  return -1;
154  }
155  json_incref(*array);
156  json_decref(json);
157  }
158  if (!json_is_array(*array)) {
159  FatalErrorOnInit("not an array");
160  json_decref(*array);
161  return -1;
162  }
163  return 0;
164 }
165 
166 static int DatajsonSetValue(
167  Dataset *set, const uint8_t *val, uint16_t val_len, json_t *value, const char *json_key)
168 {
169  DataJsonType elt = { .value = NULL, .len = 0 };
170  if (set->remove_key) {
171  json_object_del(value, json_key);
172  }
173 
174  elt.value = json_dumps(value, JSON_COMPACT);
175  if (elt.value == NULL) {
176  FatalErrorOnInit("json_dumps failed for %s/%s", set->name, set->load);
177  return 0;
178  }
179  if (strlen(elt.value) > DATAJSON_JSON_LENGTH) {
180  SCLogError("dataset: json string too long: %s/%s", set->name, set->load);
181  SCFree(elt.value);
182  elt.value = NULL;
183  return 0;
184  }
185  elt.len = (uint16_t)strlen(elt.value);
186 
187  int add_ret = DatajsonAdd(set, val, val_len, &elt);
188  if (add_ret < 0) {
189  FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
190  return 0;
191  }
192  return add_ret;
193 }
194 
195 /**
196  * \retval 1 data was added to the hash
197  * \retval 0 data was not added to the hash as it is already there
198  * \retval -1 failed to add data to the hash
199  */
200 static int DatajsonAddString(
201  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
202 {
203  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json };
204  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
205  if (res.data) {
206  DatajsonUnlockData(res.data);
207  return res.is_new ? 1 : 0;
208  }
209  return -1;
210 }
211 
212 static int DatajsonAddMd5(
213  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
214 {
215  if (data_len != SC_MD5_LEN)
216  return -2;
217 
218  Md5Type lookup = { .json = *json };
219  memcpy(lookup.md5, data, SC_MD5_LEN);
220  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
221  if (res.data) {
222  DatajsonUnlockData(res.data);
223  return res.is_new ? 1 : 0;
224  }
225  return -1;
226 }
227 
228 static int DatajsonAddSha256(
229  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
230 {
231  if (data_len != SC_SHA256_LEN)
232  return -2;
233 
234  Sha256Type lookup = { .json = *json };
235  memcpy(lookup.sha256, data, SC_SHA256_LEN);
236  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
237  if (res.data) {
238  DatajsonUnlockData(res.data);
239  return res.is_new ? 1 : 0;
240  }
241  return -1;
242 }
243 
244 static int DatajsonAddIPv4(
245  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
246 {
247  if (data_len < SC_IPV4_LEN)
248  return -2;
249 
250  IPv4Type lookup = { .json = *json };
251  memcpy(lookup.ipv4, data, SC_IPV4_LEN);
252  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
253  if (res.data) {
254  DatajsonUnlockData(res.data);
255  return res.is_new ? 1 : 0;
256  }
257  return -1;
258 }
259 
260 static int DatajsonAddIPv6(
261  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
262 {
263  if (data_len != SC_IPV6_LEN)
264  return -2;
265 
266  IPv6Type lookup = { .json = *json };
267  memcpy(lookup.ipv6, data, SC_IPV6_LEN);
268  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
269  if (res.data) {
270  DatajsonUnlockData(res.data);
271  return res.is_new ? 1 : 0;
272  }
273  return -1;
274 }
275 
276 /*
277  * \brief Add data to the dataset from a JSON object.
278  *
279  * \param set The dataset to add data to.
280  * \param data The data to add.
281  * \param data_len The length of the data.
282  * \param json The JSON object containing additional information.
283  *
284  * Memory allocated for the `json` parameter will be freed if the data
285  * is not added to the hash.
286  *
287  * \retval 1 Data was added to the hash.
288  * \retval 0 Data was not added to the hash as it is already there.
289  * \retval -1 Failed to add data to the hash.
290  */
291 static int DatajsonAdd(
292  Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json)
293 {
294  if (json == NULL)
295  return -1;
296  if (json->value == NULL)
297  return -1;
298 
299  if (set == NULL) {
300  if (json->value != NULL) {
301  SCFree(json->value);
302  json->value = NULL;
303  }
304  return -1;
305  }
306 
307  int add_ret = 0;
308  switch (set->type) {
309  case DATASET_TYPE_STRING:
310  add_ret = DatajsonAddString(set, data, data_len, json);
311  break;
312  case DATASET_TYPE_MD5:
313  add_ret = DatajsonAddMd5(set, data, data_len, json);
314  break;
315  case DATASET_TYPE_SHA256:
316  add_ret = DatajsonAddSha256(set, data, data_len, json);
317  break;
318  case DATASET_TYPE_IPV4:
319  add_ret = DatajsonAddIPv4(set, data, data_len, json);
320  break;
321  case DATASET_TYPE_IPV6:
322  add_ret = DatajsonAddIPv6(set, data, data_len, json);
323  break;
324  default:
325  add_ret = -1;
326  break;
327  }
328 
329  SCFree(json->value);
330  json->value = NULL;
331 
332  return add_ret;
333 }
334 
335 static int DatajsonLoadTypeFromJSON(Dataset *set, char *json_key, char *array_key,
336  uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
337 {
338  if (strlen(set->load) == 0)
339  return 0;
340 
341  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
342 
343  uint32_t cnt = 0;
344  json_t *json;
345  bool found = false;
346  SCLogDebug("dataset: array_key '%s' %p", array_key, array_key);
347  if (ParseJsonFile(set->load, &json, array_key) == -1) {
348  SCLogError("dataset: %s failed to parse from '%s'", set->name, set->load);
349  return -1;
350  }
351 
352  size_t index;
353  json_t *value;
354  json_array_foreach (json, index, value) {
355  cnt += DatajsonAddTypeElement(set, value, json_key, &found);
356  }
357  json_decref(json);
358 
359  if (found == false) {
361  "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
362  return -1;
363  }
365 
366  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
367  return 0;
368 }
369 
370 static uint32_t DatajsonLoadTypeFromJsonline(Dataset *set, char *json_key,
371  uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
372 {
373  uint32_t cnt = 0;
374  FILE *fp = fopen(set->load, "r");
375  bool found = false;
376 
377  if (fp == NULL) {
378  SCLogError("dataset: %s failed to open file '%s'", set->name, set->load);
379  return 0;
380  }
381 
382  char line[DATAJSON_JSON_LENGTH];
383  while (fgets(line, sizeof(line), fp) != NULL) {
384  json_t *json = json_loads(line, 0, NULL);
385  if (json == NULL) {
386  SCLogError("dataset: %s failed to parse line '%s'", set->name, line);
387  goto out_err;
388  }
389  cnt += DatajsonAddTypeElement(set, json, json_key, &found);
390  json_decref(json);
391  }
392  int close_op = fclose(fp);
393  if (close_op != 0) {
394  SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
395  return 0;
396  }
397 
398  if (found == false) {
400  "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
401  return 0;
402  }
403  return cnt;
404 out_err:
405  close_op = fclose(fp);
406  if (close_op != 0) {
407  SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
408  }
409  return 0;
410 }
411 
412 static uint32_t DatajsonAddStringElement(Dataset *set, json_t *value, char *json_key, bool *found)
413 {
414  json_t *key = GetSubObjectByKey(value, json_key);
415  if (key == NULL) {
416  /* ignore error as it can be a working mode where some entries
417  are not in the same format */
418  return 0;
419  }
420 
421  *found = true;
422 
423  const char *val_key = json_string_value(key);
424  if (val_key == NULL) {
425  FatalErrorOnInit("dataset: %s failed to get value for key '%s'", set->name, json_key);
426  return 0;
427  }
428  size_t val_len = strlen(val_key);
429 
430  json_incref(key);
431  int ret = DatajsonSetValue(set, (const uint8_t *)val_key, (uint16_t)val_len, value, json_key);
432  json_decref(key);
433  if (ret < 0) {
434  FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
435  return 0;
436  }
437  return ret;
438 }
439 
440 static int DatajsonLoadString(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
441 {
442  if (strlen(set->load) == 0)
443  return 0;
444 
445  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
446 
447  uint32_t cnt = 0;
448  if (format == DATASET_FORMAT_JSON) {
449  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddStringElement);
450  } else if (format == DATASET_FORMAT_NDJSON) {
451  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddStringElement);
452  }
454 
455  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
456  return 0;
457 }
458 
459 static uint32_t DatajsonAddMd5Element(Dataset *set, json_t *value, char *json_key, bool *found)
460 {
461  json_t *key = GetSubObjectByKey(value, json_key);
462  if (key == NULL) {
463  /* ignore error as it can be a working mode where some entries
464  are not in the same format */
465  return 0;
466  }
467 
468  *found = true;
469 
470  const char *hash_string = json_string_value(key);
471  if (strlen(hash_string) != SC_MD5_HEX_LEN) {
472  FatalErrorOnInit("Not correct length for a hash");
473  return 0;
474  }
475 
476  uint8_t hash[SC_MD5_LEN];
477  if (HexToRaw((const uint8_t *)hash_string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) {
478  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
479  return 0;
480  }
481  return DatajsonSetValue(set, hash, SC_MD5_LEN, value, json_key);
482 }
483 
484 static int DatajsonLoadMd5(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
485 {
486  if (strlen(set->load) == 0)
487  return 0;
488 
489  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
490 
491  uint32_t cnt = 0;
492  if (format == DATASET_FORMAT_JSON) {
493  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddMd5Element);
494  } else if (format == DATASET_FORMAT_NDJSON) {
495  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddMd5Element);
496  }
498 
499  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
500  return 0;
501 }
502 
503 static uint32_t DatajsonAddSha256Element(Dataset *set, json_t *value, char *json_key, bool *found)
504 {
505  json_t *key = GetSubObjectByKey(value, json_key);
506  if (key == NULL) {
507  /* ignore error as it can be a working mode where some entries
508  are not in the same format */
509  return 0;
510  }
511 
512  *found = true;
513 
514  const char *hash_string = json_string_value(key);
515  if (strlen(hash_string) != SC_SHA256_HEX_LEN) {
516  FatalErrorOnInit("Not correct length for a hash");
517  return 0;
518  }
519 
520  uint8_t hash[SC_SHA256_LEN];
521  if (HexToRaw((const uint8_t *)hash_string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) {
522  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
523  return 0;
524  }
525 
526  return DatajsonSetValue(set, hash, SC_SHA256_LEN, value, json_key);
527 }
528 
529 static int DatajsonLoadSha256(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
530 {
531  if (strlen(set->load) == 0)
532  return 0;
533 
534  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
535 
536  uint32_t cnt = 0;
537  if (format == DATASET_FORMAT_JSON) {
538  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddSha256Element);
539  } else if (format == DATASET_FORMAT_NDJSON) {
540  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddSha256Element);
541  }
543 
544  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
545  return 0;
546 }
547 
548 static uint32_t DatajsonAddIpv4Element(Dataset *set, json_t *value, char *json_key, bool *found)
549 {
550  json_t *key = GetSubObjectByKey(value, json_key);
551  if (key == NULL) {
552  /* ignore error as it can be a working mode where some entries
553  are not in the same format */
554  return 0;
555  }
556 
557  *found = true;
558 
559  const char *ip_string = json_string_value(key);
560  struct in_addr in;
561  if (inet_pton(AF_INET, ip_string, &in) != 1) {
562  FatalErrorOnInit("datajson IPv4 parse failed %s/%s: %s", set->name, set->load, ip_string);
563  return 0;
564  }
565 
566  return DatajsonSetValue(set, (const uint8_t *)&in.s_addr, SC_IPV4_LEN, value, json_key);
567 }
568 
569 static int DatajsonLoadIPv4(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
570 {
571  if (strlen(set->load) == 0)
572  return 0;
573 
574  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
575  uint32_t cnt = 0;
576 
577  if (format == DATASET_FORMAT_JSON) {
578  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIpv4Element);
579  } else if (format == DATASET_FORMAT_NDJSON) {
580  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIpv4Element);
581  }
583 
584  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
585  return 0;
586 }
587 
588 static uint32_t DatajsonAddIPv6Element(Dataset *set, json_t *value, char *json_key, bool *found)
589 {
590  json_t *key = GetSubObjectByKey(value, json_key);
591  if (key == NULL) {
592  /* ignore error as it can be a working mode where some entries
593  are not in the same format */
594  return 0;
595  }
596 
597  *found = true;
598 
599  const char *ip_string = json_string_value(key);
600  struct in6_addr in6;
601  int ret = DatasetParseIpv6String(set, ip_string, &in6);
602  if (ret < 0) {
603  FatalErrorOnInit("unable to parse IP address");
604  return 0;
605  }
606 
607  return DatajsonSetValue(set, (const uint8_t *)&in6.s6_addr, SC_IPV6_LEN, value, json_key);
608 }
609 
610 static int DatajsonLoadIPv6(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
611 {
612  if (strlen(set->load) == 0)
613  return 0;
614 
615  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
616 
617  uint32_t cnt = 0;
618 
619  if (format == DATASET_FORMAT_JSON) {
620  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIPv6Element);
621  } else if (format == DATASET_FORMAT_NDJSON) {
622  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIPv6Element);
623  }
624 
626 
627  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
628  return 0;
629 }
630 
631 Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap,
632  uint32_t hashsize, char *json_key_value, char *json_array_key, DatasetFormats format,
633  bool remove_key)
634 {
635  Dataset *set = NULL;
636 
637  DatasetLock();
638  int ret = DatasetGetOrCreate(name, type, NULL, load, &memcap, &hashsize, &set);
639  if (ret < 0) {
640  SCLogError("dataset with JSON %s creation failed", name);
641  DatasetUnlock();
642  return NULL;
643  }
644  if (ret == 1) {
645  SCLogDebug("dataset %s already exists", name);
646  if (set->remove_key != remove_key) {
647  SCLogError("dataset %s remove_key mismatch: %d != %d", set->name, set->remove_key,
648  remove_key);
649  DatasetUnlock();
650  return NULL;
651  }
652  DatasetUnlock();
653  return set;
654  }
655 
656  set->remove_key = remove_key;
657 
658  char cnf_name[128];
659  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
660  switch (type) {
661  case DATASET_TYPE_MD5:
662  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrJsonSet, Md5StrJsonFree,
663  Md5StrHash, Md5StrCompare, NULL, Md5StrJsonGetLength, load != NULL ? 1 : 0,
664  memcap, hashsize);
665  if (set->hash == NULL)
666  goto out_err;
667  if (DatajsonLoadMd5(set, json_key_value, json_array_key, format) < 0)
668  goto out_err;
669  break;
670  case DATASET_TYPE_STRING:
671  set->hash = THashInit(cnf_name, sizeof(StringType), StringJsonSet, StringJsonFree,
672  StringHash, StringCompare, NULL, StringJsonGetLength, load != NULL ? 1 : 0,
673  memcap, hashsize);
674  if (set->hash == NULL)
675  goto out_err;
676  if (DatajsonLoadString(set, json_key_value, json_array_key, format) < 0) {
677  SCLogError("dataset %s loading failed", name);
678  goto out_err;
679  }
680  break;
681  case DATASET_TYPE_SHA256:
682  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrJsonSet, Sha256StrJsonFree,
684  load != NULL ? 1 : 0, memcap, hashsize);
685  if (set->hash == NULL)
686  goto out_err;
687  if (DatajsonLoadSha256(set, json_key_value, json_array_key, format) < 0)
688  goto out_err;
689  break;
690  case DATASET_TYPE_IPV4:
691  set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4JsonSet, IPv4JsonFree, IPv4Hash,
692  IPv4Compare, NULL, IPv4JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
693  if (set->hash == NULL)
694  goto out_err;
695  if (DatajsonLoadIPv4(set, json_key_value, json_array_key, format) < 0)
696  goto out_err;
697  break;
698  case DATASET_TYPE_IPV6:
699  set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6JsonSet, IPv6JsonFree, IPv6Hash,
700  IPv6Compare, NULL, IPv6JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
701  if (set->hash == NULL)
702  goto out_err;
703  if (DatajsonLoadIPv6(set, json_key_value, json_array_key, format) < 0)
704  goto out_err;
705  break;
706  }
707 
708  SCLogDebug(
709  "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
710 
711  if (DatasetAppendSet(set) < 0) {
712  SCLogError("dataset %s append failed", name);
713  goto out_err;
714  }
715 
716  DatasetUnlock();
717  return set;
718 out_err:
719  if (set->hash) {
720  THashShutdown(set->hash);
721  }
722  SCFree(set);
723  DatasetUnlock();
724  return NULL;
725 }
726 
727 static DataJsonResultType DatajsonLookupString(
728  Dataset *set, const uint8_t *data, const uint32_t data_len)
729 {
730  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
731 
732  if (set == NULL)
733  return rrep;
734 
735  StringType lookup = {
736  .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0
737  };
738  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
739  if (rdata) {
740  StringType *found = rdata->data;
741  rrep.found = true;
742  rrep.json = found->json;
743  rrep.hashdata = rdata;
744  return rrep;
745  }
746  return rrep;
747 }
748 
749 static DataJsonResultType DatajsonLookupMd5(
750  Dataset *set, const uint8_t *data, const uint32_t data_len)
751 {
752  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
753 
754  if (set == NULL)
755  return rrep;
756 
757  if (data_len != SC_MD5_LEN)
758  return rrep;
759 
760  Md5Type lookup = { .json.value = NULL, .json.len = 0 };
761  memcpy(lookup.md5, data, data_len);
762  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
763  if (rdata) {
764  Md5Type *found = rdata->data;
765  rrep.found = true;
766  rrep.json = found->json;
767  rrep.hashdata = rdata;
768  return rrep;
769  }
770  return rrep;
771 }
772 
773 static DataJsonResultType DatajsonLookupSha256(
774  Dataset *set, const uint8_t *data, const uint32_t data_len)
775 {
776  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
777 
778  if (set == NULL)
779  return rrep;
780 
781  if (data_len != SC_SHA256_LEN)
782  return rrep;
783 
784  Sha256Type lookup = { .json.value = NULL, .json.len = 0 };
785  memcpy(lookup.sha256, data, data_len);
786  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
787  if (rdata) {
788  Sha256Type *found = rdata->data;
789  rrep.found = true;
790  rrep.json = found->json;
791  rrep.hashdata = rdata;
792  return rrep;
793  }
794  return rrep;
795 }
796 
797 static DataJsonResultType DatajsonLookupIPv4(
798  Dataset *set, const uint8_t *data, const uint32_t data_len)
799 {
800  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
801 
802  if (set == NULL)
803  return rrep;
804 
805  if (data_len != SC_IPV4_LEN)
806  return rrep;
807 
808  IPv4Type lookup = { .json.value = NULL, .json.len = 0 };
809  memcpy(lookup.ipv4, data, data_len);
810  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
811  if (rdata) {
812  IPv4Type *found = rdata->data;
813  rrep.found = true;
814  rrep.json = found->json;
815  rrep.hashdata = rdata;
816  return rrep;
817  }
818  return rrep;
819 }
820 
821 static DataJsonResultType DatajsonLookupIPv6(
822  Dataset *set, const uint8_t *data, const uint32_t data_len)
823 {
824  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
825 
826  if (set == NULL)
827  return rrep;
828 
829  /* We can have IPv4 or IPV6 here due to ip.src and ip.dst implementation */
830  if (data_len != SC_IPV6_LEN && data_len != SC_IPV4_LEN)
831  return rrep;
832 
833  IPv6Type lookup = { .json.value = NULL, .json.len = 0 };
834  memcpy(lookup.ipv6, data, data_len);
835  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
836  if (rdata) {
837  IPv6Type *found = rdata->data;
838  rrep.found = true;
839  rrep.json = found->json;
840  rrep.hashdata = rdata;
841  return rrep;
842  }
843  return rrep;
844 }
845 
846 DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
847 {
848  DataJsonResultType rrep = { .found = false, .json = { .value = 0 } };
849  if (set == NULL)
850  return rrep;
851 
852  switch (set->type) {
853  case DATASET_TYPE_STRING:
854  return DatajsonLookupString(set, data, data_len);
855  case DATASET_TYPE_MD5:
856  return DatajsonLookupMd5(set, data, data_len);
857  case DATASET_TYPE_SHA256:
858  return DatajsonLookupSha256(set, data, data_len);
859  case DATASET_TYPE_IPV4:
860  return DatajsonLookupIPv4(set, data, data_len);
861  case DATASET_TYPE_IPV6:
862  return DatajsonLookupIPv6(set, data, data_len);
863  default:
864  break;
865  }
866  return rrep;
867 }
868 
869 /** \brief add serialized data to json set
870  * \retval int 1 added
871  * \retval int 0 already in hash
872  * \retval int -1 API error (not added)
873  * \retval int -2 DATA error
874  */
875 int DatajsonAddSerialized(Dataset *set, const char *value, const char *json)
876 {
877  if (set == NULL)
878  return -1;
879 
880  if (strlen(value) == 0)
881  return -1;
882 
883  DataJsonType jvalue = { .value = NULL, .len = 0 };
884  if (json) {
885  if (ParseJsonLine(json, strlen(json), &jvalue) < 0) {
886  SCLogNotice("bad json value for dataset %s/%s", set->name, set->load);
887  return -1;
888  }
889  }
890 
891  int ret = -1;
892  switch (set->type) {
893  case DATASET_TYPE_STRING: {
894  uint32_t decoded_size = SCBase64DecodeBufferSize((uint32_t)strlen(value));
895  uint8_t decoded[decoded_size];
896  uint32_t num_decoded = SCBase64Decode(
897  (const uint8_t *)value, strlen(value), SCBase64ModeStrict, decoded);
898  if (num_decoded == 0)
899  goto operror;
900  ret = DatajsonAdd(set, decoded, num_decoded, &jvalue);
901  break;
902  }
903  case DATASET_TYPE_MD5: {
904  if (strlen(value) != SC_MD5_HEX_LEN)
905  goto operror;
906  uint8_t hash[SC_MD5_LEN];
907  if (HexToRaw((const uint8_t *)value, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0)
908  goto operror;
909  ret = DatajsonAdd(set, hash, SC_MD5_LEN, &jvalue);
910  break;
911  }
912  case DATASET_TYPE_SHA256: {
913  if (strlen(value) != SC_SHA256_HEX_LEN)
914  goto operror;
915  uint8_t hash[SC_SHA256_LEN];
916  if (HexToRaw((const uint8_t *)value, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0)
917  goto operror;
918  ret = DatajsonAdd(set, hash, SC_SHA256_LEN, &jvalue);
919  break;
920  }
921  case DATASET_TYPE_IPV4: {
922  struct in_addr in;
923  if (inet_pton(AF_INET, value, &in) != 1)
924  goto operror;
925  ret = DatajsonAdd(set, (uint8_t *)&in.s_addr, SC_IPV4_LEN, &jvalue);
926  break;
927  }
928  case DATASET_TYPE_IPV6: {
929  struct in6_addr in6;
930  if (DatasetParseIpv6String(set, value, &in6) != 0) {
931  SCLogError("Dataset failed to import %s as IPv6", value);
932  goto operror;
933  }
934  ret = DatajsonAdd(set, (uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &jvalue);
935  break;
936  }
937  }
938  SCFree(jvalue.value);
939  return ret;
940 operror:
941  SCFree(jvalue.value);
942  return -2;
943 }
DatajsonLookup
DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets-context-json.c:846
Sha256StrJsonSet
int Sha256StrJsonSet(void *dst, void *src)
Definition: datasets-sha256.c:41
DataJsonType::len
uint16_t len
Definition: datasets-context-json.h:34
util-byte.h
len
uint8_t len
Definition: app-layer-dnp3.h:2
datasets-string.h
THashDataGetResult::data
THashData * data
Definition: util-thash.h:192
datasets-md5.h
IPv4JsonSet
int IPv4JsonSet(void *dst, void *src)
Definition: datasets-ipv4.c:41
Dataset::name
char name[DATASET_NAME_MAX_LEN+1]
Definition: datasets.h:48
Dataset::save
char save[PATH_MAX]
Definition: datasets.h:57
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:275
datasets-sha256.h
IPv6Compare
bool IPv6Compare(void *a, void *b)
Definition: datasets-ipv6.c:56
StringJsonFree
void StringJsonFree(void *s)
Definition: datasets-string.c:125
HexToRaw
int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
Definition: util-byte.c:806
SC_SHA256_LEN
#define SC_SHA256_LEN
Definition: util-file.h:37
Md5Type
Definition: datasets-md5.h:30
Dataset::hash
THashTableContext * hash
Definition: datasets.h:54
IPv4JsonFree
void IPv4JsonFree(void *s)
Definition: datasets-ipv4.c:74
DatasetFormats
DatasetFormats
Definition: datasets.h:31
IPv4Hash
uint32_t IPv4Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv4.c:63
Sha256Type::sha256
uint8_t sha256[32]
Definition: datasets-sha256.h:31
Dataset::type
enum DatasetTypes type
Definition: datasets.h:49
THashConsolidateMemcap
void THashConsolidateMemcap(THashTableContext *ctx)
Definition: util-thash.c:345
SC_IPV6_LEN
#define SC_IPV6_LEN
Definition: util-ip.h:29
rust.h
DATASET_TYPE_SHA256
@ DATASET_TYPE_SHA256
Definition: datasets.h:41
StringJsonGetLength
uint32_t StringJsonGetLength(void *s)
Definition: datasets-string.c:134
Md5Type::md5
uint8_t md5[16]
Definition: datasets-md5.h:31
DataJsonResultType::found
bool found
Definition: datasets-context-json.h:38
DATASET_TYPE_IPV6
@ DATASET_TYPE_IPV6
Definition: datasets.h:43
Md5StrCompare
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:57
DatajsonUnlockElt
void DatajsonUnlockElt(DataJsonResultType *r)
Definition: datasets-context-json.c:47
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
DataJsonResultType::hashdata
THashData * hashdata
Definition: datasets-context-json.h:40
DataJsonResultType
Definition: datasets-context-json.h:37
datasets.h
IPv4JsonGetLength
uint32_t IPv4JsonGetLength(void *s)
Definition: datasets-ipv4.c:82
util-debug.h
DatasetGetOrCreate
int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set)
Definition: datasets.c:369
datasets-ipv6.h
IPv6Type::ipv6
uint8_t ipv6[16]
Definition: datasets-ipv6.h:31
util-ip.h
DataJsonResultType::json
DataJsonType json
Definition: datasets-context-json.h:39
datasets-ipv4.h
IPv6JsonGetLength
uint32_t IPv6JsonGetLength(void *s)
Definition: datasets-ipv6.c:83
Md5StrHash
uint32_t Md5StrHash(uint32_t hash_seed, void *s)
Definition: datasets-md5.c:65
THashDataGetResult
Definition: util-thash.h:191
StringType
Definition: datasets-string.h:30
Md5Type::json
DataJsonType json
Definition: datasets-md5.h:34
Sha256Type::json
DataJsonType json
Definition: datasets-sha256.h:34
type
uint16_t type
Definition: decode-vlan.c:106
DatasetLock
void DatasetLock(void)
Definition: datasets.c:102
IPv6Type
Definition: datasets-ipv6.h:30
name
const char * name
Definition: tm-threads.c:2163
IPv4Type::json
DataJsonType json
Definition: datasets-ipv4.h:34
DATASET_TYPE_IPV4
@ DATASET_TYPE_IPV4
Definition: datasets.h:42
StringType::ptr
uint8_t * ptr
Definition: datasets-string.h:36
IPv6JsonFree
void IPv6JsonFree(void *s)
Definition: datasets-ipv6.c:75
StringType::json
DataJsonType json
Definition: datasets-string.h:34
THashShutdown
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:354
DatasetTypes
DatasetTypes
Definition: datasets.h:37
Sha256StrJsonGetLength
uint32_t Sha256StrJsonGetLength(void *s)
Definition: datasets-sha256.c:83
THashData_::data
void * data
Definition: util-thash.h:92
cnt
uint32_t cnt
Definition: tmqh-packetpool.h:7
Sha256Type
Definition: datasets-sha256.h:30
THashData_
Definition: util-thash.h:85
Dataset::remove_key
bool remove_key
Definition: datasets.h:53
Sha256StrJsonFree
void Sha256StrJsonFree(void *s)
Definition: datasets-sha256.c:75
DATASET_FORMAT_NDJSON
@ DATASET_FORMAT_NDJSON
Definition: datasets.h:34
DATASET_FORMAT_JSON
@ DATASET_FORMAT_JSON
Definition: datasets.h:33
suricata-common.h
SCStrndup
#define SCStrndup(s, n)
Definition: util-mem.h:59
datasets-context-json.h
FatalErrorOnInit
#define FatalErrorOnInit(...)
Fatal error IF we're starting up, and configured to consider errors to be fatal errors.
Definition: util-debug.h:519
StringCompare
bool StringCompare(void *a, void *b)
Definition: datasets-string.c:95
THashGetFromHash
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:618
DataJsonType
Definition: datasets-context-json.h:32
hashsize
#define hashsize(n)
Definition: util-hash-lookup3.h:40
THashLookupFromHash
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:728
IPv4Type
Definition: datasets-ipv4.h:30
THashDecrUsecnt
#define THashDecrUsecnt(h)
Definition: util-thash.h:170
IPv4Compare
bool IPv4Compare(void *a, void *b)
Definition: datasets-ipv4.c:55
DatajsonCopyJson
int DatajsonCopyJson(DataJsonType *dst, DataJsonType *src)
Definition: datasets-context-json.c:54
SCMalloc
#define SCMalloc(sz)
Definition: util-mem.h:47
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
IPv6Hash
uint32_t IPv6Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv6.c:64
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:267
SCFree
#define SCFree(p)
Definition: util-mem.h:61
Sha256StrCompare
bool Sha256StrCompare(void *a, void *b)
Definition: datasets-sha256.c:55
DatajsonGet
Dataset * DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, uint32_t hashsize, char *json_key_value, char *json_array_key, DatasetFormats format, bool remove_key)
Definition: datasets-context-json.c:631
StringHash
uint32_t StringHash(uint32_t hash_seed, void *s)
Definition: datasets-string.c:106
src
uint16_t src
Definition: app-layer-dnp3.h:5
DataJsonType::value
char * value
Definition: datasets-context-json.h:33
IPv6JsonSet
int IPv6JsonSet(void *dst, void *src)
Definition: datasets-ipv6.c:42
DATASET_TYPE_MD5
@ DATASET_TYPE_MD5
Definition: datasets.h:40
DATASET_TYPE_STRING
@ DATASET_TYPE_STRING
Definition: datasets.h:39
DatasetUnlock
void DatasetUnlock(void)
Definition: datasets.c:107
THashDataGetResult::is_new
bool is_new
Definition: util-thash.h:193
suricata.h
THashInit
THashTableContext * THashInit(const char *cnf_prefix, uint32_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(uint32_t, void *), bool(*DataCompare)(void *, void *), bool(*DataExpired)(void *, SCTime_t), uint32_t(*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
Definition: util-thash.c:302
StringJsonSet
int StringJsonSet(void *dst, void *src)
Definition: datasets-string.c:79
IPv4Type::ipv4
uint8_t ipv4[4]
Definition: datasets-ipv4.h:31
Dataset
Definition: datasets.h:47
SC_MD5_LEN
#define SC_MD5_LEN
Definition: util-file.h:43
IPv6Type::json
DataJsonType json
Definition: datasets-ipv6.h:34
dst
uint16_t dst
Definition: app-layer-dnp3.h:4
SCLogNotice
#define SCLogNotice(...)
Macro used to log NOTICE messages.
Definition: util-debug.h:243
Md5StrJsonFree
void Md5StrJsonFree(void *s)
Definition: datasets-md5.c:76
Dataset::load
char load[PATH_MAX]
Definition: datasets.h:56
Sha256StrHash
uint32_t Sha256StrHash(uint32_t hash_seed, void *s)
Definition: datasets-sha256.c:63
SC_IPV4_LEN
#define SC_IPV4_LEN
Definition: util-ip.h:28
Md5StrJsonGetLength
uint32_t Md5StrJsonGetLength(void *s)
Definition: datasets-md5.c:84
DatajsonAddSerialized
int DatajsonAddSerialized(Dataset *set, const char *value, const char *json)
add serialized data to json set
Definition: datasets-context-json.c:875
DatasetAppendSet
int DatasetAppendSet(Dataset *set)
Definition: datasets.c:79
Md5StrJsonSet
int Md5StrJsonSet(void *dst, void *src)
Definition: datasets-md5.c:43
DatasetParseIpv6String
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
Definition: datasets.c:156
DATAJSON_JSON_LENGTH
#define DATAJSON_JSON_LENGTH
Definition: datasets-context-json.h:30