suricata
datasets-context-json.c
Go to the documentation of this file.
1 /* Copyright (C) 2025 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Eric Leblond <el@stamus-networks.com>
22  */
23 
24 #include "suricata-common.h"
25 #include "suricata.h"
26 #include "rust.h"
27 #include "datasets.h"
28 #include "datasets-context-json.h"
29 #include "datasets-ipv4.h"
30 #include "datasets-ipv6.h"
31 #include "datasets-md5.h"
32 #include "datasets-sha256.h"
33 #include "datasets-string.h"
34 #include "util-byte.h"
35 #include "util-ip.h"
36 #include "util-debug.h"
37 
38 static int DatajsonAdd(
39  Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json);
40 
41 static inline void DatajsonUnlockData(THashData *d)
42 {
43  (void)THashDecrUsecnt(d);
44  THashDataUnlock(d);
45 }
46 
48 {
49  if (r->hashdata) {
50  DatajsonUnlockData(r->hashdata);
51  }
52 }
53 
55 {
56  dst->len = src->len;
57  dst->value = SCMalloc(dst->len + 1);
58  if (dst->value == NULL)
59  return -1;
60  memcpy(dst->value, src->value, dst->len);
61  dst->value[dst->len] = '\0'; // Ensure null-termination
62  return 0;
63 }
64 
65 /* return true if number is a float or an integer */
66 static bool IsFloat(const char *in, size_t ins)
67 {
68  char *endptr;
69  float val = strtof(in, &endptr);
70  const char *end_ins = in + ins - 1;
71  if (val != 0 && (endptr == end_ins)) {
72  return true;
73  }
74  /* if value is 0 then we need to check if some parsing has been done */
75  return val != 0 || endptr != in;
76 }
77 
78 static int ParseJsonLine(const char *in, size_t ins, DataJsonType *rep_out)
79 {
80  if (ins > DATAJSON_JSON_LENGTH) {
81  SCLogError("dataset: json string too long: %s", in);
82  return -1;
83  }
84 
85  json_error_t jerror;
86  json_t *msg = json_loads(in, 0, &jerror);
87  if (msg == NULL) {
88  /* JANSSON does not see an integer, float or a string as valid JSON.
89  So we need to exclude them from failure. */
90  if (!IsFloat(in, ins) && !((in[0] == '"') && (in[ins - 1] == '"'))) {
91  SCLogError("dataset: Invalid json: %s: '%s'", jerror.text, in);
92  return -1;
93  }
94  } else {
95  json_decref(msg);
96  }
97  rep_out->len = (uint16_t)ins;
98  rep_out->value = SCStrndup(in, ins);
99  if (rep_out->value == NULL) {
100  return -1;
101  }
102  return 0;
103 }
104 
105 static json_t *GetSubObjectByKey(json_t *json, const char *key)
106 {
107  if (!json || !key || !json_is_object(json)) {
108  return NULL;
109  }
110  if (strlen(key) > SIG_JSON_CONTENT_KEY_LEN) {
112  return NULL;
113  }
114 
115  const char *current_key = key;
116  json_t *current = json;
117  while (current_key) {
118  const char *dot = strchr(current_key, '.');
119 
120  size_t key_len = dot ? (size_t)(dot - current_key) : strlen(current_key);
121  char key_buffer[key_len + 1];
122  strlcpy(key_buffer, current_key, key_len + 1);
123 
124  if (json_is_object(current) == false) {
125  return NULL;
126  }
127  current = json_object_get(current, key_buffer);
128  if (current == NULL) {
129  return NULL;
130  }
131  current_key = dot ? dot + 1 : NULL;
132  }
133  return current;
134 }
135 
136 static int ParseJsonFile(const char *file, json_t **array, char *key)
137 {
138  json_t *json;
139  json_error_t error;
140  /* assume we have one single JSON element in FILE */
141  json = json_load_file(file, 0, &error);
142  if (json == NULL) {
143  FatalErrorOnInit("can't load JSON, error on line %d: %s", error.line, error.text);
144  return -1;
145  }
146 
147  if (key == NULL || strlen(key) == 0) {
148  *array = json;
149  } else {
150  *array = GetSubObjectByKey(json, key);
151  if (*array == NULL) {
152  SCLogError("dataset: %s failed to get key '%s'", file, key);
153  json_decref(json);
154  return -1;
155  }
156  json_incref(*array);
157  json_decref(json);
158  }
159  if (!json_is_array(*array)) {
160  FatalErrorOnInit("not an array");
161  json_decref(*array);
162  return -1;
163  }
164  return 0;
165 }
166 
167 static int DatajsonSetValue(
168  Dataset *set, const uint8_t *val, uint16_t val_len, json_t *value, const char *json_key)
169 {
170  DataJsonType elt = { .value = NULL, .len = 0 };
171  if (set->remove_key) {
172  json_object_del(value, json_key);
173  }
174 
175  elt.value = json_dumps(value, JSON_COMPACT);
176  if (elt.value == NULL) {
177  FatalErrorOnInit("json_dumps failed for %s/%s", set->name, set->load);
178  return 0;
179  }
180  if (strlen(elt.value) > DATAJSON_JSON_LENGTH) {
181  SCLogError("dataset: json string too long: %s/%s", set->name, set->load);
182  SCFree(elt.value);
183  elt.value = NULL;
184  return 0;
185  }
186  elt.len = (uint16_t)strlen(elt.value);
187 
188  int add_ret = DatajsonAdd(set, val, val_len, &elt);
189  if (add_ret < 0) {
190  FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
191  return 0;
192  }
193  return add_ret;
194 }
195 
196 /**
197  * \retval 1 data was added to the hash
198  * \retval 0 data was not added to the hash as it is already there
199  * \retval -1 failed to add data to the hash
200  */
201 static int DatajsonAddString(
202  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
203 {
204  StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .json = *json };
205  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
206  if (res.data) {
207  DatajsonUnlockData(res.data);
208  return res.is_new ? 1 : 0;
209  }
210  return -1;
211 }
212 
213 static int DatajsonAddMd5(
214  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
215 {
216  if (data_len != SC_MD5_LEN)
217  return -2;
218 
219  Md5Type lookup = { .json = *json };
220  memcpy(lookup.md5, data, SC_MD5_LEN);
221  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
222  if (res.data) {
223  DatajsonUnlockData(res.data);
224  return res.is_new ? 1 : 0;
225  }
226  return -1;
227 }
228 
229 static int DatajsonAddSha256(
230  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
231 {
232  if (data_len != SC_SHA256_LEN)
233  return -2;
234 
235  Sha256Type lookup = { .json = *json };
236  memcpy(lookup.sha256, data, SC_SHA256_LEN);
237  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
238  if (res.data) {
239  DatajsonUnlockData(res.data);
240  return res.is_new ? 1 : 0;
241  }
242  return -1;
243 }
244 
245 static int DatajsonAddIPv4(
246  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
247 {
248  if (data_len < SC_IPV4_LEN)
249  return -2;
250 
251  IPv4Type lookup = { .json = *json };
252  memcpy(lookup.ipv4, data, SC_IPV4_LEN);
253  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
254  if (res.data) {
255  DatajsonUnlockData(res.data);
256  return res.is_new ? 1 : 0;
257  }
258  return -1;
259 }
260 
261 static int DatajsonAddIPv6(
262  Dataset *set, const uint8_t *data, const uint32_t data_len, const DataJsonType *json)
263 {
264  if (data_len != SC_IPV6_LEN)
265  return -2;
266 
267  IPv6Type lookup = { .json = *json };
268  memcpy(lookup.ipv6, data, SC_IPV6_LEN);
269  struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
270  if (res.data) {
271  DatajsonUnlockData(res.data);
272  return res.is_new ? 1 : 0;
273  }
274  return -1;
275 }
276 
277 /*
278  * \brief Add data to the dataset from a JSON object.
279  *
280  * \param set The dataset to add data to.
281  * \param data The data to add.
282  * \param data_len The length of the data.
283  * \param json The JSON object containing additional information.
284  *
285  * Memory allocated for the `json` parameter will be freed if the data
286  * is not added to the hash.
287  *
288  * \retval 1 Data was added to the hash.
289  * \retval 0 Data was not added to the hash as it is already there.
290  * \retval -1 Failed to add data to the hash.
291  */
292 static int DatajsonAdd(
293  Dataset *set, const uint8_t *data, const uint32_t data_len, DataJsonType *json)
294 {
295  if (json == NULL)
296  return -1;
297  if (json->value == NULL)
298  return -1;
299 
300  if (set == NULL) {
301  if (json->value != NULL) {
302  SCFree(json->value);
303  json->value = NULL;
304  }
305  return -1;
306  }
307 
308  int add_ret = 0;
309  switch (set->type) {
310  case DATASET_TYPE_STRING:
311  add_ret = DatajsonAddString(set, data, data_len, json);
312  break;
313  case DATASET_TYPE_MD5:
314  add_ret = DatajsonAddMd5(set, data, data_len, json);
315  break;
316  case DATASET_TYPE_SHA256:
317  add_ret = DatajsonAddSha256(set, data, data_len, json);
318  break;
319  case DATASET_TYPE_IPV4:
320  add_ret = DatajsonAddIPv4(set, data, data_len, json);
321  break;
322  case DATASET_TYPE_IPV6:
323  add_ret = DatajsonAddIPv6(set, data, data_len, json);
324  break;
325  default:
326  add_ret = -1;
327  break;
328  }
329 
330  SCFree(json->value);
331  json->value = NULL;
332 
333  return add_ret;
334 }
335 
336 static int DatajsonLoadTypeFromJSON(Dataset *set, char *json_key, char *array_key,
337  uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
338 {
339  if (strlen(set->load) == 0)
340  return 0;
341 
342  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
343 
344  uint32_t cnt = 0;
345  json_t *json;
346  bool found = false;
347  SCLogDebug("dataset: array_key '%s' %p", array_key, array_key);
348  if (ParseJsonFile(set->load, &json, array_key) == -1) {
349  SCLogError("dataset: %s failed to parse from '%s'", set->name, set->load);
350  return -1;
351  }
352 
353  size_t index;
354  json_t *value;
355  json_array_foreach (json, index, value) {
356  cnt += DatajsonAddTypeElement(set, value, json_key, &found);
357  }
358  json_decref(json);
359 
360  if (found == false) {
362  "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
363  return -1;
364  }
366 
367  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
368  return 0;
369 }
370 
371 static uint32_t DatajsonLoadTypeFromJsonline(Dataset *set, char *json_key,
372  uint32_t (*DatajsonAddTypeElement)(Dataset *, json_t *, char *, bool *))
373 {
374  uint32_t cnt = 0;
375  FILE *fp = fopen(set->load, "r");
376  bool found = false;
377 
378  if (fp == NULL) {
379  SCLogError("dataset: %s failed to open file '%s'", set->name, set->load);
380  return 0;
381  }
382 
383  char line[DATAJSON_JSON_LENGTH];
384  while (fgets(line, sizeof(line), fp) != NULL) {
385  json_t *json = json_loads(line, 0, NULL);
386  if (json == NULL) {
387  SCLogError("dataset: %s failed to parse line '%s'", set->name, line);
388  goto out_err;
389  }
390  cnt += DatajsonAddTypeElement(set, json, json_key, &found);
391  json_decref(json);
392  }
393  int close_op = fclose(fp);
394  if (close_op != 0) {
395  SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
396  return 0;
397  }
398 
399  if (found == false) {
401  "No valid entries for key '%s' found in the file '%s'", json_key, set->load);
402  return 0;
403  }
404  return cnt;
405 out_err:
406  close_op = fclose(fp);
407  if (close_op != 0) {
408  SCLogError("dataset: %s failed to close file '%s'", set->name, set->load);
409  }
410  return 0;
411 }
412 
413 static uint32_t DatajsonAddStringElement(Dataset *set, json_t *value, char *json_key, bool *found)
414 {
415  json_t *key = GetSubObjectByKey(value, json_key);
416  if (key == NULL) {
417  /* ignore error as it can be a working mode where some entries
418  are not in the same format */
419  return 0;
420  }
421 
422  *found = true;
423 
424  const char *val_key = json_string_value(key);
425  if (val_key == NULL) {
426  FatalErrorOnInit("dataset: %s failed to get value for key '%s'", set->name, json_key);
427  return 0;
428  }
429  size_t val_len = strlen(val_key);
430 
431  json_incref(key);
432  int ret = DatajsonSetValue(set, (const uint8_t *)val_key, (uint16_t)val_len, value, json_key);
433  json_decref(key);
434  if (ret < 0) {
435  FatalErrorOnInit("datajson data add failed %s/%s", set->name, set->load);
436  return 0;
437  }
438  return ret;
439 }
440 
441 static int DatajsonLoadString(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
442 {
443  if (strlen(set->load) == 0)
444  return 0;
445 
446  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
447 
448  uint32_t cnt = 0;
449  if (format == DATASET_FORMAT_JSON) {
450  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddStringElement);
451  } else if (format == DATASET_FORMAT_NDJSON) {
452  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddStringElement);
453  }
455 
456  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
457  return 0;
458 }
459 
460 static uint32_t DatajsonAddMd5Element(Dataset *set, json_t *value, char *json_key, bool *found)
461 {
462  json_t *key = GetSubObjectByKey(value, json_key);
463  if (key == NULL) {
464  /* ignore error as it can be a working mode where some entries
465  are not in the same format */
466  return 0;
467  }
468 
469  *found = true;
470 
471  const char *hash_string = json_string_value(key);
472  if (strlen(hash_string) != SC_MD5_HEX_LEN) {
473  FatalErrorOnInit("Not correct length for a hash");
474  return 0;
475  }
476 
477  uint8_t hash[SC_MD5_LEN];
478  if (HexToRaw((const uint8_t *)hash_string, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0) {
479  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
480  return 0;
481  }
482  return DatajsonSetValue(set, hash, SC_MD5_LEN, value, json_key);
483 }
484 
485 static int DatajsonLoadMd5(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
486 {
487  if (strlen(set->load) == 0)
488  return 0;
489 
490  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
491 
492  uint32_t cnt = 0;
493  if (format == DATASET_FORMAT_JSON) {
494  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddMd5Element);
495  } else if (format == DATASET_FORMAT_NDJSON) {
496  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddMd5Element);
497  }
499 
500  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
501  return 0;
502 }
503 
504 static uint32_t DatajsonAddSha256Element(Dataset *set, json_t *value, char *json_key, bool *found)
505 {
506  json_t *key = GetSubObjectByKey(value, json_key);
507  if (key == NULL) {
508  /* ignore error as it can be a working mode where some entries
509  are not in the same format */
510  return 0;
511  }
512 
513  *found = true;
514 
515  const char *hash_string = json_string_value(key);
516  if (strlen(hash_string) != SC_SHA256_HEX_LEN) {
517  FatalErrorOnInit("Not correct length for a hash");
518  return 0;
519  }
520 
521  uint8_t hash[SC_SHA256_LEN];
522  if (HexToRaw((const uint8_t *)hash_string, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0) {
523  FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
524  return 0;
525  }
526 
527  return DatajsonSetValue(set, hash, SC_SHA256_LEN, value, json_key);
528 }
529 
530 static int DatajsonLoadSha256(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
531 {
532  if (strlen(set->load) == 0)
533  return 0;
534 
535  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
536 
537  uint32_t cnt = 0;
538  if (format == DATASET_FORMAT_JSON) {
539  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddSha256Element);
540  } else if (format == DATASET_FORMAT_NDJSON) {
541  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddSha256Element);
542  }
544 
545  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
546  return 0;
547 }
548 
549 static uint32_t DatajsonAddIpv4Element(Dataset *set, json_t *value, char *json_key, bool *found)
550 {
551  json_t *key = GetSubObjectByKey(value, json_key);
552  if (key == NULL) {
553  /* ignore error as it can be a working mode where some entries
554  are not in the same format */
555  return 0;
556  }
557 
558  *found = true;
559 
560  const char *ip_string = json_string_value(key);
561  struct in_addr in;
562  if (inet_pton(AF_INET, ip_string, &in) != 1) {
563  FatalErrorOnInit("datajson IPv4 parse failed %s/%s: %s", set->name, set->load, ip_string);
564  return 0;
565  }
566 
567  return DatajsonSetValue(set, (const uint8_t *)&in.s_addr, SC_IPV4_LEN, value, json_key);
568 }
569 
570 static int DatajsonLoadIPv4(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
571 {
572  if (strlen(set->load) == 0)
573  return 0;
574 
575  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
576  uint32_t cnt = 0;
577 
578  if (format == DATASET_FORMAT_JSON) {
579  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIpv4Element);
580  } else if (format == DATASET_FORMAT_NDJSON) {
581  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIpv4Element);
582  }
584 
585  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
586  return 0;
587 }
588 
589 static uint32_t DatajsonAddIPv6Element(Dataset *set, json_t *value, char *json_key, bool *found)
590 {
591  json_t *key = GetSubObjectByKey(value, json_key);
592  if (key == NULL) {
593  /* ignore error as it can be a working mode where some entries
594  are not in the same format */
595  return 0;
596  }
597 
598  *found = true;
599 
600  const char *ip_string = json_string_value(key);
601  struct in6_addr in6;
602  int ret = DatasetParseIpv6String(set, ip_string, &in6);
603  if (ret < 0) {
604  FatalErrorOnInit("unable to parse IP address");
605  return 0;
606  }
607 
608  return DatajsonSetValue(set, (const uint8_t *)&in6.s6_addr, SC_IPV6_LEN, value, json_key);
609 }
610 
611 static int DatajsonLoadIPv6(Dataset *set, char *json_key, char *array_key, DatasetFormats format)
612 {
613  if (strlen(set->load) == 0)
614  return 0;
615 
616  SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
617 
618  uint32_t cnt = 0;
619 
620  if (format == DATASET_FORMAT_JSON) {
621  cnt = DatajsonLoadTypeFromJSON(set, json_key, array_key, DatajsonAddIPv6Element);
622  } else if (format == DATASET_FORMAT_NDJSON) {
623  cnt = DatajsonLoadTypeFromJsonline(set, json_key, DatajsonAddIPv6Element);
624  }
625 
627 
628  SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
629  return 0;
630 }
631 
632 Dataset *DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap,
633  uint32_t hashsize, char *json_key_value, char *json_array_key, DatasetFormats format,
634  bool remove_key)
635 {
636  Dataset *set = NULL;
637 
638  DatasetLock();
639  int ret = DatasetGetOrCreate(name, type, NULL, load, &memcap, &hashsize, &set);
640  if (ret < 0) {
641  SCLogError("dataset with JSON %s creation failed", name);
642  DatasetUnlock();
643  return NULL;
644  }
645  if (ret == 1) {
646  SCLogDebug("dataset %s already exists", name);
647  if (set->remove_key != remove_key) {
648  SCLogError("dataset %s remove_key mismatch: %d != %d", set->name, set->remove_key,
649  remove_key);
650  DatasetUnlock();
651  return NULL;
652  }
653  DatasetUnlock();
654  return set;
655  }
656 
657  set->remove_key = remove_key;
658 
659  char cnf_name[128];
660  snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
661  switch (type) {
662  case DATASET_TYPE_MD5:
663  set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrJsonSet, Md5StrJsonFree,
664  Md5StrHash, Md5StrCompare, NULL, Md5StrJsonGetLength, load != NULL ? 1 : 0,
665  memcap, hashsize);
666  if (set->hash == NULL)
667  goto out_err;
668  if (DatajsonLoadMd5(set, json_key_value, json_array_key, format) < 0)
669  goto out_err;
670  break;
671  case DATASET_TYPE_STRING:
672  set->hash = THashInit(cnf_name, sizeof(StringType), StringJsonSet, StringJsonFree,
673  StringHash, StringCompare, NULL, StringJsonGetLength, load != NULL ? 1 : 0,
674  memcap, hashsize);
675  if (set->hash == NULL)
676  goto out_err;
677  if (DatajsonLoadString(set, json_key_value, json_array_key, format) < 0) {
678  SCLogError("dataset %s loading failed", name);
679  goto out_err;
680  }
681  break;
682  case DATASET_TYPE_SHA256:
683  set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrJsonSet, Sha256StrJsonFree,
685  load != NULL ? 1 : 0, memcap, hashsize);
686  if (set->hash == NULL)
687  goto out_err;
688  if (DatajsonLoadSha256(set, json_key_value, json_array_key, format) < 0)
689  goto out_err;
690  break;
691  case DATASET_TYPE_IPV4:
692  set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4JsonSet, IPv4JsonFree, IPv4Hash,
693  IPv4Compare, NULL, IPv4JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
694  if (set->hash == NULL)
695  goto out_err;
696  if (DatajsonLoadIPv4(set, json_key_value, json_array_key, format) < 0)
697  goto out_err;
698  break;
699  case DATASET_TYPE_IPV6:
700  set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6JsonSet, IPv6JsonFree, IPv6Hash,
701  IPv6Compare, NULL, IPv6JsonGetLength, load != NULL ? 1 : 0, memcap, hashsize);
702  if (set->hash == NULL)
703  goto out_err;
704  if (DatajsonLoadIPv6(set, json_key_value, json_array_key, format) < 0)
705  goto out_err;
706  break;
707  }
708 
709  SCLogDebug(
710  "set %p/%s type %u save %s load %s", set, set->name, set->type, set->save, set->load);
711 
712  if (DatasetAppendSet(set) < 0) {
713  SCLogError("dataset %s append failed", name);
714  goto out_err;
715  }
716 
717  DatasetUnlock();
718  return set;
719 out_err:
720  if (set->hash) {
721  THashShutdown(set->hash);
722  }
723  SCFree(set);
724  DatasetUnlock();
725  return NULL;
726 }
727 
728 static DataJsonResultType DatajsonLookupString(
729  Dataset *set, const uint8_t *data, const uint32_t data_len)
730 {
731  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
732 
733  if (set == NULL)
734  return rrep;
735 
736  StringType lookup = {
737  .ptr = (uint8_t *)data, .len = data_len, .json.value = NULL, .json.len = 0
738  };
739  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
740  if (rdata) {
741  StringType *found = rdata->data;
742  rrep.found = true;
743  rrep.json = found->json;
744  rrep.hashdata = rdata;
745  return rrep;
746  }
747  return rrep;
748 }
749 
750 static DataJsonResultType DatajsonLookupMd5(
751  Dataset *set, const uint8_t *data, const uint32_t data_len)
752 {
753  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
754 
755  if (set == NULL)
756  return rrep;
757 
758  if (data_len != SC_MD5_LEN)
759  return rrep;
760 
761  Md5Type lookup = { .json.value = NULL, .json.len = 0 };
762  memcpy(lookup.md5, data, data_len);
763  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
764  if (rdata) {
765  Md5Type *found = rdata->data;
766  rrep.found = true;
767  rrep.json = found->json;
768  rrep.hashdata = rdata;
769  return rrep;
770  }
771  return rrep;
772 }
773 
774 static DataJsonResultType DatajsonLookupSha256(
775  Dataset *set, const uint8_t *data, const uint32_t data_len)
776 {
777  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
778 
779  if (set == NULL)
780  return rrep;
781 
782  if (data_len != SC_SHA256_LEN)
783  return rrep;
784 
785  Sha256Type lookup = { .json.value = NULL, .json.len = 0 };
786  memcpy(lookup.sha256, data, data_len);
787  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
788  if (rdata) {
789  Sha256Type *found = rdata->data;
790  rrep.found = true;
791  rrep.json = found->json;
792  rrep.hashdata = rdata;
793  return rrep;
794  }
795  return rrep;
796 }
797 
798 static DataJsonResultType DatajsonLookupIPv4(
799  Dataset *set, const uint8_t *data, const uint32_t data_len)
800 {
801  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
802 
803  if (set == NULL)
804  return rrep;
805 
806  if (data_len != SC_IPV4_LEN)
807  return rrep;
808 
809  IPv4Type lookup = { .json.value = NULL, .json.len = 0 };
810  memcpy(lookup.ipv4, data, data_len);
811  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
812  if (rdata) {
813  IPv4Type *found = rdata->data;
814  rrep.found = true;
815  rrep.json = found->json;
816  rrep.hashdata = rdata;
817  return rrep;
818  }
819  return rrep;
820 }
821 
822 static DataJsonResultType DatajsonLookupIPv6(
823  Dataset *set, const uint8_t *data, const uint32_t data_len)
824 {
825  DataJsonResultType rrep = { .found = false, .json = { .value = NULL, .len = 0 } };
826 
827  if (set == NULL)
828  return rrep;
829 
830  /* We can have IPv4 or IPV6 here due to ip.src and ip.dst implementation */
831  if (data_len != SC_IPV6_LEN && data_len != SC_IPV4_LEN)
832  return rrep;
833 
834  IPv6Type lookup = { .json.value = NULL, .json.len = 0 };
835  memcpy(lookup.ipv6, data, data_len);
836  THashData *rdata = THashLookupFromHash(set->hash, &lookup);
837  if (rdata) {
838  IPv6Type *found = rdata->data;
839  rrep.found = true;
840  rrep.json = found->json;
841  rrep.hashdata = rdata;
842  return rrep;
843  }
844  return rrep;
845 }
846 
847 DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
848 {
849  DataJsonResultType rrep = { .found = false, .json = { .value = 0 } };
850  if (set == NULL)
851  return rrep;
852 
853  switch (set->type) {
854  case DATASET_TYPE_STRING:
855  return DatajsonLookupString(set, data, data_len);
856  case DATASET_TYPE_MD5:
857  return DatajsonLookupMd5(set, data, data_len);
858  case DATASET_TYPE_SHA256:
859  return DatajsonLookupSha256(set, data, data_len);
860  case DATASET_TYPE_IPV4:
861  return DatajsonLookupIPv4(set, data, data_len);
862  case DATASET_TYPE_IPV6:
863  return DatajsonLookupIPv6(set, data, data_len);
864  default:
865  break;
866  }
867  return rrep;
868 }
869 
870 /** \brief add serialized data to json set
871  * \retval int 1 added
872  * \retval int 0 already in hash
873  * \retval int -1 API error (not added)
874  * \retval int -2 DATA error
875  */
876 int DatajsonAddSerialized(Dataset *set, const char *value, const char *json)
877 {
878  if (set == NULL)
879  return -1;
880 
881  if (strlen(value) == 0)
882  return -1;
883 
884  DataJsonType jvalue = { .value = NULL, .len = 0 };
885  if (json) {
886  if (ParseJsonLine(json, strlen(json), &jvalue) < 0) {
887  SCLogNotice("bad json value for dataset %s/%s", set->name, set->load);
888  return -1;
889  }
890  }
891 
892  int ret = -1;
893  switch (set->type) {
894  case DATASET_TYPE_STRING: {
895  if (strlen(value) > UINT16_MAX) {
896  // size check before stack allocation
897  // should never happen as unix socket callers limits it to 4k
898  SCFree(jvalue.value);
899  return -1;
900  }
901  uint32_t decoded_size = SCBase64DecodeBufferSize((uint32_t)strlen(value));
902  uint8_t decoded[decoded_size];
903  uint32_t num_decoded = SCBase64Decode(
904  (const uint8_t *)value, strlen(value), SCBase64ModeStrict, decoded);
905  if (num_decoded == 0)
906  goto operror;
907  ret = DatajsonAdd(set, decoded, num_decoded, &jvalue);
908  break;
909  }
910  case DATASET_TYPE_MD5: {
911  if (strlen(value) != SC_MD5_HEX_LEN)
912  goto operror;
913  uint8_t hash[SC_MD5_LEN];
914  if (HexToRaw((const uint8_t *)value, SC_MD5_HEX_LEN, hash, sizeof(hash)) < 0)
915  goto operror;
916  ret = DatajsonAdd(set, hash, SC_MD5_LEN, &jvalue);
917  break;
918  }
919  case DATASET_TYPE_SHA256: {
920  if (strlen(value) != SC_SHA256_HEX_LEN)
921  goto operror;
922  uint8_t hash[SC_SHA256_LEN];
923  if (HexToRaw((const uint8_t *)value, SC_SHA256_HEX_LEN, hash, sizeof(hash)) < 0)
924  goto operror;
925  ret = DatajsonAdd(set, hash, SC_SHA256_LEN, &jvalue);
926  break;
927  }
928  case DATASET_TYPE_IPV4: {
929  struct in_addr in;
930  if (inet_pton(AF_INET, value, &in) != 1)
931  goto operror;
932  ret = DatajsonAdd(set, (uint8_t *)&in.s_addr, SC_IPV4_LEN, &jvalue);
933  break;
934  }
935  case DATASET_TYPE_IPV6: {
936  struct in6_addr in6;
937  if (DatasetParseIpv6String(set, value, &in6) != 0) {
938  SCLogError("Dataset failed to import %s as IPv6", value);
939  goto operror;
940  }
941  ret = DatajsonAdd(set, (uint8_t *)&in6.s6_addr, SC_IPV6_LEN, &jvalue);
942  break;
943  }
944  }
945  SCFree(jvalue.value);
946  return ret;
947 operror:
948  SCFree(jvalue.value);
949  return -2;
950 }
DatajsonLookup
DataJsonResultType DatajsonLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
Definition: datasets-context-json.c:847
Sha256StrJsonSet
int Sha256StrJsonSet(void *dst, void *src)
Definition: datasets-sha256.c:41
DataJsonType::len
uint16_t len
Definition: datasets-context-json.h:34
util-byte.h
len
uint8_t len
Definition: app-layer-dnp3.h:2
datasets-string.h
THashDataGetResult::data
THashData * data
Definition: util-thash.h:192
datasets-md5.h
IPv4JsonSet
int IPv4JsonSet(void *dst, void *src)
Definition: datasets-ipv4.c:41
Dataset::name
char name[DATASET_NAME_MAX_LEN+1]
Definition: datasets.h:56
Dataset::save
char save[PATH_MAX]
Definition: datasets.h:65
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:282
datasets-sha256.h
IPv6Compare
bool IPv6Compare(void *a, void *b)
Definition: datasets-ipv6.c:56
name
const char * name
Definition: detect-engine-proto.c:48
StringJsonFree
void StringJsonFree(void *s)
Definition: datasets-string.c:127
HexToRaw
int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
Definition: util-byte.c:771
SC_SHA256_LEN
#define SC_SHA256_LEN
Definition: util-file.h:104
Md5Type
Definition: datasets-md5.h:30
Dataset::hash
THashTableContext * hash
Definition: datasets.h:62
IPv4JsonFree
void IPv4JsonFree(void *s)
Definition: datasets-ipv4.c:74
DatasetFormats
DatasetFormats
Definition: datasets.h:39
IPv4Hash
uint32_t IPv4Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv4.c:63
Sha256Type::sha256
uint8_t sha256[32]
Definition: datasets-sha256.h:31
Dataset::type
enum DatasetTypes type
Definition: datasets.h:57
THashConsolidateMemcap
void THashConsolidateMemcap(THashTableContext *ctx)
Definition: util-thash.c:345
SC_IPV6_LEN
#define SC_IPV6_LEN
Definition: util-ip.h:29
rust.h
DATASET_TYPE_SHA256
@ DATASET_TYPE_SHA256
Definition: datasets.h:49
StringJsonGetLength
uint32_t StringJsonGetLength(void *s)
Definition: datasets-string.c:136
Md5Type::md5
uint8_t md5[16]
Definition: datasets-md5.h:31
DataJsonResultType::found
bool found
Definition: datasets-context-json.h:38
DATASET_TYPE_IPV6
@ DATASET_TYPE_IPV6
Definition: datasets.h:51
Md5StrCompare
bool Md5StrCompare(void *a, void *b)
Definition: datasets-md5.c:57
DatajsonUnlockElt
void DatajsonUnlockElt(DataJsonResultType *r)
Definition: datasets-context-json.c:47
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
DataJsonResultType::hashdata
THashData * hashdata
Definition: datasets-context-json.h:40
DataJsonResultType
Definition: datasets-context-json.h:37
datasets.h
IPv4JsonGetLength
uint32_t IPv4JsonGetLength(void *s)
Definition: datasets-ipv4.c:82
util-debug.h
DatasetGetOrCreate
int DatasetGetOrCreate(const char *name, enum DatasetTypes type, const char *save, const char *load, uint64_t *memcap, uint32_t *hashsize, Dataset **ret_set)
Definition: datasets.c:369
datasets-ipv6.h
IPv6Type::ipv6
uint8_t ipv6[16]
Definition: datasets-ipv6.h:31
SIG_JSON_CONTENT_KEY_LEN
#define SIG_JSON_CONTENT_KEY_LEN
Definition: detect.h:1234
util-ip.h
DataJsonResultType::json
DataJsonType json
Definition: datasets-context-json.h:39
datasets-ipv4.h
IPv6JsonGetLength
uint32_t IPv6JsonGetLength(void *s)
Definition: datasets-ipv6.c:83
Md5StrHash
uint32_t Md5StrHash(uint32_t hash_seed, void *s)
Definition: datasets-md5.c:65
THashDataGetResult
Definition: util-thash.h:191
StringType
Definition: datasets-string.h:30
Md5Type::json
DataJsonType json
Definition: datasets-md5.h:34
Sha256Type::json
DataJsonType json
Definition: datasets-sha256.h:34
type
uint16_t type
Definition: decode-vlan.c:106
DatasetLock
void DatasetLock(void)
Definition: datasets.c:102
IPv6Type
Definition: datasets-ipv6.h:30
IPv4Type::json
DataJsonType json
Definition: datasets-ipv4.h:34
DATASET_TYPE_IPV4
@ DATASET_TYPE_IPV4
Definition: datasets.h:50
StringType::ptr
uint8_t * ptr
Definition: datasets-string.h:36
IPv6JsonFree
void IPv6JsonFree(void *s)
Definition: datasets-ipv6.c:75
StringType::json
DataJsonType json
Definition: datasets-string.h:34
THashShutdown
void THashShutdown(THashTableContext *ctx)
shutdown the flow engine
Definition: util-thash.c:354
DatasetTypes
DatasetTypes
Definition: datasets.h:45
Sha256StrJsonGetLength
uint32_t Sha256StrJsonGetLength(void *s)
Definition: datasets-sha256.c:83
THashData_::data
void * data
Definition: util-thash.h:92
cnt
uint32_t cnt
Definition: tmqh-packetpool.h:7
Sha256Type
Definition: datasets-sha256.h:30
THashData_
Definition: util-thash.h:85
Dataset::remove_key
bool remove_key
Definition: datasets.h:61
Sha256StrJsonFree
void Sha256StrJsonFree(void *s)
Definition: datasets-sha256.c:75
DATASET_FORMAT_NDJSON
@ DATASET_FORMAT_NDJSON
Definition: datasets.h:42
DATASET_FORMAT_JSON
@ DATASET_FORMAT_JSON
Definition: datasets.h:41
suricata-common.h
SCStrndup
#define SCStrndup(s, n)
Definition: util-mem.h:59
datasets-context-json.h
FatalErrorOnInit
#define FatalErrorOnInit(...)
Fatal error IF we're starting up, and configured to consider errors to be fatal errors.
Definition: util-debug.h:526
StringCompare
bool StringCompare(void *a, void *b)
Definition: datasets-string.c:97
THashGetFromHash
struct THashDataGetResult THashGetFromHash(THashTableContext *ctx, void *data)
Definition: util-thash.c:637
DataJsonType
Definition: datasets-context-json.h:32
hashsize
#define hashsize(n)
Definition: util-hash-lookup3.h:40
THashLookupFromHash
THashData * THashLookupFromHash(THashTableContext *ctx, void *data)
look up data in the hash
Definition: util-thash.c:747
IPv4Type
Definition: datasets-ipv4.h:30
THashDecrUsecnt
#define THashDecrUsecnt(h)
Definition: util-thash.h:170
IPv4Compare
bool IPv4Compare(void *a, void *b)
Definition: datasets-ipv4.c:55
DatajsonCopyJson
int DatajsonCopyJson(DataJsonType *dst, DataJsonType *src)
Definition: datasets-context-json.c:54
SCMalloc
#define SCMalloc(sz)
Definition: util-mem.h:47
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
IPv6Hash
uint32_t IPv6Hash(uint32_t hash_seed, void *s)
Definition: datasets-ipv6.c:64
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:274
SCFree
#define SCFree(p)
Definition: util-mem.h:61
Sha256StrCompare
bool Sha256StrCompare(void *a, void *b)
Definition: datasets-sha256.c:55
DatajsonGet
Dataset * DatajsonGet(const char *name, enum DatasetTypes type, const char *load, uint64_t memcap, uint32_t hashsize, char *json_key_value, char *json_array_key, DatasetFormats format, bool remove_key)
Definition: datasets-context-json.c:632
StringHash
uint32_t StringHash(uint32_t hash_seed, void *s)
Definition: datasets-string.c:108
src
uint16_t src
Definition: app-layer-dnp3.h:5
DataJsonType::value
char * value
Definition: datasets-context-json.h:33
IPv6JsonSet
int IPv6JsonSet(void *dst, void *src)
Definition: datasets-ipv6.c:42
DATASET_TYPE_MD5
@ DATASET_TYPE_MD5
Definition: datasets.h:48
DATASET_TYPE_STRING
@ DATASET_TYPE_STRING
Definition: datasets.h:47
DatasetUnlock
void DatasetUnlock(void)
Definition: datasets.c:107
THashDataGetResult::is_new
bool is_new
Definition: util-thash.h:193
suricata.h
THashInit
THashTableContext * THashInit(const char *cnf_prefix, uint32_t data_size, int(*DataSet)(void *, void *), void(*DataFree)(void *), uint32_t(*DataHash)(uint32_t, void *), bool(*DataCompare)(void *, void *), bool(*DataExpired)(void *, SCTime_t), uint32_t(*DataSize)(void *), bool reset_memcap, uint64_t memcap, uint32_t hashsize)
Definition: util-thash.c:302
StringJsonSet
int StringJsonSet(void *dst, void *src)
Definition: datasets-string.c:81
IPv4Type::ipv4
uint8_t ipv4[4]
Definition: datasets-ipv4.h:31
Dataset
Definition: datasets.h:55
SC_MD5_LEN
#define SC_MD5_LEN
Definition: util-file.h:110
IPv6Type::json
DataJsonType json
Definition: datasets-ipv6.h:34
dst
uint16_t dst
Definition: app-layer-dnp3.h:4
SCLogNotice
#define SCLogNotice(...)
Macro used to log NOTICE messages.
Definition: util-debug.h:250
Md5StrJsonFree
void Md5StrJsonFree(void *s)
Definition: datasets-md5.c:76
Dataset::load
char load[PATH_MAX]
Definition: datasets.h:64
Sha256StrHash
uint32_t Sha256StrHash(uint32_t hash_seed, void *s)
Definition: datasets-sha256.c:63
SC_IPV4_LEN
#define SC_IPV4_LEN
Definition: util-ip.h:28
Md5StrJsonGetLength
uint32_t Md5StrJsonGetLength(void *s)
Definition: datasets-md5.c:84
DatajsonAddSerialized
int DatajsonAddSerialized(Dataset *set, const char *value, const char *json)
add serialized data to json set
Definition: datasets-context-json.c:876
DatasetAppendSet
int DatasetAppendSet(Dataset *set)
Definition: datasets.c:79
DEBUG_VALIDATE_BUG_ON
#define DEBUG_VALIDATE_BUG_ON(exp)
Definition: util-validate.h:102
Md5StrJsonSet
int Md5StrJsonSet(void *dst, void *src)
Definition: datasets-md5.c:43
DatasetParseIpv6String
int DatasetParseIpv6String(Dataset *set, const char *line, struct in6_addr *in6)
Definition: datasets.c:156
DATAJSON_JSON_LENGTH
#define DATAJSON_JSON_LENGTH
Definition: datasets-context-json.h:30