46 static uint32_t set_ids = 0;
54 static inline void DatasetUnlockData(
THashData *d)
59 static bool DatasetIsStatic(
const char *save,
const char *load);
60 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *
hashsize);
64 if (strcasecmp(
"md5", s) == 0)
66 if (strcasecmp(
"sha256", s) == 0)
68 if (strcasecmp(
"string", s) == 0)
70 if (strcasecmp(
"ipv4", s) == 0)
72 if (strcasecmp(
"ip", s) == 0)
86 static Dataset *DatasetSearchByName(
const char *
name)
98 static int HexToRaw(
const uint8_t *in,
size_t ins, uint8_t *out,
size_t outs)
108 memset(hash, 0, outs);
110 for (x = 0, i = 0; i < ins; i+=2, x++) {
111 char buf[3] = { 0, 0, 0 };
115 long value = strtol(buf, NULL, 16);
116 if (value >= 0 && value <= 255)
117 hash[x] = (uint8_t)value;
119 SCLogError(
"hash byte out of range %ld", value);
124 memcpy(out, hash, outs);
128 static int DatasetLoadIPv4(
Dataset *set)
130 if (strlen(set->
load) == 0)
134 const char *fopen_mode =
"r";
135 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
139 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSIpv4);
142 }
else if (retval == -1) {
151 static int ParseIpv6String(
Dataset *set,
const char *line,
struct in6_addr *in6)
154 char *got_colon = strchr(line,
':');
157 if (inet_pton(AF_INET6, line, in6) != 1) {
161 memcpy(&ip6addr, in6->s6_addr,
sizeof(ip6addr));
163 if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
164 ip6addr[0] = ip6addr[3];
167 memcpy(in6, ip6addr,
sizeof(
struct in6_addr));
172 if (inet_pton(AF_INET, line, &in) != 1) {
176 memset(in6, 0,
sizeof(
struct in6_addr));
177 memcpy(in6, &in,
sizeof(
struct in_addr));
182 static int DatasetLoadIPv6(
Dataset *set)
184 if (strlen(set->
load) == 0)
188 const char *fopen_mode =
"r";
189 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
193 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSIpv6);
196 }
else if (retval == -1) {
205 static int DatasetLoadMd5(
Dataset *set)
207 if (strlen(set->
load) == 0)
211 const char *fopen_mode =
"r";
212 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
216 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSMd5);
219 }
else if (retval == -1) {
228 static int DatasetLoadSha256(
Dataset *set)
230 if (strlen(set->
load) == 0)
234 const char *fopen_mode =
"r";
235 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
239 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSSha256);
242 }
else if (retval == -1) {
251 static int DatasetLoadString(
Dataset *set)
253 if (strlen(set->
load) == 0)
258 const char *fopen_mode =
"r";
259 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
263 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSString);
266 }
else if (retval == -1) {
282 static void DatasetGetPath(
const char *in_path,
289 strlcpy(path, in_path,
sizeof(path));
290 strlcpy(out_path, path, out_size);
295 if (stat(data_dir, &st) != 0) {
296 SCLogDebug(
"data-dir '%s': %s", data_dir, strerror(errno));
300 snprintf(path,
sizeof(path),
"%s/%s", data_dir, in_path);
303 if (stat(path, &st) != 0) {
304 SCLogDebug(
"path %s: %s", path, strerror(errno));
306 snprintf(path,
sizeof(path),
"%s", in_path);
310 strlcpy(out_path, path, out_size);
311 SCLogDebug(
"in_path \'%s\' => \'%s\'", in_path, out_path);
329 static bool DatasetCheckHashsize(
const char *
name, uint32_t hash_size)
332 SCLogError(
"hashsize %u in dataset '%s' exceeds configured 'single-hashsize' limit (%u)",
340 SCLogError(
"hashsize %u in dataset '%s' exceeds configured 'total-hashsizes' limit (%u, in "
349 static void DatasetUpdateHashsize(
const char *
name, uint32_t hash_size)
360 uint64_t default_memcap = 0;
361 uint32_t default_hashsize = 0;
371 "exists and is of type %u",
377 if ((save == NULL || strlen(save) == 0) &&
378 (load == NULL || strlen(load) == 0)) {
382 if ((save == NULL && strlen(set->
save) > 0) ||
383 (save != NULL && strcmp(set->
save, save) != 0)) {
388 if ((load == NULL && strlen(set->
load) > 0) ||
389 (load != NULL && strcmp(set->
load, load) != 0)) {
405 GetDefaultMemcap(&default_memcap, &default_hashsize);
414 set = DatasetAlloc(
name);
421 if (save && strlen(save)) {
425 if (load && strlen(load)) {
431 snprintf(cnf_name,
sizeof(cnf_name),
"datasets.%s.hash",
name);
437 memcap > 0 ? memcap : default_memcap,
hashsize);
438 if (set->
hash == NULL)
440 if (DatasetLoadMd5(set) < 0)
446 memcap > 0 ? memcap : default_memcap,
hashsize);
447 if (set->
hash == NULL)
449 if (DatasetLoadString(set) < 0)
455 memcap > 0 ? memcap : default_memcap,
hashsize);
456 if (set->
hash == NULL)
458 if (DatasetLoadSha256(set) < 0)
464 memcap > 0 ? memcap : default_memcap,
hashsize);
465 if (set->
hash == NULL)
467 if (DatasetLoadIPv4(set) < 0)
473 memcap > 0 ? memcap : default_memcap,
hashsize);
474 if (set->
hash == NULL)
476 if (DatasetLoadIPv6(set) < 0)
480 if (set->
hash == NULL) {
485 SCLogError(
"dataset too large for set memcap");
489 SCLogDebug(
"set %p/%s type %u save %s load %s",
512 static bool DatasetIsStatic(
const char *save,
const char *load)
517 if ((load != NULL && strlen(load) > 0) &&
518 (save == NULL || strlen(save) == 0)) {
552 SCLogDebug(
"Post Reload Cleanup starting.. Hidden sets will be removed");
578 #define DATASETS_HASHSIZE_DEFAULT 4096
580 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *
hashsize)
582 const char *
str = NULL;
583 if (
ConfGet(
"datasets.defaults.memcap", &
str) == 1) {
586 " resetting to default",
593 if (
ConfGet(
"datasets.defaults.hashsize", &
str) == 1) {
597 " resetting to default: %u",
607 uint64_t default_memcap = 0;
608 uint32_t default_hashsize = 0;
609 GetDefaultMemcap(&default_memcap, &default_hashsize);
610 if (datasets != NULL) {
611 const char *
str = NULL;
612 if (
ConfGet(
"datasets.limits.total-hashsizes", &
str) == 1) {
614 FatalError(
"failed to parse datasets.limits.total-hashsizes value: %s",
str);
617 if (
ConfGet(
"datasets.limits.single-hashsize", &
str) == 1) {
619 FatalError(
"failed to parse datasets.limits.single-hashsize value: %s",
str);
624 FatalError(
"total-hashsizes (%u) cannot be smaller than single-hashsize (%u)",
635 if (iter->
name == NULL) {
640 char save[PATH_MAX] =
"";
641 char load[PATH_MAX] =
"";
645 const char *set_name = iter->
name;
654 if (set_type == NULL) {
662 DatasetGetPath(set_save->
val, save,
sizeof(save),
TYPE_STATE);
663 strlcpy(load, save,
sizeof(load));
668 DatasetGetPath(set_load->
val, load,
sizeof(load),
TYPE_LOAD);
676 " deduced: %s, resetting to default",
685 " deduced: %s, resetting to default",
691 snprintf(conf_str,
sizeof(conf_str),
"datasets.%d.%s", list_pos, set_name);
693 SCLogDebug(
"set %s type %s. Conf %s", set_name, set_type->
val, conf_str);
695 if (strcmp(set_type->
val,
"md5") == 0) {
697 memcap > 0 ? memcap : default_memcap,
703 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
706 }
else if (strcmp(set_type->
val,
"sha256") == 0) {
708 memcap > 0 ? memcap : default_memcap,
714 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
717 }
else if (strcmp(set_type->
val,
"string") == 0) {
719 memcap > 0 ? memcap : default_memcap,
725 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
728 }
else if (strcmp(set_type->
val,
"ipv4") == 0) {
730 memcap > 0 ? memcap : default_memcap,
736 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
739 }
else if (strcmp(set_type->
val,
"ip") == 0) {
741 memcap > 0 ? memcap : default_memcap,
747 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
772 SCLogDebug(
"destroying datasets done: %p", sets);
775 static int SaveCallback(
void *
ctx,
const uint8_t *data,
const uint32_t data_len)
780 return (
int)fwrite(data, data_len, 1, fp);
785 static int Md5AsAscii(
const void *s,
char *out,
size_t out_size)
792 return (
int)strlen(out);
795 static int Sha256AsAscii(
const void *s,
char *out,
size_t out_size)
802 return (
int)strlen(out);
805 static int IPv4AsAscii(
const void *s,
char *out,
size_t out_size)
812 return (
int)strlen(out);
815 static int IPv6AsAscii(
const void *s,
char *out,
size_t out_size)
820 for (
int i = 4; i <= 15; i++) {
821 if (ip6->
ipv6[i] != 0) {
833 return (
int)strlen(out);
842 if (strlen(set->
save) == 0)
845 FILE *fp = fopen(set->
save,
"w");
877 static int DatasetLookupString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
882 StringType lookup = { .
ptr = (uint8_t *)data, .
len = data_len, .rep.value = 0 };
885 DatasetUnlockData(rdata);
892 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
899 StringType lookup = { .
ptr = (uint8_t *)data, .
len = data_len, .rep = *rep };
905 DatasetUnlockData(rdata);
911 static int DatasetLookupIPv4(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
920 memcpy(lookup.
ipv4, data, 4);
923 DatasetUnlockData(rdata);
930 Dataset *set,
const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
941 memcpy(lookup.
ipv4, data, data_len);
947 DatasetUnlockData(rdata);
953 static int DatasetLookupIPv6(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
958 if (data_len != 16 && data_len != 4)
962 memcpy(lookup.
ipv6, data, data_len);
965 DatasetUnlockData(rdata);
972 Dataset *set,
const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
979 if (data_len != 16 && data_len != 4)
983 memcpy(lookup.
ipv6, data, data_len);
989 DatasetUnlockData(rdata);
995 static int DatasetLookupMd5(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1004 memcpy(lookup.
md5, data, data_len);
1007 DatasetUnlockData(rdata);
1014 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1025 memcpy(lookup.
md5, data, data_len);
1031 DatasetUnlockData(rdata);
1037 static int DatasetLookupSha256(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1046 memcpy(lookup.
sha256, data, data_len);
1049 DatasetUnlockData(rdata);
1056 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1067 memcpy(lookup.
sha256, data, data_len);
1073 DatasetUnlockData(rdata);
1093 switch (set->
type) {
1095 return DatasetLookupString(set, data, data_len);
1097 return DatasetLookupMd5(set, data, data_len);
1099 return DatasetLookupSha256(set, data, data_len);
1101 return DatasetLookupIPv4(set, data, data_len);
1103 return DatasetLookupIPv6(set, data, data_len);
1109 const DataRepType *rep)
1115 switch (set->
type) {
1117 return DatasetLookupStringwRep(set, data, data_len, rep);
1119 return DatasetLookupMd5wRep(set, data, data_len, rep);
1121 return DatasetLookupSha256wRep(set, data, data_len, rep);
1123 return DatasetLookupIPv4wRep(set, data, data_len, rep);
1125 return DatasetLookupIPv6wRep(set, data, data_len, rep);
1135 static int DatasetAddString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1140 StringType lookup = { .ptr = (uint8_t *)data, .
len = data_len,
1144 DatasetUnlockData(res.
data);
1145 return res.
is_new ? 1 : 0;
1155 static int DatasetAddStringwRep(
1156 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1165 DatasetUnlockData(res.
data);
1166 return res.
is_new ? 1 : 0;
1171 static int DatasetAddIPv4(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1185 DatasetUnlockData(res.
data);
1186 return res.
is_new ? 1 : 0;
1191 static int DatasetAddIPv6(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1197 if (data_len != 16) {
1205 DatasetUnlockData(res.
data);
1206 return res.
is_new ? 1 : 0;
1211 static int DatasetAddIPv4wRep(
1212 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1224 DatasetUnlockData(res.
data);
1225 return res.
is_new ? 1 : 0;
1230 static int DatasetAddIPv6wRep(
1231 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1243 DatasetUnlockData(res.
data);
1244 return res.
is_new ? 1 : 0;
1249 static int DatasetAddMd5(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1261 DatasetUnlockData(res.
data);
1262 return res.
is_new ? 1 : 0;
1267 static int DatasetAddMd5wRep(
1268 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1280 DatasetUnlockData(res.
data);
1281 return res.
is_new ? 1 : 0;
1286 static int DatasetAddSha256wRep(
1287 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1299 DatasetUnlockData(res.
data);
1300 return res.
is_new ? 1 : 0;
1305 static int DatasetAddSha256(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1317 DatasetUnlockData(res.
data);
1318 return res.
is_new ? 1 : 0;
1328 switch (set->
type) {
1330 return DatasetAddString(set,
data, data_len);
1332 return DatasetAddMd5(set,
data, data_len);
1334 return DatasetAddSha256(set,
data, data_len);
1336 return DatasetAddIPv4(set,
data, data_len);
1338 return DatasetAddIPv6(set,
data, data_len);
1348 switch (set->
type) {
1350 return DatasetAddStringwRep(set,
data, data_len, rep);
1352 return DatasetAddMd5wRep(set,
data, data_len, rep);
1354 return DatasetAddSha256wRep(set,
data, data_len, rep);
1356 return DatasetAddIPv4wRep(set,
data, data_len, rep);
1358 return DatasetAddIPv6wRep(set,
data, data_len, rep);
1365 static int DatasetOpSerialized(
Dataset *set,
const char *
string,
DatasetOpFunc DatasetOpString,
1371 if (strlen(
string) == 0)
1374 switch (set->
type) {
1376 uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(
string));
1377 uint8_t decoded[decoded_size];
1378 uint32_t num_decoded = SCBase64Decode(
1379 (
const uint8_t *)
string, strlen(
string), SCBase64ModeStrict, decoded);
1380 if (num_decoded == 0) {
1384 return DatasetOpString(set, decoded, num_decoded);
1387 if (strlen(
string) != 32)
1390 if (HexToRaw((
const uint8_t *)
string, 32, hash,
sizeof(hash)) < 0)
1392 return DatasetOpMd5(set, hash, 16);
1395 if (strlen(
string) != 64)
1398 if (HexToRaw((
const uint8_t *)
string, 64, hash,
sizeof(hash)) < 0)
1400 return DatasetOpSha256(set, hash, 32);
1404 if (inet_pton(AF_INET,
string, &in) != 1)
1406 return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1409 struct in6_addr in6;
1410 if (ParseIpv6String(set,
string, &in6) != 0) {
1411 SCLogError(
"Dataset failed to import %s as IPv6",
string);
1414 return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
1428 return DatasetOpSerialized(set,
string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1429 DatasetAddIPv4, DatasetAddIPv6);
1440 return DatasetOpSerialized(set,
string, DatasetLookupString, DatasetLookupMd5,
1441 DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1449 static int DatasetRemoveString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1454 StringType lookup = { .ptr = (uint8_t *)data, .
len = data_len,
1459 static int DatasetRemoveIPv4(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1468 memcpy(lookup.
ipv4, data, 4);
1472 static int DatasetRemoveIPv6(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1481 memcpy(lookup.
ipv6, data, 16);
1485 static int DatasetRemoveMd5(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1494 memcpy(lookup.
md5, data, 16);
1498 static int DatasetRemoveSha256(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1507 memcpy(lookup.
sha256, data, 32);
1518 return DatasetOpSerialized(set,
string, DatasetRemoveString, DatasetRemoveMd5,
1519 DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
1527 switch (set->
type) {
1529 return DatasetRemoveString(set, data, data_len);
1531 return DatasetRemoveMd5(set, data, data_len);
1533 return DatasetRemoveSha256(set, data, data_len);
1535 return DatasetRemoveIPv4(set, data, data_len);
1537 return DatasetRemoveIPv6(set, data, data_len);