46 static uint32_t set_ids = 0;
54 static inline void DatasetUnlockData(
THashData *d)
59 static bool DatasetIsStatic(
const char *save,
const char *load);
60 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *
hashsize);
64 if (strcasecmp(
"md5", s) == 0)
66 if (strcasecmp(
"sha256", s) == 0)
68 if (strcasecmp(
"string", s) == 0)
70 if (strcasecmp(
"ipv4", s) == 0)
72 if (strcasecmp(
"ip", s) == 0)
86 static Dataset *DatasetSearchByName(
const char *
name)
98 static int HexToRaw(
const uint8_t *in,
size_t ins, uint8_t *out,
size_t outs)
108 memset(hash, 0, outs);
110 for (x = 0, i = 0; i < ins; i+=2, x++) {
111 char buf[3] = { 0, 0, 0 };
115 long value = strtol(buf, NULL, 16);
116 if (value >= 0 && value <= 255)
117 hash[x] = (uint8_t)value;
119 SCLogError(
"hash byte out of range %ld", value);
124 memcpy(out, hash, outs);
128 static int DatasetLoadIPv4(
Dataset *set)
130 if (strlen(set->
load) == 0)
134 const char *fopen_mode =
"r";
135 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
139 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSIpv4);
142 }
else if (retval == -1) {
151 static int ParseIpv6String(
Dataset *set,
const char *line,
struct in6_addr *in6)
154 char *got_colon = strchr(line,
':');
157 if (inet_pton(AF_INET6, line, in6) != 1) {
161 memcpy(&ip6addr, in6->s6_addr,
sizeof(ip6addr));
163 if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
164 ip6addr[0] = ip6addr[3];
167 memcpy(in6, ip6addr,
sizeof(
struct in6_addr));
172 if (inet_pton(AF_INET, line, &in) != 1) {
176 memset(in6, 0,
sizeof(
struct in6_addr));
177 memcpy(in6, &in,
sizeof(
struct in_addr));
182 static int DatasetLoadIPv6(
Dataset *set)
184 if (strlen(set->
load) == 0)
188 const char *fopen_mode =
"r";
189 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
193 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSIpv6);
196 }
else if (retval == -1) {
205 static int DatasetLoadMd5(
Dataset *set)
207 if (strlen(set->
load) == 0)
211 const char *fopen_mode =
"r";
212 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
216 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSMd5);
219 }
else if (retval == -1) {
228 static int DatasetLoadSha256(
Dataset *set)
230 if (strlen(set->
load) == 0)
234 const char *fopen_mode =
"r";
235 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
239 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSSha256);
242 }
else if (retval == -1) {
251 static int DatasetLoadString(
Dataset *set)
253 if (strlen(set->
load) == 0)
258 const char *fopen_mode =
"r";
259 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
263 int retval = ParseDatasets(set, set->
name, set->
load, fopen_mode, DSString);
266 }
else if (retval == -1) {
282 static void DatasetGetPath(
const char *in_path,
289 strlcpy(path, in_path,
sizeof(path));
290 strlcpy(out_path, path, out_size);
295 if (stat(data_dir, &st) != 0) {
296 SCLogDebug(
"data-dir '%s': %s", data_dir, strerror(errno));
300 snprintf(path,
sizeof(path),
"%s/%s", data_dir, in_path);
303 if (stat(path, &st) != 0) {
304 SCLogDebug(
"path %s: %s", path, strerror(errno));
306 snprintf(path,
sizeof(path),
"%s", in_path);
310 strlcpy(out_path, path, out_size);
311 SCLogDebug(
"in_path \'%s\' => \'%s\'", in_path, out_path);
329 static bool DatasetCheckHashsize(
const char *
name, uint32_t hash_size)
332 SCLogError(
"hashsize %u in dataset '%s' exceeds configured 'single-hashsize' limit (%u)",
340 SCLogError(
"hashsize %u in dataset '%s' exceeds configured 'total-hashsizes' limit (%u, in "
349 static void DatasetUpdateHashsize(
const char *
name, uint32_t hash_size)
360 uint64_t default_memcap = 0;
361 uint32_t default_hashsize = 0;
371 "exists and is of type %u",
377 if ((save == NULL || strlen(save) == 0) &&
378 (load == NULL || strlen(load) == 0)) {
382 if ((save == NULL && strlen(set->
save) > 0) ||
383 (save != NULL && strcmp(set->
save, save) != 0)) {
388 if ((load == NULL && strlen(set->
load) > 0) ||
389 (load != NULL && strcmp(set->
load, load) != 0)) {
405 GetDefaultMemcap(&default_memcap, &default_hashsize);
414 set = DatasetAlloc(
name);
421 if (save && strlen(save)) {
425 if (load && strlen(load)) {
431 snprintf(cnf_name,
sizeof(cnf_name),
"datasets.%s.hash",
name);
437 memcap > 0 ? memcap : default_memcap,
hashsize);
438 if (set->
hash == NULL)
440 if (DatasetLoadMd5(set) < 0)
446 memcap > 0 ? memcap : default_memcap,
hashsize);
447 if (set->
hash == NULL)
449 if (DatasetLoadString(set) < 0)
455 memcap > 0 ? memcap : default_memcap,
hashsize);
456 if (set->
hash == NULL)
458 if (DatasetLoadSha256(set) < 0)
464 memcap > 0 ? memcap : default_memcap,
hashsize);
465 if (set->
hash == NULL)
467 if (DatasetLoadIPv4(set) < 0)
473 memcap > 0 ? memcap : default_memcap,
hashsize);
474 if (set->
hash == NULL)
476 if (DatasetLoadIPv6(set) < 0)
480 if (set->
hash == NULL) {
485 SCLogError(
"dataset too large for set memcap");
489 SCLogDebug(
"set %p/%s type %u save %s load %s",
512 static bool DatasetIsStatic(
const char *save,
const char *load)
517 if ((load != NULL && strlen(load) > 0) &&
518 (save == NULL || strlen(save) == 0)) {
552 SCLogDebug(
"Post Reload Cleanup starting.. Hidden sets will be removed");
578 #define DATASETS_HASHSIZE_DEFAULT 4096
580 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *
hashsize)
582 const char *
str = NULL;
586 " resetting to default",
593 if (
SCConfGet(
"datasets.defaults.hashsize", &
str) == 1) {
597 " resetting to default: %u",
607 uint64_t default_memcap = 0;
608 uint32_t default_hashsize = 0;
609 GetDefaultMemcap(&default_memcap, &default_hashsize);
610 if (datasets != NULL) {
611 const char *
str = NULL;
612 if (
SCConfGet(
"datasets.limits.total-hashsizes", &
str) == 1) {
614 FatalError(
"failed to parse datasets.limits.total-hashsizes value: %s",
str);
617 if (
SCConfGet(
"datasets.limits.single-hashsize", &
str) == 1) {
619 FatalError(
"failed to parse datasets.limits.single-hashsize value: %s",
str);
624 FatalError(
"total-hashsizes (%u) cannot be smaller than single-hashsize (%u)",
635 if (iter->
name == NULL) {
640 char save[PATH_MAX] =
"";
641 char load[PATH_MAX] =
"";
645 const char *set_name = iter->
name;
653 if (set_type == NULL) {
660 DatasetGetPath(set_save->
val, save,
sizeof(save),
TYPE_STATE);
661 strlcpy(load, save,
sizeof(load));
665 DatasetGetPath(set_load->
val, load,
sizeof(load),
TYPE_LOAD);
673 " deduced: %s, resetting to default",
682 " deduced: %s, resetting to default",
688 snprintf(conf_str,
sizeof(conf_str),
"datasets.%d.%s", list_pos, set_name);
690 SCLogDebug(
"set %s type %s. Conf %s", set_name, set_type->
val, conf_str);
692 if (strcmp(set_type->
val,
"md5") == 0) {
694 memcap > 0 ? memcap : default_memcap,
700 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
703 }
else if (strcmp(set_type->
val,
"sha256") == 0) {
705 memcap > 0 ? memcap : default_memcap,
711 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
714 }
else if (strcmp(set_type->
val,
"string") == 0) {
716 memcap > 0 ? memcap : default_memcap,
722 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
725 }
else if (strcmp(set_type->
val,
"ipv4") == 0) {
727 memcap > 0 ? memcap : default_memcap,
733 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
736 }
else if (strcmp(set_type->
val,
"ip") == 0) {
738 memcap > 0 ? memcap : default_memcap,
744 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
769 SCLogDebug(
"destroying datasets done: %p", sets);
772 static int SaveCallback(
void *
ctx,
const uint8_t *data,
const uint32_t data_len)
777 return (
int)fwrite(data, data_len, 1, fp);
782 static int Md5AsAscii(
const void *s,
char *out,
size_t out_size)
789 return (
int)strlen(out);
792 static int Sha256AsAscii(
const void *s,
char *out,
size_t out_size)
799 return (
int)strlen(out);
802 static int IPv4AsAscii(
const void *s,
char *out,
size_t out_size)
809 return (
int)strlen(out);
812 static int IPv6AsAscii(
const void *s,
char *out,
size_t out_size)
817 for (
int i = 4; i <= 15; i++) {
818 if (ip6->
ipv6[i] != 0) {
830 return (
int)strlen(out);
839 if (strlen(set->
save) == 0)
842 FILE *fp = fopen(set->
save,
"w");
874 static int DatasetLookupString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
879 StringType lookup = { .
ptr = (uint8_t *)data, .
len = data_len, .rep.value = 0 };
882 DatasetUnlockData(rdata);
889 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
896 StringType lookup = { .
ptr = (uint8_t *)data, .
len = data_len, .rep = *rep };
902 DatasetUnlockData(rdata);
908 static int DatasetLookupIPv4(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
917 memcpy(lookup.
ipv4, data, 4);
920 DatasetUnlockData(rdata);
927 Dataset *set,
const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
938 memcpy(lookup.
ipv4, data, data_len);
944 DatasetUnlockData(rdata);
950 static int DatasetLookupIPv6(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
955 if (data_len != 16 && data_len != 4)
959 memcpy(lookup.
ipv6, data, data_len);
962 DatasetUnlockData(rdata);
969 Dataset *set,
const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
976 if (data_len != 16 && data_len != 4)
980 memcpy(lookup.
ipv6, data, data_len);
986 DatasetUnlockData(rdata);
992 static int DatasetLookupMd5(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1001 memcpy(lookup.
md5, data, data_len);
1004 DatasetUnlockData(rdata);
1011 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1022 memcpy(lookup.
md5, data, data_len);
1028 DatasetUnlockData(rdata);
1034 static int DatasetLookupSha256(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1043 memcpy(lookup.
sha256, data, data_len);
1046 DatasetUnlockData(rdata);
1053 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1064 memcpy(lookup.
sha256, data, data_len);
1070 DatasetUnlockData(rdata);
1090 switch (set->
type) {
1092 return DatasetLookupString(set, data, data_len);
1094 return DatasetLookupMd5(set, data, data_len);
1096 return DatasetLookupSha256(set, data, data_len);
1098 return DatasetLookupIPv4(set, data, data_len);
1100 return DatasetLookupIPv6(set, data, data_len);
1106 const DataRepType *rep)
1112 switch (set->
type) {
1114 return DatasetLookupStringwRep(set, data, data_len, rep);
1116 return DatasetLookupMd5wRep(set, data, data_len, rep);
1118 return DatasetLookupSha256wRep(set, data, data_len, rep);
1120 return DatasetLookupIPv4wRep(set, data, data_len, rep);
1122 return DatasetLookupIPv6wRep(set, data, data_len, rep);
1132 static int DatasetAddString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1137 StringType lookup = { .ptr = (uint8_t *)data, .
len = data_len,
1141 DatasetUnlockData(res.
data);
1142 return res.
is_new ? 1 : 0;
1152 static int DatasetAddStringwRep(
1153 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1162 DatasetUnlockData(res.
data);
1163 return res.
is_new ? 1 : 0;
1168 static int DatasetAddIPv4(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1182 DatasetUnlockData(res.
data);
1183 return res.
is_new ? 1 : 0;
1188 static int DatasetAddIPv6(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1194 if (data_len != 16) {
1202 DatasetUnlockData(res.
data);
1203 return res.
is_new ? 1 : 0;
1208 static int DatasetAddIPv4wRep(
1209 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1221 DatasetUnlockData(res.
data);
1222 return res.
is_new ? 1 : 0;
1227 static int DatasetAddIPv6wRep(
1228 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1240 DatasetUnlockData(res.
data);
1241 return res.
is_new ? 1 : 0;
1246 static int DatasetAddMd5(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1258 DatasetUnlockData(res.
data);
1259 return res.
is_new ? 1 : 0;
1264 static int DatasetAddMd5wRep(
1265 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1277 DatasetUnlockData(res.
data);
1278 return res.
is_new ? 1 : 0;
1283 static int DatasetAddSha256wRep(
1284 Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
const DataRepType *rep)
1296 DatasetUnlockData(res.
data);
1297 return res.
is_new ? 1 : 0;
1302 static int DatasetAddSha256(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1314 DatasetUnlockData(res.
data);
1315 return res.
is_new ? 1 : 0;
1325 switch (set->
type) {
1327 return DatasetAddString(set,
data, data_len);
1329 return DatasetAddMd5(set,
data, data_len);
1331 return DatasetAddSha256(set,
data, data_len);
1333 return DatasetAddIPv4(set,
data, data_len);
1335 return DatasetAddIPv6(set,
data, data_len);
1345 switch (set->
type) {
1347 return DatasetAddStringwRep(set,
data, data_len, rep);
1349 return DatasetAddMd5wRep(set,
data, data_len, rep);
1351 return DatasetAddSha256wRep(set,
data, data_len, rep);
1353 return DatasetAddIPv4wRep(set,
data, data_len, rep);
1355 return DatasetAddIPv6wRep(set,
data, data_len, rep);
1362 static int DatasetOpSerialized(
Dataset *set,
const char *
string,
DatasetOpFunc DatasetOpString,
1368 if (strlen(
string) == 0)
1371 switch (set->
type) {
1373 uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(
string));
1374 uint8_t decoded[decoded_size];
1375 uint32_t num_decoded = SCBase64Decode(
1376 (
const uint8_t *)
string, strlen(
string), SCBase64ModeStrict, decoded);
1377 if (num_decoded == 0) {
1381 return DatasetOpString(set, decoded, num_decoded);
1384 if (strlen(
string) != 32)
1387 if (HexToRaw((
const uint8_t *)
string, 32, hash,
sizeof(hash)) < 0)
1389 return DatasetOpMd5(set, hash, 16);
1392 if (strlen(
string) != 64)
1395 if (HexToRaw((
const uint8_t *)
string, 64, hash,
sizeof(hash)) < 0)
1397 return DatasetOpSha256(set, hash, 32);
1401 if (inet_pton(AF_INET,
string, &in) != 1)
1403 return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1406 struct in6_addr in6;
1407 if (ParseIpv6String(set,
string, &in6) != 0) {
1408 SCLogError(
"Dataset failed to import %s as IPv6",
string);
1411 return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
1425 return DatasetOpSerialized(set,
string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1426 DatasetAddIPv4, DatasetAddIPv6);
1437 return DatasetOpSerialized(set,
string, DatasetLookupString, DatasetLookupMd5,
1438 DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1446 static int DatasetRemoveString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1451 StringType lookup = { .ptr = (uint8_t *)data, .
len = data_len,
1456 static int DatasetRemoveIPv4(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1465 memcpy(lookup.
ipv4, data, 4);
1469 static int DatasetRemoveIPv6(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1478 memcpy(lookup.
ipv6, data, 16);
1482 static int DatasetRemoveMd5(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1491 memcpy(lookup.
md5, data, 16);
1495 static int DatasetRemoveSha256(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1504 memcpy(lookup.
sha256, data, 32);
1515 return DatasetOpSerialized(set,
string, DatasetRemoveString, DatasetRemoveMd5,
1516 DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
1524 switch (set->
type) {
1526 return DatasetRemoveString(set, data, data_len);
1528 return DatasetRemoveMd5(set, data, data_len);
1530 return DatasetRemoveSha256(set, data, data_len);
1532 return DatasetRemoveIPv4(set, data, data_len);
1534 return DatasetRemoveIPv6(set, data, data_len);