46 static uint32_t set_ids = 0;
48 static int DatasetAddwRep(
Dataset *set,
const uint8_t *data,
const uint32_t data_len,
51 static inline void DatasetUnlockData(
THashData *d)
56 static bool DatasetIsStatic(
const char *save,
const char *load);
57 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *
hashsize);
61 if (strcasecmp(
"md5", s) == 0)
63 if (strcasecmp(
"sha256", s) == 0)
65 if (strcasecmp(
"string", s) == 0)
67 if (strcasecmp(
"ipv4", s) == 0)
69 if (strcasecmp(
"ip", s) == 0)
74 static Dataset *DatasetAlloc(
const char *name)
83 static Dataset *DatasetSearchByName(
const char *name)
87 if (strcasecmp(name, set->
name) == 0 && set->
hidden ==
false) {
95 static int HexToRaw(
const uint8_t *in,
size_t ins, uint8_t *out,
size_t outs)
105 memset(hash, 0, outs);
107 for (x = 0, i = 0; i < ins; i+=2, x++) {
108 char buf[3] = { 0, 0, 0 };
112 long value = strtol(buf, NULL, 16);
113 if (value >= 0 && value <= 255)
114 hash[x] = (uint8_t)value;
116 SCLogError(
"hash byte out of range %ld", value);
121 memcpy(out, hash, outs);
125 static int ParseRepLine(
const char *in,
size_t ins,
DataRepType *rep_out)
129 memcpy(raw, in, ins);
133 char *ptrs[1] = {NULL};
137 while (i < ins + 1) {
138 if (line[i] ==
',' || line[i] ==
'\n' || line[i] ==
'\0') {
159 if (r != (
int)strlen(ptrs[0])) {
160 SCLogError(
"'%s' is not a valid reputation value (0-65535)", ptrs[0]);
169 static int DatasetLoadIPv4(
Dataset *set)
171 if (strlen(set->
load) == 0)
175 const char *fopen_mode =
"r";
176 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
180 FILE *fp = fopen(set->
load, fopen_mode);
188 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
189 char *r = strchr(line,
',');
191 line[strlen(line) - 1] =
'\0';
195 if (inet_pton(AF_INET, line, &in) != 1) {
200 if (
DatasetAdd(set, (
const uint8_t *)&in.s_addr, 4) < 0) {
208 line[strlen(line) - 1] =
'\0';
214 if (inet_pton(AF_INET, line, &in) != 1) {
222 if (ParseRepLine(r, strlen(r), &rep) < 0) {
228 if (DatasetAddwRep(set, (
const uint8_t *)&in.s_addr, 4, &rep) < 0) {
243 static int ParseIpv6String(
Dataset *set,
char *line,
struct in6_addr *in6)
246 char *got_colon = strchr(line,
':');
249 if (inet_pton(AF_INET6, line, in6) != 1) {
253 memcpy(&ip6addr, in6->s6_addr,
sizeof(ip6addr));
255 if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
256 ip6addr[0] = ip6addr[3];
259 memcpy(in6, ip6addr,
sizeof(
struct in6_addr));
264 if (inet_pton(AF_INET, line, &in) != 1) {
268 memset(in6, 0,
sizeof(
struct in6_addr));
269 memcpy(in6, &in,
sizeof(
struct in_addr));
274 static int DatasetLoadIPv6(
Dataset *set)
276 if (strlen(set->
load) == 0)
280 const char *fopen_mode =
"r";
281 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
285 FILE *fp = fopen(set->
load, fopen_mode);
293 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
294 char *r = strchr(line,
',');
296 line[strlen(line) - 1] =
'\0';
300 int ret = ParseIpv6String(set, line, &in6);
306 if (
DatasetAdd(set, (
const uint8_t *)&in6.s6_addr, 16) < 0) {
314 line[strlen(line) - 1] =
'\0';
320 int ret = ParseIpv6String(set, line, &in6);
329 if (ParseRepLine(r, strlen(r), &rep) < 0) {
335 if (DatasetAddwRep(set, (
const uint8_t *)&in6.s6_addr, 16, &rep) < 0) {
350 static int DatasetLoadMd5(
Dataset *set)
352 if (strlen(set->
load) == 0)
356 const char *fopen_mode =
"r";
357 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
361 FILE *fp = fopen(set->
load, fopen_mode);
369 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
371 if (strlen(line) == 33) {
372 line[strlen(line) - 1] =
'\0';
376 if (HexToRaw((
const uint8_t *)line, 32, hash,
sizeof(hash)) < 0) {
381 if (
DatasetAdd(set, (
const uint8_t *)hash, 16) < 0) {
388 }
else if (strlen(line) > 33 && line[32] ==
',') {
389 line[strlen(line) - 1] =
'\0';
393 if (HexToRaw((
const uint8_t *)line, 32, hash,
sizeof(hash)) < 0) {
399 if (ParseRepLine(line + 33, strlen(line) - 33, &rep) < 0) {
405 if (DatasetAddwRep(set, hash, 16, &rep) < 0) {
413 FatalErrorOnInit(
"MD5 bad line len %u: '%s'", (uint32_t)strlen(line), line);
424 static int DatasetLoadSha256(
Dataset *set)
426 if (strlen(set->
load) == 0)
430 const char *fopen_mode =
"r";
431 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
435 FILE *fp = fopen(set->
load, fopen_mode);
443 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
445 if (strlen(line) == 65) {
446 line[strlen(line) - 1] =
'\0';
450 if (HexToRaw((
const uint8_t *)line, 64, hash,
sizeof(hash)) < 0) {
455 if (
DatasetAdd(set, (
const uint8_t *)hash, (uint32_t)32) < 0) {
462 }
else if (strlen(line) > 65 && line[64] ==
',') {
463 line[strlen(line) - 1] =
'\0';
464 SCLogDebug(
"SHA-256 with REP line: '%s'", line);
467 if (HexToRaw((
const uint8_t *)line, 64, hash,
sizeof(hash)) < 0) {
473 if (ParseRepLine(line + 65, strlen(line) - 65, &rep) < 0) {
480 if (DatasetAddwRep(set, hash, 32, &rep) < 0) {
494 static int DatasetLoadString(
Dataset *set)
496 if (strlen(set->
load) == 0)
500 const char *fopen_mode =
"r";
501 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
505 FILE *fp = fopen(set->
load, fopen_mode);
513 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
514 if (strlen(line) <= 1)
517 char *r = strchr(line,
',');
519 line[strlen(line) - 1] =
'\0';
523 uint8_t decoded[strlen(line)];
524 uint32_t consumed = 0, num_decoded = 0;
526 (uint32_t)strlen(line), &consumed, &num_decoded, Base64ModeStrict);
532 if (
DatasetAdd(set, (
const uint8_t *)decoded, num_decoded) < 0) {
538 line[strlen(line) - 1] =
'\0';
544 uint8_t decoded[strlen(line)];
545 uint32_t consumed = 0, num_decoded = 0;
547 (uint32_t)strlen(line), &consumed, &num_decoded, Base64ModeStrict);
557 if (ParseRepLine(r, strlen(r), &rep) < 0) {
563 if (DatasetAddwRep(set, (
const uint8_t *)decoded, num_decoded, &rep) < 0) {
586 static void DatasetGetPath(
const char *in_path,
593 strlcpy(path, in_path,
sizeof(path));
594 strlcpy(out_path, path, out_size);
599 if (stat(data_dir, &st) != 0) {
600 SCLogDebug(
"data-dir '%s': %s", data_dir, strerror(errno));
604 snprintf(path,
sizeof(path),
"%s/%s", data_dir, in_path);
607 if (stat(path, &st) != 0) {
608 SCLogDebug(
"path %s: %s", path, strerror(errno));
610 snprintf(path,
sizeof(path),
"%s", in_path);
614 strlcpy(out_path, path, out_size);
615 SCLogDebug(
"in_path \'%s\' => \'%s\'", in_path, out_path);
622 Dataset *set = DatasetSearchByName(name);
636 uint64_t default_memcap = 0;
637 uint32_t default_hashsize = 0;
643 Dataset *set = DatasetSearchByName(name);
647 "exists and is of type %u",
653 if ((save == NULL || strlen(save) == 0) &&
654 (load == NULL || strlen(load) == 0)) {
658 if ((save == NULL && strlen(set->
save) > 0) ||
659 (save != NULL && strcmp(set->
save, save) != 0)) {
664 if ((load == NULL && strlen(set->
load) > 0) ||
665 (load != NULL && strcmp(set->
load, load) != 0)) {
681 set = DatasetAlloc(name);
688 if (save && strlen(save)) {
692 if (load && strlen(load)) {
698 snprintf(cnf_name,
sizeof(cnf_name),
"datasets.%s.hash", name);
700 GetDefaultMemcap(&default_memcap, &default_hashsize);
705 memcap > 0 ? memcap : default_memcap,
707 if (set->
hash == NULL)
709 if (DatasetLoadMd5(set) < 0)
715 memcap > 0 ? memcap : default_memcap,
717 if (set->
hash == NULL)
719 if (DatasetLoadString(set) < 0)
725 memcap > 0 ? memcap : default_memcap,
727 if (set->
hash == NULL)
729 if (DatasetLoadSha256(set) < 0)
735 NULL, NULL, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
737 if (set->
hash == NULL)
739 if (DatasetLoadIPv4(set) < 0)
745 NULL, NULL, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
747 if (set->
hash == NULL)
749 if (DatasetLoadIPv6(set) < 0)
755 SCLogError(
"dataset too large for set memcap");
759 SCLogDebug(
"set %p/%s type %u save %s load %s",
778 static bool DatasetIsStatic(
const char *save,
const char *load)
783 if ((load != NULL && strlen(load) > 0) &&
784 (save == NULL || strlen(save) == 0)) {
814 SCLogDebug(
"Post Reload Cleanup starting.. Hidden sets will be removed");
820 if (cur->
hidden ==
false) {
838 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *
hashsize)
840 const char *
str = NULL;
841 if (
ConfGet(
"datasets.defaults.memcap", &
str) == 1) {
844 " resetting to default",
849 if (
ConfGet(
"datasets.defaults.hashsize", &
str) == 1) {
852 " resetting to default",
863 uint64_t default_memcap = 0;
864 uint32_t default_hashsize = 0;
865 GetDefaultMemcap(&default_memcap, &default_hashsize);
866 if (datasets != NULL) {
870 if (iter->
name == NULL) {
875 char save[PATH_MAX] =
"";
876 char load[PATH_MAX] =
"";
880 const char *set_name = iter->
name;
889 if (set_type == NULL) {
897 DatasetGetPath(set_save->
val, save,
sizeof(save),
TYPE_STATE);
898 strlcpy(load, save,
sizeof(load));
903 DatasetGetPath(set_load->
val, load,
sizeof(load),
TYPE_LOAD);
911 " deduced: %s, resetting to default",
920 " deduced: %s, resetting to default",
926 snprintf(conf_str,
sizeof(conf_str),
"datasets.%d.%s", list_pos, set_name);
928 SCLogDebug(
"set %s type %s. Conf %s", set_name, set_type->
val, conf_str);
930 if (strcmp(set_type->
val,
"md5") == 0) {
932 memcap > 0 ? memcap : default_memcap,
938 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
941 }
else if (strcmp(set_type->
val,
"sha256") == 0) {
943 memcap > 0 ? memcap : default_memcap,
949 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
952 }
else if (strcmp(set_type->
val,
"string") == 0) {
954 memcap > 0 ? memcap : default_memcap,
960 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
963 }
else if (strcmp(set_type->
val,
"ipv4") == 0) {
965 memcap > 0 ? memcap : default_memcap,
971 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
974 }
else if (strcmp(set_type->
val,
"ip") == 0) {
976 memcap > 0 ? memcap : default_memcap,
982 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
1007 SCLogDebug(
"destroying datasets done: %p", sets);
1010 static int SaveCallback(
void *
ctx,
const uint8_t *data,
const uint32_t data_len)
1015 return (
int)fwrite(data, data_len, 1, fp);
1020 static int Md5AsAscii(
const void *s,
char *out,
size_t out_size)
1027 return (
int)strlen(out);
1030 static int Sha256AsAscii(
const void *s,
char *out,
size_t out_size)
1037 return (
int)strlen(out);
1040 static int IPv4AsAscii(
const void *s,
char *out,
size_t out_size)
1047 return (
int)strlen(out);
1050 static int IPv6AsAscii(
const void *s,
char *out,
size_t out_size)
1054 bool is_ipv4 =
true;
1055 for (
int i = 4; i <= 15; i++) {
1056 if (ip6->
ipv6[i] != 0) {
1068 return (
int)strlen(out);
1077 if (strlen(set->
save) == 0)
1080 FILE *fp = fopen(set->
save,
"w");
1086 switch (set->
type) {
1112 static int DatasetLookupString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1120 DatasetUnlockData(rdata);
1127 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1134 StringType lookup = { .
ptr = (uint8_t *)data, .
len = data_len, .rep = *rep };
1140 DatasetUnlockData(rdata);
1146 static int DatasetLookupIPv4(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1155 memcpy(lookup.
ipv4, data, 4);
1158 DatasetUnlockData(rdata);
1176 memcpy(lookup.
ipv4, data, data_len);
1182 DatasetUnlockData(rdata);
1188 static int DatasetLookupIPv6(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1193 if (data_len != 16 && data_len != 4)
1197 memcpy(lookup.
ipv6, data, data_len);
1200 DatasetUnlockData(rdata);
1214 if (data_len != 16 && data_len != 4)
1218 memcpy(lookup.
ipv6, data, data_len);
1224 DatasetUnlockData(rdata);
1230 static int DatasetLookupMd5(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1239 memcpy(lookup.
md5, data, data_len);
1242 DatasetUnlockData(rdata);
1249 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1260 memcpy(lookup.
md5, data, data_len);
1266 DatasetUnlockData(rdata);
1272 static int DatasetLookupSha256(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1281 memcpy(lookup.
sha256, data, data_len);
1284 DatasetUnlockData(rdata);
1291 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1302 memcpy(lookup.
sha256, data, data_len);
1308 DatasetUnlockData(rdata);
1328 switch (set->
type) {
1330 return DatasetLookupString(set, data, data_len);
1332 return DatasetLookupMd5(set, data, data_len);
1334 return DatasetLookupSha256(set, data, data_len);
1336 return DatasetLookupIPv4(set, data, data_len);
1338 return DatasetLookupIPv6(set, data, data_len);
1350 switch (set->
type) {
1352 return DatasetLookupStringwRep(set, data, data_len, rep);
1354 return DatasetLookupMd5wRep(set, data, data_len, rep);
1356 return DatasetLookupSha256wRep(set, data, data_len, rep);
1358 return DatasetLookupIPv4wRep(set, data, data_len, rep);
1360 return DatasetLookupIPv6wRep(set, data, data_len, rep);
1370 static int DatasetAddString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1375 StringType lookup = { .ptr = (uint8_t *)data, .
len = data_len,
1379 DatasetUnlockData(res.
data);
1380 return res.
is_new ? 1 : 0;
1390 static int DatasetAddStringwRep(
1400 DatasetUnlockData(res.
data);
1401 return res.
is_new ? 1 : 0;
1406 static int DatasetAddIPv4(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1420 DatasetUnlockData(res.
data);
1421 return res.
is_new ? 1 : 0;
1426 static int DatasetAddIPv6(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1432 if (data_len != 16) {
1440 DatasetUnlockData(res.
data);
1441 return res.
is_new ? 1 : 0;
1446 static int DatasetAddIPv4wRep(
1459 DatasetUnlockData(res.
data);
1460 return res.
is_new ? 1 : 0;
1465 static int DatasetAddIPv6wRep(
1478 DatasetUnlockData(res.
data);
1479 return res.
is_new ? 1 : 0;
1484 static int DatasetAddMd5(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1496 DatasetUnlockData(res.
data);
1497 return res.
is_new ? 1 : 0;
1502 static int DatasetAddMd5wRep(
1515 DatasetUnlockData(res.
data);
1516 return res.
is_new ? 1 : 0;
1521 static int DatasetAddSha256wRep(
1534 DatasetUnlockData(res.
data);
1535 return res.
is_new ? 1 : 0;
1540 static int DatasetAddSha256(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1552 DatasetUnlockData(res.
data);
1553 return res.
is_new ? 1 : 0;
1563 switch (set->
type) {
1565 return DatasetAddString(set,
data, data_len);
1567 return DatasetAddMd5(set,
data, data_len);
1569 return DatasetAddSha256(set,
data, data_len);
1571 return DatasetAddIPv4(set,
data, data_len);
1573 return DatasetAddIPv6(set,
data, data_len);
1578 static int DatasetAddwRep(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
1584 switch (set->
type) {
1586 return DatasetAddStringwRep(set,
data, data_len, rep);
1588 return DatasetAddMd5wRep(set,
data, data_len, rep);
1590 return DatasetAddSha256wRep(set,
data, data_len, rep);
1592 return DatasetAddIPv4wRep(set,
data, data_len, rep);
1594 return DatasetAddIPv6wRep(set,
data, data_len, rep);
1601 static int DatasetOpSerialized(
Dataset *set,
const char *
string,
DatasetOpFunc DatasetOpString,
1608 switch (set->
type) {
1611 uint8_t decoded[strlen(
string)];
1612 uint32_t consumed = 0, num_decoded = 0;
1614 DecodeBase64(decoded, (uint32_t)strlen(
string), (
const uint8_t *)
string,
1615 (uint32_t)strlen(
string), &consumed, &num_decoded, Base64ModeStrict);
1620 return DatasetOpString(set, decoded, num_decoded);
1623 if (strlen(
string) != 32)
1626 if (HexToRaw((
const uint8_t *)
string, 32, hash,
sizeof(hash)) < 0)
1628 return DatasetOpMd5(set, hash, 16);
1631 if (strlen(
string) != 64)
1634 if (HexToRaw((
const uint8_t *)
string, 64, hash,
sizeof(hash)) < 0)
1636 return DatasetOpSha256(set, hash, 32);
1640 if (inet_pton(AF_INET,
string, &in) != 1)
1642 return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1646 if (inet_pton(AF_INET6,
string, &in) != 1)
1648 return DatasetOpIPv6(set, (uint8_t *)&in.s6_addr, 16);
1662 return DatasetOpSerialized(set,
string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1663 DatasetAddIPv4, DatasetAddIPv6);
1674 return DatasetOpSerialized(set,
string, DatasetLookupString, DatasetLookupMd5,
1675 DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1683 static int DatasetRemoveString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1688 StringType lookup = { .ptr = (uint8_t *)data, .
len = data_len,
1693 static int DatasetRemoveIPv4(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1702 memcpy(lookup.
ipv4, data, 4);
1706 static int DatasetRemoveIPv6(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1715 memcpy(lookup.
ipv6, data, 16);
1719 static int DatasetRemoveMd5(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1728 memcpy(lookup.
md5, data, 16);
1732 static int DatasetRemoveSha256(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1741 memcpy(lookup.
sha256, data, 32);
1752 return DatasetOpSerialized(set,
string, DatasetRemoveString, DatasetRemoveMd5,
1753 DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);