45 static uint32_t set_ids = 0;
47 static int DatasetAddwRep(
Dataset *set,
const uint8_t *data,
const uint32_t data_len,
50 static inline void DatasetUnlockData(
THashData *d)
55 static bool DatasetIsStatic(
const char *save,
const char *load);
56 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *
hashsize);
60 if (strcasecmp(
"md5", s) == 0)
62 if (strcasecmp(
"sha256", s) == 0)
64 if (strcasecmp(
"string", s) == 0)
66 if (strcasecmp(
"ipv4", s) == 0)
68 if (strcasecmp(
"ip", s) == 0)
73 static Dataset *DatasetAlloc(
const char *name)
82 static Dataset *DatasetSearchByName(
const char *name)
86 if (strcasecmp(name, set->
name) == 0 && set->
hidden ==
false) {
94 static int HexToRaw(
const uint8_t *in,
size_t ins, uint8_t *out,
size_t outs)
104 memset(hash, 0, outs);
106 for (x = 0, i = 0; i < ins; i+=2, x++) {
107 char buf[3] = { 0, 0, 0 };
111 long value = strtol(buf, NULL, 16);
112 if (value >= 0 && value <= 255)
113 hash[x] = (uint8_t)value;
115 SCLogError(
"hash byte out of range %ld", value);
120 memcpy(out, hash, outs);
124 static int ParseRepLine(
const char *in,
size_t ins,
DataRepType *rep_out)
128 memcpy(raw, in, ins);
132 char *ptrs[1] = {NULL};
136 while (i < ins + 1) {
137 if (line[i] ==
',' || line[i] ==
'\n' || line[i] ==
'\0') {
158 if (r != (
int)strlen(ptrs[0])) {
159 SCLogError(
"'%s' is not a valid reputation value (0-65535)", ptrs[0]);
168 static int DatasetLoadIPv4(
Dataset *set)
170 if (strlen(set->
load) == 0)
174 const char *fopen_mode =
"r";
175 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
179 FILE *fp = fopen(set->
load, fopen_mode);
187 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
188 char *r = strchr(line,
',');
190 line[strlen(line) - 1] =
'\0';
194 if (inet_pton(AF_INET, line, &in) != 1) {
199 if (
DatasetAdd(set, (
const uint8_t *)&in.s_addr, 4) < 0) {
207 line[strlen(line) - 1] =
'\0';
213 if (inet_pton(AF_INET, line, &in) != 1) {
221 if (ParseRepLine(r, strlen(r), &rep) < 0) {
227 if (DatasetAddwRep(set, (
const uint8_t *)&in.s_addr, 4, &rep) < 0) {
242 static int ParseIpv6String(
Dataset *set,
const char *line,
struct in6_addr *in6)
245 char *got_colon = strchr(line,
':');
248 if (inet_pton(AF_INET6, line, in6) != 1) {
252 memcpy(&ip6addr, in6->s6_addr,
sizeof(ip6addr));
254 if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
255 ip6addr[0] = ip6addr[3];
258 memcpy(in6, ip6addr,
sizeof(
struct in6_addr));
263 if (inet_pton(AF_INET, line, &in) != 1) {
267 memset(in6, 0,
sizeof(
struct in6_addr));
268 memcpy(in6, &in,
sizeof(
struct in_addr));
273 static int DatasetLoadIPv6(
Dataset *set)
275 if (strlen(set->
load) == 0)
279 const char *fopen_mode =
"r";
280 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
284 FILE *fp = fopen(set->
load, fopen_mode);
292 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
293 char *r = strchr(line,
',');
295 line[strlen(line) - 1] =
'\0';
299 int ret = ParseIpv6String(set, line, &in6);
305 if (
DatasetAdd(set, (
const uint8_t *)&in6.s6_addr, 16) < 0) {
313 line[strlen(line) - 1] =
'\0';
319 int ret = ParseIpv6String(set, line, &in6);
328 if (ParseRepLine(r, strlen(r), &rep) < 0) {
334 if (DatasetAddwRep(set, (
const uint8_t *)&in6.s6_addr, 16, &rep) < 0) {
349 static int DatasetLoadMd5(
Dataset *set)
351 if (strlen(set->
load) == 0)
355 const char *fopen_mode =
"r";
356 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
360 FILE *fp = fopen(set->
load, fopen_mode);
368 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
370 if (strlen(line) == 33) {
371 line[strlen(line) - 1] =
'\0';
375 if (HexToRaw((
const uint8_t *)line, 32, hash,
sizeof(hash)) < 0) {
380 if (
DatasetAdd(set, (
const uint8_t *)hash, 16) < 0) {
387 }
else if (strlen(line) > 33 && line[32] ==
',') {
388 line[strlen(line) - 1] =
'\0';
392 if (HexToRaw((
const uint8_t *)line, 32, hash,
sizeof(hash)) < 0) {
398 if (ParseRepLine(line + 33, strlen(line) - 33, &rep) < 0) {
404 if (DatasetAddwRep(set, hash, 16, &rep) < 0) {
412 FatalErrorOnInit(
"MD5 bad line len %u: '%s'", (uint32_t)strlen(line), line);
423 static int DatasetLoadSha256(
Dataset *set)
425 if (strlen(set->
load) == 0)
429 const char *fopen_mode =
"r";
430 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
434 FILE *fp = fopen(set->
load, fopen_mode);
442 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
444 if (strlen(line) == 65) {
445 line[strlen(line) - 1] =
'\0';
449 if (HexToRaw((
const uint8_t *)line, 64, hash,
sizeof(hash)) < 0) {
454 if (
DatasetAdd(set, (
const uint8_t *)hash, (uint32_t)32) < 0) {
461 }
else if (strlen(line) > 65 && line[64] ==
',') {
462 line[strlen(line) - 1] =
'\0';
463 SCLogDebug(
"SHA-256 with REP line: '%s'", line);
466 if (HexToRaw((
const uint8_t *)line, 64, hash,
sizeof(hash)) < 0) {
472 if (ParseRepLine(line + 65, strlen(line) - 65, &rep) < 0) {
479 if (DatasetAddwRep(set, hash, 32, &rep) < 0) {
493 static int DatasetLoadString(
Dataset *set)
495 if (strlen(set->
load) == 0)
499 const char *fopen_mode =
"r";
500 if (strlen(set->
save) > 0 && strcmp(set->
save, set->
load) == 0) {
504 FILE *fp = fopen(set->
load, fopen_mode);
512 while (fgets(line, (
int)
sizeof(line), fp) != NULL) {
513 if (strlen(line) <= 1)
516 char *r = strchr(line,
',');
518 line[strlen(line) - 1] =
'\0';
520 uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
522 uint8_t decoded[decoded_size];
523 uint32_t num_decoded =
524 Base64Decode((
const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
525 if (num_decoded == 0 && strlen(line) > 0) {
530 if (
DatasetAdd(set, (
const uint8_t *)decoded, num_decoded) < 0) {
536 line[strlen(line) - 1] =
'\0';
541 uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
542 uint8_t decoded[decoded_size];
543 uint32_t num_decoded =
544 Base64Decode((
const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
545 if (num_decoded == 0) {
554 if (ParseRepLine(r, strlen(r), &rep) < 0) {
560 if (DatasetAddwRep(set, (
const uint8_t *)decoded, num_decoded, &rep) < 0) {
583 static void DatasetGetPath(
const char *in_path,
590 strlcpy(path, in_path,
sizeof(path));
591 strlcpy(out_path, path, out_size);
596 if (stat(data_dir, &st) != 0) {
597 SCLogDebug(
"data-dir '%s': %s", data_dir, strerror(errno));
601 snprintf(path,
sizeof(path),
"%s/%s", data_dir, in_path);
604 if (stat(path, &st) != 0) {
605 SCLogDebug(
"path %s: %s", path, strerror(errno));
607 snprintf(path,
sizeof(path),
"%s", in_path);
611 strlcpy(out_path, path, out_size);
612 SCLogDebug(
"in_path \'%s\' => \'%s\'", in_path, out_path);
619 Dataset *set = DatasetSearchByName(name);
633 uint64_t default_memcap = 0;
634 uint32_t default_hashsize = 0;
640 Dataset *set = DatasetSearchByName(name);
644 "exists and is of type %u",
650 if ((save == NULL || strlen(save) == 0) &&
651 (load == NULL || strlen(load) == 0)) {
655 if ((save == NULL && strlen(set->
save) > 0) ||
656 (save != NULL && strcmp(set->
save, save) != 0)) {
661 if ((load == NULL && strlen(set->
load) > 0) ||
662 (load != NULL && strcmp(set->
load, load) != 0)) {
678 set = DatasetAlloc(name);
685 if (save && strlen(save)) {
689 if (load && strlen(load)) {
695 snprintf(cnf_name,
sizeof(cnf_name),
"datasets.%s.hash", name);
697 GetDefaultMemcap(&default_memcap, &default_hashsize);
702 memcap > 0 ? memcap : default_memcap,
704 if (set->
hash == NULL)
706 if (DatasetLoadMd5(set) < 0)
712 memcap > 0 ? memcap : default_memcap,
714 if (set->
hash == NULL)
716 if (DatasetLoadString(set) < 0)
722 memcap > 0 ? memcap : default_memcap,
724 if (set->
hash == NULL)
726 if (DatasetLoadSha256(set) < 0)
732 NULL, NULL, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
734 if (set->
hash == NULL)
736 if (DatasetLoadIPv4(set) < 0)
742 NULL, NULL, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
744 if (set->
hash == NULL)
746 if (DatasetLoadIPv6(set) < 0)
752 SCLogError(
"dataset too large for set memcap");
756 SCLogDebug(
"set %p/%s type %u save %s load %s",
775 static bool DatasetIsStatic(
const char *save,
const char *load)
780 if ((load != NULL && strlen(load) > 0) &&
781 (save == NULL || strlen(save) == 0)) {
811 SCLogDebug(
"Post Reload Cleanup starting.. Hidden sets will be removed");
817 if (cur->
hidden ==
false) {
835 static void GetDefaultMemcap(uint64_t *memcap, uint32_t *
hashsize)
837 const char *
str = NULL;
838 if (
ConfGet(
"datasets.defaults.memcap", &
str) == 1) {
841 " resetting to default",
846 if (
ConfGet(
"datasets.defaults.hashsize", &
str) == 1) {
849 " resetting to default",
860 uint64_t default_memcap = 0;
861 uint32_t default_hashsize = 0;
862 GetDefaultMemcap(&default_memcap, &default_hashsize);
863 if (datasets != NULL) {
867 if (iter->
name == NULL) {
872 char save[PATH_MAX] =
"";
873 char load[PATH_MAX] =
"";
877 const char *set_name = iter->
name;
886 if (set_type == NULL) {
894 DatasetGetPath(set_save->
val, save,
sizeof(save),
TYPE_STATE);
895 strlcpy(load, save,
sizeof(load));
900 DatasetGetPath(set_load->
val, load,
sizeof(load),
TYPE_LOAD);
908 " deduced: %s, resetting to default",
917 " deduced: %s, resetting to default",
923 snprintf(conf_str,
sizeof(conf_str),
"datasets.%d.%s", list_pos, set_name);
925 SCLogDebug(
"set %s type %s. Conf %s", set_name, set_type->
val, conf_str);
927 if (strcmp(set_type->
val,
"md5") == 0) {
929 memcap > 0 ? memcap : default_memcap,
935 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
938 }
else if (strcmp(set_type->
val,
"sha256") == 0) {
940 memcap > 0 ? memcap : default_memcap,
946 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
949 }
else if (strcmp(set_type->
val,
"string") == 0) {
951 memcap > 0 ? memcap : default_memcap,
957 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
960 }
else if (strcmp(set_type->
val,
"ipv4") == 0) {
962 memcap > 0 ? memcap : default_memcap,
968 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
971 }
else if (strcmp(set_type->
val,
"ip") == 0) {
973 memcap > 0 ? memcap : default_memcap,
979 SCLogDebug(
"dataset %s: id %u type %s", set_name, dset->
id, set_type->
val);
1004 SCLogDebug(
"destroying datasets done: %p", sets);
1007 static int SaveCallback(
void *
ctx,
const uint8_t *data,
const uint32_t data_len)
1012 return (
int)fwrite(data, data_len, 1, fp);
1017 static int Md5AsAscii(
const void *s,
char *out,
size_t out_size)
1024 return (
int)strlen(out);
1027 static int Sha256AsAscii(
const void *s,
char *out,
size_t out_size)
1034 return (
int)strlen(out);
1037 static int IPv4AsAscii(
const void *s,
char *out,
size_t out_size)
1044 return (
int)strlen(out);
1047 static int IPv6AsAscii(
const void *s,
char *out,
size_t out_size)
1051 bool is_ipv4 =
true;
1052 for (
int i = 4; i <= 15; i++) {
1053 if (ip6->
ipv6[i] != 0) {
1065 return (
int)strlen(out);
1074 if (strlen(set->
save) == 0)
1077 FILE *fp = fopen(set->
save,
"w");
1083 switch (set->
type) {
1109 static int DatasetLookupString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1117 DatasetUnlockData(rdata);
1124 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1131 StringType lookup = { .
ptr = (uint8_t *)data, .
len = data_len, .rep = *rep };
1137 DatasetUnlockData(rdata);
1143 static int DatasetLookupIPv4(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1152 memcpy(lookup.
ipv4, data, 4);
1155 DatasetUnlockData(rdata);
1173 memcpy(lookup.
ipv4, data, data_len);
1179 DatasetUnlockData(rdata);
1185 static int DatasetLookupIPv6(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1190 if (data_len != 16 && data_len != 4)
1194 memcpy(lookup.
ipv6, data, data_len);
1197 DatasetUnlockData(rdata);
1211 if (data_len != 16 && data_len != 4)
1215 memcpy(lookup.
ipv6, data, data_len);
1221 DatasetUnlockData(rdata);
1227 static int DatasetLookupMd5(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1236 memcpy(lookup.
md5, data, data_len);
1239 DatasetUnlockData(rdata);
1246 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1257 memcpy(lookup.
md5, data, data_len);
1263 DatasetUnlockData(rdata);
1269 static int DatasetLookupSha256(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1278 memcpy(lookup.
sha256, data, data_len);
1281 DatasetUnlockData(rdata);
1288 const uint8_t *data,
const uint32_t data_len,
const DataRepType *rep)
1299 memcpy(lookup.
sha256, data, data_len);
1305 DatasetUnlockData(rdata);
1325 switch (set->
type) {
1327 return DatasetLookupString(set, data, data_len);
1329 return DatasetLookupMd5(set, data, data_len);
1331 return DatasetLookupSha256(set, data, data_len);
1333 return DatasetLookupIPv4(set, data, data_len);
1335 return DatasetLookupIPv6(set, data, data_len);
1347 switch (set->
type) {
1349 return DatasetLookupStringwRep(set, data, data_len, rep);
1351 return DatasetLookupMd5wRep(set, data, data_len, rep);
1353 return DatasetLookupSha256wRep(set, data, data_len, rep);
1355 return DatasetLookupIPv4wRep(set, data, data_len, rep);
1357 return DatasetLookupIPv6wRep(set, data, data_len, rep);
1367 static int DatasetAddString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1372 StringType lookup = { .ptr = (uint8_t *)data, .
len = data_len,
1376 DatasetUnlockData(res.
data);
1377 return res.
is_new ? 1 : 0;
1387 static int DatasetAddStringwRep(
1397 DatasetUnlockData(res.
data);
1398 return res.
is_new ? 1 : 0;
1403 static int DatasetAddIPv4(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1417 DatasetUnlockData(res.
data);
1418 return res.
is_new ? 1 : 0;
1423 static int DatasetAddIPv6(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1429 if (data_len != 16) {
1437 DatasetUnlockData(res.
data);
1438 return res.
is_new ? 1 : 0;
1443 static int DatasetAddIPv4wRep(
1456 DatasetUnlockData(res.
data);
1457 return res.
is_new ? 1 : 0;
1462 static int DatasetAddIPv6wRep(
1475 DatasetUnlockData(res.
data);
1476 return res.
is_new ? 1 : 0;
1481 static int DatasetAddMd5(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1493 DatasetUnlockData(res.
data);
1494 return res.
is_new ? 1 : 0;
1499 static int DatasetAddMd5wRep(
1512 DatasetUnlockData(res.
data);
1513 return res.
is_new ? 1 : 0;
1518 static int DatasetAddSha256wRep(
1531 DatasetUnlockData(res.
data);
1532 return res.
is_new ? 1 : 0;
1537 static int DatasetAddSha256(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len)
1549 DatasetUnlockData(res.
data);
1550 return res.
is_new ? 1 : 0;
1560 switch (set->
type) {
1562 return DatasetAddString(set,
data, data_len);
1564 return DatasetAddMd5(set,
data, data_len);
1566 return DatasetAddSha256(set,
data, data_len);
1568 return DatasetAddIPv4(set,
data, data_len);
1570 return DatasetAddIPv6(set,
data, data_len);
1575 static int DatasetAddwRep(
Dataset *set,
const uint8_t *
data,
const uint32_t data_len,
1581 switch (set->
type) {
1583 return DatasetAddStringwRep(set,
data, data_len, rep);
1585 return DatasetAddMd5wRep(set,
data, data_len, rep);
1587 return DatasetAddSha256wRep(set,
data, data_len, rep);
1589 return DatasetAddIPv4wRep(set,
data, data_len, rep);
1591 return DatasetAddIPv6wRep(set,
data, data_len, rep);
1598 static int DatasetOpSerialized(
Dataset *set,
const char *
string,
DatasetOpFunc DatasetOpString,
1604 if (strlen(
string) == 0)
1607 switch (set->
type) {
1609 uint32_t decoded_size = Base64DecodeBufferSize(strlen(
string));
1610 uint8_t decoded[decoded_size];
1611 uint32_t num_decoded = Base64Decode(
1612 (
const uint8_t *)
string, strlen(
string), Base64ModeStrict, decoded);
1613 if (num_decoded == 0) {
1617 return DatasetOpString(set, decoded, num_decoded);
1620 if (strlen(
string) != 32)
1623 if (HexToRaw((
const uint8_t *)
string, 32, hash,
sizeof(hash)) < 0)
1625 return DatasetOpMd5(set, hash, 16);
1628 if (strlen(
string) != 64)
1631 if (HexToRaw((
const uint8_t *)
string, 64, hash,
sizeof(hash)) < 0)
1633 return DatasetOpSha256(set, hash, 32);
1637 if (inet_pton(AF_INET,
string, &in) != 1)
1639 return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
1642 struct in6_addr in6;
1643 if (ParseIpv6String(set,
string, &in6) != 0) {
1644 SCLogError(
"Dataset failed to import %s as IPv6",
string);
1647 return DatasetOpIPv6(set, (uint8_t *)&in6.s6_addr, 16);
1661 return DatasetOpSerialized(set,
string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
1662 DatasetAddIPv4, DatasetAddIPv6);
1673 return DatasetOpSerialized(set,
string, DatasetLookupString, DatasetLookupMd5,
1674 DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
1682 static int DatasetRemoveString(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1687 StringType lookup = { .ptr = (uint8_t *)data, .
len = data_len,
1692 static int DatasetRemoveIPv4(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1701 memcpy(lookup.
ipv4, data, 4);
1705 static int DatasetRemoveIPv6(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1714 memcpy(lookup.
ipv6, data, 16);
1718 static int DatasetRemoveMd5(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1727 memcpy(lookup.
md5, data, 16);
1731 static int DatasetRemoveSha256(
Dataset *set,
const uint8_t *data,
const uint32_t data_len)
1740 memcpy(lookup.
sha256, data, 32);
1751 return DatasetOpSerialized(set,
string, DatasetRemoveString, DatasetRemoveMd5,
1752 DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
1760 switch (set->
type) {
1762 return DatasetRemoveString(set, data, data_len);
1764 return DatasetRemoveMd5(set, data, data_len);
1766 return DatasetRemoveSha256(set, data, data_len);
1768 return DatasetRemoveIPv4(set, data, data_len);
1770 return DatasetRemoveIPv6(set, data, data_len);