33 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
38 #ifdef HAVE_PACKET_EBPF
41 #include <sys/resource.h>
54 #include <bpf/libbpf.h>
59 #define BPF_MAP_MAX_COUNT 16
61 #define BYPASSED_FLOW_TIMEOUT 60
73 struct bpf_maps_info {
74 struct bpf_map_item array[BPF_MAP_MAX_COUNT];
78 typedef struct BypassedIfaceList_ {
80 struct BypassedIfaceList_ *
next;
83 static void BpfMapsInfoFree(
void *bpf)
85 struct bpf_maps_info *bpfinfo = (
struct bpf_maps_info *)bpf;
87 for (i = 0; i < bpfinfo->last; i ++) {
88 if (bpfinfo->array[i].name) {
89 if (bpfinfo->array[i].to_unlink) {
90 char pinnedpath[PATH_MAX];
91 int ret = snprintf(pinnedpath,
sizeof(pinnedpath),
92 "/sys/fs/bpf/suricata-%s-%s",
93 bpfinfo->array[i].iface,
94 bpfinfo->array[i].name);
97 ret = unlink(pinnedpath);
101 "Unable to remove %s: %s (%d)", pinnedpath, strerror(error), error);
104 SCLogWarning(
"Unable to remove map %s", bpfinfo->array[i].name);
107 SCFree(bpfinfo->array[i].name);
113 static void BypassedListFree(
void *ifl)
115 BypassedIfaceList *mifl = (BypassedIfaceList *)ifl;
116 BypassedIfaceList *nifl;
124 void EBPFDeleteKey(
int fd,
void *key)
126 int ret = bpf_map_delete_elem(fd, key);
128 SCLogWarning(
"Unable to delete entry: %s (%d)", strerror(errno), errno);
132 static struct bpf_maps_info *EBPFGetBpfMap(
const char *iface)
139 return (
struct bpf_maps_info *)data;
149 int EBPFGetMapFDByName(
const char *iface,
const char *
name)
153 if (iface == NULL ||
name == NULL)
155 struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface);
156 if (bpf_maps == NULL)
159 for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
160 if (!bpf_maps->array[i].name)
162 if (!strcmp(bpf_maps->array[i].name,
name)) {
163 SCLogDebug(
"Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd,
name);
164 return bpf_maps->array[i].fd;
171 static int EBPFLoadPinnedMapsFile(
LiveDevice *livedev,
const char *file)
173 char pinnedpath[1024];
174 snprintf(pinnedpath,
sizeof(pinnedpath),
175 "/sys/fs/bpf/suricata-%s-%s",
179 return bpf_obj_get(pinnedpath);
182 static int EBPFLoadPinnedMaps(
LiveDevice *livedev,
struct ebpf_timeout_config *config)
184 int fd_v4 = -1, fd_v6 = -1;
187 if (config->pinned_maps_name) {
188 int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name);
195 if (config->mode == AFP_MODE_XDP_BYPASS) {
197 fd_v4 = EBPFLoadPinnedMapsFile(livedev,
"flow_table_v4");
203 fd_v6 = EBPFLoadPinnedMapsFile(livedev,
"flow_table_v6");
205 SCLogWarning(
"Found a flow_table_v4 map but no flow_table_v6 map");
210 struct bpf_maps_info *bpf_map_data =
SCCalloc(1,
sizeof(*bpf_map_data));
211 if (bpf_map_data == NULL) {
216 if (config->mode == AFP_MODE_XDP_BYPASS) {
217 bpf_map_data->array[0].fd = fd_v4;
218 bpf_map_data->array[0].name =
SCStrdup(
"flow_table_v4");
219 if (bpf_map_data->array[0].name == NULL) {
222 bpf_map_data->array[1].fd = fd_v6;
223 bpf_map_data->array[1].name =
SCStrdup(
"flow_table_v6");
224 if (bpf_map_data->array[1].name == NULL) {
227 bpf_map_data->last = 2;
229 bpf_map_data->last = 0;
233 int fd = EBPFLoadPinnedMapsFile(livedev,
"cpu_map");
235 bpf_map_data->array[bpf_map_data->last].fd = fd;
236 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"cpu_map");
237 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
240 bpf_map_data->last++;
242 fd = EBPFLoadPinnedMapsFile(livedev,
"cpus_available");
244 bpf_map_data->array[bpf_map_data->last].fd = fd;
245 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"cpus_available");
246 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
249 bpf_map_data->last++;
251 fd = EBPFLoadPinnedMapsFile(livedev,
"tx_peer");
253 bpf_map_data->array[bpf_map_data->last].fd = fd;
254 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"tx_peer");
255 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
258 bpf_map_data->last++;
260 fd = EBPFLoadPinnedMapsFile(livedev,
"tx_peer_int");
262 bpf_map_data->array[bpf_map_data->last].fd = fd;
263 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"tx_peer_int");
264 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
267 bpf_map_data->last++;
278 for (
int i = 0; i < bpf_map_data->last; i++) {
279 SCFree(bpf_map_data->array[i].name);
281 bpf_map_data->last = 0;
298 int EBPFLoadFile(
const char *iface,
const char *path,
const char * section,
299 int *val,
struct ebpf_timeout_config *config)
303 struct bpf_object *bpfobj = NULL;
304 struct bpf_program *bpfprog = NULL;
305 struct bpf_map *map = NULL;
313 if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) {
315 if (EBPFLoadPinnedMaps(livedev, config) == 0) {
316 SCLogInfo(
"Loaded pinned maps, will use already loaded eBPF filter");
322 SCLogError(
"No file defined to load eBPF from");
328 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
329 if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) {
330 SCLogError(
"Unable to lock memory: %s (%d)", strerror(errno), errno);
335 bpfobj = bpf_object__open(path);
336 long error = libbpf_get_error(bpfobj);
339 libbpf_strerror(error, err_buf,
341 SCLogError(
"Unable to load eBPF objects in '%s': %s", path, err_buf);
345 if (config->flags & EBPF_XDP_HW_MODE) {
346 unsigned int ifindex = if_nametoindex(iface);
347 bpf_object__for_each_program(bpfprog, bpfobj) {
348 bpf_program__set_ifindex(bpfprog, ifindex);
350 bpf_map__for_each(map, bpfobj) {
351 bpf_map__set_ifindex(map, ifindex);
356 bpf_object__for_each_program(bpfprog, bpfobj) {
357 #ifdef HAVE_BPF_PROGRAM__SECTION_NAME
358 const char *title = bpf_program__section_name(bpfprog);
360 const char *title = bpf_program__title(bpfprog, 0);
362 if (!strcmp(title, section)) {
363 if (config->flags & EBPF_SOCKET_FILTER) {
364 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
365 bpf_program__set_type(bpfprog, BPF_PROG_TYPE_SOCKET_FILTER);
368 bpf_program__set_socket_filter(bpfprog);
371 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
372 bpf_program__set_type(bpfprog, BPF_PROG_TYPE_XDP);
375 bpf_program__set_xdp(bpfprog);
384 SCLogError(
"No section '%s' in '%s' file. Will not be able to use the file", section, path);
388 err = bpf_object__load(bpfobj);
391 SCLogError(
"Permission issue when loading eBPF object"
392 " (check libbpf error on stdout)");
395 libbpf_strerror(err, buf,
sizeof(buf));
396 SCLogError(
"Unable to load eBPF object: %s (%d)", buf, err);
404 struct bpf_maps_info *bpf_map_data =
SCCalloc(1,
sizeof(*bpf_map_data));
405 if (bpf_map_data == NULL) {
411 bpf_map__for_each(map, bpfobj) {
412 if (bpf_map_data->last == BPF_MAP_MAX_COUNT) {
413 SCLogError(
"Too many BPF maps in eBPF files");
416 if (strcmp(bpf_map__name(map),
"flow_table_v4") == 0) {
417 if (bpf_map__key_size(map) !=
sizeof(
struct flowv4_keys)) {
422 if (strcmp(bpf_map__name(map),
"flow_table_v6") == 0) {
423 if (bpf_map__key_size(map) !=
sizeof(
struct flowv6_keys)) {
428 SCLogDebug(
"Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
429 bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map);
430 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(bpf_map__name(map));
431 snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ,
433 if (!bpf_map_data->array[bpf_map_data->last].name) {
435 BpfMapsInfoFree(bpf_map_data);
438 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
439 if (config->flags & EBPF_PINNED_MAPS) {
440 SCLogConfig(
"Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd,
441 bpf_map_data->array[bpf_map_data->last].name);
443 snprintf(buf,
sizeof(buf),
"/sys/fs/bpf/suricata-%s-%s", iface,
444 bpf_map_data->array[bpf_map_data->last].name);
445 int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf);
450 if (config->flags & EBPF_XDP_CODE) {
451 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
453 bpf_map_data->array[bpf_map_data->last].to_unlink = 1;
456 bpf_map_data->last++;
466 pfd = bpf_program__fd(bpfprog);
468 SCLogError(
"Unable to find %s section", section);
472 SCLogInfo(
"Successfully loaded eBPF file '%s' on '%s'", path, iface);
485 int EBPFSetupXDP(
const char *iface,
int fd, uint8_t
flags)
487 #ifdef HAVE_PACKET_XDP
488 unsigned int ifindex = if_nametoindex(iface);
493 #ifdef HAVE_BPF_XDP_ATTACH
494 int err = bpf_xdp_attach(ifindex, fd,
flags, NULL);
497 int err = bpf_set_link_xdp_fd(ifindex, fd,
flags);
501 libbpf_strerror(err, buf,
sizeof(buf));
502 SCLogError(
"Unable to set XDP on '%s': %s (%d)", iface, buf, err);
515 size_t skey,
FlowKey *flow_key,
struct timespec *ctime,
516 uint64_t pkts_cnt, uint64_t bytes_cnt,
517 int mapfd,
int cpus_count)
541 EBPFBypassData *eb =
SCCalloc(1,
sizeof(EBPFBypassData));
554 memcpy(mkey, key, skey);
557 eb->cpus_count = cpus_count;
565 EBPFBypassData *eb = (EBPFBypassData *) fc->
bypass_data;
572 if (eb->key[0] && eb->key[1]) {
583 memcpy(mkey, key, skey);
591 void EBPFBypassFree(
void *data)
593 EBPFBypassData *eb = (EBPFBypassData *)data;
611 EBPFBypassData *eb,
void *key,
615 uint64_t pkts_cnt = 0;
616 uint64_t bytes_cnt = 0;
619 BPF_DECLARE_PERCPU(
struct pair, values_array, eb->cpus_count);
620 memset(values_array, 0,
sizeof(values_array));
621 int res = bpf_map_lookup_elem(eb->mapfd, key, values_array);
623 SCLogDebug(
"errno: (%d) %s", errno, strerror(errno));
626 for (i = 0; i < eb->cpus_count; i++) {
628 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
629 BPF_PERCPU(values_array, i).packets,
630 BPF_PERCPU(values_array, i).bytes);
631 pkts_cnt += BPF_PERCPU(values_array, i).packets;
632 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
656 bool EBPFBypassUpdate(
Flow *f,
void *data, time_t tsec)
658 EBPFBypassData *eb = (EBPFBypassData *)data;
666 bool activity = EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[0], 0);
667 activity |= EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[1], 1);
671 EBPFDeleteKey(eb->mapfd, eb->key[0]);
672 EBPFDeleteKey(eb->mapfd, eb->key[1]);
682 size_t skey,
FlowKey *flow_key,
struct timespec *ctime,
683 uint64_t pkts_cnt, uint64_t bytes_cnt,
684 int mapfd,
int cpus_count);
693 struct timespec *ctime,
694 struct ebpf_timeout_config *tcfg,
695 OpFlowForKey EBPFOpFlowForKey
699 int mapfd = EBPFGetMapFDByName(dev->
dev,
name);
703 struct flowv4_keys key = {}, next_key;
706 uint64_t hash_cnt = 0;
708 if (tcfg->cpus_count == 0) {
712 bool dead_flow =
false;
713 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
714 uint64_t bytes_cnt = 0;
715 uint64_t pkts_cnt = 0;
718 EBPFDeleteKey(mapfd, &key);
723 BPF_DECLARE_PERCPU(
struct pair, values_array, tcfg->cpus_count);
724 memset(values_array, 0,
sizeof(values_array));
725 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
727 SCLogDebug(
"no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
728 SCLogDebug(
"errno: (%d) %s", errno, strerror(errno));
732 for (i = 0; i < tcfg->cpus_count; i++) {
734 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
735 BPF_PERCPU(values_array, i).packets,
736 BPF_PERCPU(values_array, i).bytes);
737 pkts_cnt += BPF_PERCPU(values_array, i).packets;
738 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
743 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
744 flow_key.
sp = ntohs(next_key.port16[0]);
745 flow_key.
dp = ntohs(next_key.port16[1]);
746 flow_key.
src.addr_data32[0] = next_key.src;
747 flow_key.
dst.addr_data32[0] = next_key.dst;
749 flow_key.
sp = next_key.port16[0];
750 flow_key.
dp = next_key.port16[1];
751 flow_key.
src.addr_data32[0] = ntohl(next_key.src);
752 flow_key.
dst.addr_data32[0] = ntohl(next_key.dst);
755 flow_key.
src.addr_data32[1] = 0;
756 flow_key.
src.addr_data32[2] = 0;
757 flow_key.
src.addr_data32[3] = 0;
759 flow_key.
dst.addr_data32[1] = 0;
760 flow_key.
dst.addr_data32[2] = 0;
761 flow_key.
dst.addr_data32[3] = 0;
762 flow_key.
vlan_id[0] = next_key.vlan0;
763 flow_key.
vlan_id[1] = next_key.vlan1;
764 if (next_key.ip_proto == 1) {
765 flow_key.
proto = IPPROTO_TCP;
767 flow_key.
proto = IPPROTO_UDP;
771 dead_flow = EBPFOpFlowForKey(&flowstats, dev, &next_key,
sizeof(next_key), &flow_key,
772 ctime, pkts_cnt, bytes_cnt,
773 mapfd, tcfg->cpus_count);
785 EBPFDeleteKey(mapfd, &key);
791 SCLogInfo(
"IPv4 bypassed flow table size: %" PRIu64, hash_cnt);
802 static int EBPFForEachFlowV6Table(
ThreadVars *th_v,
804 struct timespec *ctime,
805 struct ebpf_timeout_config *tcfg,
806 OpFlowForKey EBPFOpFlowForKey
810 int mapfd = EBPFGetMapFDByName(dev->
dev,
name);
814 struct flowv6_keys key = {}, next_key;
817 uint64_t hash_cnt = 0;
819 if (tcfg->cpus_count == 0) {
824 uint64_t pkts_cnt = 0;
825 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
826 uint64_t bytes_cnt = 0;
829 EBPFDeleteKey(mapfd, &key);
834 BPF_DECLARE_PERCPU(
struct pair, values_array, tcfg->cpus_count);
835 memset(values_array, 0,
sizeof(values_array));
836 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
838 SCLogDebug(
"no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
842 for (i = 0; i < tcfg->cpus_count; i++) {
844 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
845 BPF_PERCPU(values_array, i).packets,
846 BPF_PERCPU(values_array, i).bytes);
847 pkts_cnt += BPF_PERCPU(values_array, i).packets;
848 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
853 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
854 flow_key.
sp = ntohs(next_key.port16[0]);
855 flow_key.
dp = ntohs(next_key.port16[1]);
857 flow_key.
src.addr_data32[0] = next_key.src[0];
858 flow_key.
src.addr_data32[1] = next_key.src[1];
859 flow_key.
src.addr_data32[2] = next_key.src[2];
860 flow_key.
src.addr_data32[3] = next_key.src[3];
862 flow_key.
dst.addr_data32[0] = next_key.dst[0];
863 flow_key.
dst.addr_data32[1] = next_key.dst[1];
864 flow_key.
dst.addr_data32[2] = next_key.dst[2];
865 flow_key.
dst.addr_data32[3] = next_key.dst[3];
867 flow_key.
sp = next_key.port16[0];
868 flow_key.
dp = next_key.port16[1];
870 flow_key.
src.addr_data32[0] = ntohl(next_key.src[0]);
871 flow_key.
src.addr_data32[1] = ntohl(next_key.src[1]);
872 flow_key.
src.addr_data32[2] = ntohl(next_key.src[2]);
873 flow_key.
src.addr_data32[3] = ntohl(next_key.src[3]);
875 flow_key.
dst.addr_data32[0] = ntohl(next_key.dst[0]);
876 flow_key.
dst.addr_data32[1] = ntohl(next_key.dst[1]);
877 flow_key.
dst.addr_data32[2] = ntohl(next_key.dst[2]);
878 flow_key.
dst.addr_data32[3] = ntohl(next_key.dst[3]);
880 flow_key.
vlan_id[0] = next_key.vlan0;
881 flow_key.
vlan_id[1] = next_key.vlan1;
882 if (next_key.ip_proto == 1) {
883 flow_key.
proto = IPPROTO_TCP;
885 flow_key.
proto = IPPROTO_UDP;
889 pkts_cnt = EBPFOpFlowForKey(&flowstats, dev, &next_key,
sizeof(next_key), &flow_key,
890 ctime, pkts_cnt, bytes_cnt,
891 mapfd, tcfg->cpus_count);
903 EBPFDeleteKey(mapfd, &key);
909 SCLogInfo(
"IPv6 bypassed flow table size: %" PRIu64, hash_cnt);
914 int EBPFCheckBypassedFlowCreate(
ThreadVars *th_v,
struct timespec *curtime,
void *data)
917 struct ebpf_timeout_config *cfg = (
struct ebpf_timeout_config *)data;
919 EBPFForEachFlowV4Table(th_v, ldev,
"flow_table_v4",
921 cfg, EBPFCreateFlowForKey);
922 EBPFForEachFlowV6Table(th_v, ldev,
"flow_table_v6",
924 cfg, EBPFCreateFlowForKey);
930 void EBPFRegisterExtension(
void)
933 g_flow_storage_id =
FlowStorageRegister(
"bypassedlist",
sizeof(
void *), NULL, BypassedListFree);
937 #ifdef HAVE_PACKET_XDP
939 static uint32_t g_redirect_iface_cpu_counter = 0;
941 static int EBPFAddCPUToMap(
const char *iface, uint32_t i)
943 int cpumap = EBPFGetMapFDByName(iface,
"cpu_map");
944 uint32_t queue_size = 4096;
951 ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0);
953 SCLogError(
"Create CPU entry failed (err:%d)", ret);
956 int cpus_available = EBPFGetMapFDByName(iface,
"cpus_available");
957 if (cpus_available < 0) {
962 ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0);
964 SCLogError(
"Create CPU entry failed (err:%d)", ret);
970 static void EBPFRedirectMapAddCPU(
int i,
void *data)
972 if (EBPFAddCPUToMap(data, i) < 0) {
975 g_redirect_iface_cpu_counter++;
979 void EBPFBuildCPUSet(
SCConfNode *node,
char *iface)
982 int mapfd = EBPFGetMapFDByName(iface,
"cpus_count");
984 SCLogError(
"Unable to find 'cpus_count' map");
987 g_redirect_iface_cpu_counter = 0;
989 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
994 SCLogWarning(
"Failed to parse XDP CPU redirect configuration");
997 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
1013 int EBPFSetPeerIface(
const char *iface,
const char *out_iface)
1015 int mapfd = EBPFGetMapFDByName(iface,
"tx_peer");
1020 int intmapfd = EBPFGetMapFDByName(iface,
"tx_peer_int");
1022 SCLogError(
"Unable to find 'tx_peer_int' map");
1027 unsigned int peer_index = if_nametoindex(out_iface);
1028 if (peer_index == 0) {
1032 int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY);
1034 SCLogError(
"Create peer entry failed (err:%d)", ret);
1037 ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY);
1039 SCLogError(
"Create peer entry failed (err:%d)", ret);
1050 int EBPFUpdateFlow(
Flow *f,
Packet *p,
void *data)
1052 BypassedIfaceList *ifl = (BypassedIfaceList *)
FlowGetStorageById(f, g_flow_storage_id);
1063 BypassedIfaceList *ldev = ifl;
1074 BypassedIfaceList *nifl =
SCCalloc(1,
sizeof(*nifl));