33 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
39 #ifdef HAVE_PACKET_EBPF
42 #include <sys/resource.h>
55 #include <bpf/libbpf.h>
60 #define BPF_MAP_MAX_COUNT 16
62 #define BYPASSED_FLOW_TIMEOUT 60
74 struct bpf_maps_info {
75 struct bpf_map_item array[BPF_MAP_MAX_COUNT];
79 typedef struct BypassedIfaceList_ {
81 struct BypassedIfaceList_ *
next;
84 static void BpfMapsInfoFree(
void *bpf)
86 struct bpf_maps_info *bpfinfo = (
struct bpf_maps_info *)bpf;
88 for (i = 0; i < bpfinfo->last; i ++) {
89 if (bpfinfo->array[i].name) {
90 if (bpfinfo->array[i].to_unlink) {
91 char pinnedpath[PATH_MAX];
92 int ret = snprintf(pinnedpath,
sizeof(pinnedpath),
93 "/sys/fs/bpf/suricata-%s-%s",
94 bpfinfo->array[i].iface,
95 bpfinfo->array[i].name);
98 ret = unlink(pinnedpath);
102 "Unable to remove %s: %s (%d)", pinnedpath, strerror(error), error);
105 SCLogWarning(
"Unable to remove map %s", bpfinfo->array[i].name);
108 SCFree(bpfinfo->array[i].name);
114 static void BypassedListFree(
void *ifl)
116 BypassedIfaceList *mifl = (BypassedIfaceList *)ifl;
117 BypassedIfaceList *nifl;
125 void EBPFDeleteKey(
int fd,
void *key)
127 int ret = bpf_map_delete_elem(fd, key);
129 SCLogWarning(
"Unable to delete entry: %s (%d)", strerror(errno), errno);
133 static struct bpf_maps_info *EBPFGetBpfMap(
const char *iface)
140 return (
struct bpf_maps_info *)data;
150 int EBPFGetMapFDByName(
const char *iface,
const char *name)
154 if (iface == NULL || name == NULL)
156 struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface);
157 if (bpf_maps == NULL)
160 for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
161 if (!bpf_maps->array[i].name)
163 if (!strcmp(bpf_maps->array[i].name, name)) {
164 SCLogDebug(
"Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd, name);
165 return bpf_maps->array[i].fd;
172 static int EBPFLoadPinnedMapsFile(
LiveDevice *livedev,
const char *file)
174 char pinnedpath[1024];
175 snprintf(pinnedpath,
sizeof(pinnedpath),
176 "/sys/fs/bpf/suricata-%s-%s",
180 return bpf_obj_get(pinnedpath);
183 static int EBPFLoadPinnedMaps(
LiveDevice *livedev,
struct ebpf_timeout_config *config)
185 int fd_v4 = -1, fd_v6 = -1;
188 if (config->pinned_maps_name) {
189 int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name);
196 if (config->mode == AFP_MODE_XDP_BYPASS) {
198 fd_v4 = EBPFLoadPinnedMapsFile(livedev,
"flow_table_v4");
204 fd_v6 = EBPFLoadPinnedMapsFile(livedev,
"flow_table_v6");
206 SCLogWarning(
"Found a flow_table_v4 map but no flow_table_v6 map");
211 struct bpf_maps_info *bpf_map_data =
SCCalloc(1,
sizeof(*bpf_map_data));
212 if (bpf_map_data == NULL) {
217 if (config->mode == AFP_MODE_XDP_BYPASS) {
218 bpf_map_data->array[0].fd = fd_v4;
219 bpf_map_data->array[0].name =
SCStrdup(
"flow_table_v4");
220 if (bpf_map_data->array[0].name == NULL) {
223 bpf_map_data->array[1].fd = fd_v6;
224 bpf_map_data->array[1].name =
SCStrdup(
"flow_table_v6");
225 if (bpf_map_data->array[1].name == NULL) {
228 bpf_map_data->last = 2;
230 bpf_map_data->last = 0;
234 int fd = EBPFLoadPinnedMapsFile(livedev,
"cpu_map");
236 bpf_map_data->array[bpf_map_data->last].fd = fd;
237 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"cpu_map");
238 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
241 bpf_map_data->last++;
243 fd = EBPFLoadPinnedMapsFile(livedev,
"cpus_available");
245 bpf_map_data->array[bpf_map_data->last].fd = fd;
246 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"cpus_available");
247 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
250 bpf_map_data->last++;
252 fd = EBPFLoadPinnedMapsFile(livedev,
"tx_peer");
254 bpf_map_data->array[bpf_map_data->last].fd = fd;
255 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"tx_peer");
256 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
259 bpf_map_data->last++;
261 fd = EBPFLoadPinnedMapsFile(livedev,
"tx_peer_int");
263 bpf_map_data->array[bpf_map_data->last].fd = fd;
264 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"tx_peer_int");
265 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
268 bpf_map_data->last++;
279 for (
int i = 0; i < bpf_map_data->last; i++) {
280 SCFree(bpf_map_data->array[i].name);
282 bpf_map_data->last = 0;
299 int EBPFLoadFile(
const char *iface,
const char *path,
const char * section,
300 int *val,
struct ebpf_timeout_config *config)
304 struct bpf_object *bpfobj = NULL;
306 struct bpf_map *map = NULL;
314 if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) {
316 if (EBPFLoadPinnedMaps(livedev, config) == 0) {
317 SCLogInfo(
"Loaded pinned maps, will use already loaded eBPF filter");
323 SCLogError(
"No file defined to load eBPF from");
329 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
330 if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) {
331 SCLogError(
"Unable to lock memory: %s (%d)", strerror(errno), errno);
336 bpfobj = bpf_object__open(path);
337 long error = libbpf_get_error(bpfobj);
340 libbpf_strerror(error, err_buf,
342 SCLogError(
"Unable to load eBPF objects in '%s': %s", path, err_buf);
346 if (config->flags & EBPF_XDP_HW_MODE) {
347 unsigned int ifindex = if_nametoindex(iface);
348 bpf_object__for_each_program(bpfprog, bpfobj) {
349 bpf_program__set_ifindex(bpfprog, ifindex);
351 bpf_map__for_each(map, bpfobj) {
352 bpf_map__set_ifindex(map, ifindex);
357 bpf_object__for_each_program(bpfprog, bpfobj) {
358 #ifdef HAVE_BPF_PROGRAM__SECTION_NAME
359 const char *title = bpf_program__section_name(bpfprog);
361 const char *title = bpf_program__title(bpfprog, 0);
363 if (!strcmp(title, section)) {
364 if (config->flags & EBPF_SOCKET_FILTER) {
365 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
366 bpf_program__set_type(bpfprog, BPF_PROG_TYPE_SOCKET_FILTER);
369 bpf_program__set_socket_filter(bpfprog);
372 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
373 bpf_program__set_type(bpfprog, BPF_PROG_TYPE_XDP);
376 bpf_program__set_xdp(bpfprog);
384 if (found ==
false) {
385 SCLogError(
"No section '%s' in '%s' file. Will not be able to use the file", section, path);
389 err = bpf_object__load(bpfobj);
392 SCLogError(
"Permission issue when loading eBPF object"
393 " (check libbpf error on stdout)");
396 libbpf_strerror(err, buf,
sizeof(buf));
397 SCLogError(
"Unable to load eBPF object: %s (%d)", buf, err);
405 struct bpf_maps_info *bpf_map_data =
SCCalloc(1,
sizeof(*bpf_map_data));
406 if (bpf_map_data == NULL) {
412 bpf_map__for_each(map, bpfobj) {
413 if (bpf_map_data->last == BPF_MAP_MAX_COUNT) {
414 SCLogError(
"Too many BPF maps in eBPF files");
417 SCLogDebug(
"Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
418 bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map);
419 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(bpf_map__name(map));
420 snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ,
422 if (!bpf_map_data->array[bpf_map_data->last].name) {
424 BpfMapsInfoFree(bpf_map_data);
427 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
428 if (config->flags & EBPF_PINNED_MAPS) {
429 SCLogConfig(
"Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd,
430 bpf_map_data->array[bpf_map_data->last].name);
432 snprintf(buf,
sizeof(buf),
"/sys/fs/bpf/suricata-%s-%s", iface,
433 bpf_map_data->array[bpf_map_data->last].name);
434 int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf);
439 if (config->flags & EBPF_XDP_CODE) {
440 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
442 bpf_map_data->array[bpf_map_data->last].to_unlink = 1;
445 bpf_map_data->last++;
455 pfd = bpf_program__fd(bpfprog);
457 SCLogError(
"Unable to find %s section", section);
461 SCLogInfo(
"Successfully loaded eBPF file '%s' on '%s'", path, iface);
474 int EBPFSetupXDP(
const char *iface,
int fd, uint8_t
flags)
476 #ifdef HAVE_PACKET_XDP
477 unsigned int ifindex = if_nametoindex(iface);
482 #ifdef HAVE_BPF_XDP_ATTACH
483 int err = bpf_xdp_attach(ifindex, fd,
flags, NULL);
486 int err = bpf_set_link_xdp_fd(ifindex, fd,
flags);
490 libbpf_strerror(err, buf,
sizeof(buf));
491 SCLogError(
"Unable to set XDP on '%s': %s (%d)", iface, buf, err);
504 size_t skey,
FlowKey *flow_key,
struct timespec *ctime,
505 uint64_t pkts_cnt, uint64_t bytes_cnt,
506 int mapfd,
int cpus_count)
530 EBPFBypassData *eb =
SCCalloc(1,
sizeof(EBPFBypassData));
543 memcpy(mkey, key, skey);
546 eb->cpus_count = cpus_count;
554 EBPFBypassData *eb = (EBPFBypassData *) fc->
bypass_data;
561 if (eb->key[0] && eb->key[1]) {
572 memcpy(mkey, key, skey);
580 void EBPFBypassFree(
void *data)
582 EBPFBypassData *eb = (EBPFBypassData *)data;
601 EBPFBypassData *eb,
void *key,
605 uint64_t pkts_cnt = 0;
606 uint64_t bytes_cnt = 0;
609 BPF_DECLARE_PERCPU(
struct pair, values_array, eb->cpus_count);
610 memset(values_array, 0,
sizeof(values_array));
611 int res = bpf_map_lookup_elem(eb->mapfd, key, values_array);
613 SCLogDebug(
"errno: (%d) %s", errno, strerror(errno));
616 for (i = 0; i < eb->cpus_count; i++) {
618 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
619 BPF_PERCPU(values_array, i).packets,
620 BPF_PERCPU(values_array, i).bytes);
621 pkts_cnt += BPF_PERCPU(values_array, i).packets;
622 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
646 bool EBPFBypassUpdate(
Flow *f,
void *data, time_t tsec)
648 EBPFBypassData *eb = (EBPFBypassData *)data;
656 bool activity = EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[0], 0);
657 activity |= EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[1], 1);
661 EBPFDeleteKey(eb->mapfd, eb->key[0]);
662 EBPFDeleteKey(eb->mapfd, eb->key[1]);
672 size_t skey,
FlowKey *flow_key,
struct timespec *ctime,
673 uint64_t pkts_cnt, uint64_t bytes_cnt,
674 int mapfd,
int cpus_count);
683 struct timespec *ctime,
684 struct ebpf_timeout_config *tcfg,
685 OpFlowForKey EBPFOpFlowForKey
689 int mapfd = EBPFGetMapFDByName(dev->
dev, name);
693 struct flowv4_keys key = {}, next_key;
696 uint64_t hash_cnt = 0;
698 if (tcfg->cpus_count == 0) {
702 bool dead_flow =
false;
703 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
704 uint64_t bytes_cnt = 0;
705 uint64_t pkts_cnt = 0;
708 EBPFDeleteKey(mapfd, &key);
713 BPF_DECLARE_PERCPU(
struct pair, values_array, tcfg->cpus_count);
714 memset(values_array, 0,
sizeof(values_array));
715 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
717 SCLogDebug(
"no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
718 SCLogDebug(
"errno: (%d) %s", errno, strerror(errno));
722 for (i = 0; i < tcfg->cpus_count; i++) {
724 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
725 BPF_PERCPU(values_array, i).packets,
726 BPF_PERCPU(values_array, i).bytes);
727 pkts_cnt += BPF_PERCPU(values_array, i).packets;
728 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
733 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
734 flow_key.
sp = ntohs(next_key.port16[0]);
735 flow_key.
dp = ntohs(next_key.port16[1]);
736 flow_key.
src.addr_data32[0] = next_key.src;
737 flow_key.
dst.addr_data32[0] = next_key.dst;
739 flow_key.
sp = next_key.port16[0];
740 flow_key.
dp = next_key.port16[1];
741 flow_key.
src.addr_data32[0] = ntohl(next_key.src);
742 flow_key.
dst.addr_data32[0] = ntohl(next_key.dst);
745 flow_key.
src.addr_data32[1] = 0;
746 flow_key.
src.addr_data32[2] = 0;
747 flow_key.
src.addr_data32[3] = 0;
749 flow_key.
dst.addr_data32[1] = 0;
750 flow_key.
dst.addr_data32[2] = 0;
751 flow_key.
dst.addr_data32[3] = 0;
752 flow_key.
vlan_id[0] = next_key.vlan0;
753 flow_key.
vlan_id[1] = next_key.vlan1;
754 flow_key.
vlan_id[2] = next_key.vlan2;
755 if (next_key.ip_proto == 1) {
756 flow_key.
proto = IPPROTO_TCP;
758 flow_key.
proto = IPPROTO_UDP;
762 dead_flow = EBPFOpFlowForKey(&flowstats, dev, &next_key,
sizeof(next_key), &flow_key,
763 ctime, pkts_cnt, bytes_cnt,
764 mapfd, tcfg->cpus_count);
776 EBPFDeleteKey(mapfd, &key);
782 SCLogInfo(
"IPv4 bypassed flow table size: %" PRIu64, hash_cnt);
793 static int EBPFForEachFlowV6Table(
ThreadVars *th_v,
795 struct timespec *ctime,
796 struct ebpf_timeout_config *tcfg,
797 OpFlowForKey EBPFOpFlowForKey
801 int mapfd = EBPFGetMapFDByName(dev->
dev, name);
805 struct flowv6_keys key = {}, next_key;
808 uint64_t hash_cnt = 0;
810 if (tcfg->cpus_count == 0) {
815 uint64_t pkts_cnt = 0;
816 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
817 uint64_t bytes_cnt = 0;
820 EBPFDeleteKey(mapfd, &key);
825 BPF_DECLARE_PERCPU(
struct pair, values_array, tcfg->cpus_count);
826 memset(values_array, 0,
sizeof(values_array));
827 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
829 SCLogDebug(
"no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
833 for (i = 0; i < tcfg->cpus_count; i++) {
835 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
836 BPF_PERCPU(values_array, i).packets,
837 BPF_PERCPU(values_array, i).bytes);
838 pkts_cnt += BPF_PERCPU(values_array, i).packets;
839 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
844 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
845 flow_key.
sp = ntohs(next_key.port16[0]);
846 flow_key.
dp = ntohs(next_key.port16[1]);
848 flow_key.
src.addr_data32[0] = next_key.src[0];
849 flow_key.
src.addr_data32[1] = next_key.src[1];
850 flow_key.
src.addr_data32[2] = next_key.src[2];
851 flow_key.
src.addr_data32[3] = next_key.src[3];
853 flow_key.
dst.addr_data32[0] = next_key.dst[0];
854 flow_key.
dst.addr_data32[1] = next_key.dst[1];
855 flow_key.
dst.addr_data32[2] = next_key.dst[2];
856 flow_key.
dst.addr_data32[3] = next_key.dst[3];
858 flow_key.
sp = next_key.port16[0];
859 flow_key.
dp = next_key.port16[1];
861 flow_key.
src.addr_data32[0] = ntohl(next_key.src[0]);
862 flow_key.
src.addr_data32[1] = ntohl(next_key.src[1]);
863 flow_key.
src.addr_data32[2] = ntohl(next_key.src[2]);
864 flow_key.
src.addr_data32[3] = ntohl(next_key.src[3]);
866 flow_key.
dst.addr_data32[0] = ntohl(next_key.dst[0]);
867 flow_key.
dst.addr_data32[1] = ntohl(next_key.dst[1]);
868 flow_key.
dst.addr_data32[2] = ntohl(next_key.dst[2]);
869 flow_key.
dst.addr_data32[3] = ntohl(next_key.dst[3]);
871 flow_key.
vlan_id[0] = next_key.vlan0;
872 flow_key.
vlan_id[1] = next_key.vlan1;
873 flow_key.
vlan_id[2] = next_key.vlan2;
874 if (next_key.ip_proto == 1) {
875 flow_key.
proto = IPPROTO_TCP;
877 flow_key.
proto = IPPROTO_UDP;
881 pkts_cnt = EBPFOpFlowForKey(&flowstats, dev, &next_key,
sizeof(next_key), &flow_key,
882 ctime, pkts_cnt, bytes_cnt,
883 mapfd, tcfg->cpus_count);
895 EBPFDeleteKey(mapfd, &key);
901 SCLogInfo(
"IPv6 bypassed flow table size: %" PRIu64, hash_cnt);
906 int EBPFCheckBypassedFlowCreate(
ThreadVars *th_v,
struct timespec *curtime,
void *data)
909 struct ebpf_timeout_config *cfg = (
struct ebpf_timeout_config *)data;
911 EBPFForEachFlowV4Table(th_v, ldev,
"flow_table_v4",
913 cfg, EBPFCreateFlowForKey);
914 EBPFForEachFlowV6Table(th_v, ldev,
"flow_table_v6",
916 cfg, EBPFCreateFlowForKey);
922 void EBPFRegisterExtension(
void)
925 g_flow_storage_id =
FlowStorageRegister(
"bypassedlist",
sizeof(
void *), NULL, BypassedListFree);
929 #ifdef HAVE_PACKET_XDP
931 static uint32_t g_redirect_iface_cpu_counter = 0;
933 static int EBPFAddCPUToMap(
const char *iface, uint32_t i)
935 int cpumap = EBPFGetMapFDByName(iface,
"cpu_map");
936 uint32_t queue_size = 4096;
943 ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0);
945 SCLogError(
"Create CPU entry failed (err:%d)", ret);
948 int cpus_available = EBPFGetMapFDByName(iface,
"cpus_available");
949 if (cpus_available < 0) {
954 ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0);
956 SCLogError(
"Create CPU entry failed (err:%d)", ret);
962 static void EBPFRedirectMapAddCPU(
int i,
void *data)
964 if (EBPFAddCPUToMap(data, i) < 0) {
967 g_redirect_iface_cpu_counter++;
971 void EBPFBuildCPUSet(
ConfNode *node,
char *iface)
974 int mapfd = EBPFGetMapFDByName(iface,
"cpus_count");
976 SCLogError(
"Unable to find 'cpus_count' map");
979 g_redirect_iface_cpu_counter = 0;
981 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
986 EBPFRedirectMapAddCPU,
988 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
1004 int EBPFSetPeerIface(
const char *iface,
const char *out_iface)
1006 int mapfd = EBPFGetMapFDByName(iface,
"tx_peer");
1011 int intmapfd = EBPFGetMapFDByName(iface,
"tx_peer_int");
1013 SCLogError(
"Unable to find 'tx_peer_int' map");
1018 unsigned int peer_index = if_nametoindex(out_iface);
1019 if (peer_index == 0) {
1023 int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY);
1025 SCLogError(
"Create peer entry failed (err:%d)", ret);
1028 ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY);
1030 SCLogError(
"Create peer entry failed (err:%d)", ret);
1041 int EBPFUpdateFlow(
Flow *f,
Packet *p,
void *data)
1043 BypassedIfaceList *ifl = (BypassedIfaceList *)
FlowGetStorageById(f, g_flow_storage_id);
1054 BypassedIfaceList *ldev = ifl;
1065 BypassedIfaceList *nifl =
SCCalloc(1,
sizeof(*nifl));