33 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
38 #ifdef HAVE_PACKET_EBPF
41 #include <sys/resource.h>
54 #include <bpf/libbpf.h>
59 #define BPF_MAP_MAX_COUNT 16
61 #define BYPASSED_FLOW_TIMEOUT 60
73 struct bpf_maps_info {
74 struct bpf_map_item array[BPF_MAP_MAX_COUNT];
78 typedef struct BypassedIfaceList_ {
80 struct BypassedIfaceList_ *
next;
83 static void BpfMapsInfoFree(
void *bpf)
85 struct bpf_maps_info *bpfinfo = (
struct bpf_maps_info *)bpf;
87 for (i = 0; i < bpfinfo->last; i ++) {
88 if (bpfinfo->array[i].name) {
89 if (bpfinfo->array[i].to_unlink) {
90 char pinnedpath[PATH_MAX];
91 int ret = snprintf(pinnedpath,
sizeof(pinnedpath),
92 "/sys/fs/bpf/suricata-%s-%s",
93 bpfinfo->array[i].iface,
94 bpfinfo->array[i].name);
97 ret = unlink(pinnedpath);
101 "Unable to remove %s: %s (%d)", pinnedpath, strerror(error), error);
104 SCLogWarning(
"Unable to remove map %s", bpfinfo->array[i].name);
107 SCFree(bpfinfo->array[i].name);
113 static void BypassedListFree(
void *ifl)
115 BypassedIfaceList *mifl = (BypassedIfaceList *)ifl;
116 BypassedIfaceList *nifl;
124 void EBPFDeleteKey(
int fd,
void *key)
126 int ret = bpf_map_delete_elem(fd, key);
128 SCLogWarning(
"Unable to delete entry: %s (%d)", strerror(errno), errno);
132 static struct bpf_maps_info *EBPFGetBpfMap(
const char *iface)
139 return (
struct bpf_maps_info *)data;
149 int EBPFGetMapFDByName(
const char *iface,
const char *
name)
153 if (iface == NULL ||
name == NULL)
155 struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface);
156 if (bpf_maps == NULL)
159 for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
160 if (!bpf_maps->array[i].name)
162 if (!strcmp(bpf_maps->array[i].name,
name)) {
163 SCLogDebug(
"Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd,
name);
164 return bpf_maps->array[i].fd;
171 static int EBPFLoadPinnedMapsFile(
LiveDevice *livedev,
const char *file)
173 char pinnedpath[1024];
174 snprintf(pinnedpath,
sizeof(pinnedpath),
175 "/sys/fs/bpf/suricata-%s-%s",
179 return bpf_obj_get(pinnedpath);
182 static int EBPFLoadPinnedMaps(
LiveDevice *livedev,
struct ebpf_timeout_config *config)
184 int fd_v4 = -1, fd_v6 = -1;
187 if (config->pinned_maps_name) {
188 int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name);
195 if (config->mode == AFP_MODE_XDP_BYPASS) {
197 fd_v4 = EBPFLoadPinnedMapsFile(livedev,
"flow_table_v4");
203 fd_v6 = EBPFLoadPinnedMapsFile(livedev,
"flow_table_v6");
205 SCLogWarning(
"Found a flow_table_v4 map but no flow_table_v6 map");
210 struct bpf_maps_info *bpf_map_data =
SCCalloc(1,
sizeof(*bpf_map_data));
211 if (bpf_map_data == NULL) {
216 if (config->mode == AFP_MODE_XDP_BYPASS) {
217 bpf_map_data->array[0].fd = fd_v4;
218 bpf_map_data->array[0].name =
SCStrdup(
"flow_table_v4");
219 if (bpf_map_data->array[0].name == NULL) {
222 bpf_map_data->array[1].fd = fd_v6;
223 bpf_map_data->array[1].name =
SCStrdup(
"flow_table_v6");
224 if (bpf_map_data->array[1].name == NULL) {
227 bpf_map_data->last = 2;
229 bpf_map_data->last = 0;
233 int fd = EBPFLoadPinnedMapsFile(livedev,
"cpu_map");
235 bpf_map_data->array[bpf_map_data->last].fd = fd;
236 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"cpu_map");
237 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
240 bpf_map_data->last++;
242 fd = EBPFLoadPinnedMapsFile(livedev,
"cpus_available");
244 bpf_map_data->array[bpf_map_data->last].fd = fd;
245 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"cpus_available");
246 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
249 bpf_map_data->last++;
251 fd = EBPFLoadPinnedMapsFile(livedev,
"tx_peer");
253 bpf_map_data->array[bpf_map_data->last].fd = fd;
254 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"tx_peer");
255 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
258 bpf_map_data->last++;
260 fd = EBPFLoadPinnedMapsFile(livedev,
"tx_peer_int");
262 bpf_map_data->array[bpf_map_data->last].fd = fd;
263 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"tx_peer_int");
264 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
267 bpf_map_data->last++;
278 for (
int i = 0; i < bpf_map_data->last; i++) {
279 SCFree(bpf_map_data->array[i].name);
281 bpf_map_data->last = 0;
298 int EBPFLoadFile(
const char *iface,
const char *path,
const char * section,
299 int *val,
struct ebpf_timeout_config *config)
303 struct bpf_object *bpfobj = NULL;
304 struct bpf_program *bpfprog = NULL;
305 struct bpf_map *map = NULL;
313 if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) {
315 if (EBPFLoadPinnedMaps(livedev, config) == 0) {
316 SCLogInfo(
"Loaded pinned maps, will use already loaded eBPF filter");
322 SCLogError(
"No file defined to load eBPF from");
328 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
329 if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) {
330 SCLogError(
"Unable to lock memory: %s (%d)", strerror(errno), errno);
335 bpfobj = bpf_object__open(path);
336 long error = libbpf_get_error(bpfobj);
339 libbpf_strerror(error, err_buf,
341 SCLogError(
"Unable to load eBPF objects in '%s': %s", path, err_buf);
345 if (config->flags & EBPF_XDP_HW_MODE) {
346 unsigned int ifindex = if_nametoindex(iface);
347 bpf_object__for_each_program(bpfprog, bpfobj) {
348 bpf_program__set_ifindex(bpfprog, ifindex);
350 bpf_map__for_each(map, bpfobj) {
351 bpf_map__set_ifindex(map, ifindex);
356 bpf_object__for_each_program(bpfprog, bpfobj) {
357 #ifdef HAVE_BPF_PROGRAM__SECTION_NAME
358 const char *title = bpf_program__section_name(bpfprog);
360 const char *title = bpf_program__title(bpfprog, 0);
362 if (!strcmp(title, section)) {
363 if (config->flags & EBPF_SOCKET_FILTER) {
364 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
365 bpf_program__set_type(bpfprog, BPF_PROG_TYPE_SOCKET_FILTER);
368 bpf_program__set_socket_filter(bpfprog);
371 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
372 bpf_program__set_type(bpfprog, BPF_PROG_TYPE_XDP);
375 bpf_program__set_xdp(bpfprog);
383 if (found ==
false) {
384 SCLogError(
"No section '%s' in '%s' file. Will not be able to use the file", section, path);
388 err = bpf_object__load(bpfobj);
391 SCLogError(
"Permission issue when loading eBPF object"
392 " (check libbpf error on stdout)");
395 libbpf_strerror(err, buf,
sizeof(buf));
396 SCLogError(
"Unable to load eBPF object: %s (%d)", buf, err);
404 struct bpf_maps_info *bpf_map_data =
SCCalloc(1,
sizeof(*bpf_map_data));
405 if (bpf_map_data == NULL) {
411 bpf_map__for_each(map, bpfobj) {
412 if (bpf_map_data->last == BPF_MAP_MAX_COUNT) {
413 SCLogError(
"Too many BPF maps in eBPF files");
416 SCLogDebug(
"Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
417 bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map);
418 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(bpf_map__name(map));
419 snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ,
421 if (!bpf_map_data->array[bpf_map_data->last].name) {
423 BpfMapsInfoFree(bpf_map_data);
426 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
427 if (config->flags & EBPF_PINNED_MAPS) {
428 SCLogConfig(
"Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd,
429 bpf_map_data->array[bpf_map_data->last].name);
431 snprintf(buf,
sizeof(buf),
"/sys/fs/bpf/suricata-%s-%s", iface,
432 bpf_map_data->array[bpf_map_data->last].name);
433 int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf);
438 if (config->flags & EBPF_XDP_CODE) {
439 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
441 bpf_map_data->array[bpf_map_data->last].to_unlink = 1;
444 bpf_map_data->last++;
454 pfd = bpf_program__fd(bpfprog);
456 SCLogError(
"Unable to find %s section", section);
460 SCLogInfo(
"Successfully loaded eBPF file '%s' on '%s'", path, iface);
473 int EBPFSetupXDP(
const char *iface,
int fd, uint8_t
flags)
475 #ifdef HAVE_PACKET_XDP
476 unsigned int ifindex = if_nametoindex(iface);
481 #ifdef HAVE_BPF_XDP_ATTACH
482 int err = bpf_xdp_attach(ifindex, fd,
flags, NULL);
485 int err = bpf_set_link_xdp_fd(ifindex, fd,
flags);
489 libbpf_strerror(err, buf,
sizeof(buf));
490 SCLogError(
"Unable to set XDP on '%s': %s (%d)", iface, buf, err);
503 size_t skey,
FlowKey *flow_key,
struct timespec *ctime,
504 uint64_t pkts_cnt, uint64_t bytes_cnt,
505 int mapfd,
int cpus_count)
529 EBPFBypassData *eb =
SCCalloc(1,
sizeof(EBPFBypassData));
542 memcpy(mkey, key, skey);
545 eb->cpus_count = cpus_count;
553 EBPFBypassData *eb = (EBPFBypassData *) fc->
bypass_data;
560 if (eb->key[0] && eb->key[1]) {
571 memcpy(mkey, key, skey);
579 void EBPFBypassFree(
void *data)
581 EBPFBypassData *eb = (EBPFBypassData *)data;
599 EBPFBypassData *eb,
void *key,
603 uint64_t pkts_cnt = 0;
604 uint64_t bytes_cnt = 0;
607 BPF_DECLARE_PERCPU(
struct pair, values_array, eb->cpus_count);
608 memset(values_array, 0,
sizeof(values_array));
609 int res = bpf_map_lookup_elem(eb->mapfd, key, values_array);
611 SCLogDebug(
"errno: (%d) %s", errno, strerror(errno));
614 for (i = 0; i < eb->cpus_count; i++) {
616 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
617 BPF_PERCPU(values_array, i).packets,
618 BPF_PERCPU(values_array, i).bytes);
619 pkts_cnt += BPF_PERCPU(values_array, i).packets;
620 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
644 bool EBPFBypassUpdate(
Flow *f,
void *data, time_t tsec)
646 EBPFBypassData *eb = (EBPFBypassData *)data;
654 bool activity = EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[0], 0);
655 activity |= EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[1], 1);
659 EBPFDeleteKey(eb->mapfd, eb->key[0]);
660 EBPFDeleteKey(eb->mapfd, eb->key[1]);
670 size_t skey,
FlowKey *flow_key,
struct timespec *ctime,
671 uint64_t pkts_cnt, uint64_t bytes_cnt,
672 int mapfd,
int cpus_count);
681 struct timespec *ctime,
682 struct ebpf_timeout_config *tcfg,
683 OpFlowForKey EBPFOpFlowForKey
687 int mapfd = EBPFGetMapFDByName(dev->
dev,
name);
691 struct flowv4_keys key = {}, next_key;
694 uint64_t hash_cnt = 0;
696 if (tcfg->cpus_count == 0) {
700 bool dead_flow =
false;
701 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
702 uint64_t bytes_cnt = 0;
703 uint64_t pkts_cnt = 0;
706 EBPFDeleteKey(mapfd, &key);
711 BPF_DECLARE_PERCPU(
struct pair, values_array, tcfg->cpus_count);
712 memset(values_array, 0,
sizeof(values_array));
713 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
715 SCLogDebug(
"no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
716 SCLogDebug(
"errno: (%d) %s", errno, strerror(errno));
720 for (i = 0; i < tcfg->cpus_count; i++) {
722 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
723 BPF_PERCPU(values_array, i).packets,
724 BPF_PERCPU(values_array, i).bytes);
725 pkts_cnt += BPF_PERCPU(values_array, i).packets;
726 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
731 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
732 flow_key.
sp = ntohs(next_key.port16[0]);
733 flow_key.
dp = ntohs(next_key.port16[1]);
734 flow_key.
src.addr_data32[0] = next_key.src;
735 flow_key.
dst.addr_data32[0] = next_key.dst;
737 flow_key.
sp = next_key.port16[0];
738 flow_key.
dp = next_key.port16[1];
739 flow_key.
src.addr_data32[0] = ntohl(next_key.src);
740 flow_key.
dst.addr_data32[0] = ntohl(next_key.dst);
743 flow_key.
src.addr_data32[1] = 0;
744 flow_key.
src.addr_data32[2] = 0;
745 flow_key.
src.addr_data32[3] = 0;
747 flow_key.
dst.addr_data32[1] = 0;
748 flow_key.
dst.addr_data32[2] = 0;
749 flow_key.
dst.addr_data32[3] = 0;
750 flow_key.
vlan_id[0] = next_key.vlan0;
751 flow_key.
vlan_id[1] = next_key.vlan1;
752 flow_key.
vlan_id[2] = next_key.vlan2;
753 if (next_key.ip_proto == 1) {
754 flow_key.
proto = IPPROTO_TCP;
756 flow_key.
proto = IPPROTO_UDP;
760 dead_flow = EBPFOpFlowForKey(&flowstats, dev, &next_key,
sizeof(next_key), &flow_key,
761 ctime, pkts_cnt, bytes_cnt,
762 mapfd, tcfg->cpus_count);
774 EBPFDeleteKey(mapfd, &key);
780 SCLogInfo(
"IPv4 bypassed flow table size: %" PRIu64, hash_cnt);
791 static int EBPFForEachFlowV6Table(
ThreadVars *th_v,
793 struct timespec *ctime,
794 struct ebpf_timeout_config *tcfg,
795 OpFlowForKey EBPFOpFlowForKey
799 int mapfd = EBPFGetMapFDByName(dev->
dev,
name);
803 struct flowv6_keys key = {}, next_key;
806 uint64_t hash_cnt = 0;
808 if (tcfg->cpus_count == 0) {
813 uint64_t pkts_cnt = 0;
814 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
815 uint64_t bytes_cnt = 0;
818 EBPFDeleteKey(mapfd, &key);
823 BPF_DECLARE_PERCPU(
struct pair, values_array, tcfg->cpus_count);
824 memset(values_array, 0,
sizeof(values_array));
825 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
827 SCLogDebug(
"no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
831 for (i = 0; i < tcfg->cpus_count; i++) {
833 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
834 BPF_PERCPU(values_array, i).packets,
835 BPF_PERCPU(values_array, i).bytes);
836 pkts_cnt += BPF_PERCPU(values_array, i).packets;
837 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
842 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
843 flow_key.
sp = ntohs(next_key.port16[0]);
844 flow_key.
dp = ntohs(next_key.port16[1]);
846 flow_key.
src.addr_data32[0] = next_key.src[0];
847 flow_key.
src.addr_data32[1] = next_key.src[1];
848 flow_key.
src.addr_data32[2] = next_key.src[2];
849 flow_key.
src.addr_data32[3] = next_key.src[3];
851 flow_key.
dst.addr_data32[0] = next_key.dst[0];
852 flow_key.
dst.addr_data32[1] = next_key.dst[1];
853 flow_key.
dst.addr_data32[2] = next_key.dst[2];
854 flow_key.
dst.addr_data32[3] = next_key.dst[3];
856 flow_key.
sp = next_key.port16[0];
857 flow_key.
dp = next_key.port16[1];
859 flow_key.
src.addr_data32[0] = ntohl(next_key.src[0]);
860 flow_key.
src.addr_data32[1] = ntohl(next_key.src[1]);
861 flow_key.
src.addr_data32[2] = ntohl(next_key.src[2]);
862 flow_key.
src.addr_data32[3] = ntohl(next_key.src[3]);
864 flow_key.
dst.addr_data32[0] = ntohl(next_key.dst[0]);
865 flow_key.
dst.addr_data32[1] = ntohl(next_key.dst[1]);
866 flow_key.
dst.addr_data32[2] = ntohl(next_key.dst[2]);
867 flow_key.
dst.addr_data32[3] = ntohl(next_key.dst[3]);
869 flow_key.
vlan_id[0] = next_key.vlan0;
870 flow_key.
vlan_id[1] = next_key.vlan1;
871 flow_key.
vlan_id[2] = next_key.vlan2;
872 if (next_key.ip_proto == 1) {
873 flow_key.
proto = IPPROTO_TCP;
875 flow_key.
proto = IPPROTO_UDP;
879 pkts_cnt = EBPFOpFlowForKey(&flowstats, dev, &next_key,
sizeof(next_key), &flow_key,
880 ctime, pkts_cnt, bytes_cnt,
881 mapfd, tcfg->cpus_count);
893 EBPFDeleteKey(mapfd, &key);
899 SCLogInfo(
"IPv6 bypassed flow table size: %" PRIu64, hash_cnt);
904 int EBPFCheckBypassedFlowCreate(
ThreadVars *th_v,
struct timespec *curtime,
void *data)
907 struct ebpf_timeout_config *cfg = (
struct ebpf_timeout_config *)data;
909 EBPFForEachFlowV4Table(th_v, ldev,
"flow_table_v4",
911 cfg, EBPFCreateFlowForKey);
912 EBPFForEachFlowV6Table(th_v, ldev,
"flow_table_v6",
914 cfg, EBPFCreateFlowForKey);
920 void EBPFRegisterExtension(
void)
923 g_flow_storage_id =
FlowStorageRegister(
"bypassedlist",
sizeof(
void *), NULL, BypassedListFree);
927 #ifdef HAVE_PACKET_XDP
929 static uint32_t g_redirect_iface_cpu_counter = 0;
931 static int EBPFAddCPUToMap(
const char *iface, uint32_t i)
933 int cpumap = EBPFGetMapFDByName(iface,
"cpu_map");
934 uint32_t queue_size = 4096;
941 ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0);
943 SCLogError(
"Create CPU entry failed (err:%d)", ret);
946 int cpus_available = EBPFGetMapFDByName(iface,
"cpus_available");
947 if (cpus_available < 0) {
952 ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0);
954 SCLogError(
"Create CPU entry failed (err:%d)", ret);
960 static void EBPFRedirectMapAddCPU(
int i,
void *data)
962 if (EBPFAddCPUToMap(data, i) < 0) {
965 g_redirect_iface_cpu_counter++;
969 void EBPFBuildCPUSet(
ConfNode *node,
char *iface)
972 int mapfd = EBPFGetMapFDByName(iface,
"cpus_count");
974 SCLogError(
"Unable to find 'cpus_count' map");
977 g_redirect_iface_cpu_counter = 0;
979 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
984 EBPFRedirectMapAddCPU,
986 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
1002 int EBPFSetPeerIface(
const char *iface,
const char *out_iface)
1004 int mapfd = EBPFGetMapFDByName(iface,
"tx_peer");
1009 int intmapfd = EBPFGetMapFDByName(iface,
"tx_peer_int");
1011 SCLogError(
"Unable to find 'tx_peer_int' map");
1016 unsigned int peer_index = if_nametoindex(out_iface);
1017 if (peer_index == 0) {
1021 int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY);
1023 SCLogError(
"Create peer entry failed (err:%d)", ret);
1026 ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY);
1028 SCLogError(
"Create peer entry failed (err:%d)", ret);
1039 int EBPFUpdateFlow(
Flow *f,
Packet *p,
void *data)
1041 BypassedIfaceList *ifl = (BypassedIfaceList *)
FlowGetStorageById(f, g_flow_storage_id);
1052 BypassedIfaceList *ldev = ifl;
1063 BypassedIfaceList *nifl =
SCCalloc(1,
sizeof(*nifl));