33 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
39 #ifdef HAVE_PACKET_EBPF
42 #include <sys/resource.h>
55 #include <bpf/libbpf.h>
60 #define BPF_MAP_MAX_COUNT 16
62 #define BYPASSED_FLOW_TIMEOUT 60
74 struct bpf_maps_info {
75 struct bpf_map_item array[BPF_MAP_MAX_COUNT];
79 typedef struct BypassedIfaceList_ {
81 struct BypassedIfaceList_ *
next;
84 static void BpfMapsInfoFree(
void *bpf)
86 struct bpf_maps_info *bpfinfo = (
struct bpf_maps_info *)bpf;
88 for (i = 0; i < bpfinfo->last; i ++) {
89 if (bpfinfo->array[i].name) {
90 if (bpfinfo->array[i].to_unlink) {
91 char pinnedpath[PATH_MAX];
92 int ret = snprintf(pinnedpath,
sizeof(pinnedpath),
93 "/sys/fs/bpf/suricata-%s-%s",
94 bpfinfo->array[i].iface,
95 bpfinfo->array[i].name);
98 ret = unlink(pinnedpath);
102 "Unable to remove %s: %s (%d)",
109 bpfinfo->array[i].name);
112 SCFree(bpfinfo->array[i].name);
118 static void BypassedListFree(
void *ifl)
120 BypassedIfaceList *mifl = (BypassedIfaceList *)ifl;
121 BypassedIfaceList *nifl;
129 void EBPFDeleteKey(
int fd,
void *key)
131 int ret = bpf_map_delete_elem(fd, key);
134 "Unable to delete entry: %s (%d)",
140 static struct bpf_maps_info *EBPFGetBpfMap(
const char *iface)
147 return (
struct bpf_maps_info *)data;
157 int EBPFGetMapFDByName(
const char *iface,
const char *name)
161 if (iface == NULL || name == NULL)
163 struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface);
164 if (bpf_maps == NULL)
167 for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
168 if (!bpf_maps->array[i].name)
170 if (!strcmp(bpf_maps->array[i].name, name)) {
171 SCLogDebug(
"Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd, name);
172 return bpf_maps->array[i].fd;
179 static int EBPFLoadPinnedMapsFile(
LiveDevice *livedev,
const char *file)
181 char pinnedpath[1024];
182 snprintf(pinnedpath,
sizeof(pinnedpath),
183 "/sys/fs/bpf/suricata-%s-%s",
187 return bpf_obj_get(pinnedpath);
190 static int EBPFLoadPinnedMaps(
LiveDevice *livedev,
struct ebpf_timeout_config *config)
192 int fd_v4 = -1, fd_v6 = -1;
195 if (config->pinned_maps_name) {
196 int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name);
203 if (config->mode == AFP_MODE_XDP_BYPASS) {
205 fd_v4 = EBPFLoadPinnedMapsFile(livedev,
"flow_table_v4");
211 fd_v6 = EBPFLoadPinnedMapsFile(livedev,
"flow_table_v6");
214 "Found a flow_table_v4 map but no flow_table_v6 map");
219 struct bpf_maps_info *bpf_map_data =
SCCalloc(1,
sizeof(*bpf_map_data));
220 if (bpf_map_data == NULL) {
225 if (config->mode == AFP_MODE_XDP_BYPASS) {
226 bpf_map_data->array[0].fd = fd_v4;
227 bpf_map_data->array[0].name =
SCStrdup(
"flow_table_v4");
228 if (bpf_map_data->array[0].name == NULL) {
231 bpf_map_data->array[1].fd = fd_v6;
232 bpf_map_data->array[1].name =
SCStrdup(
"flow_table_v6");
233 if (bpf_map_data->array[1].name == NULL) {
236 bpf_map_data->last = 2;
238 bpf_map_data->last = 0;
242 int fd = EBPFLoadPinnedMapsFile(livedev,
"cpu_map");
244 bpf_map_data->array[bpf_map_data->last].fd = fd;
245 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"cpu_map");
246 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
249 bpf_map_data->last++;
251 fd = EBPFLoadPinnedMapsFile(livedev,
"cpus_available");
253 bpf_map_data->array[bpf_map_data->last].fd = fd;
254 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"cpus_available");
255 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
258 bpf_map_data->last++;
260 fd = EBPFLoadPinnedMapsFile(livedev,
"tx_peer");
262 bpf_map_data->array[bpf_map_data->last].fd = fd;
263 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"tx_peer");
264 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
267 bpf_map_data->last++;
269 fd = EBPFLoadPinnedMapsFile(livedev,
"tx_peer_int");
271 bpf_map_data->array[bpf_map_data->last].fd = fd;
272 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(
"tx_peer_int");
273 if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
276 bpf_map_data->last++;
287 for (
int i = 0; i < bpf_map_data->last; i++) {
288 SCFree(bpf_map_data->array[i].name);
290 bpf_map_data->last = 0;
307 int EBPFLoadFile(
const char *iface,
const char *path,
const char * section,
308 int *val,
struct ebpf_timeout_config *config)
312 struct bpf_object *bpfobj = NULL;
314 struct bpf_map *map = NULL;
322 if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) {
324 if (EBPFLoadPinnedMaps(livedev, config) == 0) {
325 SCLogInfo(
"Loaded pinned maps, will use already loaded eBPF filter");
337 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
338 if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) {
340 strerror(errno), errno);
345 bpfobj = bpf_object__open(path);
346 long error = libbpf_get_error(bpfobj);
349 libbpf_strerror(error, err_buf,
352 "Unable to load eBPF objects in '%s': %s",
357 if (config->flags & EBPF_XDP_HW_MODE) {
358 unsigned int ifindex = if_nametoindex(iface);
359 bpf_object__for_each_program(bpfprog, bpfobj) {
360 bpf_program__set_ifindex(bpfprog, ifindex);
362 bpf_map__for_each(map, bpfobj) {
363 bpf_map__set_ifindex(map, ifindex);
368 bpf_object__for_each_program(bpfprog, bpfobj) {
369 #ifdef HAVE_BPF_PROGRAM__SECTION_NAME
370 const char *title = bpf_program__section_name(bpfprog);
372 const char *title = bpf_program__title(bpfprog, 0);
374 if (!strcmp(title, section)) {
375 if (config->flags & EBPF_SOCKET_FILTER) {
376 bpf_program__set_socket_filter(bpfprog);
378 bpf_program__set_xdp(bpfprog);
385 if (found ==
false) {
387 "No section '%s' in '%s' file. Will not be able to use the file",
393 err = bpf_object__load(bpfobj);
397 "Permission issue when loading eBPF object"
398 " (check libbpf error on stdout)");
401 libbpf_strerror(err, buf,
sizeof(buf));
403 "Unable to load eBPF object: %s (%d)",
413 struct bpf_maps_info *bpf_map_data =
SCCalloc(1,
sizeof(*bpf_map_data));
414 if (bpf_map_data == NULL) {
420 bpf_map__for_each(map, bpfobj) {
421 if (bpf_map_data->last == BPF_MAP_MAX_COUNT) {
425 SCLogDebug(
"Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
426 bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map);
427 bpf_map_data->array[bpf_map_data->last].name =
SCStrdup(bpf_map__name(map));
428 snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ,
430 if (!bpf_map_data->array[bpf_map_data->last].name) {
432 BpfMapsInfoFree(bpf_map_data);
435 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
436 if (config->flags & EBPF_PINNED_MAPS) {
437 SCLogConfig(
"Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd,
438 bpf_map_data->array[bpf_map_data->last].name);
440 snprintf(buf,
sizeof(buf),
"/sys/fs/bpf/suricata-%s-%s", iface,
441 bpf_map_data->array[bpf_map_data->last].name);
442 int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf);
447 if (config->flags & EBPF_XDP_CODE) {
448 bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
450 bpf_map_data->array[bpf_map_data->last].to_unlink = 1;
453 bpf_map_data->last++;
463 pfd = bpf_program__fd(bpfprog);
466 "Unable to find %s section", section);
470 SCLogInfo(
"Successfully loaded eBPF file '%s' on '%s'", path, iface);
483 int EBPFSetupXDP(
const char *iface,
int fd, uint8_t
flags)
485 #ifdef HAVE_PACKET_XDP
486 unsigned int ifindex = if_nametoindex(iface);
489 "Unknown interface '%s'", iface);
492 int err = bpf_set_link_xdp_fd(ifindex, fd,
flags);
495 libbpf_strerror(err, buf,
sizeof(buf));
510 size_t skey,
FlowKey *flow_key,
struct timespec *ctime,
511 uint64_t pkts_cnt, uint64_t bytes_cnt,
512 int mapfd,
int cpus_count)
536 EBPFBypassData *eb =
SCCalloc(1,
sizeof(EBPFBypassData));
549 memcpy(mkey, key, skey);
552 eb->cpus_count = cpus_count;
560 EBPFBypassData *eb = (EBPFBypassData *) fc->
bypass_data;
567 if (eb->key[0] && eb->key[1]) {
578 memcpy(mkey, key, skey);
586 void EBPFBypassFree(
void *data)
588 EBPFBypassData *eb = (EBPFBypassData *)data;
607 EBPFBypassData *eb,
void *key,
611 uint64_t pkts_cnt = 0;
612 uint64_t bytes_cnt = 0;
615 BPF_DECLARE_PERCPU(
struct pair, values_array, eb->cpus_count);
616 memset(values_array, 0,
sizeof(values_array));
617 int res = bpf_map_lookup_elem(eb->mapfd, key, values_array);
619 SCLogDebug(
"errno: (%d) %s", errno, strerror(errno));
622 for (i = 0; i < eb->cpus_count; i++) {
624 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
625 BPF_PERCPU(values_array, i).packets,
626 BPF_PERCPU(values_array, i).bytes);
627 pkts_cnt += BPF_PERCPU(values_array, i).packets;
628 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
652 bool EBPFBypassUpdate(
Flow *f,
void *data, time_t tsec)
654 EBPFBypassData *eb = (EBPFBypassData *)data;
662 bool activity = EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[0], 0);
663 activity |= EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[1], 1);
667 EBPFDeleteKey(eb->mapfd, eb->key[0]);
668 EBPFDeleteKey(eb->mapfd, eb->key[1]);
678 size_t skey,
FlowKey *flow_key,
struct timespec *ctime,
679 uint64_t pkts_cnt, uint64_t bytes_cnt,
680 int mapfd,
int cpus_count);
689 struct timespec *ctime,
690 struct ebpf_timeout_config *tcfg,
691 OpFlowForKey EBPFOpFlowForKey
695 int mapfd = EBPFGetMapFDByName(dev->
dev, name);
699 struct flowv4_keys key = {}, next_key;
702 uint64_t hash_cnt = 0;
704 if (tcfg->cpus_count == 0) {
708 bool dead_flow =
false;
709 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
710 uint64_t bytes_cnt = 0;
711 uint64_t pkts_cnt = 0;
714 EBPFDeleteKey(mapfd, &key);
719 BPF_DECLARE_PERCPU(
struct pair, values_array, tcfg->cpus_count);
720 memset(values_array, 0,
sizeof(values_array));
721 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
723 SCLogDebug(
"no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
724 SCLogDebug(
"errno: (%d) %s", errno, strerror(errno));
728 for (i = 0; i < tcfg->cpus_count; i++) {
730 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
731 BPF_PERCPU(values_array, i).packets,
732 BPF_PERCPU(values_array, i).bytes);
733 pkts_cnt += BPF_PERCPU(values_array, i).packets;
734 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
739 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
740 flow_key.
sp = ntohs(next_key.port16[0]);
741 flow_key.
dp = ntohs(next_key.port16[1]);
742 flow_key.
src.addr_data32[0] = next_key.src;
743 flow_key.
dst.addr_data32[0] = next_key.dst;
745 flow_key.
sp = next_key.port16[0];
746 flow_key.
dp = next_key.port16[1];
747 flow_key.
src.addr_data32[0] = ntohl(next_key.src);
748 flow_key.
dst.addr_data32[0] = ntohl(next_key.dst);
751 flow_key.
src.addr_data32[1] = 0;
752 flow_key.
src.addr_data32[2] = 0;
753 flow_key.
src.addr_data32[3] = 0;
755 flow_key.
dst.addr_data32[1] = 0;
756 flow_key.
dst.addr_data32[2] = 0;
757 flow_key.
dst.addr_data32[3] = 0;
758 flow_key.
vlan_id[0] = next_key.vlan0;
759 flow_key.
vlan_id[1] = next_key.vlan1;
760 if (next_key.ip_proto == 1) {
761 flow_key.
proto = IPPROTO_TCP;
763 flow_key.
proto = IPPROTO_UDP;
766 dead_flow = EBPFOpFlowForKey(&flowstats, dev, &next_key,
sizeof(next_key), &flow_key,
767 ctime, pkts_cnt, bytes_cnt,
768 mapfd, tcfg->cpus_count);
780 EBPFDeleteKey(mapfd, &key);
786 SCLogInfo(
"IPv4 bypassed flow table size: %" PRIu64, hash_cnt);
797 static int EBPFForEachFlowV6Table(
ThreadVars *th_v,
799 struct timespec *ctime,
800 struct ebpf_timeout_config *tcfg,
801 OpFlowForKey EBPFOpFlowForKey
805 int mapfd = EBPFGetMapFDByName(dev->
dev, name);
809 struct flowv6_keys key = {}, next_key;
812 uint64_t hash_cnt = 0;
814 if (tcfg->cpus_count == 0) {
819 uint64_t pkts_cnt = 0;
820 while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
821 uint64_t bytes_cnt = 0;
824 EBPFDeleteKey(mapfd, &key);
829 BPF_DECLARE_PERCPU(
struct pair, values_array, tcfg->cpus_count);
830 memset(values_array, 0,
sizeof(values_array));
831 int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
833 SCLogDebug(
"no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
837 for (i = 0; i < tcfg->cpus_count; i++) {
839 SCLogDebug(
"%d: Adding pkts %lu bytes %lu", i,
840 BPF_PERCPU(values_array, i).packets,
841 BPF_PERCPU(values_array, i).bytes);
842 pkts_cnt += BPF_PERCPU(values_array, i).packets;
843 bytes_cnt += BPF_PERCPU(values_array, i).bytes;
848 if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
849 flow_key.
sp = ntohs(next_key.port16[0]);
850 flow_key.
dp = ntohs(next_key.port16[1]);
852 flow_key.
src.addr_data32[0] = next_key.src[0];
853 flow_key.
src.addr_data32[1] = next_key.src[1];
854 flow_key.
src.addr_data32[2] = next_key.src[2];
855 flow_key.
src.addr_data32[3] = next_key.src[3];
857 flow_key.
dst.addr_data32[0] = next_key.dst[0];
858 flow_key.
dst.addr_data32[1] = next_key.dst[1];
859 flow_key.
dst.addr_data32[2] = next_key.dst[2];
860 flow_key.
dst.addr_data32[3] = next_key.dst[3];
862 flow_key.
sp = next_key.port16[0];
863 flow_key.
dp = next_key.port16[1];
865 flow_key.
src.addr_data32[0] = ntohl(next_key.src[0]);
866 flow_key.
src.addr_data32[1] = ntohl(next_key.src[1]);
867 flow_key.
src.addr_data32[2] = ntohl(next_key.src[2]);
868 flow_key.
src.addr_data32[3] = ntohl(next_key.src[3]);
870 flow_key.
dst.addr_data32[0] = ntohl(next_key.dst[0]);
871 flow_key.
dst.addr_data32[1] = ntohl(next_key.dst[1]);
872 flow_key.
dst.addr_data32[2] = ntohl(next_key.dst[2]);
873 flow_key.
dst.addr_data32[3] = ntohl(next_key.dst[3]);
875 flow_key.
vlan_id[0] = next_key.vlan0;
876 flow_key.
vlan_id[1] = next_key.vlan1;
877 if (next_key.ip_proto == 1) {
878 flow_key.
proto = IPPROTO_TCP;
880 flow_key.
proto = IPPROTO_UDP;
883 pkts_cnt = EBPFOpFlowForKey(&flowstats, dev, &next_key,
sizeof(next_key), &flow_key,
884 ctime, pkts_cnt, bytes_cnt,
885 mapfd, tcfg->cpus_count);
897 EBPFDeleteKey(mapfd, &key);
903 SCLogInfo(
"IPv6 bypassed flow table size: %" PRIu64, hash_cnt);
908 int EBPFCheckBypassedFlowCreate(
ThreadVars *th_v,
struct timespec *curtime,
void *data)
911 struct ebpf_timeout_config *cfg = (
struct ebpf_timeout_config *)data;
913 EBPFForEachFlowV4Table(th_v, ldev,
"flow_table_v4",
915 cfg, EBPFCreateFlowForKey);
916 EBPFForEachFlowV6Table(th_v, ldev,
"flow_table_v6",
918 cfg, EBPFCreateFlowForKey);
924 void EBPFRegisterExtension(
void)
927 g_flow_storage_id =
FlowStorageRegister(
"bypassedlist",
sizeof(
void *), NULL, BypassedListFree);
931 #ifdef HAVE_PACKET_XDP
933 static uint32_t g_redirect_iface_cpu_counter = 0;
935 static int EBPFAddCPUToMap(
const char *iface, uint32_t i)
937 int cpumap = EBPFGetMapFDByName(iface,
"cpu_map");
938 uint32_t queue_size = 4096;
945 ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0);
950 int cpus_available = EBPFGetMapFDByName(iface,
"cpus_available");
951 if (cpus_available < 0) {
956 ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0);
964 static void EBPFRedirectMapAddCPU(
int i,
void *data)
966 if (EBPFAddCPUToMap(data, i) < 0) {
968 "Unable to add CPU %d to set", i);
970 g_redirect_iface_cpu_counter++;
974 void EBPFBuildCPUSet(
ConfNode *node,
char *iface)
977 int mapfd = EBPFGetMapFDByName(iface,
"cpus_count");
980 "Unable to find 'cpus_count' map");
983 g_redirect_iface_cpu_counter = 0;
985 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
990 EBPFRedirectMapAddCPU,
992 bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
1008 int EBPFSetPeerIface(
const char *iface,
const char *out_iface)
1010 int mapfd = EBPFGetMapFDByName(iface,
"tx_peer");
1013 "Unable to find 'tx_peer' map");
1016 int intmapfd = EBPFGetMapFDByName(iface,
"tx_peer_int");
1019 "Unable to find 'tx_peer_int' map");
1024 unsigned int peer_index = if_nametoindex(out_iface);
1025 if (peer_index == 0) {
1029 int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY);
1034 ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY);
1047 int EBPFUpdateFlow(
Flow *f,
Packet *p,
void *data)
1049 BypassedIfaceList *ifl = (BypassedIfaceList *)
FlowGetStorageById(f, g_flow_storage_id);
1060 BypassedIfaceList *ldev = ifl;
1071 BypassedIfaceList *nifl =
SCCalloc(1,
sizeof(*nifl));