32 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
33 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
68 #if HAVE_LINUX_IF_ETHER_H
69 #include <linux/if_ether.h>
106 SCLogError(
"Error creating thread %s: you do not have "
107 "support for AF_XDP enabled, on Linux host please recompile "
108 "with --enable-af-xdp",
115 #define POLL_TIMEOUT 100
116 #define NUM_FRAMES XSK_RING_PROD__DEFAULT_NUM_DESCS
117 #define FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
118 #define MEM_BYTES (NUM_FRAMES * FRAME_SIZE * 2)
119 #define RECONNECT_TIMEOUT 500000
122 enum state { AFXDP_STATE_DOWN, AFXDP_STATE_UP };
124 struct XskInitProtect {
131 struct xsk_umem *umem;
132 struct xsk_ring_prod fq;
133 struct xsk_ring_cons cq;
134 struct xsk_umem_config cfg;
135 int mmap_alignment_flag;
138 struct QueueAssignment {
144 struct xsk_ring_cons rx;
145 struct xsk_ring_prod tx;
146 struct xsk_socket *xsk;
149 struct QueueAssignment queue;
152 struct xsk_socket_config cfg;
153 bool enable_busy_poll;
154 uint32_t busy_poll_time;
155 uint32_t busy_poll_budget;
163 typedef struct AFXDPThreadVars_ {
176 struct UmemInfo umem;
177 struct XskSockInfo xsk;
178 uint32_t gro_flush_timeout;
179 uint32_t napi_defer_hard_irqs;
188 uint16_t capture_afxdp_packets;
189 uint16_t capture_kernel_drops;
190 uint16_t capture_afxdp_poll;
191 uint16_t capture_afxdp_poll_timeout;
192 uint16_t capture_afxdp_poll_failed;
193 uint16_t capture_afxdp_empty_reads;
194 uint16_t capture_afxdp_failed_reads;
195 uint16_t capture_afxdp_acquire_pkt_failed;
199 static void ReceiveAFXDPThreadExitStats(
ThreadVars *,
void *);
239 static inline void AFXDPDumpCounters(AFXDPThreadVars *ptv)
241 struct xdp_statistics stats;
242 socklen_t
len =
sizeof(
struct xdp_statistics);
243 int fd = xsk_socket__fd(ptv->xsk.xsk);
245 if (getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &
len) >= 0) {
246 uint64_t rx_dropped = stats.rx_dropped + stats.rx_invalid_descs + stats.rx_ring_full;
250 StatsAddUI64(ptv->tv, ptv->capture_afxdp_packets, ptv->pkts);
255 SCLogDebug(
"(%s) Kernel: Packets %" PRIu64
", bytes %" PRIu64
", dropped %" PRIu64
"",
287 static TmEcode AFXDPAssignQueueID(AFXDPThreadVars *ptv)
289 if (ptv->xsk.queue.assigned ==
false) {
290 ptv->xsk.queue.queue_num =
SC_ATOMIC_GET(xsk_protect.queue_num);
294 ptv->xsk.queue.assigned =
true;
299 static void AFXDPAllThreadsRunning(AFXDPThreadVars *ptv)
302 if ((ptv->threads - 1) == (
int)ptv->xsk.queue.queue_num) {
303 SCLogDebug(
"All AF_XDP capture threads are running.");
308 static TmEcode AcquireBuffer(AFXDPThreadVars *ptv)
310 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | ptv->umem.mmap_alignment_flag;
311 ptv->umem.buf = mmap(NULL, MEM_BYTES, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
313 if (ptv->umem.buf == MAP_FAILED) {
321 static TmEcode ConfigureXSKUmem(AFXDPThreadVars *ptv)
323 if (xsk_umem__create(&ptv->umem.umem, ptv->umem.buf, MEM_BYTES, &ptv->umem.fq, &ptv->umem.cq,
325 SCLogError(
"failed to create umem: %s", strerror(errno));
332 static TmEcode InitFillRing(AFXDPThreadVars *ptv,
const uint32_t cnt)
336 uint32_t ret = xsk_ring_prod__reserve(&ptv->umem.fq, cnt, &idx_fq);
338 SCLogError(
"Failed to initialise the fill ring.");
342 for (uint32_t i = 0; i < cnt; i++) {
343 *xsk_ring_prod__fill_addr(&ptv->umem.fq, idx_fq++) = i * FRAME_SIZE;
346 xsk_ring_prod__submit(&ptv->umem.fq, cnt);
355 static TmEcode WriteLinuxTunables(AFXDPThreadVars *ptv)
380 static TmEcode ConfigureBusyPolling(AFXDPThreadVars *ptv)
382 if (!ptv->xsk.enable_busy_poll) {
390 SCLogWarning(
"Kernel version older than required: v5.11,"
391 " upgrade kernel version to use 'enable-busy-poll' option.");
395 #if defined SO_PREFER_BUSY_POLL && defined SO_BUSY_POLL && defined SO_BUSY_POLL_BUDGET
396 const int fd = xsk_socket__fd(ptv->xsk.xsk);
403 if (setsockopt(fd, SOL_SOCKET, SO_PREFER_BUSY_POLL, (
void *)&sock_opt,
sizeof(sock_opt)) < 0) {
407 sock_opt = ptv->xsk.busy_poll_time;
408 if (setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL, (
void *)&sock_opt,
sizeof(sock_opt)) < 0) {
412 sock_opt = ptv->xsk.busy_poll_budget;
413 if (setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL_BUDGET, (
void *)&sock_opt,
sizeof(sock_opt)) < 0) {
420 "Kernel does not support busy poll, upgrade kernel or disable \"enable-busy-poll\".");
425 static void AFXDPSwitchState(AFXDPThreadVars *ptv,
int state)
427 ptv->afxdp_state = state;
430 static TmEcode OpenXSKSocket(AFXDPThreadVars *ptv)
441 if ((ret = xsk_socket__create(&ptv->xsk.xsk, ptv->livedev->dev, ptv->xsk.queue.queue_num,
442 ptv->umem.umem, &ptv->xsk.rx, &ptv->xsk.tx, &ptv->xsk.cfg))) {
443 SCLogError(
"Failed to create socket: %s", strerror(-ret));
446 SCLogDebug(
"bind to %s on queue %u", ptv->iface, ptv->xsk.queue.queue_num);
449 ptv->xsk.fd.fd = xsk_socket__fd(ptv->xsk.xsk);
450 ptv->xsk.fd.events = POLLIN;
453 AFXDPSwitchState(ptv, AFXDP_STATE_UP);
459 static void AFXDPCloseSocket(AFXDPThreadVars *ptv)
462 xsk_socket__delete(ptv->xsk.xsk);
466 if (ptv->umem.umem) {
467 xsk_umem__delete(ptv->umem.umem);
468 ptv->umem.umem = NULL;
471 memset(&ptv->umem.fq, 0,
sizeof(
struct xsk_ring_prod));
472 memset(&ptv->umem.cq, 0,
sizeof(
struct xsk_ring_cons));
475 static TmEcode AFXDPSocketCreation(AFXDPThreadVars *ptv)
481 if (InitFillRing(ptv, NUM_FRAMES * 2) !=
TM_ECODE_OK) {
492 " performance may be reduced.");
496 #ifdef HAVE_BPF_XDP_QUERY_ID
497 if (bpf_xdp_query_id(ptv->ifindex, ptv->xsk.cfg.xdp_flags, &ptv->prog_id)) {
498 SCLogError(
"Failed to attach eBPF program to interface: %s", ptv->livedev->dev);
502 if (bpf_get_link_xdp_id(ptv->ifindex, &ptv->prog_id, ptv->xsk.cfg.xdp_flags)) {
503 SCLogError(
"Failed to attach eBPF program to interface: %s", ptv->livedev->dev);
517 static TmEcode AFXDPTryReopen(AFXDPThreadVars *ptv)
519 AFXDPCloseSocket(ptv);
520 usleep(RECONNECT_TIMEOUT);
522 int if_flags = GetIfaceFlags(ptv->iface);
523 if (if_flags == -1) {
524 SCLogDebug(
"Couldn't get flags for interface '%s'", ptv->iface);
526 }
else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
527 SCLogDebug(
"Interface '%s' is down", ptv->iface);
535 SCLogInfo(
"Interface '%s' is back", ptv->iface);
548 static void AFXDPReleasePacket(
Packet *p)
550 *xsk_ring_prod__fill_addr((
struct xsk_ring_prod *)p->afxdp_v.fq, p->afxdp_v.fq_idx) =
556 static inline int DumpStatsEverySecond(AFXDPThreadVars *ptv, time_t *last_dump)
558 int stats_dumped = 0;
559 time_t current_time = time(NULL);
561 if (current_time != *last_dump) {
562 AFXDPDumpCounters(ptv);
563 *last_dump = current_time;
572 static inline ssize_t WakeupSocket(
void *data)
575 AFXDPThreadVars *ptv = (AFXDPThreadVars *)data;
578 if (ptv->xsk.enable_busy_poll || xsk_ring_prod__needs_wakeup(&ptv->umem.fq)) {
579 res = recvfrom(xsk_socket__fd(ptv->xsk.xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
600 if (initdata == NULL) {
605 AFXDPThreadVars *ptv =
SCMalloc(
sizeof(AFXDPThreadVars));
610 memset(ptv, 0,
sizeof(AFXDPThreadVars));
616 ptv->ifindex = if_nametoindex(ptv->iface);
619 if (ptv->livedev == NULL) {
625 ptv->promisc = afxdpconfig->
promisc;
626 if (ptv->promisc != 0) {
628 if (SetIfaceFlags(ptv->iface, IFF_PROMISC | IFF_UP) != 0) {
629 SCLogError(
"Failed to switch interface (%s) to promiscuous, error %s", ptv->iface,
636 ptv->threads = afxdpconfig->
threads;
639 ptv->xsk.cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
640 ptv->xsk.cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
641 ptv->xsk.cfg.xdp_flags = afxdpconfig->
mode;
642 ptv->xsk.cfg.bind_flags = afxdpconfig->
bind_flags;
645 ptv->umem.cfg.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2;
646 ptv->umem.cfg.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
647 ptv->umem.cfg.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
648 ptv->umem.cfg.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
652 if (ptv->umem.cfg.flags == XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
653 ptv->umem.mmap_alignment_flag = MAP_HUGETLB;
671 ptv->capture_afxdp_acquire_pkt_failed =
681 ReceiveAFXDPThreadDeinit(
tv, ptv);
698 time_t last_dump = 0;
700 uint32_t idx_rx = 0, idx_fq = 0, rcvd;
702 AFXDPThreadVars *ptv = (AFXDPThreadVars *)data;
707 AFXDPAllThreadsRunning(ptv);
716 if (
unlikely(ptv->afxdp_state == AFXDP_STATE_DOWN)) {
718 usleep(RECONNECT_TIMEOUT);
722 r = AFXDPTryReopen(ptv);
728 AFXDPDumpCounters(ptv);
735 if (!ptv->xsk.enable_busy_poll) {
736 StatsIncr(ptv->tv, ptv->capture_afxdp_poll);
743 StatsIncr(ptv->tv, ptv->capture_afxdp_poll_timeout);
745 StatsIncr(ptv->tv, ptv->capture_afxdp_poll_failed);
747 AFXDPSwitchState(ptv, AFXDP_STATE_DOWN);
750 DumpStatsEverySecond(ptv, &last_dump);
755 rcvd = xsk_ring_cons__peek(&ptv->xsk.rx, ptv->xsk.busy_poll_budget, &idx_rx);
757 StatsIncr(ptv->tv, ptv->capture_afxdp_empty_reads);
758 ssize_t ret = WakeupSocket(ptv);
761 AFXDPSwitchState(ptv, AFXDP_STATE_DOWN);
763 DumpStatsEverySecond(ptv, &last_dump);
767 uint32_t res = xsk_ring_prod__reserve(&ptv->umem.fq, rcvd, &idx_fq);
768 while (res != rcvd) {
769 StatsIncr(ptv->tv, ptv->capture_afxdp_failed_reads);
770 ssize_t ret = WakeupSocket(ptv);
773 AFXDPSwitchState(ptv, AFXDP_STATE_DOWN);
776 res = xsk_ring_prod__reserve(&ptv->umem.fq, rcvd, &idx_fq);
779 gettimeofday(&
ts, NULL);
781 for (uint32_t i = 0; i < rcvd; i++) {
784 StatsIncr(ptv->tv, ptv->capture_afxdp_acquire_pkt_failed);
796 uint64_t addr = xsk_ring_cons__rx_desc(&ptv->xsk.rx, idx_rx)->addr;
797 uint32_t
len = xsk_ring_cons__rx_desc(&ptv->xsk.rx, idx_rx++)->len;
798 uint64_t orig = xsk_umem__extract_addr(addr);
799 addr = xsk_umem__add_offset_to_addr(addr);
801 uint8_t *pkt_data = xsk_umem__get_data(ptv->umem.buf, addr);
805 p->afxdp_v.fq_idx = idx_fq++;
806 p->afxdp_v.orig = orig;
807 p->afxdp_v.fq = &ptv->umem.fq;
811 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) !=
TM_ECODE_OK) {
817 xsk_ring_prod__submit(&ptv->umem.fq, rcvd);
818 xsk_ring_cons__release(&ptv->xsk.rx, rcvd);
821 DumpStatsEverySecond(ptv, &last_dump);
834 AFXDPThreadVars *ptv = (AFXDPThreadVars *)data;
837 xsk_socket__delete(ptv->xsk.xsk);
841 if (ptv->umem.umem) {
842 xsk_umem__delete(ptv->umem.umem);
843 ptv->umem.umem = NULL;
845 munmap(ptv->umem.buf, MEM_BYTES);
856 static void ReceiveAFXDPThreadExitStats(
ThreadVars *
tv,
void *data)
859 AFXDPThreadVars *ptv = (AFXDPThreadVars *)data;
861 AFXDPDumpCounters(ptv);
863 SCLogPerf(
"(%s) Kernel: Packets %" PRIu64
", bytes %" PRIu64
", dropped %" PRIu64
"",
tv->
name,