80 FatalError(
"Error creating thread %s: you do not have "
81 "support for DPDK enabled, on Linux host please recompile "
95 static struct timeval machine_start_time = { 0, 0 };
100 typedef struct DPDKThreadVars_ {
108 uint16_t capture_dpdk_packets;
109 uint16_t capture_dpdk_rx_errs;
110 uint16_t capture_dpdk_imissed;
111 uint16_t capture_dpdk_rx_no_mbufs;
112 uint16_t capture_dpdk_ierrors;
113 uint16_t capture_dpdk_tx_errs;
118 uint16_t out_port_id;
126 int32_t port_socket_id;
127 struct rte_mempool *pkt_mempool;
128 struct rte_mbuf *received_mbufs[BURST_SIZE];
132 static void ReceiveDPDKThreadExitStats(
ThreadVars *,
void *);
140 static uint64_t CyclesToMicroseconds(uint64_t cycles);
141 static uint64_t CyclesToSeconds(uint64_t cycles);
142 static void DPDKFreeMbufArray(
struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t
offset);
143 static uint64_t DPDKGetSeconds(
void);
145 static void DPDKFreeMbufArray(
struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t
offset)
147 for (
int i =
offset; i < mbuf_cnt; i++) {
148 rte_pktmbuf_free(mbuf_array[i]);
152 static uint64_t CyclesToMicroseconds(
const uint64_t cycles)
154 const uint64_t ticks_per_us = rte_get_tsc_hz() / 1000000;
155 if (ticks_per_us == 0) {
158 return cycles / ticks_per_us;
161 static uint64_t CyclesToSeconds(
const uint64_t cycles)
163 const uint64_t ticks_per_s = rte_get_tsc_hz();
164 if (ticks_per_s == 0) {
167 return cycles / ticks_per_s;
170 static void CyclesAddToTimeval(
171 const uint64_t cycles,
struct timeval *orig_tv,
struct timeval *new_tv)
173 uint64_t usec = CyclesToMicroseconds(cycles) + orig_tv->tv_usec;
174 new_tv->tv_sec = orig_tv->tv_sec + usec / 1000000;
175 new_tv->tv_usec = (usec % 1000000);
180 gettimeofday(&machine_start_time, NULL);
181 machine_start_time.tv_sec -= DPDKGetSeconds();
190 static SCTime_t DPDKSetTimevalReal(
struct timeval *machine_start_tv)
192 struct timeval real_tv;
193 CyclesAddToTimeval(rte_get_tsc_cycles(), machine_start_tv, &real_tv);
198 static uint64_t DPDKGetSeconds(
void)
200 return CyclesToSeconds(rte_get_tsc_cycles());
203 static void DevicePostStartPMDSpecificActions(DPDKThreadVars *ptv,
const char *driver_name)
205 if (strcmp(driver_name,
"net_bonding") == 0) {
206 driver_name = BondingDeviceDriverGet(ptv->port_id);
211 if (strcmp(driver_name,
"net_i40e") == 0)
212 i40eDeviceSetRSS(ptv->port_id, ptv->threads);
215 static void DevicePreClosePMDSpecificActions(DPDKThreadVars *ptv,
const char *driver_name)
217 if (strcmp(driver_name,
"net_bonding") == 0) {
218 driver_name = BondingDeviceDriverGet(ptv->port_id);
221 if (strcmp(driver_name,
"net_i40e") == 0) {
222 #if RTE_VERSION > RTE_VERSION_NUM(20, 0, 0, 0)
224 struct rte_flow_error flush_error = { 0 };
225 int32_t retval = rte_flow_flush(ptv->port_id, &flush_error);
227 SCLogError(
"%s: unable to flush rte_flow rules: %s Flush error msg: %s",
228 ptv->livedev->dev, rte_strerror(-retval), flush_error.message);
238 static int GetNumaNode(
void)
243 #if defined(__linux__)
244 cpu = sched_getcpu();
245 node = numa_node_of_cpu(cpu);
247 SCLogWarning(
"NUMA node retrieval is not supported on this OS.");
285 static inline void DPDKDumpCounters(DPDKThreadVars *ptv)
290 if (ptv->queue_id == 0) {
291 struct rte_eth_stats eth_stats;
292 int retval = rte_eth_stats_get(ptv->port_id, ð_stats);
294 SCLogError(
"%s: failed to get stats: %s", ptv->livedev->dev, rte_strerror(-retval));
299 ptv->pkts + eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf);
301 eth_stats.ipackets + eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf);
303 eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf);
304 StatsSetUI64(ptv->tv, ptv->capture_dpdk_imissed, eth_stats.imissed);
305 StatsSetUI64(ptv->tv, ptv->capture_dpdk_rx_no_mbufs, eth_stats.rx_nombuf);
306 StatsSetUI64(ptv->tv, ptv->capture_dpdk_ierrors, eth_stats.ierrors);
307 StatsSetUI64(ptv->tv, ptv->capture_dpdk_tx_errs, eth_stats.oerrors);
309 ptv->livedev->drop, eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf);
311 StatsSetUI64(ptv->tv, ptv->capture_dpdk_packets, ptv->pkts);
315 static void DPDKReleasePacket(
Packet *p)
324 #
if defined(RTE_LIBRTE_I40E_PMD) || defined(RTE_LIBRTE_IXGBE_PMD) || defined(RTE_LIBRTE_ICE_PMD)
330 rte_eth_tx_burst(p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id, &p->dpdk_v.mbuf, 1);
337 retval = rte_eth_tx_burst(
338 p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id, &p->dpdk_v.mbuf, 1);
340 SCLogDebug(
"Unable to transmit the packet on port %u queue %u",
341 p->dpdk_v.out_port_id, p->dpdk_v.out_queue_id);
342 rte_pktmbuf_free(p->dpdk_v.mbuf);
343 p->dpdk_v.mbuf = NULL;
347 rte_pktmbuf_free(p->dpdk_v.mbuf);
348 p->dpdk_v.mbuf = NULL;
362 time_t last_dump = 0;
364 bool segmented_mbufs_warned = 0;
365 SCTime_t t = DPDKSetTimevalReal(&machine_start_time);
368 DPDKThreadVars *ptv = (DPDKThreadVars *)data;
378 rte_eth_stats_reset(ptv->port_id);
379 rte_eth_xstats_reset(ptv->port_id);
383 if (ptv->queue_id == 0) {
384 rte_eth_dev_stop(ptv->port_id);
386 rte_eth_dev_stop(ptv->out_port_id);
389 DPDKDumpCounters(ptv);
393 nb_rx = rte_eth_rx_burst(ptv->port_id, ptv->queue_id, ptv->received_mbufs, BURST_SIZE);
395 t = DPDKSetTimevalReal(&machine_start_time);
397 if (msecs > last_timeout_msec + 100) {
398 TmThreadsCaptureHandleTimeout(
tv, NULL);
399 last_timeout_msec = msecs;
404 ptv->pkts += (uint64_t)nb_rx;
405 for (uint16_t i = 0; i < nb_rx; i++) {
416 p->
ts = DPDKSetTimevalReal(&machine_start_time);
417 p->dpdk_v.mbuf = ptv->received_mbufs[i];
419 p->dpdk_v.copy_mode = ptv->copy_mode;
420 p->dpdk_v.out_port_id = ptv->out_port_id;
421 p->dpdk_v.out_queue_id = ptv->queue_id;
427 uint64_t ol_flags = ptv->received_mbufs[i]->ol_flags;
428 if ((ol_flags & RTE_MBUF_F_RX_IP_CKSUM_MASK) == RTE_MBUF_F_RX_IP_CKSUM_GOOD &&
429 (ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) == RTE_MBUF_F_RX_L4_CKSUM_GOOD) {
430 SCLogDebug(
"HW detected GOOD IP and L4 chsum, ignoring validation");
433 if ((ol_flags & RTE_MBUF_F_RX_IP_CKSUM_MASK) == RTE_MBUF_F_RX_IP_CKSUM_BAD) {
438 if ((ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) == RTE_MBUF_F_RX_L4_CKSUM_BAD) {
445 if (!rte_pktmbuf_is_contiguous(p->dpdk_v.mbuf) && !segmented_mbufs_warned) {
446 char warn_s[] =
"Segmented mbufs detected! Redmine Ticket #6012 "
447 "Check your configuration or report the issue";
448 enum rte_proc_type_t eal_t = rte_eal_process_type();
449 if (eal_t == RTE_PROC_SECONDARY) {
451 "try to increase mbuf size in your primary application",
453 }
else if (eal_t == RTE_PROC_PRIMARY) {
455 "try to increase MTU in your suricata.yaml",
459 segmented_mbufs_warned = 1;
462 PacketSetData(p, rte_pktmbuf_mtod(p->dpdk_v.mbuf, uint8_t *),
463 rte_pktmbuf_pkt_len(p->dpdk_v.mbuf));
464 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) !=
TM_ECODE_OK) {
466 DPDKFreeMbufArray(ptv->received_mbufs, nb_rx - i - 1, i + 1);
472 current_time = DPDKGetSeconds();
473 if (current_time != last_dump) {
474 DPDKDumpCounters(ptv);
475 last_dump = current_time;
494 int retval, thread_numa;
495 DPDKThreadVars *ptv = NULL;
498 if (initdata == NULL) {
499 SCLogError(
"DPDK configuration is NULL in thread initialization");
503 ptv =
SCCalloc(1,
sizeof(DPDKThreadVars));
521 ptv->copy_mode = dpdk_config->copy_mode;
522 ptv->checksum_mode = dpdk_config->checksum_mode;
524 ptv->threads = dpdk_config->threads;
525 ptv->port_id = dpdk_config->port_id;
526 ptv->out_port_id = dpdk_config->out_port_id;
527 ptv->port_socket_id = dpdk_config->socket_id;
529 ptv->pkt_mempool = dpdk_config->pkt_mempool;
530 dpdk_config->pkt_mempool = NULL;
532 thread_numa = GetNumaNode();
533 if (thread_numa >= 0 && ptv->port_socket_id != SOCKET_ID_ANY &&
534 thread_numa != ptv->port_socket_id) {
536 SCLogPerf(
"%s: NIC is on NUMA %d, thread on NUMA %d", dpdk_config->iface,
537 ptv->port_socket_id, thread_numa);
541 ptv->queue_id = queue_id;
544 if (queue_id == dpdk_config->threads - 1) {
545 retval = rte_eth_dev_start(ptv->port_id);
547 SCLogError(
"%s: error (%s) during device startup", dpdk_config->iface,
548 rte_strerror(-retval));
552 struct rte_eth_dev_info dev_info;
553 retval = rte_eth_dev_info_get(ptv->port_id, &dev_info);
555 SCLogError(
"%s: error (%s) when getting device info", dpdk_config->iface,
556 rte_strerror(-retval));
561 DevicePostStartPMDSpecificActions(ptv, dev_info.driver_name);
563 uint16_t inconsistent_numa_cnt =
SC_ATOMIC_GET(dpdk_config->inconsitent_numa_cnt);
564 if (inconsistent_numa_cnt > 0 && ptv->port_socket_id != SOCKET_ID_ANY) {
565 SCLogWarning(
"%s: NIC is on NUMA %d, %u threads on different NUMA node(s)",
566 dpdk_config->iface, ptv->port_socket_id, inconsistent_numa_cnt);
567 }
else if (ptv->port_socket_id == SOCKET_ID_ANY) {
569 "%s: unable to determine NIC's NUMA node, degraded performance can be expected",
575 dpdk_config->DerefFunc(dpdk_config);
579 if (dpdk_config != NULL)
580 dpdk_config->DerefFunc(dpdk_config);
586 static void PrintDPDKPortXstats(uint32_t port_id,
const char *port_name)
588 struct rte_eth_xstat *xstats;
589 struct rte_eth_xstat_name *xstats_names;
591 int32_t
len = rte_eth_xstats_get(port_id, NULL, 0);
593 FatalError(
"Error (%s) getting count of rte_eth_xstats failed on port %s",
594 rte_strerror(-
len), port_name);
598 FatalError(
"Failed to allocate memory for the rte_eth_xstat structure");
600 int32_t ret = rte_eth_xstats_get(port_id, xstats,
len);
601 if (ret < 0 || ret >
len) {
603 FatalError(
"Error (%s) getting rte_eth_xstats failed on port %s", rte_strerror(-ret),
606 xstats_names =
SCCalloc(
len,
sizeof(*xstats_names));
607 if (xstats_names == NULL) {
609 FatalError(
"Failed to allocate memory for the rte_eth_xstat_name array");
611 ret = rte_eth_xstats_get_names(port_id, xstats_names,
len);
612 if (ret < 0 || ret >
len) {
615 FatalError(
"Error (%s) getting names of rte_eth_xstats failed on port %s",
616 rte_strerror(-ret), port_name);
618 for (int32_t i = 0; i <
len; i++) {
619 if (xstats[i].value > 0)
620 SCLogPerf(
"Port %u (%s) - %s: %" PRIu64, port_id, port_name, xstats_names[i].name,
633 static void ReceiveDPDKThreadExitStats(
ThreadVars *
tv,
void *data)
637 DPDKThreadVars *ptv = (DPDKThreadVars *)data;
639 if (ptv->queue_id == 0) {
640 struct rte_eth_stats eth_stats;
641 PrintDPDKPortXstats(ptv->port_id, ptv->livedev->dev);
642 retval = rte_eth_stats_get(ptv->port_id, ð_stats);
644 SCLogError(
"%s: failed to get stats (%s)", ptv->livedev->dev, strerror(-retval));
647 SCLogPerf(
"%s: total RX stats: packets %" PRIu64
" bytes: %" PRIu64
" missed: %" PRIu64
648 " errors: %" PRIu64
" nombufs: %" PRIu64,
649 ptv->livedev->dev, eth_stats.ipackets, eth_stats.ibytes, eth_stats.imissed,
650 eth_stats.ierrors, eth_stats.rx_nombuf);
652 SCLogPerf(
"%s: total TX stats: packets %" PRIu64
" bytes: %" PRIu64
" errors: %" PRIu64,
653 ptv->livedev->dev, eth_stats.opackets, eth_stats.obytes, eth_stats.oerrors);
656 DPDKDumpCounters(ptv);
668 DPDKThreadVars *ptv = (DPDKThreadVars *)data;
670 if (ptv->queue_id == 0) {
671 struct rte_eth_dev_info dev_info;
672 int retval = rte_eth_dev_info_get(ptv->port_id, &dev_info);
674 SCLogError(
"%s: error (%s) when getting device info", ptv->livedev->dev,
675 rte_strerror(-retval));
679 DevicePreClosePMDSpecificActions(ptv, dev_info.driver_name);
682 ptv->pkt_mempool = NULL;