suricata
runmode-dpdk.c
Go to the documentation of this file.
1 /* Copyright (C) 2021 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \ingroup dpdk
20  *
21  * @{
22  */
23 
24 /**
25  * \file
26  *
27  * \author Lukas Sismis <lukas.sismis@gmail.com>
28  *
29  * DPDK runmode
30  *
31  */
32 
33 #include "suricata-common.h"
34 #include "runmodes.h"
35 #include "runmode-dpdk.h"
36 #include "decode.h"
37 #include "source-dpdk.h"
38 #include "util-runmodes.h"
39 #include "util-byte.h"
40 #include "util-cpu.h"
41 #include "util-debug.h"
42 #include "util-device.h"
43 #include "util-dpdk.h"
44 #include "util-dpdk-i40e.h"
45 #include "util-dpdk-ice.h"
46 #include "util-dpdk-ixgbe.h"
47 #include "util-dpdk-bonding.h"
48 #include "util-time.h"
49 #include "util-conf.h"
50 #include "suricata.h"
51 #include "util-affinity.h"
52 
53 #ifdef HAVE_DPDK
54 
55 #define RSS_HKEY_LEN 40
56 // General purpose RSS key for symmetric bidirectional flow distribution
57 uint8_t rss_hkey[] = { 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D,
58  0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D,
59  0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A };
60 
61 // Calculates the closest multiple of y from x
62 #define ROUNDUP(x, y) ((((x) + ((y)-1)) / (y)) * (y))
63 
64 /* Maximum DPDK EAL parameters count. */
65 #define EAL_ARGS 48
66 
67 struct Arguments {
68  uint16_t capacity;
69  char **argv;
70  uint16_t argc;
71 };
72 
73 static char *AllocArgument(size_t arg_len);
74 static char *AllocAndSetArgument(const char *arg);
75 static char *AllocAndSetOption(const char *arg);
76 
77 static void ArgumentsInit(struct Arguments *args, unsigned capacity);
78 static void ArgumentsCleanup(struct Arguments *args);
79 static void ArgumentsAdd(struct Arguments *args, char *value);
80 static void ArgumentsAddOptionAndArgument(struct Arguments *args, const char *opt, const char *arg);
81 static void InitEal(void);
82 
83 static void ConfigSetIface(DPDKIfaceConfig *iconf, const char *entry_str);
84 static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str);
85 static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues);
86 static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues);
87 static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int);
88 static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_str);
89 static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int);
90 static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int);
91 static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int);
92 static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool);
93 static bool ConfigSetMulticast(DPDKIfaceConfig *iconf, int entry_bool);
94 static int ConfigSetChecksumChecks(DPDKIfaceConfig *iconf, int entry_bool);
95 static int ConfigSetChecksumOffload(DPDKIfaceConfig *iconf, int entry_bool);
96 static int ConfigSetCopyIface(DPDKIfaceConfig *iconf, const char *entry_str);
97 static int ConfigSetCopyMode(DPDKIfaceConfig *iconf, const char *entry_str);
98 static int ConfigSetCopyIfaceSettings(DPDKIfaceConfig *iconf, const char *iface, const char *mode);
99 static void ConfigInit(DPDKIfaceConfig **iconf);
100 static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface);
101 static DPDKIfaceConfig *ConfigParse(const char *iface);
102 
103 static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,
104  const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf);
105 static int DeviceConfigureQueues(DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info,
106  const struct rte_eth_conf *port_conf);
107 static int DeviceValidateOutIfaceConfig(DPDKIfaceConfig *iconf);
108 static int DeviceConfigureIPS(DPDKIfaceConfig *iconf);
109 static int DeviceConfigure(DPDKIfaceConfig *iconf);
110 static void *ParseDpdkConfigAndConfigureDevice(const char *iface);
111 static void DPDKDerefConfig(void *conf);
112 
113 #define DPDK_CONFIG_DEFAULT_THREADS "auto"
114 #define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE 65535
115 #define DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE "auto"
116 #define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS 1024
117 #define DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS 1024
118 #define DPDK_CONFIG_DEFAULT_RSS_HASH_FUNCTIONS RTE_ETH_RSS_IP
119 #define DPDK_CONFIG_DEFAULT_MTU 1500
120 #define DPDK_CONFIG_DEFAULT_PROMISCUOUS_MODE 1
121 #define DPDK_CONFIG_DEFAULT_MULTICAST_MODE 1
122 #define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION 1
123 #define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD 1
124 #define DPDK_CONFIG_DEFAULT_COPY_MODE "none"
125 #define DPDK_CONFIG_DEFAULT_COPY_INTERFACE "none"
126 
127 DPDKIfaceConfigAttributes dpdk_yaml = {
128  .threads = "threads",
129  .promisc = "promisc",
130  .multicast = "multicast",
131  .checksum_checks = "checksum-checks",
132  .checksum_checks_offload = "checksum-checks-offload",
133  .mtu = "mtu",
134  .rss_hf = "rss-hash-functions",
135  .mempool_size = "mempool-size",
136  .mempool_cache_size = "mempool-cache-size",
137  .rx_descriptors = "rx-descriptors",
138  .tx_descriptors = "tx-descriptors",
139  .copy_mode = "copy-mode",
140  .copy_iface = "copy-iface",
141 };
142 
143 static int GreatestDivisorUpTo(uint32_t num, uint32_t max_num)
144 {
145  for (int i = max_num; i >= 2; i--) {
146  if (num % i == 0) {
147  return i;
148  }
149  }
150  return 1;
151 }
152 
153 static char *AllocArgument(size_t arg_len)
154 {
155  SCEnter();
156  char *ptr;
157 
158  arg_len += 1; // null character
159  ptr = (char *)SCCalloc(arg_len, sizeof(char));
160  if (ptr == NULL)
161  FatalError("Could not allocate memory for an argument");
162 
163  SCReturnPtr(ptr, "char *");
164 }
165 
166 /**
167  * Allocates space for length of the given string and then copies contents
168  * @param arg String to set to the newly allocated space
169  * @return memory address if no error otherwise NULL (with errno set)
170  */
171 static char *AllocAndSetArgument(const char *arg)
172 {
173  SCEnter();
174  if (arg == NULL)
175  FatalError("Passed argument is NULL in DPDK config initialization");
176 
177  char *ptr;
178  size_t arg_len = strlen(arg);
179 
180  ptr = AllocArgument(arg_len);
181  strlcpy(ptr, arg, arg_len + 1);
182  SCReturnPtr(ptr, "char *");
183 }
184 
185 static char *AllocAndSetOption(const char *arg)
186 {
187  SCEnter();
188  if (arg == NULL)
189  FatalError("Passed option is NULL in DPDK config initialization");
190 
191  char *ptr = NULL;
192  size_t arg_len = strlen(arg);
193  uint8_t is_long_arg = arg_len > 1;
194  const char *dash_prefix = is_long_arg ? "--" : "-";
195  size_t full_len = arg_len + strlen(dash_prefix);
196 
197  ptr = AllocArgument(full_len);
198  strlcpy(ptr, dash_prefix, strlen(dash_prefix) + 1);
199  strlcat(ptr, arg, full_len + 1);
200  SCReturnPtr(ptr, "char *");
201 }
202 
203 static void ArgumentsInit(struct Arguments *args, unsigned capacity)
204 {
205  SCEnter();
206  args->argv = SCCalloc(capacity, sizeof(*args->argv)); // alloc array of pointers
207  if (args->argv == NULL)
208  FatalError("Could not allocate memory for Arguments structure");
209 
210  args->capacity = capacity;
211  args->argc = 0;
212  SCReturn;
213 }
214 
215 static void ArgumentsCleanup(struct Arguments *args)
216 {
217  SCEnter();
218  for (int i = 0; i < args->argc; i++) {
219  if (args->argv[i] != NULL) {
220  SCFree(args->argv[i]);
221  args->argv[i] = NULL;
222  }
223  }
224 
225  SCFree(args->argv);
226  args->argv = NULL;
227  args->argc = 0;
228  args->capacity = 0;
229 }
230 
231 static void ArgumentsAdd(struct Arguments *args, char *value)
232 {
233  SCEnter();
234  if (args->argc + 1 > args->capacity)
235  FatalError("No capacity for more arguments (Max: %" PRIu32 ")", EAL_ARGS);
236 
237  args->argv[args->argc++] = value;
238  SCReturn;
239 }
240 
241 static void ArgumentsAddOptionAndArgument(struct Arguments *args, const char *opt, const char *arg)
242 {
243  SCEnter();
244  char *option;
245  char *argument;
246 
247  option = AllocAndSetOption(opt);
248  ArgumentsAdd(args, option);
249 
250  // Empty argument could mean option only (e.g. --no-huge)
251  if (arg == NULL || arg[0] == '\0')
252  SCReturn;
253 
254  argument = AllocAndSetArgument(arg);
255  ArgumentsAdd(args, argument);
256  SCReturn;
257 }
258 
259 static void InitEal(void)
260 {
261  SCEnter();
262  int retval;
263  ConfNode *param;
264  const ConfNode *eal_params = ConfGetNode("dpdk.eal-params");
265  struct Arguments args;
266  char **eal_argv;
267 
268  if (eal_params == NULL) {
269  FatalError("DPDK EAL parameters not found in the config");
270  }
271 
272  ArgumentsInit(&args, EAL_ARGS);
273  ArgumentsAdd(&args, AllocAndSetArgument("suricata"));
274 
275  TAILQ_FOREACH (param, &eal_params->head, next) {
276  if (ConfNodeIsSequence(param)) {
277  const char *key = param->name;
278  ConfNode *val;
279  TAILQ_FOREACH (val, &param->head, next) {
280  ArgumentsAddOptionAndArgument(&args, key, (const char *)val->val);
281  }
282  continue;
283  }
284  ArgumentsAddOptionAndArgument(&args, param->name, param->val);
285  }
286 
287  // creating a shallow copy for cleanup because rte_eal_init changes array contents
288  eal_argv = SCCalloc(args.argc, sizeof(*args.argv));
289  if (eal_argv == NULL) {
290  FatalError("Failed to allocate memory for the array of DPDK EAL arguments");
291  }
292  memcpy(eal_argv, args.argv, args.argc * sizeof(*args.argv));
293 
294  rte_log_set_global_level(RTE_LOG_WARNING);
295  retval = rte_eal_init(args.argc, eal_argv);
296 
297  ArgumentsCleanup(&args);
298  SCFree(eal_argv);
299 
300  if (retval < 0) { // retval bound to the result of rte_eal_init
301  FatalError("DPDK EAL initialization error: %s", rte_strerror(-retval));
302  }
304 }
305 
306 static void DPDKDerefConfig(void *conf)
307 {
308  SCEnter();
309  DPDKIfaceConfig *iconf = (DPDKIfaceConfig *)conf;
310 
311  if (SC_ATOMIC_SUB(iconf->ref, 1) == 1) {
312  if (iconf->pkt_mempool != NULL) {
313  rte_mempool_free(iconf->pkt_mempool);
314  }
315 
316  SCFree(iconf);
317  }
318  SCReturn;
319 }
320 
321 static void ConfigInit(DPDKIfaceConfig **iconf)
322 {
323  SCEnter();
324  DPDKIfaceConfig *ptr = NULL;
325  ptr = SCCalloc(1, sizeof(DPDKIfaceConfig));
326  if (ptr == NULL)
327  FatalError("Could not allocate memory for DPDKIfaceConfig");
328 
329  ptr->pkt_mempool = NULL;
330  ptr->out_port_id = -1; // make sure no port is set
331  SC_ATOMIC_INIT(ptr->ref);
332  (void)SC_ATOMIC_ADD(ptr->ref, 1);
333  ptr->DerefFunc = DPDKDerefConfig;
334  ptr->flags = 0;
335 
336  *iconf = ptr;
337  SCReturn;
338 }
339 
340 static void ConfigSetIface(DPDKIfaceConfig *iconf, const char *entry_str)
341 {
342  SCEnter();
343  int retval;
344 
345  if (entry_str == NULL || entry_str[0] == '\0')
346  FatalError("Interface name in DPDK config is NULL or empty");
347 
348  retval = rte_eth_dev_get_port_by_name(entry_str, &iconf->port_id);
349  if (retval < 0)
350  FatalError("Interface \"%s\": %s", entry_str, rte_strerror(-retval));
351 
352  strlcpy(iconf->iface, entry_str, sizeof(iconf->iface));
353  SCReturn;
354 }
355 
356 static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str)
357 {
358  SCEnter();
359  static int32_t remaining_auto_cpus = -1;
361  SCLogError("DPDK runmode requires configured thread affinity");
362  SCReturnInt(-EINVAL);
363  }
364 
365  ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set");
366  if (wtaf == NULL) {
367  SCLogError("Specify worker-cpu-set list in the threading section");
368  SCReturnInt(-EINVAL);
369  }
370  ThreadsAffinityType *mtaf = GetAffinityTypeFromName("management-cpu-set");
371  if (mtaf == NULL) {
372  SCLogError("Specify management-cpu-set list in the threading section");
373  SCReturnInt(-EINVAL);
374  }
375  uint32_t sched_cpus = UtilAffinityGetAffinedCPUNum(wtaf);
376  if (sched_cpus == UtilCpuGetNumProcessorsOnline()) {
377  SCLogWarning(
378  "\"all\" specified in worker CPU cores affinity, excluding management threads");
379  UtilAffinityCpusExclude(wtaf, mtaf);
380  sched_cpus = UtilAffinityGetAffinedCPUNum(wtaf);
381  }
382 
383  if (sched_cpus == 0) {
384  SCLogError("No worker CPU cores with configured affinity were configured");
385  SCReturnInt(-EINVAL);
386  } else if (UtilAffinityCpusOverlap(wtaf, mtaf) != 0) {
387  SCLogWarning("Worker threads should not overlap with management threads in the CPU core "
388  "affinity configuration");
389  }
390 
391  const char *active_runmode = RunmodeGetActive();
392  if (active_runmode && !strcmp("single", active_runmode)) {
393  iconf->threads = 1;
394  SCReturnInt(0);
395  }
396 
397  if (entry_str == NULL) {
398  SCLogError("Number of threads for interface \"%s\" not specified", iconf->iface);
399  SCReturnInt(-EINVAL);
400  }
401 
402  if (strcmp(entry_str, "auto") == 0) {
403  iconf->threads = (uint16_t)sched_cpus / LiveGetDeviceCount();
404  if (iconf->threads == 0) {
405  SCLogError("Not enough worker CPU cores with affinity were configured");
406  SCReturnInt(-ERANGE);
407  }
408 
409  if (remaining_auto_cpus > 0) {
410  iconf->threads++;
411  remaining_auto_cpus--;
412  } else if (remaining_auto_cpus == -1) {
413  remaining_auto_cpus = (int32_t)sched_cpus % LiveGetDeviceCount();
414  if (remaining_auto_cpus > 0) {
415  iconf->threads++;
416  remaining_auto_cpus--;
417  }
418  }
419  SCLogConfig("%s: auto-assigned %u threads", iconf->iface, iconf->threads);
420  SCReturnInt(0);
421  }
422 
423  if (StringParseInt32(&iconf->threads, 10, 0, entry_str) < 0) {
424  SCLogError("Threads entry for interface %s contain non-numerical characters - \"%s\"",
425  iconf->iface, entry_str);
426  SCReturnInt(-EINVAL);
427  }
428 
429  if (iconf->threads <= 0) {
430  SCLogError("%s: positive number of threads required", iconf->iface);
431  SCReturnInt(-ERANGE);
432  }
433 
434  SCReturnInt(0);
435 }
436 
437 static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues)
438 {
439  SCEnter();
440  iconf->nb_rx_queues = nb_queues;
441  if (iconf->nb_rx_queues < 1) {
442  SCLogError("%s: positive number of RX queues is required", iconf->iface);
443  SCReturnInt(-ERANGE);
444  }
445 
446  SCReturnInt(0);
447 }
448 
449 static int ConfigSetTxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues)
450 {
451  SCEnter();
452  iconf->nb_tx_queues = nb_queues;
453  if (iconf->nb_tx_queues < 1) {
454  SCLogError("%s: positive number of TX queues is required", iconf->iface);
455  SCReturnInt(-ERANGE);
456  }
457 
458  SCReturnInt(0);
459 }
460 
461 static int ConfigSetMempoolSize(DPDKIfaceConfig *iconf, intmax_t entry_int)
462 {
463  SCEnter();
464  if (entry_int <= 0) {
465  SCLogError("%s: positive memory pool size is required", iconf->iface);
466  SCReturnInt(-ERANGE);
467  }
468 
469  iconf->mempool_size = entry_int;
470  SCReturnInt(0);
471 }
472 
473 static int ConfigSetMempoolCacheSize(DPDKIfaceConfig *iconf, const char *entry_str)
474 {
475  SCEnter();
476  if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "auto") == 0) {
477  // calculate the mempool size based on the mempool size (it needs to be already filled in)
478  // It is advised to have mempool cache size lower or equal to:
479  // RTE_MEMPOOL_CACHE_MAX_SIZE (by default 512) and "mempool-size / 1.5"
480  // and at the same time "mempool-size modulo cache_size == 0".
481  if (iconf->mempool_size == 0) {
482  SCLogError("%s: cannot calculate mempool cache size of a mempool with size %d",
483  iconf->iface, iconf->mempool_size);
484  SCReturnInt(-EINVAL);
485  }
486 
487  uint32_t max_cache_size = MAX(RTE_MEMPOOL_CACHE_MAX_SIZE, iconf->mempool_size / 1.5);
488  iconf->mempool_cache_size = GreatestDivisorUpTo(iconf->mempool_size, max_cache_size);
489  SCReturnInt(0);
490  }
491 
492  if (StringParseUint32(&iconf->mempool_cache_size, 10, 0, entry_str) < 0) {
493  SCLogError("%s: mempool cache size entry contain non-numerical characters - \"%s\"",
494  iconf->iface, entry_str);
495  SCReturnInt(-EINVAL);
496  }
497 
498  if (iconf->mempool_cache_size <= 0 || iconf->mempool_cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
499  SCLogError("%s: mempool cache size requires a positive number smaller than %" PRIu32,
500  iconf->iface, RTE_MEMPOOL_CACHE_MAX_SIZE);
501  SCReturnInt(-ERANGE);
502  }
503 
504  SCReturnInt(0);
505 }
506 
507 static int ConfigSetRxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int)
508 {
509  SCEnter();
510  if (entry_int <= 0) {
511  SCLogError("%s: positive number of RX descriptors is required", iconf->iface);
512  SCReturnInt(-ERANGE);
513  }
514 
515  iconf->nb_rx_desc = entry_int;
516  SCReturnInt(0);
517 }
518 
519 static int ConfigSetTxDescriptors(DPDKIfaceConfig *iconf, intmax_t entry_int)
520 {
521  SCEnter();
522  if (entry_int <= 0) {
523  SCLogError("%s: positive number of TX descriptors is required", iconf->iface);
524  SCReturnInt(-ERANGE);
525  }
526 
527  iconf->nb_tx_desc = entry_int;
528  SCReturnInt(0);
529 }
530 
531 static int ConfigSetRSSHashFunctions(DPDKIfaceConfig *iconf, const char *entry_str)
532 {
533  SCEnter();
534  if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "auto") == 0) {
535  iconf->rss_hf = DPDK_CONFIG_DEFAULT_RSS_HASH_FUNCTIONS;
536  SCReturnInt(0);
537  }
538 
539  if (StringParseUint64(&iconf->rss_hf, 0, 0, entry_str) < 0) {
540  SCLogError("%s: RSS hash functions entry contain non-numerical characters - \"%s\"",
541  iconf->iface, entry_str);
542  SCReturnInt(-EINVAL);
543  }
544 
545  SCReturnInt(0);
546 }
547 
548 static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int)
549 {
550  SCEnter();
551  if (entry_int < RTE_ETHER_MIN_MTU || entry_int > RTE_ETHER_MAX_JUMBO_FRAME_LEN) {
552  SCLogError("%s: MTU size can only be between %" PRIu32 " and %" PRIu32, iconf->iface,
553  RTE_ETHER_MIN_MTU, RTE_ETHER_MAX_JUMBO_FRAME_LEN);
554  SCReturnInt(-ERANGE);
555  }
556 
557  iconf->mtu = entry_int;
558  SCReturnInt(0);
559 }
560 
561 static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool)
562 {
563  SCEnter();
564  if (entry_bool)
565  iconf->flags |= DPDK_PROMISC;
566 
567  SCReturnBool(true);
568 }
569 
570 static bool ConfigSetMulticast(DPDKIfaceConfig *iconf, int entry_bool)
571 {
572  SCEnter();
573  if (entry_bool)
574  iconf->flags |= DPDK_MULTICAST; // enable
575 
576  SCReturnBool(true);
577 }
578 
579 static int ConfigSetChecksumChecks(DPDKIfaceConfig *iconf, int entry_bool)
580 {
581  SCEnter();
582  if (entry_bool)
583  iconf->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
584 
585  SCReturnInt(0);
586 }
587 
588 static int ConfigSetChecksumOffload(DPDKIfaceConfig *iconf, int entry_bool)
589 {
590  SCEnter();
591  if (entry_bool)
592  iconf->flags |= DPDK_RX_CHECKSUM_OFFLOAD;
593 
594  SCReturnInt(0);
595 }
596 
597 static int ConfigSetCopyIface(DPDKIfaceConfig *iconf, const char *entry_str)
598 {
599  SCEnter();
600  int retval;
601 
602  if (entry_str == NULL || entry_str[0] == '\0' || strcmp(entry_str, "none") == 0) {
603  iconf->out_iface = NULL;
604  SCReturnInt(0);
605  }
606 
607  retval = rte_eth_dev_get_port_by_name(entry_str, &iconf->out_port_id);
608  if (retval < 0) {
609  SCLogError("%s: name of the copy interface (%s) is invalid (err %s)", iconf->iface,
610  entry_str, rte_strerror(-retval));
611  SCReturnInt(retval);
612  }
613 
614  iconf->out_iface = entry_str;
615  SCReturnInt(0);
616 }
617 
618 static int ConfigSetCopyMode(DPDKIfaceConfig *iconf, const char *entry_str)
619 {
620  SCEnter();
621  if (entry_str == NULL) {
622  SCLogWarning("%s: no copy mode specified, changing to %s ", iconf->iface,
623  DPDK_CONFIG_DEFAULT_COPY_MODE);
624  entry_str = DPDK_CONFIG_DEFAULT_COPY_MODE;
625  }
626 
627  if (strcmp(entry_str, "none") != 0 && strcmp(entry_str, "tap") != 0 &&
628  strcmp(entry_str, "ips") != 0) {
629  SCLogWarning("%s: copy mode \"%s\" is not one of the possible values (none|tap|ips). "
630  "Changing to %s",
631  entry_str, iconf->iface, DPDK_CONFIG_DEFAULT_COPY_MODE);
632  entry_str = DPDK_CONFIG_DEFAULT_COPY_MODE;
633  }
634 
635  if (strcmp(entry_str, "none") == 0) {
636  iconf->copy_mode = DPDK_COPY_MODE_NONE;
637  } else if (strcmp(entry_str, "tap") == 0) {
638  iconf->copy_mode = DPDK_COPY_MODE_TAP;
639  } else if (strcmp(entry_str, "ips") == 0) {
640  iconf->copy_mode = DPDK_COPY_MODE_IPS;
641  }
642 
643  SCReturnInt(0);
644 }
645 
646 static int ConfigSetCopyIfaceSettings(DPDKIfaceConfig *iconf, const char *iface, const char *mode)
647 {
648  SCEnter();
649  int retval;
650 
651  retval = ConfigSetCopyIface(iconf, iface);
652  if (retval < 0)
653  SCReturnInt(retval);
654 
655  retval = ConfigSetCopyMode(iconf, mode);
656  if (retval < 0)
657  SCReturnInt(retval);
658 
659  if (iconf->copy_mode == DPDK_COPY_MODE_NONE) {
660  if (iconf->out_iface != NULL)
661  iconf->out_iface = NULL;
662  SCReturnInt(0);
663  }
664 
665  if (iconf->out_iface == NULL || strlen(iconf->out_iface) <= 0) {
666  SCLogError("%s: copy mode enabled but interface not set", iconf->iface);
667  SCReturnInt(-EINVAL);
668  }
669 
670  SCReturnInt(0);
671 }
672 
673 static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface)
674 {
675  SCEnter();
676  int retval;
677  ConfNode *if_root;
678  ConfNode *if_default;
679  const char *entry_str = NULL;
680  intmax_t entry_int = 0;
681  int entry_bool = 0;
682  const char *copy_iface_str = NULL;
683  const char *copy_mode_str = NULL;
684 
685  ConfigSetIface(iconf, iface);
686 
687  retval = ConfSetRootAndDefaultNodes("dpdk.interfaces", iconf->iface, &if_root, &if_default);
688  if (retval < 0) {
689  FatalError("failed to find DPDK configuration for the interface %s", iconf->iface);
690  }
691 
692  retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.threads, &entry_str) != 1
693  ? ConfigSetThreads(iconf, DPDK_CONFIG_DEFAULT_THREADS)
694  : ConfigSetThreads(iconf, entry_str);
695  if (retval < 0)
696  SCReturnInt(retval);
697 
698  // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported
699  retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads);
700  if (retval < 0)
701  SCReturnInt(retval);
702 
703  // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported
704  retval = ConfigSetTxQueues(iconf, (uint16_t)iconf->threads);
705  if (retval < 0)
706  SCReturnInt(retval);
707 
709  if_root, if_default, dpdk_yaml.mempool_size, &entry_int) != 1
710  ? ConfigSetMempoolSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE)
711  : ConfigSetMempoolSize(iconf, entry_int);
712  if (retval < 0)
713  SCReturnInt(retval);
714 
716  if_root, if_default, dpdk_yaml.mempool_cache_size, &entry_str) != 1
717  ? ConfigSetMempoolCacheSize(iconf, DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE)
718  : ConfigSetMempoolCacheSize(iconf, entry_str);
719  if (retval < 0)
720  SCReturnInt(retval);
721 
723  if_root, if_default, dpdk_yaml.rx_descriptors, &entry_int) != 1
724  ? ConfigSetRxDescriptors(iconf, DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS)
725  : ConfigSetRxDescriptors(iconf, entry_int);
726  if (retval < 0)
727  SCReturnInt(retval);
728 
730  if_root, if_default, dpdk_yaml.tx_descriptors, &entry_int) != 1
731  ? ConfigSetTxDescriptors(iconf, DPDK_CONFIG_DEFAULT_TX_DESCRIPTORS)
732  : ConfigSetTxDescriptors(iconf, entry_int);
733  if (retval < 0)
734  SCReturnInt(retval);
735 
736  retval = ConfGetChildValueIntWithDefault(if_root, if_default, dpdk_yaml.mtu, &entry_int) != 1
737  ? ConfigSetMtu(iconf, DPDK_CONFIG_DEFAULT_MTU)
738  : ConfigSetMtu(iconf, entry_int);
739  if (retval < 0)
740  SCReturnInt(retval);
741 
742  retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.rss_hf, &entry_str) != 1
743  ? ConfigSetRSSHashFunctions(iconf, NULL)
744  : ConfigSetRSSHashFunctions(iconf, entry_str);
745  if (retval < 0)
746  SCReturnInt(retval);
747 
749  if_root, if_default, dpdk_yaml.promisc, &entry_bool) != 1
750  ? ConfigSetPromiscuousMode(iconf, DPDK_CONFIG_DEFAULT_PROMISCUOUS_MODE)
751  : ConfigSetPromiscuousMode(iconf, entry_bool);
752  if (retval != true)
753  SCReturnInt(-EINVAL);
754 
756  if_root, if_default, dpdk_yaml.multicast, &entry_bool) != 1
757  ? ConfigSetMulticast(iconf, DPDK_CONFIG_DEFAULT_MULTICAST_MODE)
758  : ConfigSetMulticast(iconf, entry_bool);
759  if (retval != true)
760  SCReturnInt(-EINVAL);
761 
763  if_root, if_default, dpdk_yaml.checksum_checks, &entry_bool) != 1
764  ? ConfigSetChecksumChecks(iconf, DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION)
765  : ConfigSetChecksumChecks(iconf, entry_bool);
766  if (retval < 0)
767  SCReturnInt(retval);
768 
770  if_root, if_default, dpdk_yaml.checksum_checks_offload, &entry_bool) != 1
771  ? ConfigSetChecksumOffload(
772  iconf, DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD)
773  : ConfigSetChecksumOffload(iconf, entry_bool);
774  if (retval < 0)
775  SCReturnInt(retval);
776 
777  retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.copy_mode, &copy_mode_str);
778  if (retval != 1)
779  SCReturnInt(-ENOENT);
780  if (retval < 0)
781  SCReturnInt(retval);
782 
784  if_root, if_default, dpdk_yaml.copy_iface, &copy_iface_str);
785  if (retval != 1)
786  SCReturnInt(-ENOENT);
787  if (retval < 0)
788  SCReturnInt(retval);
789 
790  retval = ConfigSetCopyIfaceSettings(iconf, copy_iface_str, copy_mode_str);
791  if (retval < 0)
792  SCReturnInt(retval);
793 
794  SCReturnInt(0);
795 }
796 
797 static int32_t ConfigValidateThreads(uint16_t iface_threads)
798 {
799  static uint32_t total_cpus = 0;
800  total_cpus += iface_threads;
801  ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set");
802  if (wtaf == NULL) {
803  SCLogError("Specify worker-cpu-set list in the threading section");
804  return -1;
805  }
806  if (total_cpus > UtilAffinityGetAffinedCPUNum(wtaf)) {
807  SCLogError("Interfaces requested more cores than configured in the threading section "
808  "(requested %d configured %d",
809  total_cpus, UtilAffinityGetAffinedCPUNum(wtaf));
810  return -1;
811  }
812 
813  return 0;
814 }
815 
816 static DPDKIfaceConfig *ConfigParse(const char *iface)
817 {
818  SCEnter();
819  int retval;
820  DPDKIfaceConfig *iconf = NULL;
821  if (iface == NULL)
822  FatalError("DPDK interface is NULL");
823 
824  ConfigInit(&iconf);
825  retval = ConfigLoad(iconf, iface);
826  if (retval < 0 || ConfigValidateThreads(iconf->threads) != 0) {
827  iconf->DerefFunc(iconf);
828  SCReturnPtr(NULL, "void *");
829  }
830 
831  SCReturnPtr(iconf, "DPDKIfaceConfig *");
832 }
833 
834 static void DeviceSetPMDSpecificRSS(struct rte_eth_rss_conf *rss_conf, const char *driver_name)
835 {
836  // RSS is configured in a specific way for a driver i40e and DPDK version <= 19.xx
837  if (strcmp(driver_name, "net_i40e") == 0)
838  i40eDeviceSetRSSConf(rss_conf);
839  if (strcmp(driver_name, "net_ice") == 0)
840  iceDeviceSetRSSHashFunction(&rss_conf->rss_hf);
841  if (strcmp(driver_name, "net_ixgbe") == 0)
842  ixgbeDeviceSetRSSHashFunction(&rss_conf->rss_hf);
843  if (strcmp(driver_name, "net_e1000_igb") == 0)
844  rss_conf->rss_hf = (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_IPV6_EX);
845 }
846 
847 // Returns -1 if no bit is set
848 static int GetFirstSetBitPosition(uint64_t bits)
849 {
850  for (uint64_t i = 0; i < 64; i++) {
851  if (bits & BIT_U64(i))
852  return i;
853  }
854  return -1;
855 }
856 
857 static void DumpRSSFlags(const uint64_t requested, const uint64_t actual)
858 {
859  SCLogConfig("REQUESTED (groups):");
860 
861  SCLogConfig(
862  "RTE_ETH_RSS_IP %sset", ((requested & RTE_ETH_RSS_IP) == RTE_ETH_RSS_IP) ? "" : "NOT ");
863  SCLogConfig("RTE_ETH_RSS_TCP %sset",
864  ((requested & RTE_ETH_RSS_TCP) == RTE_ETH_RSS_TCP) ? "" : "NOT ");
865  SCLogConfig("RTE_ETH_RSS_UDP %sset",
866  ((requested & RTE_ETH_RSS_UDP) == RTE_ETH_RSS_UDP) ? "" : "NOT ");
867  SCLogConfig("RTE_ETH_RSS_SCTP %sset",
868  ((requested & RTE_ETH_RSS_SCTP) == RTE_ETH_RSS_SCTP) ? "" : "NOT ");
869  SCLogConfig("RTE_ETH_RSS_TUNNEL %sset",
870  ((requested & RTE_ETH_RSS_TUNNEL) == RTE_ETH_RSS_TUNNEL) ? "" : "NOT ");
871 
872  SCLogConfig("REQUESTED (individual):");
873  SCLogConfig("RTE_ETH_RSS_IPV4 (Bit position: %d) %sset",
874  GetFirstSetBitPosition(RTE_ETH_RSS_IPV4), (requested & RTE_ETH_RSS_IPV4) ? "" : "NOT ");
875  SCLogConfig("RTE_ETH_RSS_FRAG_IPV4 (Bit position: %d) %sset",
876  GetFirstSetBitPosition(RTE_ETH_RSS_FRAG_IPV4),
877  (requested & RTE_ETH_RSS_FRAG_IPV4) ? "" : "NOT ");
878  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_TCP (Bit position: %d) %sset",
879  GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV4_TCP),
880  (requested & RTE_ETH_RSS_NONFRAG_IPV4_TCP) ? "" : "NOT ");
881  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_UDP (Bit position: %d) %sset",
882  GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV4_UDP),
883  (requested & RTE_ETH_RSS_NONFRAG_IPV4_UDP) ? "" : "NOT ");
884  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_SCTP (Bit position: %d) %sset",
885  GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV4_SCTP),
886  (requested & RTE_ETH_RSS_NONFRAG_IPV4_SCTP) ? "" : "NOT ");
887  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_OTHER (Bit position: %d) %sset",
888  GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV4_OTHER),
889  (requested & RTE_ETH_RSS_NONFRAG_IPV4_OTHER) ? "" : "NOT ");
890  SCLogConfig("RTE_ETH_RSS_IPV6 (Bit position: %d) %sset",
891  GetFirstSetBitPosition(RTE_ETH_RSS_IPV6), (requested & RTE_ETH_RSS_IPV6) ? "" : "NOT ");
892  SCLogConfig("RTE_ETH_RSS_FRAG_IPV6 (Bit position: %d) %sset",
893  GetFirstSetBitPosition(RTE_ETH_RSS_FRAG_IPV6),
894  (requested & RTE_ETH_RSS_FRAG_IPV6) ? "" : "NOT ");
895  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_TCP (Bit position: %d) %sset",
896  GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV6_TCP),
897  (requested & RTE_ETH_RSS_NONFRAG_IPV6_TCP) ? "" : "NOT ");
898  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_UDP (Bit position: %d) %sset",
899  GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV6_UDP),
900  (requested & RTE_ETH_RSS_NONFRAG_IPV6_UDP) ? "" : "NOT ");
901  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_SCTP (Bit position: %d) %sset",
902  GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV6_SCTP),
903  (requested & RTE_ETH_RSS_NONFRAG_IPV6_SCTP) ? "" : "NOT ");
904  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_OTHER (Bit position: %d) %sset",
905  GetFirstSetBitPosition(RTE_ETH_RSS_NONFRAG_IPV6_OTHER),
906  (requested & RTE_ETH_RSS_NONFRAG_IPV6_OTHER) ? "" : "NOT ");
907 
908  SCLogConfig("RTE_ETH_RSS_L2_PAYLOAD (Bit position: %d) %sset",
909  GetFirstSetBitPosition(RTE_ETH_RSS_L2_PAYLOAD),
910  (requested & RTE_ETH_RSS_L2_PAYLOAD) ? "" : "NOT ");
911  SCLogConfig("RTE_ETH_RSS_IPV6_EX (Bit position: %d) %sset",
912  GetFirstSetBitPosition(RTE_ETH_RSS_IPV6_EX),
913  (requested & RTE_ETH_RSS_IPV6_EX) ? "" : "NOT ");
914  SCLogConfig("RTE_ETH_RSS_IPV6_TCP_EX (Bit position: %d) %sset",
915  GetFirstSetBitPosition(RTE_ETH_RSS_IPV6_TCP_EX),
916  (requested & RTE_ETH_RSS_IPV6_TCP_EX) ? "" : "NOT ");
917  SCLogConfig("RTE_ETH_RSS_IPV6_UDP_EX (Bit position: %d) %sset",
918  GetFirstSetBitPosition(RTE_ETH_RSS_IPV6_UDP_EX),
919  (requested & RTE_ETH_RSS_IPV6_UDP_EX) ? "" : "NOT ");
920 
921  SCLogConfig("RTE_ETH_RSS_PORT (Bit position: %d) %sset",
922  GetFirstSetBitPosition(RTE_ETH_RSS_PORT), (requested & RTE_ETH_RSS_PORT) ? "" : "NOT ");
923  SCLogConfig("RTE_ETH_RSS_VXLAN (Bit position: %d) %sset",
924  GetFirstSetBitPosition(RTE_ETH_RSS_VXLAN),
925  (requested & RTE_ETH_RSS_VXLAN) ? "" : "NOT ");
926  SCLogConfig("RTE_ETH_RSS_NVGRE (Bit position: %d) %sset",
927  GetFirstSetBitPosition(RTE_ETH_RSS_NVGRE),
928  (requested & RTE_ETH_RSS_NVGRE) ? "" : "NOT ");
929  SCLogConfig("RTE_ETH_RSS_GTPU (Bit position: %d) %sset",
930  GetFirstSetBitPosition(RTE_ETH_RSS_GTPU), (requested & RTE_ETH_RSS_GTPU) ? "" : "NOT ");
931 
932  SCLogConfig("RTE_ETH_RSS_L3_SRC_ONLY (Bit position: %d) %sset",
933  GetFirstSetBitPosition(RTE_ETH_RSS_L3_SRC_ONLY),
934  (requested & RTE_ETH_RSS_L3_SRC_ONLY) ? "" : "NOT ");
935  SCLogConfig("RTE_ETH_RSS_L3_DST_ONLY (Bit position: %d) %sset",
936  GetFirstSetBitPosition(RTE_ETH_RSS_L3_DST_ONLY),
937  (requested & RTE_ETH_RSS_L3_DST_ONLY) ? "" : "NOT ");
938  SCLogConfig("RTE_ETH_RSS_L4_SRC_ONLY (Bit position: %d) %sset",
939  GetFirstSetBitPosition(RTE_ETH_RSS_L4_SRC_ONLY),
940  (requested & RTE_ETH_RSS_L4_SRC_ONLY) ? "" : "NOT ");
941  SCLogConfig("RTE_ETH_RSS_L4_DST_ONLY (Bit position: %d) %sset",
942  GetFirstSetBitPosition(RTE_ETH_RSS_L4_DST_ONLY),
943  (requested & RTE_ETH_RSS_L4_DST_ONLY) ? "" : "NOT ");
944  SCLogConfig("ACTUAL (group):");
945  SCLogConfig(
946  "RTE_ETH_RSS_IP %sset", ((actual & RTE_ETH_RSS_IP) == RTE_ETH_RSS_IP) ? "" : "NOT ");
947  SCLogConfig(
948  "RTE_ETH_RSS_TCP %sset", ((actual & RTE_ETH_RSS_TCP) == RTE_ETH_RSS_TCP) ? "" : "NOT ");
949  SCLogConfig(
950  "RTE_ETH_RSS_UDP %sset", ((actual & RTE_ETH_RSS_UDP) == RTE_ETH_RSS_UDP) ? "" : "NOT ");
951  SCLogConfig("RTE_ETH_RSS_SCTP %sset",
952  ((actual & RTE_ETH_RSS_SCTP) == RTE_ETH_RSS_SCTP) ? "" : "NOT ");
953  SCLogConfig("RTE_ETH_RSS_TUNNEL %sset",
954  ((actual & RTE_ETH_RSS_TUNNEL) == RTE_ETH_RSS_TUNNEL) ? "" : "NOT ");
955 
956  SCLogConfig("ACTUAL (individual flags):");
957  SCLogConfig("RTE_ETH_RSS_IPV4 %sset", (actual & RTE_ETH_RSS_IPV4) ? "" : "NOT ");
958  SCLogConfig("RTE_ETH_RSS_FRAG_IPV4 %sset", (actual & RTE_ETH_RSS_FRAG_IPV4) ? "" : "NOT ");
959  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_TCP %sset",
960  (actual & RTE_ETH_RSS_NONFRAG_IPV4_TCP) ? "" : "NOT ");
961  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_UDP %sset",
962  (actual & RTE_ETH_RSS_NONFRAG_IPV4_UDP) ? "" : "NOT ");
963  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_SCTP %sset",
964  (actual & RTE_ETH_RSS_NONFRAG_IPV4_SCTP) ? "" : "NOT ");
965  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV4_OTHER %sset",
966  (actual & RTE_ETH_RSS_NONFRAG_IPV4_OTHER) ? "" : "NOT ");
967  SCLogConfig("RTE_ETH_RSS_IPV6 %sset", (actual & RTE_ETH_RSS_IPV6) ? "" : "NOT ");
968  SCLogConfig("RTE_ETH_RSS_FRAG_IPV6 %sset", (actual & RTE_ETH_RSS_FRAG_IPV6) ? "" : "NOT ");
969  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_TCP %sset",
970  (actual & RTE_ETH_RSS_NONFRAG_IPV6_TCP) ? "" : "NOT ");
971  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_UDP %sset",
972  (actual & RTE_ETH_RSS_NONFRAG_IPV6_UDP) ? "" : "NOT ");
973  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_SCTP %sset",
974  (actual & RTE_ETH_RSS_NONFRAG_IPV6_SCTP) ? "" : "NOT ");
975  SCLogConfig("RTE_ETH_RSS_NONFRAG_IPV6_OTHER %sset",
976  (actual & RTE_ETH_RSS_NONFRAG_IPV6_OTHER) ? "" : "NOT ");
977 
978  SCLogConfig("RTE_ETH_RSS_L2_PAYLOAD %sset", (actual & RTE_ETH_RSS_L2_PAYLOAD) ? "" : "NOT ");
979  SCLogConfig("RTE_ETH_RSS_IPV6_EX %sset", (actual & RTE_ETH_RSS_IPV6_EX) ? "" : "NOT ");
980  SCLogConfig("RTE_ETH_RSS_IPV6_TCP_EX %sset", (actual & RTE_ETH_RSS_IPV6_TCP_EX) ? "" : "NOT ");
981  SCLogConfig("RTE_ETH_RSS_IPV6_UDP_EX %sset", (actual & RTE_ETH_RSS_IPV6_UDP_EX) ? "" : "NOT ");
982 
983  SCLogConfig("RTE_ETH_RSS_PORT %sset", (actual & RTE_ETH_RSS_PORT) ? "" : "NOT ");
984  SCLogConfig("RTE_ETH_RSS_VXLAN %sset", (actual & RTE_ETH_RSS_VXLAN) ? "" : "NOT ");
985  SCLogConfig("RTE_ETH_RSS_NVGRE %sset", (actual & RTE_ETH_RSS_NVGRE) ? "" : "NOT ");
986  SCLogConfig("RTE_ETH_RSS_GTPU %sset", (actual & RTE_ETH_RSS_GTPU) ? "" : "NOT ");
987 
988  SCLogConfig("RTE_ETH_RSS_L3_SRC_ONLY %sset", (actual & RTE_ETH_RSS_L3_SRC_ONLY) ? "" : "NOT ");
989  SCLogConfig("RTE_ETH_RSS_L3_DST_ONLY %sset", (actual & RTE_ETH_RSS_L3_DST_ONLY) ? "" : "NOT ");
990  SCLogConfig("RTE_ETH_RSS_L4_SRC_ONLY %sset", (actual & RTE_ETH_RSS_L4_SRC_ONLY) ? "" : "NOT ");
991  SCLogConfig("RTE_ETH_RSS_L4_DST_ONLY %sset", (actual & RTE_ETH_RSS_L4_DST_ONLY) ? "" : "NOT ");
992 }
993 
994 static void DumpRXOffloadCapabilities(const uint64_t rx_offld_capa)
995 {
996  SCLogConfig("RTE_ETH_RX_OFFLOAD_VLAN_STRIP - %savailable",
997  rx_offld_capa & RTE_ETH_RX_OFFLOAD_VLAN_STRIP ? "" : "NOT ");
998  SCLogConfig("RTE_ETH_RX_OFFLOAD_IPV4_CKSUM - %savailable",
999  rx_offld_capa & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM ? "" : "NOT ");
1000  SCLogConfig("RTE_ETH_RX_OFFLOAD_UDP_CKSUM - %savailable",
1001  rx_offld_capa & RTE_ETH_RX_OFFLOAD_UDP_CKSUM ? "" : "NOT ");
1002  SCLogConfig("RTE_ETH_RX_OFFLOAD_TCP_CKSUM - %savailable",
1003  rx_offld_capa & RTE_ETH_RX_OFFLOAD_TCP_CKSUM ? "" : "NOT ");
1004  SCLogConfig("RTE_ETH_RX_OFFLOAD_TCP_LRO - %savailable",
1005  rx_offld_capa & RTE_ETH_RX_OFFLOAD_TCP_LRO ? "" : "NOT ");
1006  SCLogConfig("RTE_ETH_RX_OFFLOAD_QINQ_STRIP - %savailable",
1007  rx_offld_capa & RTE_ETH_RX_OFFLOAD_QINQ_STRIP ? "" : "NOT ");
1008  SCLogConfig("RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM - %savailable",
1009  rx_offld_capa & RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM ? "" : "NOT ");
1010  SCLogConfig("RTE_ETH_RX_OFFLOAD_MACSEC_STRIP - %savailable",
1011  rx_offld_capa & RTE_ETH_RX_OFFLOAD_MACSEC_STRIP ? "" : "NOT ");
1012 #if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
1013  SCLogConfig("RTE_ETH_RX_OFFLOAD_HEADER_SPLIT - %savailable",
1014  rx_offld_capa & RTE_ETH_RX_OFFLOAD_HEADER_SPLIT ? "" : "NOT ");
1015 #endif
1016  SCLogConfig("RTE_ETH_RX_OFFLOAD_VLAN_FILTER - %savailable",
1017  rx_offld_capa & RTE_ETH_RX_OFFLOAD_VLAN_FILTER ? "" : "NOT ");
1018  SCLogConfig("RTE_ETH_RX_OFFLOAD_VLAN_EXTEND - %savailable",
1019  rx_offld_capa & RTE_ETH_RX_OFFLOAD_VLAN_EXTEND ? "" : "NOT ");
1020  SCLogConfig("RTE_ETH_RX_OFFLOAD_SCATTER - %savailable",
1021  rx_offld_capa & RTE_ETH_RX_OFFLOAD_SCATTER ? "" : "NOT ");
1022  SCLogConfig("RTE_ETH_RX_OFFLOAD_TIMESTAMP - %savailable",
1023  rx_offld_capa & RTE_ETH_RX_OFFLOAD_TIMESTAMP ? "" : "NOT ");
1024  SCLogConfig("RTE_ETH_RX_OFFLOAD_SECURITY - %savailable",
1025  rx_offld_capa & RTE_ETH_RX_OFFLOAD_SECURITY ? "" : "NOT ");
1026  SCLogConfig("RTE_ETH_RX_OFFLOAD_KEEP_CRC - %savailable",
1027  rx_offld_capa & RTE_ETH_RX_OFFLOAD_KEEP_CRC ? "" : "NOT ");
1028  SCLogConfig("RTE_ETH_RX_OFFLOAD_SCTP_CKSUM - %savailable",
1029  rx_offld_capa & RTE_ETH_RX_OFFLOAD_SCTP_CKSUM ? "" : "NOT ");
1030  SCLogConfig("RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM - %savailable",
1031  rx_offld_capa & RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM ? "" : "NOT ");
1032  SCLogConfig("RTE_ETH_RX_OFFLOAD_RSS_HASH - %savailable",
1033  rx_offld_capa & RTE_ETH_RX_OFFLOAD_RSS_HASH ? "" : "NOT ");
1034 #if RTE_VERSION >= RTE_VERSION_NUM(20, 11, 0, 0)
1035  SCLogConfig("RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT - %savailable",
1036  rx_offld_capa & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT ? "" : "NOT ");
1037 #endif
1038 }
1039 
1040 static int DeviceValidateMTU(const DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info)
1041 {
1042  if (iconf->mtu > dev_info->max_mtu || iconf->mtu < dev_info->min_mtu) {
1043  SCLogError("%s: MTU out of bounds. "
1044  "Min MTU: %" PRIu16 " Max MTU: %" PRIu16,
1045  iconf->iface, dev_info->min_mtu, dev_info->max_mtu);
1046  SCReturnInt(-ERANGE);
1047  }
1048 
1049 #if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
1050  // check if jumbo frames are set and are available
1051  if (iconf->mtu > RTE_ETHER_MAX_LEN &&
1052  !(dev_info->rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)) {
1053  SCLogError("%s: jumbo frames not supported, set MTU to 1500", iconf->iface);
1054  SCReturnInt(-EINVAL);
1055  }
1056 #endif
1057 
1058  SCReturnInt(0);
1059 }
1060 
1061 static void DeviceSetMTU(struct rte_eth_conf *port_conf, uint16_t mtu)
1062 {
1063 #if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
1064  port_conf->rxmode.mtu = mtu;
1065 #else
1066  port_conf->rxmode.max_rx_pkt_len = mtu;
1067  if (mtu > RTE_ETHER_MAX_LEN) {
1068  port_conf->rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
1069  }
1070 #endif
1071 }
1072 
1073 /**
1074  * \param port_id - queried port
1075  * \param socket_id - socket ID of the queried port
1076  * \return non-negative number on success, negative on failure (errno)
1077  */
1078 static int32_t DeviceSetSocketID(uint16_t port_id, int32_t *socket_id)
1079 {
1080  rte_errno = 0;
1081  int retval = rte_eth_dev_socket_id(port_id);
1082  *socket_id = retval;
1083 
1084 #if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0) // DPDK API changed since 22.11
1085  retval = -rte_errno;
1086 #else
1087  if (retval == SOCKET_ID_ANY)
1088  retval = 0; // DPDK couldn't determine socket ID of a port
1089 #endif
1090 
1091  return retval;
1092 }
1093 
1094 static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,
1095  const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf)
1096 {
1097  DumpRXOffloadCapabilities(dev_info->rx_offload_capa);
1098  *port_conf = (struct rte_eth_conf){
1099  .rxmode = {
1100  .mq_mode = RTE_ETH_MQ_RX_NONE,
1101  .offloads = 0, // turn every offload off to prevent any packet modification
1102  },
1103  .txmode = {
1104  .mq_mode = RTE_ETH_MQ_TX_NONE,
1105  .offloads = 0,
1106  },
1107  };
1108 
1109  // configure RX offloads
1110  if (dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_RSS_HASH) {
1111  if (iconf->nb_rx_queues > 1) {
1112  SCLogConfig("%s: RSS enabled for %d queues", iconf->iface, iconf->nb_rx_queues);
1113  port_conf->rx_adv_conf.rss_conf = (struct rte_eth_rss_conf){
1114  .rss_key = rss_hkey,
1115  .rss_key_len = RSS_HKEY_LEN,
1116  .rss_hf = iconf->rss_hf,
1117  };
1118 
1119  const char *dev_driver = dev_info->driver_name;
1120  if (strcmp(dev_info->driver_name, "net_bonding") == 0) {
1121  dev_driver = BondingDeviceDriverGet(iconf->port_id);
1122  }
1123 
1124  DeviceSetPMDSpecificRSS(&port_conf->rx_adv_conf.rss_conf, dev_driver);
1125 
1126  uint64_t rss_hf_tmp =
1127  port_conf->rx_adv_conf.rss_conf.rss_hf & dev_info->flow_type_rss_offloads;
1128  if (port_conf->rx_adv_conf.rss_conf.rss_hf != rss_hf_tmp) {
1129  DumpRSSFlags(port_conf->rx_adv_conf.rss_conf.rss_hf, rss_hf_tmp);
1130 
1131  SCLogWarning("%s: modified RSS hash function based on hardware support: "
1132  "requested:%#" PRIx64 ", configured:%#" PRIx64,
1133  iconf->iface, port_conf->rx_adv_conf.rss_conf.rss_hf, rss_hf_tmp);
1134  port_conf->rx_adv_conf.rss_conf.rss_hf = rss_hf_tmp;
1135  }
1136  port_conf->rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
1137  } else {
1138  SCLogConfig("%s: RSS not enabled", iconf->iface);
1139  port_conf->rx_adv_conf.rss_conf.rss_key = NULL;
1140  port_conf->rx_adv_conf.rss_conf.rss_hf = 0;
1141  }
1142  } else {
1143  SCLogConfig("%s: RSS not supported", iconf->iface);
1144  }
1145 
1146  if (iconf->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1147  SCLogConfig("%s: checksum validation disabled", iconf->iface);
1148  } else if ((dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_CHECKSUM) ==
1149  RTE_ETH_RX_OFFLOAD_CHECKSUM) { // multibit comparison to make sure all bits are set
1150  if (iconf->checksum_mode == CHECKSUM_VALIDATION_ENABLE &&
1151  iconf->flags & DPDK_RX_CHECKSUM_OFFLOAD) {
1152  SCLogConfig("%s: IP, TCP and UDP checksum validation offloaded", iconf->iface);
1153  port_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_CHECKSUM;
1154  } else if (iconf->checksum_mode == CHECKSUM_VALIDATION_ENABLE &&
1155  !(iconf->flags & DPDK_RX_CHECKSUM_OFFLOAD)) {
1156  SCLogConfig("%s: checksum validation enabled (but can be offloaded)", iconf->iface);
1157  }
1158  }
1159 
1160  DeviceSetMTU(port_conf, iconf->mtu);
1161 
1162  if (dev_info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
1163  port_conf->txmode.offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
1164  }
1165 }
1166 
1167 static int DeviceConfigureQueues(DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info,
1168  const struct rte_eth_conf *port_conf)
1169 {
1170  SCEnter();
1171  int retval;
1172  uint16_t mtu_size;
1173  uint16_t mbuf_size;
1174  struct rte_eth_rxconf rxq_conf;
1175  struct rte_eth_txconf txq_conf;
1176 
1177  char mempool_name[64];
1178  snprintf(mempool_name, 64, "mempool_%.20s", iconf->iface);
1179  // +4 for VLAN header
1180  mtu_size = iconf->mtu + RTE_ETHER_CRC_LEN + RTE_ETHER_HDR_LEN + 4;
1181  mbuf_size = ROUNDUP(mtu_size, 1024) + RTE_PKTMBUF_HEADROOM;
1182  SCLogConfig("%s: creating packet mbuf pool %s of size %d, cache size %d, mbuf size %d",
1183  iconf->iface, mempool_name, iconf->mempool_size, iconf->mempool_cache_size, mbuf_size);
1184 
1185  iconf->pkt_mempool = rte_pktmbuf_pool_create(mempool_name, iconf->mempool_size,
1186  iconf->mempool_cache_size, 0, mbuf_size, (int)iconf->socket_id);
1187  if (iconf->pkt_mempool == NULL) {
1188  retval = -rte_errno;
1189  SCLogError("%s: rte_pktmbuf_pool_create failed with code %d (mempool: %s) - %s",
1190  iconf->iface, rte_errno, mempool_name, rte_strerror(rte_errno));
1191  SCReturnInt(retval);
1192  }
1193 
1194  for (uint16_t queue_id = 0; queue_id < iconf->nb_rx_queues; queue_id++) {
1195  rxq_conf = dev_info->default_rxconf;
1196  rxq_conf.offloads = port_conf->rxmode.offloads;
1197  rxq_conf.rx_thresh.hthresh = 0;
1198  rxq_conf.rx_thresh.pthresh = 0;
1199  rxq_conf.rx_thresh.wthresh = 0;
1200  rxq_conf.rx_free_thresh = 0;
1201  rxq_conf.rx_drop_en = 0;
1202  SCLogConfig("%s: rx queue setup: queue:%d port:%d rx_desc:%d tx_desc:%d rx: hthresh: %d "
1203  "pthresh %d wthresh %d free_thresh %d drop_en %d offloads %lu",
1204  iconf->iface, queue_id, iconf->port_id, iconf->nb_rx_desc, iconf->nb_tx_desc,
1205  rxq_conf.rx_thresh.hthresh, rxq_conf.rx_thresh.pthresh, rxq_conf.rx_thresh.wthresh,
1206  rxq_conf.rx_free_thresh, rxq_conf.rx_drop_en, rxq_conf.offloads);
1207 
1208  retval = rte_eth_rx_queue_setup(iconf->port_id, queue_id, iconf->nb_rx_desc,
1209  iconf->socket_id, &rxq_conf, iconf->pkt_mempool);
1210  if (retval < 0) {
1211  rte_mempool_free(iconf->pkt_mempool);
1212  SCLogError(
1213  "%s: rte_eth_rx_queue_setup failed with code %d for device queue %u of port %u",
1214  iconf->iface, retval, queue_id, iconf->port_id);
1215  SCReturnInt(retval);
1216  }
1217  }
1218 
1219  for (uint16_t queue_id = 0; queue_id < iconf->nb_tx_queues; queue_id++) {
1220  txq_conf = dev_info->default_txconf;
1221  txq_conf.offloads = port_conf->txmode.offloads;
1222  SCLogConfig("%s: tx queue setup: queue:%d port:%d", iconf->iface, queue_id, iconf->port_id);
1223  retval = rte_eth_tx_queue_setup(
1224  iconf->port_id, queue_id, iconf->nb_tx_desc, iconf->socket_id, &txq_conf);
1225  if (retval < 0) {
1226  rte_mempool_free(iconf->pkt_mempool);
1227  SCLogError(
1228  "%s: rte_eth_tx_queue_setup failed with code %d for device queue %u of port %u",
1229  iconf->iface, retval, queue_id, iconf->port_id);
1230  SCReturnInt(retval);
1231  }
1232  }
1233 
1234  SCReturnInt(0);
1235 }
1236 
1237 static int DeviceValidateOutIfaceConfig(DPDKIfaceConfig *iconf)
1238 {
1239  SCEnter();
1240  int retval;
1241  DPDKIfaceConfig *out_iconf = NULL;
1242  ConfigInit(&out_iconf);
1243  if (out_iconf == NULL) {
1244  FatalError("Copy interface of the interface \"%s\" is NULL", iconf->iface);
1245  }
1246 
1247  retval = ConfigLoad(out_iconf, iconf->out_iface);
1248  if (retval < 0) {
1249  SCLogError("%s: fail to load config of interface", iconf->out_iface);
1250  out_iconf->DerefFunc(out_iconf);
1251  SCReturnInt(-EINVAL);
1252  }
1253 
1254  if (iconf->nb_rx_queues != out_iconf->nb_tx_queues) {
1255  // the other direction is validated when the copy interface is configured
1256  SCLogError("%s: configured %d RX queues but copy interface %s has %d TX queues"
1257  " - number of queues must be equal",
1258  iconf->iface, iconf->nb_rx_queues, out_iconf->iface, out_iconf->nb_tx_queues);
1259  out_iconf->DerefFunc(out_iconf);
1260  SCReturnInt(-EINVAL);
1261  } else if (iconf->mtu != out_iconf->mtu) {
1262  SCLogError("%s: configured MTU of %d but copy interface %s has MTU set to %d"
1263  " - MTU must be equal",
1264  iconf->iface, iconf->mtu, out_iconf->iface, out_iconf->mtu);
1265  out_iconf->DerefFunc(out_iconf);
1266  SCReturnInt(-EINVAL);
1267  } else if (iconf->copy_mode != out_iconf->copy_mode) {
1268  SCLogError("%s: copy modes of interfaces %s and %s are not equal", iconf->iface,
1269  iconf->iface, out_iconf->iface);
1270  out_iconf->DerefFunc(out_iconf);
1271  SCReturnInt(-EINVAL);
1272  } else if (strcmp(iconf->iface, out_iconf->out_iface) != 0) {
1273  // check if the other iface has the current iface set as a copy iface
1274  SCLogError("%s: copy interface of %s is not set to %s", iconf->iface, out_iconf->iface,
1275  iconf->iface);
1276  out_iconf->DerefFunc(out_iconf);
1277  SCReturnInt(-EINVAL);
1278  }
1279 
1280  out_iconf->DerefFunc(out_iconf);
1281  SCReturnInt(0);
1282 }
1283 
1284 static int DeviceConfigureIPS(DPDKIfaceConfig *iconf)
1285 {
1286  SCEnter();
1287  int retval;
1288 
1289  if (iconf->out_iface != NULL) {
1290  retval = rte_eth_dev_get_port_by_name(iconf->out_iface, &iconf->out_port_id);
1291  if (retval != 0) {
1292  SCLogError("%s: failed to obtain out iface %s port id (err=%d)", iconf->iface,
1293  iconf->out_iface, retval);
1294  SCReturnInt(retval);
1295  }
1296 
1297  int32_t out_port_socket_id;
1298  retval = DeviceSetSocketID(iconf->port_id, &out_port_socket_id);
1299  if (retval < 0) {
1300  SCLogError("%s: invalid socket id (err=%d)", iconf->out_iface, retval);
1301  SCReturnInt(retval);
1302  }
1303 
1304  if (iconf->socket_id != out_port_socket_id) {
1305  SCLogWarning("%s: out iface %s is not on the same NUMA node", iconf->iface,
1306  iconf->out_iface);
1307  }
1308 
1309  retval = DeviceValidateOutIfaceConfig(iconf);
1310  if (retval != 0) {
1311  // Error will be written out by the validation function
1312  SCReturnInt(retval);
1313  }
1314 
1315  if (iconf->copy_mode == DPDK_COPY_MODE_IPS)
1316  SCLogInfo("%s: DPDK IPS mode activated: %s->%s", iconf->iface, iconf->iface,
1317  iconf->out_iface);
1318  else if (iconf->copy_mode == DPDK_COPY_MODE_TAP)
1319  SCLogInfo("%s: DPDK TAP mode activated: %s->%s", iconf->iface, iconf->iface,
1320  iconf->out_iface);
1321  }
1322  SCReturnInt(0);
1323 }
1324 
1325 /**
1326  * Function verifies changes in e.g. device info after configuration has
1327  * happened. Sometimes (e.g. DPDK Bond PMD with Intel NICs i40e/ixgbe) change
1328  * device info only after the device configuration.
1329  * @param iconf
1330  * @param dev_info
1331  * @return 0 on success, -EAGAIN when reconfiguration is needed, <0 on failure
1332  */
1333 static int32_t DeviceVerifyPostConfigure(
1334  const DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info)
1335 {
1336  struct rte_eth_dev_info post_conf_dev_info = { 0 };
1337  int32_t ret = rte_eth_dev_info_get(iconf->port_id, &post_conf_dev_info);
1338  if (ret < 0) {
1339  SCLogError("%s: getting device info failed (err: %s)", iconf->iface, rte_strerror(-ret));
1340  SCReturnInt(ret);
1341  }
1342 
1343  if (dev_info->flow_type_rss_offloads != post_conf_dev_info.flow_type_rss_offloads ||
1344  dev_info->rx_offload_capa != post_conf_dev_info.rx_offload_capa ||
1345  dev_info->tx_offload_capa != post_conf_dev_info.tx_offload_capa ||
1346  dev_info->max_rx_queues != post_conf_dev_info.max_rx_queues ||
1347  dev_info->max_tx_queues != post_conf_dev_info.max_tx_queues ||
1348  dev_info->max_mtu != post_conf_dev_info.max_mtu) {
1349  SCLogWarning("%s: device information severely changed after configuration, reconfiguring",
1350  iconf->iface);
1351  return -EAGAIN;
1352  }
1353 
1354  if (strcmp(dev_info->driver_name, "net_bonding") == 0) {
1355  ret = BondingAllDevicesSameDriver(iconf->port_id);
1356  if (ret < 0) {
1357  SCLogError("%s: bond port uses port with different DPDK drivers", iconf->iface);
1358  SCReturnInt(ret);
1359  }
1360  }
1361 
1362  return 0;
1363 }
1364 
1365 static int DeviceConfigure(DPDKIfaceConfig *iconf)
1366 {
1367  SCEnter();
1368  int32_t retval = rte_eth_dev_get_port_by_name(iconf->iface, &(iconf->port_id));
1369  if (retval < 0) {
1370  SCLogError("%s: getting port id failed (err: %s)", iconf->iface, rte_strerror(-retval));
1371  SCReturnInt(retval);
1372  }
1373 
1374  if (!rte_eth_dev_is_valid_port(iconf->port_id)) {
1375  SCLogError("%s: specified port %d is invalid", iconf->iface, iconf->port_id);
1376  SCReturnInt(retval);
1377  }
1378 
1379  retval = DeviceSetSocketID(iconf->port_id, &iconf->socket_id);
1380  if (retval < 0) {
1381  SCLogError("%s: invalid socket id (err: %s)", iconf->iface, rte_strerror(-retval));
1382  SCReturnInt(retval);
1383  }
1384 
1385  struct rte_eth_dev_info dev_info = { 0 };
1386  retval = rte_eth_dev_info_get(iconf->port_id, &dev_info);
1387  if (retval < 0) {
1388  SCLogError("%s: getting device info failed (err: %s)", iconf->iface, rte_strerror(-retval));
1389  SCReturnInt(retval);
1390  }
1391 
1392  if (iconf->nb_rx_queues > dev_info.max_rx_queues) {
1393  SCLogError("%s: configured RX queues %u is higher than device maximum (%" PRIu16 ")",
1394  iconf->iface, iconf->nb_rx_queues, dev_info.max_rx_queues);
1395  SCReturnInt(-ERANGE);
1396  }
1397 
1398  if (iconf->nb_tx_queues > dev_info.max_tx_queues) {
1399  SCLogError("%s: configured TX queues %u is higher than device maximum (%" PRIu16 ")",
1400  iconf->iface, iconf->nb_tx_queues, dev_info.max_tx_queues);
1401  SCReturnInt(-ERANGE);
1402  }
1403 
1404  retval = DeviceValidateMTU(iconf, &dev_info);
1405  if (retval < 0)
1406  return retval;
1407 
1408  struct rte_eth_conf port_conf = { 0 };
1409  DeviceInitPortConf(iconf, &dev_info, &port_conf);
1410  if (port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM) {
1411  // Suricata does not need recalc checksums now
1412  iconf->checksum_mode = CHECKSUM_VALIDATION_OFFLOAD;
1413  }
1414 
1415  retval = rte_eth_dev_configure(
1416  iconf->port_id, iconf->nb_rx_queues, iconf->nb_tx_queues, &port_conf);
1417  if (retval < 0) {
1418  SCLogError("%s: failed to configure the device (port %u, err %s)", iconf->iface,
1419  iconf->port_id, rte_strerror(-retval));
1420  SCReturnInt(retval);
1421  }
1422 
1423  retval = DeviceVerifyPostConfigure(iconf, &dev_info);
1424  if (retval < 0)
1425  return retval;
1426 
1427  retval = rte_eth_dev_adjust_nb_rx_tx_desc(
1428  iconf->port_id, &iconf->nb_rx_desc, &iconf->nb_tx_desc);
1429  if (retval != 0) {
1430  SCLogError("%s: failed to adjust device queue descriptors (port %u, err %d)", iconf->iface,
1431  iconf->port_id, retval);
1432  SCReturnInt(retval);
1433  }
1434 
1435  retval = iconf->flags & DPDK_MULTICAST ? rte_eth_allmulticast_enable(iconf->port_id)
1436  : rte_eth_allmulticast_disable(iconf->port_id);
1437  if (retval == -ENOTSUP) {
1438  retval = rte_eth_allmulticast_get(iconf->port_id);
1439  // when multicast is enabled but set to disable or vice versa
1440  if ((retval == 1 && !(iconf->flags & DPDK_MULTICAST)) ||
1441  (retval == 0 && (iconf->flags & DPDK_MULTICAST))) {
1442  SCLogError("%s: Allmulticast setting of port (%" PRIu16
1443  ") can not be configured. Set it to %s",
1444  iconf->iface, iconf->port_id, retval == 1 ? "true" : "false");
1445  } else if (retval < 0) {
1446  SCLogError("%s: failed to get multicast mode (port %u, err %d)", iconf->iface,
1447  iconf->port_id, retval);
1448  SCReturnInt(retval);
1449  }
1450  } else if (retval < 0) {
1451  SCLogError("%s: error when changing multicast setting (port %u err %d)", iconf->iface,
1452  iconf->port_id, retval);
1453  SCReturnInt(retval);
1454  }
1455 
1456  retval = iconf->flags & DPDK_PROMISC ? rte_eth_promiscuous_enable(iconf->port_id)
1457  : rte_eth_promiscuous_disable(iconf->port_id);
1458  if (retval == -ENOTSUP) {
1459  retval = rte_eth_promiscuous_get(iconf->port_id);
1460  if ((retval == 1 && !(iconf->flags & DPDK_PROMISC)) ||
1461  (retval == 0 && (iconf->flags & DPDK_PROMISC))) {
1462  SCLogError("%s: promiscuous setting of port (%" PRIu16
1463  ") can not be configured. Set it to %s",
1464  iconf->iface, iconf->port_id, retval == 1 ? "true" : "false");
1466  } else if (retval < 0) {
1467  SCLogError("%s: failed to get promiscuous mode (port %u, err=%d)", iconf->iface,
1468  iconf->port_id, retval);
1469  SCReturnInt(retval);
1470  }
1471  } else if (retval < 0) {
1472  SCLogError("%s: error when changing promiscuous setting (port %u, err %d)", iconf->iface,
1473  iconf->port_id, retval);
1475  }
1476 
1477  // set maximum transmission unit
1478  SCLogConfig("%s: setting MTU to %d", iconf->iface, iconf->mtu);
1479  retval = rte_eth_dev_set_mtu(iconf->port_id, iconf->mtu);
1480  if (retval == -ENOTSUP) {
1481  SCLogWarning("%s: changing MTU on port %u is not supported, ignoring the setting",
1482  iconf->iface, iconf->port_id);
1483  // if it is not possible to set the MTU, retrieve it
1484  retval = rte_eth_dev_get_mtu(iconf->port_id, &iconf->mtu);
1485  if (retval < 0) {
1486  SCLogError("%s: failed to retrieve MTU (port %u, err %d)", iconf->iface, iconf->port_id,
1487  retval);
1488  SCReturnInt(retval);
1489  }
1490  } else if (retval < 0) {
1491  SCLogError("%s: failed to set MTU to %u (port %u, err %d)", iconf->iface, iconf->mtu,
1492  iconf->port_id, retval);
1493  SCReturnInt(retval);
1494  }
1495 
1496  retval = DeviceConfigureQueues(iconf, &dev_info, &port_conf);
1497  if (retval < 0) {
1498  SCReturnInt(retval);
1499  }
1500 
1501  retval = DeviceConfigureIPS(iconf);
1502  if (retval < 0) {
1503  SCReturnInt(retval);
1504  }
1505 
1506  SCReturnInt(0);
1507 }
1508 
1509 static void *ParseDpdkConfigAndConfigureDevice(const char *iface)
1510 {
1511  int retval;
1512  DPDKIfaceConfig *iconf = ConfigParse(iface);
1513  if (iconf == NULL) {
1514  FatalError("DPDK configuration could not be parsed");
1515  }
1516 
1517  retval = DeviceConfigure(iconf);
1518  if (retval == -EAGAIN) {
1519  // for e.g. bonding PMD it needs to be reconfigured
1520  retval = DeviceConfigure(iconf);
1521  }
1522 
1523  if (retval < 0) { // handles both configure attempts
1524  iconf->DerefFunc(iconf);
1525  retval = rte_eal_cleanup();
1526  if (retval != 0)
1527  FatalError("EAL cleanup failed: %s", strerror(-retval));
1528 
1529  FatalError("%s: failed to configure", iface);
1530  }
1531 
1532  SC_ATOMIC_RESET(iconf->ref);
1533  (void)SC_ATOMIC_ADD(iconf->ref, iconf->threads);
1534  // This counter is increased by worker threads that individually pick queue IDs.
1535  SC_ATOMIC_RESET(iconf->queue_id);
1536  SC_ATOMIC_RESET(iconf->inconsitent_numa_cnt);
1537 
1538  // initialize LiveDev DPDK values
1539  LiveDevice *ldev_instance = LiveGetDevice(iface);
1540  if (ldev_instance == NULL) {
1541  FatalError("Device %s is not registered as a live device", iface);
1542  }
1543  ldev_instance->dpdk_vars.pkt_mp = iconf->pkt_mempool;
1544  return iconf;
1545 }
1546 
1547 /**
1548  * \brief extract information from config file
1549  *
1550  * The returned structure will be freed by the thread init function.
1551  * This is thus necessary to or copy the structure before giving it
1552  * to thread or to reparse the file for each thread (and thus have
1553  * new structure.
1554  *
1555  * After configuration is loaded, DPDK also configures the device according to the settings.
1556  *
1557  * \return a DPDKIfaceConfig corresponding to the interface name
1558  */
1559 
1560 static int DPDKConfigGetThreadsCount(void *conf)
1561 {
1562  if (conf == NULL)
1563  FatalError("Configuration file is NULL");
1564 
1565  DPDKIfaceConfig *dpdk_conf = (DPDKIfaceConfig *)conf;
1566  return dpdk_conf->threads;
1567 }
1568 
1569 #endif /* HAVE_DPDK */
1570 
1571 static int DPDKRunModeIsIPS(void)
1572 {
1573  /* Find initial node */
1574  const char dpdk_node_query[] = "dpdk.interfaces";
1575  ConfNode *dpdk_node = ConfGetNode(dpdk_node_query);
1576  if (dpdk_node == NULL) {
1577  FatalError("Unable to get %s configuration node", dpdk_node_query);
1578  }
1579 
1580  const char default_iface[] = "default";
1581  ConfNode *if_default = ConfNodeLookupKeyValue(dpdk_node, "interface", default_iface);
1582  int nlive = LiveGetDeviceCount();
1583  bool has_ips = false;
1584  bool has_ids = false;
1585  for (int ldev = 0; ldev < nlive; ldev++) {
1586  const char *live_dev = LiveGetDeviceName(ldev);
1587  if (live_dev == NULL)
1588  FatalError("Unable to get device id %d from LiveDevice list", ldev);
1589 
1590  ConfNode *if_root = ConfFindDeviceConfig(dpdk_node, live_dev);
1591  if (if_root == NULL) {
1592  if (if_default == NULL)
1593  FatalError("Unable to get %s or %s interface", live_dev, default_iface);
1594 
1595  if_root = if_default;
1596  }
1597 
1598  const char *copymodestr = NULL;
1599  if (ConfGetChildValueWithDefault(if_root, if_default, "copy-mode", &copymodestr) == 1) {
1600  if (strcmp(copymodestr, "ips") == 0) {
1601  has_ips = true;
1602  } else {
1603  has_ids = true;
1604  }
1605  } else {
1606  has_ids = true;
1607  }
1608 
1609  if (has_ids && has_ips) {
1610  FatalError("Copy-mode of interface %s mixes with the previously set copy-modes "
1611  "(only IDS/TAP and IPS copy-mode combinations are allowed in DPDK",
1612  live_dev);
1613  }
1614  }
1615 
1616  return has_ips;
1617 }
1618 
1619 static void DPDKRunModeEnableIPS(void)
1620 {
1621  if (DPDKRunModeIsIPS()) {
1622  SCLogInfo("Setting IPS mode");
1623  EngineModeSetIPS();
1624  }
1625 }
1626 
1627 const char *RunModeDpdkGetDefaultMode(void)
1628 {
1629  return "workers";
1630 }
1631 
1633 {
1635  "Workers DPDK mode, each thread does all"
1636  " tasks from acquisition to logging",
1637  RunModeIdsDpdkWorkers, DPDKRunModeEnableIPS);
1638 }
1639 
1640 /**
1641  * \brief Workers version of the DPDK processing.
1642  *
1643  * Start N threads with each thread doing all the work.
1644  *
1645  */
1647 {
1648  SCEnter();
1649 #ifdef HAVE_DPDK
1650  int ret;
1651 
1652  TimeModeSetLive();
1653 
1654  InitEal();
1655  ret = RunModeSetLiveCaptureWorkers(ParseDpdkConfigAndConfigureDevice, DPDKConfigGetThreadsCount,
1656  "ReceiveDPDK", "DecodeDPDK", thread_name_workers, NULL);
1657  if (ret != 0) {
1658  FatalError("Unable to start runmode");
1659  }
1660 
1661  SCLogDebug("RunModeIdsDpdkWorkers initialised");
1662 
1663 #endif /* HAVE_DPDK */
1664  SCReturnInt(0);
1665 }
1666 
1667 /**
1668  * @}
1669  */
thread_name_workers
const char * thread_name_workers
Definition: runmodes.c:81
DPDKIfaceConfigAttributes_::checksum_checks_offload
const char * checksum_checks_offload
Definition: runmode-dpdk.h:31
util-byte.h
DPDKIfaceConfigAttributes_::mempool_size
const char * mempool_size
Definition: runmode-dpdk.h:34
CHECKSUM_VALIDATION_OFFLOAD
@ CHECKSUM_VALIDATION_OFFLOAD
Definition: decode.h:50
RunModeSetLiveCaptureWorkers
int RunModeSetLiveCaptureWorkers(ConfigIfaceParserFunc ConfigParser, ConfigIfaceThreadsCountFunc ModThreadsCount, const char *recv_mod_name, const char *decode_mod_name, const char *thread_name, const char *live_dev)
Definition: util-runmodes.c:322
DPDKSetTimevalOfMachineStart
void DPDKSetTimevalOfMachineStart(void)
DPDKIfaceConfigAttributes_::promisc
const char * promisc
Definition: runmode-dpdk.h:28
SC_ATOMIC_INIT
#define SC_ATOMIC_INIT(name)
wrapper for initializing an atomic variable.
Definition: util-atomic.h:315
ConfNode_::val
char * val
Definition: conf.h:34
RUNMODE_DPDK
@ RUNMODE_DPDK
Definition: runmodes.h:40
DPDKIfaceConfigAttributes_::mempool_cache_size
const char * mempool_cache_size
Definition: runmode-dpdk.h:35
DPDK_COPY_MODE_IPS
@ DPDK_COPY_MODE_IPS
Definition: source-dpdk.h:33
ConfGetChildValueBoolWithDefault
int ConfGetChildValueBoolWithDefault(const ConfNode *base, const ConfNode *dflt, const char *name, int *val)
Definition: conf.c:514
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:269
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
ConfGetNode
ConfNode * ConfGetNode(const char *name)
Get a ConfNode by name.
Definition: conf.c:181
UtilAffinityGetAffinedCPUNum
uint16_t UtilAffinityGetAffinedCPUNum(ThreadsAffinityType *taf)
Definition: util-affinity.c:310
LiveDevice_
Definition: util-device.h:49
SC_ATOMIC_ADD
#define SC_ATOMIC_ADD(name, val)
add a value to our atomic variable
Definition: util-atomic.h:333
DPDKIfaceConfigAttributes_::copy_iface
const char * copy_iface
Definition: runmode-dpdk.h:39
RunModeIdsDpdkWorkers
int RunModeIdsDpdkWorkers(void)
Workers version of the DPDK processing.
Definition: runmode-dpdk.c:1646
util-runmodes.h
TAILQ_FOREACH
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:252
RunModeRegisterNewRunMode
void RunModeRegisterNewRunMode(enum RunModes runmode, const char *name, const char *description, int(*RunModeFunc)(void), void(*RunModeIsIPSEnabled)(void))
Registers a new runmode.
Definition: runmodes.c:491
ConfNodeLookupKeyValue
ConfNode * ConfNodeLookupKeyValue(const ConfNode *base, const char *key, const char *value)
Lookup for a key value under a specific node.
Definition: conf.c:831
CHECKSUM_VALIDATION_DISABLE
@ CHECKSUM_VALIDATION_DISABLE
Definition: decode.h:45
RunmodeGetActive
char * RunmodeGetActive(void)
Definition: runmodes.c:217
StringParseInt32
int StringParseInt32(int32_t *res, int base, size_t len, const char *str)
Definition: util-byte.c:622
DPDKIfaceConfig_
Definition: source-dpdk.h:45
util-dpdk-ice.h
ConfGetChildValueIntWithDefault
int ConfGetChildValueIntWithDefault(const ConfNode *base, const ConfNode *dflt, const char *name, intmax_t *val)
Definition: conf.c:462
MAX
#define MAX(x, y)
Definition: suricata-common.h:390
TM_ECODE_FAILED
@ TM_ECODE_FAILED
Definition: tm-threads-common.h:85
strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
SCReturnBool
#define SCReturnBool(x)
Definition: util-debug.h:289
CHECKSUM_VALIDATION_ENABLE
@ CHECKSUM_VALIDATION_ENABLE
Definition: decode.h:46
RunModeDpdkGetDefaultMode
const char * RunModeDpdkGetDefaultMode(void)
Definition: runmode-dpdk.c:1627
decode.h
util-device.h
util-debug.h
DPDKIfaceConfigAttributes_::rx_descriptors
const char * rx_descriptors
Definition: runmode-dpdk.h:36
strlcat
size_t strlcat(char *, const char *src, size_t siz)
Definition: util-strlcatu.c:45
util-cpu.h
DPDKIfaceConfigAttributes_::checksum_checks
const char * checksum_checks
Definition: runmode-dpdk.h:30
LiveGetDevice
LiveDevice * LiveGetDevice(const char *name)
Get a pointer to the device at idx.
Definition: util-device.c:248
DPDK_RX_CHECKSUM_OFFLOAD
#define DPDK_RX_CHECKSUM_OFFLOAD
Definition: source-dpdk.h:42
ConfFindDeviceConfig
ConfNode * ConfFindDeviceConfig(ConfNode *node, const char *iface)
Find the configuration node for a specific device.
Definition: util-conf.c:121
SCEnter
#define SCEnter(...)
Definition: util-debug.h:271
util-affinity.h
EngineModeSetIPS
void EngineModeSetIPS(void)
Definition: suricata.c:240
ConfGetChildValueWithDefault
int ConfGetChildValueWithDefault(const ConfNode *base, const ConfNode *dflt, const char *name, const char **vptr)
Definition: conf.c:378
StringParseUint32
int StringParseUint32(uint32_t *res, int base, size_t len, const char *str)
Definition: util-byte.c:313
util-time.h
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:249
threading_set_cpu_affinity
int threading_set_cpu_affinity
Definition: runmodes.c:75
DPDKIfaceConfigAttributes_
Definition: runmode-dpdk.h:26
SC_ATOMIC_SUB
#define SC_ATOMIC_SUB(name, val)
sub a value from our atomic variable
Definition: util-atomic.h:342
SCReturn
#define SCReturn
Definition: util-debug.h:273
SCReturnPtr
#define SCReturnPtr(x, type)
Definition: util-debug.h:287
BIT_U64
#define BIT_U64(n)
Definition: suricata-common.h:396
runmodes.h
SCLogInfo
#define SCLogInfo(...)
Macro used to log INFORMATIONAL messages.
Definition: util-debug.h:224
DPDKIfaceConfigAttributes_::threads
const char * threads
Definition: runmode-dpdk.h:27
TimeModeSetLive
void TimeModeSetLive(void)
Definition: util-time.c:99
DPDKIfaceConfigAttributes_::tx_descriptors
const char * tx_descriptors
Definition: runmode-dpdk.h:37
util-dpdk.h
util-conf.h
StringParseUint64
int StringParseUint64(uint64_t *res, int base, size_t len, const char *str)
Definition: util-byte.c:308
source-dpdk.h
suricata-common.h
LiveGetDeviceName
const char * LiveGetDeviceName(int number)
Get a pointer to the device name at idx.
Definition: util-device.c:184
DPDK_PROMISC
#define DPDK_PROMISC
Definition: source-dpdk.h:39
ConfNode_::name
char * name
Definition: conf.h:33
ConfNodeIsSequence
int ConfNodeIsSequence(const ConfNode *node)
Check if a node is a sequence or node.
Definition: conf.c:946
FatalError
#define FatalError(...)
Definition: util-debug.h:502
SC_ATOMIC_RESET
#define SC_ATOMIC_RESET(name)
wrapper for reinitializing an atomic variable.
Definition: util-atomic.h:324
ThreadsAffinityType_
Definition: util-affinity.h:65
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:261
SCFree
#define SCFree(p)
Definition: util-mem.h:61
ConfNode_
Definition: conf.h:32
util-dpdk-bonding.h
DPDKIfaceConfigAttributes_::multicast
const char * multicast
Definition: runmode-dpdk.h:29
RunModeDpdkRegister
void RunModeDpdkRegister(void)
Definition: runmode-dpdk.c:1632
util-dpdk-ixgbe.h
ConfSetRootAndDefaultNodes
int ConfSetRootAndDefaultNodes(const char *ifaces_node_name, const char *iface, ConfNode **if_root, ConfNode **if_default)
Finds and sets root and default node of the interface.
Definition: conf.c:983
util-dpdk-i40e.h
suricata.h
runmode-dpdk.h
DPDKIfaceConfigAttributes_::rss_hf
const char * rss_hf
Definition: runmode-dpdk.h:33
GetAffinityTypeFromName
ThreadsAffinityType * GetAffinityTypeFromName(const char *name)
find affinity by its name
Definition: util-affinity.c:68
LiveGetDeviceCount
int LiveGetDeviceCount(void)
Get the number of registered devices.
Definition: util-device.c:164
DPDKIfaceConfigAttributes_::mtu
const char * mtu
Definition: runmode-dpdk.h:32
DPDKIfaceConfigAttributes_::copy_mode
const char * copy_mode
Definition: runmode-dpdk.h:38
UtilCpuGetNumProcessorsOnline
uint16_t UtilCpuGetNumProcessorsOnline(void)
Get the number of cpus online in the system.
Definition: util-cpu.c:108
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
SCReturnInt
#define SCReturnInt(x)
Definition: util-debug.h:275
DPDK_COPY_MODE_TAP
@ DPDK_COPY_MODE_TAP
Definition: source-dpdk.h:33
DPDK_COPY_MODE_NONE
@ DPDK_COPY_MODE_NONE
Definition: source-dpdk.h:33
DPDK_MULTICAST
#define DPDK_MULTICAST
Definition: source-dpdk.h:40