suricata
util-ebpf.c
Go to the documentation of this file.
1 /* Copyright (C) 2018-2021 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \ingroup afppacket
20  *
21  * @{
22  */
23 
24 /**
25  * \file
26  *
27  * \author Eric Leblond <eric@regit.org>
28  *
29  * eBPF utility
30  *
31  */
32 
33 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
35 
36 #include "suricata-common.h"
37 #include "flow-bypass.h"
38 
39 #ifdef HAVE_PACKET_EBPF
40 
41 #include <sys/time.h>
42 #include <sys/resource.h>
43 
44 #include "util-ebpf.h"
45 #include "util-affinity.h"
46 #include "util-cpu.h"
47 #include "util-device.h"
48 
49 #include "device-storage.h"
50 #include "flow-storage.h"
51 #include "flow.h"
52 #include "flow-hash.h"
53 #include "tm-threads.h"
54 
55 #include <bpf/libbpf.h>
56 #include <bpf/bpf.h>
57 #include <net/if.h>
58 #include "autoconf.h"
59 
60 #define BPF_MAP_MAX_COUNT 16
61 
62 #define BYPASSED_FLOW_TIMEOUT 60
63 
64 static LiveDevStorageId g_livedev_storage_id = { .id = -1 };
65 static FlowStorageId g_flow_storage_id = { .id = -1 };
66 
67 struct bpf_map_item {
68  char iface[IFNAMSIZ];
69  char * name;
70  int fd;
71  uint8_t to_unlink;
72 };
73 
74 struct bpf_maps_info {
75  struct bpf_map_item array[BPF_MAP_MAX_COUNT];
76  int last;
77 };
78 
79 typedef struct BypassedIfaceList_ {
80  LiveDevice *dev;
81  struct BypassedIfaceList_ *next;
82 } BypassedIfaceList;
83 
84 static void BpfMapsInfoFree(void *bpf)
85 {
86  struct bpf_maps_info *bpfinfo = (struct bpf_maps_info *)bpf;
87  int i;
88  for (i = 0; i < bpfinfo->last; i ++) {
89  if (bpfinfo->array[i].name) {
90  if (bpfinfo->array[i].to_unlink) {
91  char pinnedpath[PATH_MAX];
92  int ret = snprintf(pinnedpath, sizeof(pinnedpath),
93  "/sys/fs/bpf/suricata-%s-%s",
94  bpfinfo->array[i].iface,
95  bpfinfo->array[i].name);
96  if (ret > 0) {
97  /* Unlink the pinned entry */
98  ret = unlink(pinnedpath);
99  if (ret == -1) {
100  int error = errno;
102  "Unable to remove %s: %s (%d)",
103  pinnedpath,
104  strerror(error),
105  error);
106  }
107  } else {
108  SCLogWarning(SC_ERR_SPRINTF, "Unable to remove map %s",
109  bpfinfo->array[i].name);
110  }
111  }
112  SCFree(bpfinfo->array[i].name);
113  }
114  }
115  SCFree(bpfinfo);
116 }
117 
118 static void BypassedListFree(void *ifl)
119 {
120  BypassedIfaceList *mifl = (BypassedIfaceList *)ifl;
121  BypassedIfaceList *nifl;
122  while (mifl) {
123  nifl = mifl->next;
124  SCFree(mifl);
125  mifl = nifl;
126  }
127 }
128 
129 void EBPFDeleteKey(int fd, void *key)
130 {
131  int ret = bpf_map_delete_elem(fd, key);
132  if (ret < 0) {
134  "Unable to delete entry: %s (%d)",
135  strerror(errno),
136  errno);
137  }
138 }
139 
140 static struct bpf_maps_info *EBPFGetBpfMap(const char *iface)
141 {
142  LiveDevice *livedev = LiveGetDevice(iface);
143  if (livedev == NULL)
144  return NULL;
145  void *data = LiveDevGetStorageById(livedev, g_livedev_storage_id);
146 
147  return (struct bpf_maps_info *)data;
148 }
149 
150 /**
151  * Get file descriptor of a map in the scope of a interface
152  *
153  * \param iface the interface where the map need to be looked for
154  * \param name the name of the map
155  * \return the file descriptor or -1 in case of error
156  */
157 int EBPFGetMapFDByName(const char *iface, const char *name)
158 {
159  int i;
160 
161  if (iface == NULL || name == NULL)
162  return -1;
163  struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface);
164  if (bpf_maps == NULL)
165  return -1;
166 
167  for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
168  if (!bpf_maps->array[i].name)
169  continue;
170  if (!strcmp(bpf_maps->array[i].name, name)) {
171  SCLogDebug("Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd, name);
172  return bpf_maps->array[i].fd;
173  }
174  }
175 
176  return -1;
177 }
178 
179 static int EBPFLoadPinnedMapsFile(LiveDevice *livedev, const char *file)
180 {
181  char pinnedpath[1024];
182  snprintf(pinnedpath, sizeof(pinnedpath),
183  "/sys/fs/bpf/suricata-%s-%s",
184  livedev->dev,
185  file);
186 
187  return bpf_obj_get(pinnedpath);
188 }
189 
190 static int EBPFLoadPinnedMaps(LiveDevice *livedev, struct ebpf_timeout_config *config)
191 {
192  int fd_v4 = -1, fd_v6 = -1;
193 
194  /* First try to load the eBPF check map and return if found */
195  if (config->pinned_maps_name) {
196  int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name);
197  if (ret == 0) {
198  /* pinned maps found, let's just exit as XDP filter is in place */
199  return ret;
200  }
201  }
202 
203  if (config->mode == AFP_MODE_XDP_BYPASS) {
204  /* Get flow v4 table */
205  fd_v4 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v4");
206  if (fd_v4 < 0) {
207  return fd_v4;
208  }
209 
210  /* Get flow v6 table */
211  fd_v6 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v6");
212  if (fd_v6 < 0) {
214  "Found a flow_table_v4 map but no flow_table_v6 map");
215  return fd_v6;
216  }
217  }
218 
219  struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
220  if (bpf_map_data == NULL) {
221  SCLogError(SC_ERR_MEM_ALLOC, "Can't allocate bpf map array");
222  return -1;
223  }
224 
225  if (config->mode == AFP_MODE_XDP_BYPASS) {
226  bpf_map_data->array[0].fd = fd_v4;
227  bpf_map_data->array[0].name = SCStrdup("flow_table_v4");
228  if (bpf_map_data->array[0].name == NULL) {
229  goto alloc_error;
230  }
231  bpf_map_data->array[1].fd = fd_v6;
232  bpf_map_data->array[1].name = SCStrdup("flow_table_v6");
233  if (bpf_map_data->array[1].name == NULL) {
234  goto alloc_error;
235  }
236  bpf_map_data->last = 2;
237  } else {
238  bpf_map_data->last = 0;
239  }
240 
241  /* Load other known maps: cpu_map, cpus_available, tx_peer, tx_peer_int */
242  int fd = EBPFLoadPinnedMapsFile(livedev, "cpu_map");
243  if (fd >= 0) {
244  bpf_map_data->array[bpf_map_data->last].fd = fd;
245  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpu_map");
246  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
247  goto alloc_error;
248  }
249  bpf_map_data->last++;
250  }
251  fd = EBPFLoadPinnedMapsFile(livedev, "cpus_available");
252  if (fd >= 0) {
253  bpf_map_data->array[bpf_map_data->last].fd = fd;
254  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpus_available");
255  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
256  goto alloc_error;
257  }
258  bpf_map_data->last++;
259  }
260  fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer");
261  if (fd >= 0) {
262  bpf_map_data->array[bpf_map_data->last].fd = fd;
263  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer");
264  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
265  goto alloc_error;
266  }
267  bpf_map_data->last++;
268  }
269  fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer_int");
270  if (fd >= 0) {
271  bpf_map_data->array[bpf_map_data->last].fd = fd;
272  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer_int");
273  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
274  goto alloc_error;
275  }
276  bpf_map_data->last++;
277  }
278 
279  /* Attach the bpf_maps_info to the LiveDevice via the device storage */
280  LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
281  /* Declare that device will use bypass stats */
282  LiveDevUseBypass(livedev);
283 
284  return 0;
285 
286 alloc_error:
287  for (int i = 0; i < bpf_map_data->last; i++) {
288  SCFree(bpf_map_data->array[i].name);
289  }
290  bpf_map_data->last = 0;
291  SCLogError(SC_ERR_MEM_ALLOC, "Can't allocate bpf map name");
292  return -1;
293 }
294 
295 /**
296  * Load a section of an eBPF file
297  *
298  * This function loads a section inside an eBPF and return
299  * via the parameter val the file descriptor that will be used to
300  * inject the eBPF code into the kernel via a syscall.
301  *
302  * \param path the path of the eBPF file to load
303  * \param section the section in the eBPF file to load
304  * \param val a pointer to an integer that will be the file desc
305  * \return -1 in case of error, 0 in case of success, 1 if pinned maps is loaded
306  */
307 int EBPFLoadFile(const char *iface, const char *path, const char * section,
308  int *val, struct ebpf_timeout_config *config)
309 {
310  int err, pfd;
311  bool found = false;
312  struct bpf_object *bpfobj = NULL;
313  struct bpf_program *bpfprog = NULL;
314  struct bpf_map *map = NULL;
315 
316  if (iface == NULL)
317  return -1;
318  LiveDevice *livedev = LiveGetDevice(iface);
319  if (livedev == NULL)
320  return -1;
321 
322  if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) {
323  /* We try to get our flow table maps and if we have them we can simply return */
324  if (EBPFLoadPinnedMaps(livedev, config) == 0) {
325  SCLogInfo("Loaded pinned maps, will use already loaded eBPF filter");
326  return 1;
327  }
328  }
329 
330  if (! path) {
331  SCLogError(SC_ERR_INVALID_VALUE, "No file defined to load eBPF from");
332  return -1;
333  }
334 
335  /* Sending the eBPF code to the kernel requires a large amount of
336  * locked memory so we set it to unlimited to avoid a ENOPERM error */
337  struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
338  if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) {
339  SCLogError(SC_ERR_MEM_ALLOC, "Unable to lock memory: %s (%d)",
340  strerror(errno), errno);
341  return -1;
342  }
343 
344  /* Open the eBPF file and parse it */
345  bpfobj = bpf_object__open(path);
346  long error = libbpf_get_error(bpfobj);
347  if (error) {
348  char err_buf[128];
349  libbpf_strerror(error, err_buf,
350  sizeof(err_buf));
352  "Unable to load eBPF objects in '%s': %s",
353  path, err_buf);
354  return -1;
355  }
356 
357  if (config->flags & EBPF_XDP_HW_MODE) {
358  unsigned int ifindex = if_nametoindex(iface);
359  bpf_object__for_each_program(bpfprog, bpfobj) {
360  bpf_program__set_ifindex(bpfprog, ifindex);
361  }
362  bpf_map__for_each(map, bpfobj) {
363  bpf_map__set_ifindex(map, ifindex);
364  }
365  }
366 
367  /* Let's check that our section is here */
368  bpf_object__for_each_program(bpfprog, bpfobj) {
369 #ifdef HAVE_BPF_PROGRAM__SECTION_NAME
370  const char *title = bpf_program__section_name(bpfprog);
371 #else
372  const char *title = bpf_program__title(bpfprog, 0);
373 #endif
374  if (!strcmp(title, section)) {
375  if (config->flags & EBPF_SOCKET_FILTER) {
376  bpf_program__set_socket_filter(bpfprog);
377  } else {
378  bpf_program__set_xdp(bpfprog);
379  }
380  found = true;
381  break;
382  }
383  }
384 
385  if (found == false) {
387  "No section '%s' in '%s' file. Will not be able to use the file",
388  section,
389  path);
390  return -1;
391  }
392 
393  err = bpf_object__load(bpfobj);
394  if (err < 0) {
395  if (err == -EPERM) {
397  "Permission issue when loading eBPF object"
398  " (check libbpf error on stdout)");
399  } else {
400  char buf[129];
401  libbpf_strerror(err, buf, sizeof(buf));
403  "Unable to load eBPF object: %s (%d)",
404  buf,
405  err);
406  }
407  return -1;
408  }
409 
410  /* Kernel and userspace are sharing data via map. Userspace access to the
411  * map via a file descriptor. So we need to store the map to fd info. For
412  * that we use bpf_maps_info:: */
413  struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
414  if (bpf_map_data == NULL) {
415  SCLogError(SC_ERR_MEM_ALLOC, "Can't allocate bpf map array");
416  return -1;
417  }
418 
419  /* Store the maps in bpf_maps_info:: */
420  bpf_map__for_each(map, bpfobj) {
421  if (bpf_map_data->last == BPF_MAP_MAX_COUNT) {
422  SCLogError(SC_ERR_NOT_SUPPORTED, "Too many BPF maps in eBPF files");
423  break;
424  }
425  SCLogDebug("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
426  bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map);
427  bpf_map_data->array[bpf_map_data->last].name = SCStrdup(bpf_map__name(map));
428  snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ,
429  "%s", iface);
430  if (!bpf_map_data->array[bpf_map_data->last].name) {
431  SCLogError(SC_ERR_MEM_ALLOC, "Unable to duplicate map name");
432  BpfMapsInfoFree(bpf_map_data);
433  return -1;
434  }
435  bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
436  if (config->flags & EBPF_PINNED_MAPS) {
437  SCLogConfig("Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd,
438  bpf_map_data->array[bpf_map_data->last].name);
439  char buf[1024];
440  snprintf(buf, sizeof(buf), "/sys/fs/bpf/suricata-%s-%s", iface,
441  bpf_map_data->array[bpf_map_data->last].name);
442  int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf);
443  if (ret != 0) {
444  SCLogWarning(SC_ERR_AFP_CREATE, "Can not pin: %s", strerror(errno));
445  }
446  /* Don't unlink pinned maps in XDP mode to avoid a state reset */
447  if (config->flags & EBPF_XDP_CODE) {
448  bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
449  } else {
450  bpf_map_data->array[bpf_map_data->last].to_unlink = 1;
451  }
452  }
453  bpf_map_data->last++;
454  }
455 
456  /* Attach the bpf_maps_info to the LiveDevice via the device storage */
457  LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
458  LiveDevUseBypass(livedev);
459 
460  /* Finally we get the file descriptor for our eBPF program. We will use
461  * the fd to attach the program to the socket (eBPF case) or to the device
462  * (XDP case). */
463  pfd = bpf_program__fd(bpfprog);
464  if (pfd == -1) {
466  "Unable to find %s section", section);
467  return -1;
468  }
469 
470  SCLogInfo("Successfully loaded eBPF file '%s' on '%s'", path, iface);
471  *val = pfd;
472  return 0;
473 }
474 
475 /**
476  * Attach a XDP program identified by its file descriptor to a device
477  *
478  * \param iface the name of interface
479  * \param fd the eBPF/XDP program file descriptor
480  * \param a flag to pass to attach function mostly used to set XDP mode
481  * \return -1 in case of error, 0 if success
482  */
483 int EBPFSetupXDP(const char *iface, int fd, uint8_t flags)
484 {
485 #ifdef HAVE_PACKET_XDP
486  unsigned int ifindex = if_nametoindex(iface);
487  if (ifindex == 0) {
489  "Unknown interface '%s'", iface);
490  return -1;
491  }
492  int err = bpf_set_link_xdp_fd(ifindex, fd, flags);
493  if (err != 0) {
494  char buf[129];
495  libbpf_strerror(err, buf, sizeof(buf));
496  SCLogError(SC_ERR_INVALID_VALUE, "Unable to set XDP on '%s': %s (%d)",
497  iface, buf, err);
498  return -1;
499  }
500 #endif
501  return 0;
502 }
503 
504 /**
505  * Create a Flow in the table for a Flowkey
506  *
507  * \return false (this create function never returns true)
508  */
509 static bool EBPFCreateFlowForKey(struct flows_stats *flowstats, LiveDevice *dev, void *key,
510  size_t skey, FlowKey *flow_key, struct timespec *ctime,
511  uint64_t pkts_cnt, uint64_t bytes_cnt,
512  int mapfd, int cpus_count)
513 {
514  Flow *f = NULL;
515  uint32_t hash = FlowKeyGetHash(flow_key);
516 
517  f = FlowGetFromFlowKey(flow_key, ctime, hash);
518  if (f == NULL)
519  return false;
520 
521  /* set accounting, we can't know the direction, so let's just start to
522  * server then if we already have something in to server to client. We need
523  * these numbers as we will use it to see if we have new traffic coming
524  * on the flow */
526  if (fc == NULL) {
527  fc = SCCalloc(sizeof(FlowBypassInfo), 1);
528  if (fc) {
529  FlowUpdateState(f, FLOW_STATE_CAPTURE_BYPASSED);
531  fc->BypassUpdate = EBPFBypassUpdate;
532  fc->BypassFree = EBPFBypassFree;
533  fc->todstpktcnt = pkts_cnt;
534  fc->todstbytecnt = bytes_cnt;
535  f->livedev = dev;
536  EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
537  if (eb == NULL) {
538  SCFree(fc);
539  FLOWLOCK_UNLOCK(f);
540  return false;
541  }
542  void *mkey = SCCalloc(1, skey);
543  if (mkey == NULL) {
544  SCFree(fc);
545  SCFree(eb);
546  FLOWLOCK_UNLOCK(f);
547  return false;
548  }
549  memcpy(mkey, key, skey);
550  eb->key[0] = mkey;
551  eb->mapfd = mapfd;
552  eb->cpus_count = cpus_count;
553  fc->bypass_data = eb;
554  flowstats->count++;
555  } else {
556  FLOWLOCK_UNLOCK(f);
557  return false;
558  }
559  } else {
560  EBPFBypassData *eb = (EBPFBypassData *) fc->bypass_data;
561  if (eb == NULL) {
562  FLOWLOCK_UNLOCK(f);
563  return false;
564  }
565  /* if both keys are here, then it is a flow bypassed by this
566  * instance so we ignore it */
567  if (eb->key[0] && eb->key[1]) {
568  FLOWLOCK_UNLOCK(f);
569  return false;
570  }
571  fc->tosrcpktcnt = pkts_cnt;
572  fc->tosrcbytecnt = bytes_cnt;
573  void *mkey = SCCalloc(1, skey);
574  if (mkey == NULL) {
575  FLOWLOCK_UNLOCK(f);
576  return false;
577  }
578  memcpy(mkey, key, skey);
579  eb->key[1] = mkey;
580  }
581  f->livedev = dev;
582  FLOWLOCK_UNLOCK(f);
583  return false;
584 }
585 
586 void EBPFBypassFree(void *data)
587 {
588  EBPFBypassData *eb = (EBPFBypassData *)data;
589  if (eb == NULL)
590  return;
591  SCFree(eb->key[0]);
592  if (eb->key[1]) {
593  SCFree(eb->key[1]);
594  }
595  SCFree(eb);
596  return;
597 }
598 
599 /**
600  *
601  * Compare eBPF half flow to Flow
602  *
603  * \return true if entries have activity, false if not
604  */
605 
606 static bool EBPFBypassCheckHalfFlow(Flow *f, FlowBypassInfo *fc,
607  EBPFBypassData *eb, void *key,
608  int index)
609 {
610  int i;
611  uint64_t pkts_cnt = 0;
612  uint64_t bytes_cnt = 0;
613  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
614  * is 1 then we have a global hash. */
615  BPF_DECLARE_PERCPU(struct pair, values_array, eb->cpus_count);
616  memset(values_array, 0, sizeof(values_array));
617  int res = bpf_map_lookup_elem(eb->mapfd, key, values_array);
618  if (res < 0) {
619  SCLogDebug("errno: (%d) %s", errno, strerror(errno));
620  return false;
621  }
622  for (i = 0; i < eb->cpus_count; i++) {
623  /* let's start accumulating value so we can compute the counters */
624  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
625  BPF_PERCPU(values_array, i).packets,
626  BPF_PERCPU(values_array, i).bytes);
627  pkts_cnt += BPF_PERCPU(values_array, i).packets;
628  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
629  }
630  if (index == 0) {
631  if (pkts_cnt != fc->todstpktcnt) {
632  fc->todstpktcnt = pkts_cnt;
633  fc->todstbytecnt = bytes_cnt;
634  return true;
635  }
636  } else {
637  if (pkts_cnt != fc->tosrcpktcnt) {
638  fc->tosrcpktcnt = pkts_cnt;
639  fc->tosrcbytecnt = bytes_cnt;
640  return true;
641  }
642  }
643 
644  return false;
645 }
646 
647 /** Check both half flows for update
648  *
649  * Update lastts in the flow and do accounting
650  *
651  * */
652 bool EBPFBypassUpdate(Flow *f, void *data, time_t tsec)
653 {
654  EBPFBypassData *eb = (EBPFBypassData *)data;
655  if (eb == NULL) {
656  return false;
657  }
659  if (fc == NULL) {
660  return false;
661  }
662  bool activity = EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[0], 0);
663  activity |= EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[1], 1);
664  if (!activity) {
665  SCLogDebug("Delete entry: %u (%ld)", FLOW_IS_IPV6(f), FlowGetId(f));
666  /* delete the entries if no time update */
667  EBPFDeleteKey(eb->mapfd, eb->key[0]);
668  EBPFDeleteKey(eb->mapfd, eb->key[1]);
669  SCLogDebug("Done delete entry: %u", FLOW_IS_IPV6(f));
670  } else {
671  f->lastts.tv_sec = tsec;
672  return true;
673  }
674  return false;
675 }
676 
677 typedef bool (*OpFlowForKey)(struct flows_stats * flowstats, LiveDevice*dev, void *key,
678  size_t skey, FlowKey *flow_key, struct timespec *ctime,
679  uint64_t pkts_cnt, uint64_t bytes_cnt,
680  int mapfd, int cpus_count);
681 
682 /**
683  * Bypassed flows iterator for IPv4
684  *
685  * This function iterates on all the flows of the IPv4 table
686  * running a callback function on each flow.
687  */
688 static int EBPFForEachFlowV4Table(ThreadVars *th_v, LiveDevice *dev, const char *name,
689  struct timespec *ctime,
690  struct ebpf_timeout_config *tcfg,
691  OpFlowForKey EBPFOpFlowForKey
692  )
693 {
694  struct flows_stats flowstats = { 0, 0, 0};
695  int mapfd = EBPFGetMapFDByName(dev->dev, name);
696  if (mapfd == -1)
697  return -1;
698 
699  struct flowv4_keys key = {}, next_key;
700  int found = 0;
701  unsigned int i;
702  uint64_t hash_cnt = 0;
703 
704  if (tcfg->cpus_count == 0) {
705  return 0;
706  }
707 
708  bool dead_flow = false;
709  while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
710  uint64_t bytes_cnt = 0;
711  uint64_t pkts_cnt = 0;
712  hash_cnt++;
713  if (dead_flow) {
714  EBPFDeleteKey(mapfd, &key);
715  dead_flow = false;
716  }
717  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
718  * is 1 then we have a global hash. */
719  BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
720  memset(values_array, 0, sizeof(values_array));
721  int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
722  if (res < 0) {
723  SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
724  SCLogDebug("errno: (%d) %s", errno, strerror(errno));
725  key = next_key;
726  continue;
727  }
728  for (i = 0; i < tcfg->cpus_count; i++) {
729  /* let's start accumulating value so we can compute the counters */
730  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
731  BPF_PERCPU(values_array, i).packets,
732  BPF_PERCPU(values_array, i).bytes);
733  pkts_cnt += BPF_PERCPU(values_array, i).packets;
734  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
735  }
736  /* Get the corresponding Flow in the Flow table to compare and update
737  * its counters and lastseen if needed */
738  FlowKey flow_key;
739  if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
740  flow_key.sp = ntohs(next_key.port16[0]);
741  flow_key.dp = ntohs(next_key.port16[1]);
742  flow_key.src.addr_data32[0] = next_key.src;
743  flow_key.dst.addr_data32[0] = next_key.dst;
744  } else {
745  flow_key.sp = next_key.port16[0];
746  flow_key.dp = next_key.port16[1];
747  flow_key.src.addr_data32[0] = ntohl(next_key.src);
748  flow_key.dst.addr_data32[0] = ntohl(next_key.dst);
749  }
750  flow_key.src.family = AF_INET;
751  flow_key.src.addr_data32[1] = 0;
752  flow_key.src.addr_data32[2] = 0;
753  flow_key.src.addr_data32[3] = 0;
754  flow_key.dst.family = AF_INET;
755  flow_key.dst.addr_data32[1] = 0;
756  flow_key.dst.addr_data32[2] = 0;
757  flow_key.dst.addr_data32[3] = 0;
758  flow_key.vlan_id[0] = next_key.vlan0;
759  flow_key.vlan_id[1] = next_key.vlan1;
760  if (next_key.ip_proto == 1) {
761  flow_key.proto = IPPROTO_TCP;
762  } else {
763  flow_key.proto = IPPROTO_UDP;
764  }
765  flow_key.recursion_level = 0;
766  dead_flow = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
767  ctime, pkts_cnt, bytes_cnt,
768  mapfd, tcfg->cpus_count);
769  if (dead_flow) {
770  found = 1;
771  }
772 
773  if (TmThreadsCheckFlag(th_v, THV_KILL)) {
774  return 0;
775  }
776 
777  key = next_key;
778  }
779  if (dead_flow) {
780  EBPFDeleteKey(mapfd, &key);
781  found = 1;
782  }
783  SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
784 
785  LiveDevAddBypassStats(dev, flowstats.count, AF_INET);
786  SCLogInfo("IPv4 bypassed flow table size: %" PRIu64, hash_cnt);
787 
788  return found;
789 }
790 
791 /**
792  * Bypassed flows iterator for IPv6
793  *
794  * This function iterates on all the flows of the IPv4 table
795  * running a callback function on each flow.
796  */
797 static int EBPFForEachFlowV6Table(ThreadVars *th_v,
798  LiveDevice *dev, const char *name,
799  struct timespec *ctime,
800  struct ebpf_timeout_config *tcfg,
801  OpFlowForKey EBPFOpFlowForKey
802  )
803 {
804  struct flows_stats flowstats = { 0, 0, 0};
805  int mapfd = EBPFGetMapFDByName(dev->dev, name);
806  if (mapfd == -1)
807  return -1;
808 
809  struct flowv6_keys key = {}, next_key;
810  int found = 0;
811  unsigned int i;
812  uint64_t hash_cnt = 0;
813 
814  if (tcfg->cpus_count == 0) {
815  SCLogWarning(SC_ERR_INVALID_VALUE, "CPU count should not be 0");
816  return 0;
817  }
818 
819  uint64_t pkts_cnt = 0;
820  while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
821  uint64_t bytes_cnt = 0;
822  hash_cnt++;
823  if (pkts_cnt > 0) {
824  EBPFDeleteKey(mapfd, &key);
825  }
826  pkts_cnt = 0;
827  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
828  * is 1 then we have a global hash. */
829  BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
830  memset(values_array, 0, sizeof(values_array));
831  int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
832  if (res < 0) {
833  SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
834  key = next_key;
835  continue;
836  }
837  for (i = 0; i < tcfg->cpus_count; i++) {
838  /* let's start accumulating value so we can compute the counters */
839  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
840  BPF_PERCPU(values_array, i).packets,
841  BPF_PERCPU(values_array, i).bytes);
842  pkts_cnt += BPF_PERCPU(values_array, i).packets;
843  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
844  }
845  /* Get the corresponding Flow in the Flow table to compare and update
846  * its counters and lastseen if needed */
847  FlowKey flow_key;
848  if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
849  flow_key.sp = ntohs(next_key.port16[0]);
850  flow_key.dp = ntohs(next_key.port16[1]);
851  flow_key.src.family = AF_INET6;
852  flow_key.src.addr_data32[0] = next_key.src[0];
853  flow_key.src.addr_data32[1] = next_key.src[1];
854  flow_key.src.addr_data32[2] = next_key.src[2];
855  flow_key.src.addr_data32[3] = next_key.src[3];
856  flow_key.dst.family = AF_INET6;
857  flow_key.dst.addr_data32[0] = next_key.dst[0];
858  flow_key.dst.addr_data32[1] = next_key.dst[1];
859  flow_key.dst.addr_data32[2] = next_key.dst[2];
860  flow_key.dst.addr_data32[3] = next_key.dst[3];
861  } else {
862  flow_key.sp = next_key.port16[0];
863  flow_key.dp = next_key.port16[1];
864  flow_key.src.family = AF_INET6;
865  flow_key.src.addr_data32[0] = ntohl(next_key.src[0]);
866  flow_key.src.addr_data32[1] = ntohl(next_key.src[1]);
867  flow_key.src.addr_data32[2] = ntohl(next_key.src[2]);
868  flow_key.src.addr_data32[3] = ntohl(next_key.src[3]);
869  flow_key.dst.family = AF_INET6;
870  flow_key.dst.addr_data32[0] = ntohl(next_key.dst[0]);
871  flow_key.dst.addr_data32[1] = ntohl(next_key.dst[1]);
872  flow_key.dst.addr_data32[2] = ntohl(next_key.dst[2]);
873  flow_key.dst.addr_data32[3] = ntohl(next_key.dst[3]);
874  }
875  flow_key.vlan_id[0] = next_key.vlan0;
876  flow_key.vlan_id[1] = next_key.vlan1;
877  if (next_key.ip_proto == 1) {
878  flow_key.proto = IPPROTO_TCP;
879  } else {
880  flow_key.proto = IPPROTO_UDP;
881  }
882  flow_key.recursion_level = 0;
883  pkts_cnt = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
884  ctime, pkts_cnt, bytes_cnt,
885  mapfd, tcfg->cpus_count);
886  if (pkts_cnt > 0) {
887  found = 1;
888  }
889 
890  if (TmThreadsCheckFlag(th_v, THV_KILL)) {
891  return 0;
892  }
893 
894  key = next_key;
895  }
896  if (pkts_cnt > 0) {
897  EBPFDeleteKey(mapfd, &key);
898  found = 1;
899  }
900  SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
901 
902  LiveDevAddBypassStats(dev, flowstats.count, AF_INET6);
903  SCLogInfo("IPv6 bypassed flow table size: %" PRIu64, hash_cnt);
904  return found;
905 }
906 
907 
908 int EBPFCheckBypassedFlowCreate(ThreadVars *th_v, struct timespec *curtime, void *data)
909 {
910  LiveDevice *ldev = NULL, *ndev;
911  struct ebpf_timeout_config *cfg = (struct ebpf_timeout_config *)data;
912  while(LiveDeviceForEach(&ldev, &ndev)) {
913  EBPFForEachFlowV4Table(th_v, ldev, "flow_table_v4",
914  curtime,
915  cfg, EBPFCreateFlowForKey);
916  EBPFForEachFlowV6Table(th_v, ldev, "flow_table_v6",
917  curtime,
918  cfg, EBPFCreateFlowForKey);
919  }
920 
921  return 0;
922 }
923 
924 void EBPFRegisterExtension(void)
925 {
926  g_livedev_storage_id = LiveDevStorageRegister("bpfmap", sizeof(void *), NULL, BpfMapsInfoFree);
927  g_flow_storage_id = FlowStorageRegister("bypassedlist", sizeof(void *), NULL, BypassedListFree);
928 }
929 
930 
931 #ifdef HAVE_PACKET_XDP
932 
933 static uint32_t g_redirect_iface_cpu_counter = 0;
934 
935 static int EBPFAddCPUToMap(const char *iface, uint32_t i)
936 {
937  int cpumap = EBPFGetMapFDByName(iface, "cpu_map");
938  uint32_t queue_size = 4096;
939  int ret;
940 
941  if (cpumap < 0) {
942  SCLogError(SC_ERR_AFP_CREATE, "Can't find cpu_map");
943  return -1;
944  }
945  ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0);
946  if (ret) {
947  SCLogError(SC_ERR_AFP_CREATE, "Create CPU entry failed (err:%d)", ret);
948  return -1;
949  }
950  int cpus_available = EBPFGetMapFDByName(iface, "cpus_available");
951  if (cpus_available < 0) {
952  SCLogError(SC_ERR_AFP_CREATE, "Can't find cpus_available map");
953  return -1;
954  }
955 
956  ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0);
957  if (ret) {
958  SCLogError(SC_ERR_AFP_CREATE, "Create CPU entry failed (err:%d)", ret);
959  return -1;
960  }
961  return 0;
962 }
963 
964 static void EBPFRedirectMapAddCPU(int i, void *data)
965 {
966  if (EBPFAddCPUToMap(data, i) < 0) {
968  "Unable to add CPU %d to set", i);
969  } else {
970  g_redirect_iface_cpu_counter++;
971  }
972 }
973 
974 void EBPFBuildCPUSet(ConfNode *node, char *iface)
975 {
976  uint32_t key0 = 0;
977  int mapfd = EBPFGetMapFDByName(iface, "cpus_count");
978  if (mapfd < 0) {
980  "Unable to find 'cpus_count' map");
981  return;
982  }
983  g_redirect_iface_cpu_counter = 0;
984  if (node == NULL) {
985  bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
986  BPF_ANY);
987  return;
988  }
989  BuildCpusetWithCallback("xdp-cpu-redirect", node,
990  EBPFRedirectMapAddCPU,
991  iface);
992  bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
993  BPF_ANY);
994 }
995 
996 /**
997  * Setup peer interface in XDP system
998  *
999  * Ths function set up the peer interface in the XDP maps used by the
1000  * bypass filter. The first map tx_peer has type device map and is
1001  * used to store the peer. The second map tx_peer_int is used by the
1002  * code to check if we have a peer defined for this interface.
1003  *
1004  * As the map are per device we just need maps with one single element.
1005  * In both case, we use the key 0 to enter element so XDP kernel code
1006  * is using the same key.
1007  */
1008 int EBPFSetPeerIface(const char *iface, const char *out_iface)
1009 {
1010  int mapfd = EBPFGetMapFDByName(iface, "tx_peer");
1011  if (mapfd < 0) {
1013  "Unable to find 'tx_peer' map");
1014  return -1;
1015  }
1016  int intmapfd = EBPFGetMapFDByName(iface, "tx_peer_int");
1017  if (intmapfd < 0) {
1019  "Unable to find 'tx_peer_int' map");
1020  return -1;
1021  }
1022 
1023  int key0 = 0;
1024  unsigned int peer_index = if_nametoindex(out_iface);
1025  if (peer_index == 0) {
1026  SCLogError(SC_ERR_INVALID_VALUE, "No iface '%s'", out_iface);
1027  return -1;
1028  }
1029  int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY);
1030  if (ret) {
1031  SCLogError(SC_ERR_AFP_CREATE, "Create peer entry failed (err:%d)", ret);
1032  return -1;
1033  }
1034  ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY);
1035  if (ret) {
1036  SCLogError(SC_ERR_AFP_CREATE, "Create peer entry failed (err:%d)", ret);
1037  return -1;
1038  }
1039  return 0;
1040 }
1041 
1042 /**
1043  * Bypass the flow on all ifaces it is seen on. This is used
1044  * in IPS mode.
1045  */
1046 
1047 int EBPFUpdateFlow(Flow *f, Packet *p, void *data)
1048 {
1049  BypassedIfaceList *ifl = (BypassedIfaceList *)FlowGetStorageById(f, g_flow_storage_id);
1050  if (ifl == NULL) {
1051  ifl = SCCalloc(1, sizeof(*ifl));
1052  if (ifl == NULL) {
1053  return 0;
1054  }
1055  ifl->dev = p->livedev;
1056  FlowSetStorageById(f, g_flow_storage_id, ifl);
1057  return 1;
1058  }
1059  /* Look for packet iface in the list */
1060  BypassedIfaceList *ldev = ifl;
1061  while (ldev) {
1062  if (p->livedev == ldev->dev) {
1063  return 1;
1064  }
1065  ldev = ldev->next;
1066  }
1067  /* Call bypass function if ever not in the list */
1068  p->BypassPacketsFlow(p);
1069 
1070  /* Add iface to the list */
1071  BypassedIfaceList *nifl = SCCalloc(1, sizeof(*nifl));
1072  if (nifl == NULL) {
1073  return 0;
1074  }
1075  nifl->dev = p->livedev;
1076  nifl->next = ifl;
1077  FlowSetStorageById(f, g_flow_storage_id, nifl);
1078  return 1;
1079 }
1080 
1081 #endif /* HAVE_PACKET_XDP */
1082 
1083 #endif
FlowStorageId
Definition: flow-storage.h:31
tm-threads.h
flow-bypass.h
FLOW_IS_IPV6
#define FLOW_IS_IPV6(f)
Definition: flow.h:158
SC_ERR_INVALID_VALUE
@ SC_ERR_INVALID_VALUE
Definition: util-error.h:160
LiveDevStorageId_
Definition: device-storage.h:31
FlowKey_::src
Address src
Definition: flow.h:308
GetFlowBypassInfoID
FlowStorageId GetFlowBypassInfoID(void)
Definition: flow-util.c:221
FlowBypassInfo_
Definition: flow.h:534
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:296
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
flows_stats::count
uint64_t count
Definition: flow-bypass.h:28
SC_ERR_NOT_SUPPORTED
@ SC_ERR_NOT_SUPPORTED
Definition: util-error.h:257
FlowKeyGetHash
uint32_t FlowKeyGetHash(FlowKey *fk)
Definition: flow-hash.c:227
BuildCpusetWithCallback
void BuildCpusetWithCallback(const char *name, ConfNode *node, void(*Callback)(int i, void *data), void *data)
Definition: util-affinity.c:99
Flow_
Flow data structure.
Definition: flow.h:353
LiveDevice_
Definition: util-device.h:39
SC_ATOMIC_ADD
#define SC_ATOMIC_ADD(name, val)
add a value to our atomic variable
Definition: util-atomic.h:333
flow-hash.h
FlowBypassInfo_::tosrcbytecnt
uint64_t tosrcbytecnt
Definition: flow.h:539
LiveDeviceForEach
LiveDevice * LiveDeviceForEach(LiveDevice **ldev, LiveDevice **ndev)
Definition: util-device.c:448
device-storage.h
Packet_::BypassPacketsFlow
int(* BypassPacketsFlow)(struct Packet_ *)
Definition: decode.h:518
FLOWLOCK_UNLOCK
#define FLOWLOCK_UNLOCK(fb)
Definition: flow.h:268
LiveDevGetStorageById
void * LiveDevGetStorageById(LiveDevice *d, LiveDevStorageId id)
Get a value from a given LiveDevice storage.
Definition: device-storage.c:88
LiveDevStorageRegister
LiveDevStorageId LiveDevStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
Register a LiveDevice storage.
Definition: device-storage.c:59
util-device.h
FlowBypassInfo_::todstbytecnt
uint64_t todstbytecnt
Definition: flow.h:541
FlowBypassInfo_::BypassUpdate
bool(* BypassUpdate)(Flow *f, void *data, time_t tsec)
Definition: flow.h:535
util-cpu.h
FlowBypassInfo_::BypassFree
void(* BypassFree)(void *data)
Definition: flow.h:536
LiveGetDevice
LiveDevice * LiveGetDevice(const char *name)
Get a pointer to the device at idx.
Definition: util-device.c:279
res
PoolThreadReserved res
Definition: stream-tcp-private.h:0
FlowKey_::recursion_level
uint8_t recursion_level
Definition: flow.h:311
util-ebpf.h
ThreadVars_
Per thread variable structure.
Definition: threadvars.h:56
util-affinity.h
THV_KILL
#define THV_KILL
Definition: threadvars.h:39
FlowSetStorageById
int FlowSetStorageById(Flow *f, FlowStorageId id, void *ptr)
Definition: flow-storage.c:44
FlowBypassInfo_::todstpktcnt
uint64_t todstpktcnt
Definition: flow.h:540
FlowStorageRegister
FlowStorageId FlowStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
Definition: flow-storage.c:65
LiveDevice_::dev
char * dev
Definition: util-device.h:40
FlowBypassInfo_::bypass_data
void * bypass_data
Definition: flow.h:537
SC_ERR_INVALID_ARGUMENT
@ SC_ERR_INVALID_ARGUMENT
Definition: util-error.h:43
SC_ERR_SYSCALL
@ SC_ERR_SYSCALL
Definition: util-error.h:80
FlowKey_::sp
Port sp
Definition: flow.h:309
Packet_
Definition: decode.h:433
SC_ERR_SPRINTF
@ SC_ERR_SPRINTF
Definition: util-error.h:42
FlowKey_::vlan_id
uint16_t vlan_id[2]
Definition: flow.h:312
Packet_::livedev
struct LiveDevice_ * livedev
Definition: decode.h:586
LiveDevUseBypass
int LiveDevUseBypass(LiveDevice *dev)
Definition: util-device.c:502
LiveDevAddBypassStats
void LiveDevAddBypassStats(LiveDevice *dev, uint64_t cnt, int family)
Definition: util-device.c:543
FlowGetStorageById
void * FlowGetStorageById(Flow *f, FlowStorageId id)
Definition: flow-storage.c:39
FlowBypassInfo_::tosrcpktcnt
uint64_t tosrcpktcnt
Definition: flow.h:538
SCLogInfo
#define SCLogInfo(...)
Macro used to log INFORMATIONAL messages.
Definition: util-debug.h:215
FlowUpdateState
void FlowUpdateState(Flow *f, const enum FlowState s)
Definition: flow.c:1162
Flow_::lastts
struct timeval lastts
Definition: flow.h:414
flow-storage.h
flags
uint8_t flags
Definition: decode-gre.h:0
suricata-common.h
FlowKey_::dst
Address dst
Definition: flow.h:308
SCLogError
#define SCLogError(err_code,...)
Macro used to log ERROR messages.
Definition: util-debug.h:255
SCStrdup
#define SCStrdup(s)
Definition: util-mem.h:56
SC_ERR_AFP_CREATE
@ SC_ERR_AFP_CREATE
Definition: util-error.h:222
Flow_::livedev
struct LiveDevice_ * livedev
Definition: flow.h:406
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
FlowKey_::proto
uint8_t proto
Definition: flow.h:310
SCLogWarning
#define SCLogWarning(err_code,...)
Macro used to log WARNING messages.
Definition: util-debug.h:242
SCFree
#define SCFree(p)
Definition: util-mem.h:61
ConfNode_
Definition: conf.h:32
FlowGetFromFlowKey
Flow * FlowGetFromFlowKey(FlowKey *key, struct timespec *ttime, const uint32_t hash)
Get or create a Flow using a FlowKey.
Definition: flow-hash.c:881
bpf_program
Definition: source-af-packet.c:79
FlowStorageId::id
int id
Definition: flow-storage.h:32
LiveDevSetStorageById
int LiveDevSetStorageById(LiveDevice *d, LiveDevStorageId id, void *ptr)
Store a pointer in a given LiveDevice storage.
Definition: device-storage.c:75
FlowKey_
Definition: flow.h:307
SC_ERR_MEM_ALLOC
@ SC_ERR_MEM_ALLOC
Definition: util-error.h:31
Address_::family
char family
Definition: decode.h:114
flow.h
TmThreadsCheckFlag
int TmThreadsCheckFlag(ThreadVars *tv, uint32_t flag)
Check if a thread flag is set.
Definition: tm-threads.c:90
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
LiveDevStorageId_::id
int id
Definition: device-storage.h:32
FlowKey_::dp
Port dp
Definition: flow.h:309
flows_stats::packets
uint64_t packets
Definition: flow-bypass.h:29
flows_stats
Definition: flow-bypass.h:27