suricata
util-ebpf.c
Go to the documentation of this file.
1 /* Copyright (C) 2018-2021 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \ingroup afppacket
20  *
21  * @{
22  */
23 
24 /**
25  * \file
26  *
27  * \author Eric Leblond <eric@regit.org>
28  *
29  * eBPF utility
30  *
31  */
32 
33 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
35 
36 #include "suricata-common.h"
37 #include "flow-bypass.h"
38 
39 #ifdef HAVE_PACKET_EBPF
40 
41 #include <sys/time.h>
42 #include <sys/resource.h>
43 
44 #include "util-ebpf.h"
45 #include "util-affinity.h"
46 #include "util-cpu.h"
47 #include "util-device.h"
48 
49 #include "device-storage.h"
50 #include "flow-storage.h"
51 #include "flow.h"
52 #include "flow-hash.h"
53 #include "tm-threads.h"
54 
55 #include <bpf/libbpf.h>
56 #include <bpf/bpf.h>
57 #include <net/if.h>
58 #include "autoconf.h"
59 
60 #define BPF_MAP_MAX_COUNT 16
61 
62 #define BYPASSED_FLOW_TIMEOUT 60
63 
64 static LiveDevStorageId g_livedev_storage_id = { .id = -1 };
65 static FlowStorageId g_flow_storage_id = { .id = -1 };
66 
67 struct bpf_map_item {
68  char iface[IFNAMSIZ];
69  char * name;
70  int fd;
71  uint8_t to_unlink;
72 };
73 
74 struct bpf_maps_info {
75  struct bpf_map_item array[BPF_MAP_MAX_COUNT];
76  int last;
77 };
78 
79 typedef struct BypassedIfaceList_ {
80  LiveDevice *dev;
81  struct BypassedIfaceList_ *next;
82 } BypassedIfaceList;
83 
84 static void BpfMapsInfoFree(void *bpf)
85 {
86  struct bpf_maps_info *bpfinfo = (struct bpf_maps_info *)bpf;
87  int i;
88  for (i = 0; i < bpfinfo->last; i ++) {
89  if (bpfinfo->array[i].name) {
90  if (bpfinfo->array[i].to_unlink) {
91  char pinnedpath[PATH_MAX];
92  int ret = snprintf(pinnedpath, sizeof(pinnedpath),
93  "/sys/fs/bpf/suricata-%s-%s",
94  bpfinfo->array[i].iface,
95  bpfinfo->array[i].name);
96  if (ret > 0) {
97  /* Unlink the pinned entry */
98  ret = unlink(pinnedpath);
99  if (ret == -1) {
100  int error = errno;
101  SCLogWarning(
102  "Unable to remove %s: %s (%d)", pinnedpath, strerror(error), error);
103  }
104  } else {
105  SCLogWarning("Unable to remove map %s", bpfinfo->array[i].name);
106  }
107  }
108  SCFree(bpfinfo->array[i].name);
109  }
110  }
111  SCFree(bpfinfo);
112 }
113 
114 static void BypassedListFree(void *ifl)
115 {
116  BypassedIfaceList *mifl = (BypassedIfaceList *)ifl;
117  BypassedIfaceList *nifl;
118  while (mifl) {
119  nifl = mifl->next;
120  SCFree(mifl);
121  mifl = nifl;
122  }
123 }
124 
125 void EBPFDeleteKey(int fd, void *key)
126 {
127  int ret = bpf_map_delete_elem(fd, key);
128  if (ret < 0) {
129  SCLogWarning("Unable to delete entry: %s (%d)", strerror(errno), errno);
130  }
131 }
132 
133 static struct bpf_maps_info *EBPFGetBpfMap(const char *iface)
134 {
135  LiveDevice *livedev = LiveGetDevice(iface);
136  if (livedev == NULL)
137  return NULL;
138  void *data = LiveDevGetStorageById(livedev, g_livedev_storage_id);
139 
140  return (struct bpf_maps_info *)data;
141 }
142 
143 /**
144  * Get file descriptor of a map in the scope of a interface
145  *
146  * \param iface the interface where the map need to be looked for
147  * \param name the name of the map
148  * \return the file descriptor or -1 in case of error
149  */
150 int EBPFGetMapFDByName(const char *iface, const char *name)
151 {
152  int i;
153 
154  if (iface == NULL || name == NULL)
155  return -1;
156  struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface);
157  if (bpf_maps == NULL)
158  return -1;
159 
160  for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
161  if (!bpf_maps->array[i].name)
162  continue;
163  if (!strcmp(bpf_maps->array[i].name, name)) {
164  SCLogDebug("Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd, name);
165  return bpf_maps->array[i].fd;
166  }
167  }
168 
169  return -1;
170 }
171 
172 static int EBPFLoadPinnedMapsFile(LiveDevice *livedev, const char *file)
173 {
174  char pinnedpath[1024];
175  snprintf(pinnedpath, sizeof(pinnedpath),
176  "/sys/fs/bpf/suricata-%s-%s",
177  livedev->dev,
178  file);
179 
180  return bpf_obj_get(pinnedpath);
181 }
182 
183 static int EBPFLoadPinnedMaps(LiveDevice *livedev, struct ebpf_timeout_config *config)
184 {
185  int fd_v4 = -1, fd_v6 = -1;
186 
187  /* First try to load the eBPF check map and return if found */
188  if (config->pinned_maps_name) {
189  int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name);
190  if (ret == 0) {
191  /* pinned maps found, let's just exit as XDP filter is in place */
192  return ret;
193  }
194  }
195 
196  if (config->mode == AFP_MODE_XDP_BYPASS) {
197  /* Get flow v4 table */
198  fd_v4 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v4");
199  if (fd_v4 < 0) {
200  return fd_v4;
201  }
202 
203  /* Get flow v6 table */
204  fd_v6 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v6");
205  if (fd_v6 < 0) {
206  SCLogWarning("Found a flow_table_v4 map but no flow_table_v6 map");
207  return fd_v6;
208  }
209  }
210 
211  struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
212  if (bpf_map_data == NULL) {
213  SCLogError("Can't allocate bpf map array");
214  return -1;
215  }
216 
217  if (config->mode == AFP_MODE_XDP_BYPASS) {
218  bpf_map_data->array[0].fd = fd_v4;
219  bpf_map_data->array[0].name = SCStrdup("flow_table_v4");
220  if (bpf_map_data->array[0].name == NULL) {
221  goto alloc_error;
222  }
223  bpf_map_data->array[1].fd = fd_v6;
224  bpf_map_data->array[1].name = SCStrdup("flow_table_v6");
225  if (bpf_map_data->array[1].name == NULL) {
226  goto alloc_error;
227  }
228  bpf_map_data->last = 2;
229  } else {
230  bpf_map_data->last = 0;
231  }
232 
233  /* Load other known maps: cpu_map, cpus_available, tx_peer, tx_peer_int */
234  int fd = EBPFLoadPinnedMapsFile(livedev, "cpu_map");
235  if (fd >= 0) {
236  bpf_map_data->array[bpf_map_data->last].fd = fd;
237  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpu_map");
238  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
239  goto alloc_error;
240  }
241  bpf_map_data->last++;
242  }
243  fd = EBPFLoadPinnedMapsFile(livedev, "cpus_available");
244  if (fd >= 0) {
245  bpf_map_data->array[bpf_map_data->last].fd = fd;
246  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpus_available");
247  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
248  goto alloc_error;
249  }
250  bpf_map_data->last++;
251  }
252  fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer");
253  if (fd >= 0) {
254  bpf_map_data->array[bpf_map_data->last].fd = fd;
255  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer");
256  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
257  goto alloc_error;
258  }
259  bpf_map_data->last++;
260  }
261  fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer_int");
262  if (fd >= 0) {
263  bpf_map_data->array[bpf_map_data->last].fd = fd;
264  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer_int");
265  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
266  goto alloc_error;
267  }
268  bpf_map_data->last++;
269  }
270 
271  /* Attach the bpf_maps_info to the LiveDevice via the device storage */
272  LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
273  /* Declare that device will use bypass stats */
274  LiveDevUseBypass(livedev);
275 
276  return 0;
277 
278 alloc_error:
279  for (int i = 0; i < bpf_map_data->last; i++) {
280  SCFree(bpf_map_data->array[i].name);
281  }
282  bpf_map_data->last = 0;
283  SCLogError("Can't allocate bpf map name");
284  return -1;
285 }
286 
287 /**
288  * Load a section of an eBPF file
289  *
290  * This function loads a section inside an eBPF and return
291  * via the parameter val the file descriptor that will be used to
292  * inject the eBPF code into the kernel via a syscall.
293  *
294  * \param path the path of the eBPF file to load
295  * \param section the section in the eBPF file to load
296  * \param val a pointer to an integer that will be the file desc
297  * \return -1 in case of error, 0 in case of success, 1 if pinned maps is loaded
298  */
299 int EBPFLoadFile(const char *iface, const char *path, const char * section,
300  int *val, struct ebpf_timeout_config *config)
301 {
302  int err, pfd;
303  bool found = false;
304  struct bpf_object *bpfobj = NULL;
305  struct bpf_program *bpfprog = NULL;
306  struct bpf_map *map = NULL;
307 
308  if (iface == NULL)
309  return -1;
310  LiveDevice *livedev = LiveGetDevice(iface);
311  if (livedev == NULL)
312  return -1;
313 
314  if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) {
315  /* We try to get our flow table maps and if we have them we can simply return */
316  if (EBPFLoadPinnedMaps(livedev, config) == 0) {
317  SCLogInfo("Loaded pinned maps, will use already loaded eBPF filter");
318  return 1;
319  }
320  }
321 
322  if (! path) {
323  SCLogError("No file defined to load eBPF from");
324  return -1;
325  }
326 
327  /* Sending the eBPF code to the kernel requires a large amount of
328  * locked memory so we set it to unlimited to avoid a ENOPERM error */
329  struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
330  if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) {
331  SCLogError("Unable to lock memory: %s (%d)", strerror(errno), errno);
332  return -1;
333  }
334 
335  /* Open the eBPF file and parse it */
336  bpfobj = bpf_object__open(path);
337  long error = libbpf_get_error(bpfobj);
338  if (error) {
339  char err_buf[128];
340  libbpf_strerror(error, err_buf,
341  sizeof(err_buf));
342  SCLogError("Unable to load eBPF objects in '%s': %s", path, err_buf);
343  return -1;
344  }
345 
346  if (config->flags & EBPF_XDP_HW_MODE) {
347  unsigned int ifindex = if_nametoindex(iface);
348  bpf_object__for_each_program(bpfprog, bpfobj) {
349  bpf_program__set_ifindex(bpfprog, ifindex);
350  }
351  bpf_map__for_each(map, bpfobj) {
352  bpf_map__set_ifindex(map, ifindex);
353  }
354  }
355 
356  /* Let's check that our section is here */
357  bpf_object__for_each_program(bpfprog, bpfobj) {
358 #ifdef HAVE_BPF_PROGRAM__SECTION_NAME
359  const char *title = bpf_program__section_name(bpfprog);
360 #else
361  const char *title = bpf_program__title(bpfprog, 0);
362 #endif
363  if (!strcmp(title, section)) {
364  if (config->flags & EBPF_SOCKET_FILTER) {
365 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
366  bpf_program__set_type(bpfprog, BPF_PROG_TYPE_SOCKET_FILTER);
367 #else
368  /* Fall back to legacy API */
369  bpf_program__set_socket_filter(bpfprog);
370 #endif
371  } else {
372 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
373  bpf_program__set_type(bpfprog, BPF_PROG_TYPE_XDP);
374 #else
375  /* Fall back to legacy API */
376  bpf_program__set_xdp(bpfprog);
377 #endif
378  }
379  found = true;
380  break;
381  }
382  }
383 
384  if (found == false) {
385  SCLogError("No section '%s' in '%s' file. Will not be able to use the file", section, path);
386  return -1;
387  }
388 
389  err = bpf_object__load(bpfobj);
390  if (err < 0) {
391  if (err == -EPERM) {
392  SCLogError("Permission issue when loading eBPF object"
393  " (check libbpf error on stdout)");
394  } else {
395  char buf[129];
396  libbpf_strerror(err, buf, sizeof(buf));
397  SCLogError("Unable to load eBPF object: %s (%d)", buf, err);
398  }
399  return -1;
400  }
401 
402  /* Kernel and userspace are sharing data via map. Userspace access to the
403  * map via a file descriptor. So we need to store the map to fd info. For
404  * that we use bpf_maps_info:: */
405  struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
406  if (bpf_map_data == NULL) {
407  SCLogError("Can't allocate bpf map array");
408  return -1;
409  }
410 
411  /* Store the maps in bpf_maps_info:: */
412  bpf_map__for_each(map, bpfobj) {
413  if (bpf_map_data->last == BPF_MAP_MAX_COUNT) {
414  SCLogError("Too many BPF maps in eBPF files");
415  break;
416  }
417  SCLogDebug("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
418  bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map);
419  bpf_map_data->array[bpf_map_data->last].name = SCStrdup(bpf_map__name(map));
420  snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ,
421  "%s", iface);
422  if (!bpf_map_data->array[bpf_map_data->last].name) {
423  SCLogError("Unable to duplicate map name");
424  BpfMapsInfoFree(bpf_map_data);
425  return -1;
426  }
427  bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
428  if (config->flags & EBPF_PINNED_MAPS) {
429  SCLogConfig("Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd,
430  bpf_map_data->array[bpf_map_data->last].name);
431  char buf[1024];
432  snprintf(buf, sizeof(buf), "/sys/fs/bpf/suricata-%s-%s", iface,
433  bpf_map_data->array[bpf_map_data->last].name);
434  int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf);
435  if (ret != 0) {
436  SCLogWarning("Can not pin: %s", strerror(errno));
437  }
438  /* Don't unlink pinned maps in XDP mode to avoid a state reset */
439  if (config->flags & EBPF_XDP_CODE) {
440  bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
441  } else {
442  bpf_map_data->array[bpf_map_data->last].to_unlink = 1;
443  }
444  }
445  bpf_map_data->last++;
446  }
447 
448  /* Attach the bpf_maps_info to the LiveDevice via the device storage */
449  LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
450  LiveDevUseBypass(livedev);
451 
452  /* Finally we get the file descriptor for our eBPF program. We will use
453  * the fd to attach the program to the socket (eBPF case) or to the device
454  * (XDP case). */
455  pfd = bpf_program__fd(bpfprog);
456  if (pfd == -1) {
457  SCLogError("Unable to find %s section", section);
458  return -1;
459  }
460 
461  SCLogInfo("Successfully loaded eBPF file '%s' on '%s'", path, iface);
462  *val = pfd;
463  return 0;
464 }
465 
466 /**
467  * Attach a XDP program identified by its file descriptor to a device
468  *
469  * \param iface the name of interface
470  * \param fd the eBPF/XDP program file descriptor
471  * \param a flag to pass to attach function mostly used to set XDP mode
472  * \return -1 in case of error, 0 if success
473  */
474 int EBPFSetupXDP(const char *iface, int fd, uint8_t flags)
475 {
476 #ifdef HAVE_PACKET_XDP
477  unsigned int ifindex = if_nametoindex(iface);
478  if (ifindex == 0) {
479  SCLogError("Unknown interface '%s'", iface);
480  return -1;
481  }
482 #ifdef HAVE_BPF_XDP_ATTACH
483  int err = bpf_xdp_attach(ifindex, fd, flags, NULL);
484 #else
485  /* Fall back to legacy API */
486  int err = bpf_set_link_xdp_fd(ifindex, fd, flags);
487 #endif
488  if (err != 0) {
489  char buf[129];
490  libbpf_strerror(err, buf, sizeof(buf));
491  SCLogError("Unable to set XDP on '%s': %s (%d)", iface, buf, err);
492  return -1;
493  }
494 #endif
495  return 0;
496 }
497 
498 /**
499  * Create a Flow in the table for a Flowkey
500  *
501  * \return false (this create function never returns true)
502  */
503 static bool EBPFCreateFlowForKey(struct flows_stats *flowstats, LiveDevice *dev, void *key,
504  size_t skey, FlowKey *flow_key, struct timespec *ctime,
505  uint64_t pkts_cnt, uint64_t bytes_cnt,
506  int mapfd, int cpus_count)
507 {
508  Flow *f = NULL;
509  uint32_t hash = FlowKeyGetHash(flow_key);
510 
511  f = FlowGetFromFlowKey(flow_key, ctime, hash);
512  if (f == NULL)
513  return false;
514 
515  /* set accounting, we can't know the direction, so let's just start to
516  * serve them if we already have something from server to client. We need
517  * these numbers as we will use it to see if we have new traffic coming
518  * on the flow */
520  if (fc == NULL) {
521  fc = SCCalloc(sizeof(FlowBypassInfo), 1);
522  if (fc) {
523  FlowUpdateState(f, FLOW_STATE_CAPTURE_BYPASSED);
525  fc->BypassUpdate = EBPFBypassUpdate;
526  fc->BypassFree = EBPFBypassFree;
527  fc->todstpktcnt = pkts_cnt;
528  fc->todstbytecnt = bytes_cnt;
529  f->livedev = dev;
530  EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
531  if (eb == NULL) {
532  SCFree(fc);
533  FLOWLOCK_UNLOCK(f);
534  return false;
535  }
536  void *mkey = SCCalloc(1, skey);
537  if (mkey == NULL) {
538  SCFree(fc);
539  SCFree(eb);
540  FLOWLOCK_UNLOCK(f);
541  return false;
542  }
543  memcpy(mkey, key, skey);
544  eb->key[0] = mkey;
545  eb->mapfd = mapfd;
546  eb->cpus_count = cpus_count;
547  fc->bypass_data = eb;
548  flowstats->count++;
549  } else {
550  FLOWLOCK_UNLOCK(f);
551  return false;
552  }
553  } else {
554  EBPFBypassData *eb = (EBPFBypassData *) fc->bypass_data;
555  if (eb == NULL) {
556  FLOWLOCK_UNLOCK(f);
557  return false;
558  }
559  /* if both keys are here, then it is a flow bypassed by this
560  * instance so we ignore it */
561  if (eb->key[0] && eb->key[1]) {
562  FLOWLOCK_UNLOCK(f);
563  return false;
564  }
565  fc->tosrcpktcnt = pkts_cnt;
566  fc->tosrcbytecnt = bytes_cnt;
567  void *mkey = SCCalloc(1, skey);
568  if (mkey == NULL) {
569  FLOWLOCK_UNLOCK(f);
570  return false;
571  }
572  memcpy(mkey, key, skey);
573  eb->key[1] = mkey;
574  }
575  f->livedev = dev;
576  FLOWLOCK_UNLOCK(f);
577  return false;
578 }
579 
580 void EBPFBypassFree(void *data)
581 {
582  EBPFBypassData *eb = (EBPFBypassData *)data;
583  if (eb == NULL)
584  return;
585  SCFree(eb->key[0]);
586  if (eb->key[1]) {
587  SCFree(eb->key[1]);
588  }
589  SCFree(eb);
590  return;
591 }
592 
593 /**
594  *
595  * Compare eBPF half flow to Flow
596  *
597  * \return true if entries have activity, false if not
598  */
599 
600 static bool EBPFBypassCheckHalfFlow(Flow *f, FlowBypassInfo *fc,
601  EBPFBypassData *eb, void *key,
602  int index)
603 {
604  int i;
605  uint64_t pkts_cnt = 0;
606  uint64_t bytes_cnt = 0;
607  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
608  * is 1 then we have a global hash. */
609  BPF_DECLARE_PERCPU(struct pair, values_array, eb->cpus_count);
610  memset(values_array, 0, sizeof(values_array));
611  int res = bpf_map_lookup_elem(eb->mapfd, key, values_array);
612  if (res < 0) {
613  SCLogDebug("errno: (%d) %s", errno, strerror(errno));
614  return false;
615  }
616  for (i = 0; i < eb->cpus_count; i++) {
617  /* let's start accumulating value so we can compute the counters */
618  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
619  BPF_PERCPU(values_array, i).packets,
620  BPF_PERCPU(values_array, i).bytes);
621  pkts_cnt += BPF_PERCPU(values_array, i).packets;
622  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
623  }
624  if (index == 0) {
625  if (pkts_cnt != fc->todstpktcnt) {
626  fc->todstpktcnt = pkts_cnt;
627  fc->todstbytecnt = bytes_cnt;
628  return true;
629  }
630  } else {
631  if (pkts_cnt != fc->tosrcpktcnt) {
632  fc->tosrcpktcnt = pkts_cnt;
633  fc->tosrcbytecnt = bytes_cnt;
634  return true;
635  }
636  }
637 
638  return false;
639 }
640 
641 /** Check both half flows for update
642  *
643  * Update lastts in the flow and do accounting
644  *
645  * */
646 bool EBPFBypassUpdate(Flow *f, void *data, time_t tsec)
647 {
648  EBPFBypassData *eb = (EBPFBypassData *)data;
649  if (eb == NULL) {
650  return false;
651  }
653  if (fc == NULL) {
654  return false;
655  }
656  bool activity = EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[0], 0);
657  activity |= EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[1], 1);
658  if (!activity) {
659  SCLogDebug("Delete entry: %u (%ld)", FLOW_IS_IPV6(f), FlowGetId(f));
660  /* delete the entries if no time update */
661  EBPFDeleteKey(eb->mapfd, eb->key[0]);
662  EBPFDeleteKey(eb->mapfd, eb->key[1]);
663  SCLogDebug("Done delete entry: %u", FLOW_IS_IPV6(f));
664  } else {
665  f->lastts = SCTIME_FROM_SECS(tsec);
666  return true;
667  }
668  return false;
669 }
670 
671 typedef bool (*OpFlowForKey)(struct flows_stats * flowstats, LiveDevice*dev, void *key,
672  size_t skey, FlowKey *flow_key, struct timespec *ctime,
673  uint64_t pkts_cnt, uint64_t bytes_cnt,
674  int mapfd, int cpus_count);
675 
676 /**
677  * Bypassed flows iterator for IPv4
678  *
679  * This function iterates on all the flows of the IPv4 table
680  * running a callback function on each flow.
681  */
682 static int EBPFForEachFlowV4Table(ThreadVars *th_v, LiveDevice *dev, const char *name,
683  struct timespec *ctime,
684  struct ebpf_timeout_config *tcfg,
685  OpFlowForKey EBPFOpFlowForKey
686  )
687 {
688  struct flows_stats flowstats = { 0, 0, 0};
689  int mapfd = EBPFGetMapFDByName(dev->dev, name);
690  if (mapfd == -1)
691  return -1;
692 
693  struct flowv4_keys key = {}, next_key;
694  int found = 0;
695  unsigned int i;
696  uint64_t hash_cnt = 0;
697 
698  if (tcfg->cpus_count == 0) {
699  return 0;
700  }
701 
702  bool dead_flow = false;
703  while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
704  uint64_t bytes_cnt = 0;
705  uint64_t pkts_cnt = 0;
706  hash_cnt++;
707  if (dead_flow) {
708  EBPFDeleteKey(mapfd, &key);
709  dead_flow = false;
710  }
711  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
712  * is 1 then we have a global hash. */
713  BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
714  memset(values_array, 0, sizeof(values_array));
715  int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
716  if (res < 0) {
717  SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
718  SCLogDebug("errno: (%d) %s", errno, strerror(errno));
719  key = next_key;
720  continue;
721  }
722  for (i = 0; i < tcfg->cpus_count; i++) {
723  /* let's start accumulating value so we can compute the counters */
724  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
725  BPF_PERCPU(values_array, i).packets,
726  BPF_PERCPU(values_array, i).bytes);
727  pkts_cnt += BPF_PERCPU(values_array, i).packets;
728  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
729  }
730  /* Get the corresponding Flow in the Flow table to compare and update
731  * its counters and lastseen if needed */
732  FlowKey flow_key;
733  if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
734  flow_key.sp = ntohs(next_key.port16[0]);
735  flow_key.dp = ntohs(next_key.port16[1]);
736  flow_key.src.addr_data32[0] = next_key.src;
737  flow_key.dst.addr_data32[0] = next_key.dst;
738  } else {
739  flow_key.sp = next_key.port16[0];
740  flow_key.dp = next_key.port16[1];
741  flow_key.src.addr_data32[0] = ntohl(next_key.src);
742  flow_key.dst.addr_data32[0] = ntohl(next_key.dst);
743  }
744  flow_key.src.family = AF_INET;
745  flow_key.src.addr_data32[1] = 0;
746  flow_key.src.addr_data32[2] = 0;
747  flow_key.src.addr_data32[3] = 0;
748  flow_key.dst.family = AF_INET;
749  flow_key.dst.addr_data32[1] = 0;
750  flow_key.dst.addr_data32[2] = 0;
751  flow_key.dst.addr_data32[3] = 0;
752  flow_key.vlan_id[0] = next_key.vlan0;
753  flow_key.vlan_id[1] = next_key.vlan1;
754  flow_key.vlan_id[2] = next_key.vlan2;
755  if (next_key.ip_proto == 1) {
756  flow_key.proto = IPPROTO_TCP;
757  } else {
758  flow_key.proto = IPPROTO_UDP;
759  }
760  flow_key.recursion_level = 0;
761  flow_key.livedev_id = dev->id;
762  dead_flow = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
763  ctime, pkts_cnt, bytes_cnt,
764  mapfd, tcfg->cpus_count);
765  if (dead_flow) {
766  found = 1;
767  }
768 
769  if (TmThreadsCheckFlag(th_v, THV_KILL)) {
770  return 0;
771  }
772 
773  key = next_key;
774  }
775  if (dead_flow) {
776  EBPFDeleteKey(mapfd, &key);
777  found = 1;
778  }
779  SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
780 
781  LiveDevAddBypassStats(dev, flowstats.count, AF_INET);
782  SCLogInfo("IPv4 bypassed flow table size: %" PRIu64, hash_cnt);
783 
784  return found;
785 }
786 
787 /**
788  * Bypassed flows iterator for IPv6
789  *
790  * This function iterates on all the flows of the IPv4 table
791  * running a callback function on each flow.
792  */
793 static int EBPFForEachFlowV6Table(ThreadVars *th_v,
794  LiveDevice *dev, const char *name,
795  struct timespec *ctime,
796  struct ebpf_timeout_config *tcfg,
797  OpFlowForKey EBPFOpFlowForKey
798  )
799 {
800  struct flows_stats flowstats = { 0, 0, 0};
801  int mapfd = EBPFGetMapFDByName(dev->dev, name);
802  if (mapfd == -1)
803  return -1;
804 
805  struct flowv6_keys key = {}, next_key;
806  int found = 0;
807  unsigned int i;
808  uint64_t hash_cnt = 0;
809 
810  if (tcfg->cpus_count == 0) {
811  SCLogWarning("CPU count should not be 0");
812  return 0;
813  }
814 
815  uint64_t pkts_cnt = 0;
816  while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
817  uint64_t bytes_cnt = 0;
818  hash_cnt++;
819  if (pkts_cnt > 0) {
820  EBPFDeleteKey(mapfd, &key);
821  }
822  pkts_cnt = 0;
823  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
824  * is 1 then we have a global hash. */
825  BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
826  memset(values_array, 0, sizeof(values_array));
827  int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
828  if (res < 0) {
829  SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
830  key = next_key;
831  continue;
832  }
833  for (i = 0; i < tcfg->cpus_count; i++) {
834  /* let's start accumulating value so we can compute the counters */
835  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
836  BPF_PERCPU(values_array, i).packets,
837  BPF_PERCPU(values_array, i).bytes);
838  pkts_cnt += BPF_PERCPU(values_array, i).packets;
839  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
840  }
841  /* Get the corresponding Flow in the Flow table to compare and update
842  * its counters and lastseen if needed */
843  FlowKey flow_key;
844  if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
845  flow_key.sp = ntohs(next_key.port16[0]);
846  flow_key.dp = ntohs(next_key.port16[1]);
847  flow_key.src.family = AF_INET6;
848  flow_key.src.addr_data32[0] = next_key.src[0];
849  flow_key.src.addr_data32[1] = next_key.src[1];
850  flow_key.src.addr_data32[2] = next_key.src[2];
851  flow_key.src.addr_data32[3] = next_key.src[3];
852  flow_key.dst.family = AF_INET6;
853  flow_key.dst.addr_data32[0] = next_key.dst[0];
854  flow_key.dst.addr_data32[1] = next_key.dst[1];
855  flow_key.dst.addr_data32[2] = next_key.dst[2];
856  flow_key.dst.addr_data32[3] = next_key.dst[3];
857  } else {
858  flow_key.sp = next_key.port16[0];
859  flow_key.dp = next_key.port16[1];
860  flow_key.src.family = AF_INET6;
861  flow_key.src.addr_data32[0] = ntohl(next_key.src[0]);
862  flow_key.src.addr_data32[1] = ntohl(next_key.src[1]);
863  flow_key.src.addr_data32[2] = ntohl(next_key.src[2]);
864  flow_key.src.addr_data32[3] = ntohl(next_key.src[3]);
865  flow_key.dst.family = AF_INET6;
866  flow_key.dst.addr_data32[0] = ntohl(next_key.dst[0]);
867  flow_key.dst.addr_data32[1] = ntohl(next_key.dst[1]);
868  flow_key.dst.addr_data32[2] = ntohl(next_key.dst[2]);
869  flow_key.dst.addr_data32[3] = ntohl(next_key.dst[3]);
870  }
871  flow_key.vlan_id[0] = next_key.vlan0;
872  flow_key.vlan_id[1] = next_key.vlan1;
873  flow_key.vlan_id[2] = next_key.vlan2;
874  if (next_key.ip_proto == 1) {
875  flow_key.proto = IPPROTO_TCP;
876  } else {
877  flow_key.proto = IPPROTO_UDP;
878  }
879  flow_key.recursion_level = 0;
880  flow_key.livedev_id = dev->id;
881  pkts_cnt = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
882  ctime, pkts_cnt, bytes_cnt,
883  mapfd, tcfg->cpus_count);
884  if (pkts_cnt > 0) {
885  found = 1;
886  }
887 
888  if (TmThreadsCheckFlag(th_v, THV_KILL)) {
889  return 0;
890  }
891 
892  key = next_key;
893  }
894  if (pkts_cnt > 0) {
895  EBPFDeleteKey(mapfd, &key);
896  found = 1;
897  }
898  SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
899 
900  LiveDevAddBypassStats(dev, flowstats.count, AF_INET6);
901  SCLogInfo("IPv6 bypassed flow table size: %" PRIu64, hash_cnt);
902  return found;
903 }
904 
905 
906 int EBPFCheckBypassedFlowCreate(ThreadVars *th_v, struct timespec *curtime, void *data)
907 {
908  LiveDevice *ldev = NULL, *ndev;
909  struct ebpf_timeout_config *cfg = (struct ebpf_timeout_config *)data;
910  while(LiveDeviceForEach(&ldev, &ndev)) {
911  EBPFForEachFlowV4Table(th_v, ldev, "flow_table_v4",
912  curtime,
913  cfg, EBPFCreateFlowForKey);
914  EBPFForEachFlowV6Table(th_v, ldev, "flow_table_v6",
915  curtime,
916  cfg, EBPFCreateFlowForKey);
917  }
918 
919  return 0;
920 }
921 
922 void EBPFRegisterExtension(void)
923 {
924  g_livedev_storage_id = LiveDevStorageRegister("bpfmap", sizeof(void *), NULL, BpfMapsInfoFree);
925  g_flow_storage_id = FlowStorageRegister("bypassedlist", sizeof(void *), NULL, BypassedListFree);
926 }
927 
928 
929 #ifdef HAVE_PACKET_XDP
930 
931 static uint32_t g_redirect_iface_cpu_counter = 0;
932 
933 static int EBPFAddCPUToMap(const char *iface, uint32_t i)
934 {
935  int cpumap = EBPFGetMapFDByName(iface, "cpu_map");
936  uint32_t queue_size = 4096;
937  int ret;
938 
939  if (cpumap < 0) {
940  SCLogError("Can't find cpu_map");
941  return -1;
942  }
943  ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0);
944  if (ret) {
945  SCLogError("Create CPU entry failed (err:%d)", ret);
946  return -1;
947  }
948  int cpus_available = EBPFGetMapFDByName(iface, "cpus_available");
949  if (cpus_available < 0) {
950  SCLogError("Can't find cpus_available map");
951  return -1;
952  }
953 
954  ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0);
955  if (ret) {
956  SCLogError("Create CPU entry failed (err:%d)", ret);
957  return -1;
958  }
959  return 0;
960 }
961 
962 static void EBPFRedirectMapAddCPU(int i, void *data)
963 {
964  if (EBPFAddCPUToMap(data, i) < 0) {
965  SCLogError("Unable to add CPU %d to set", i);
966  } else {
967  g_redirect_iface_cpu_counter++;
968  }
969 }
970 
971 void EBPFBuildCPUSet(ConfNode *node, char *iface)
972 {
973  uint32_t key0 = 0;
974  int mapfd = EBPFGetMapFDByName(iface, "cpus_count");
975  if (mapfd < 0) {
976  SCLogError("Unable to find 'cpus_count' map");
977  return;
978  }
979  g_redirect_iface_cpu_counter = 0;
980  if (node == NULL) {
981  bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
982  BPF_ANY);
983  return;
984  }
985  BuildCpusetWithCallback("xdp-cpu-redirect", node,
986  EBPFRedirectMapAddCPU,
987  iface);
988  bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
989  BPF_ANY);
990 }
991 
992 /**
993  * Setup peer interface in XDP system
994  *
995  * Ths function set up the peer interface in the XDP maps used by the
996  * bypass filter. The first map tx_peer has type device map and is
997  * used to store the peer. The second map tx_peer_int is used by the
998  * code to check if we have a peer defined for this interface.
999  *
1000  * As the map are per device we just need maps with one single element.
1001  * In both case, we use the key 0 to enter element so XDP kernel code
1002  * is using the same key.
1003  */
1004 int EBPFSetPeerIface(const char *iface, const char *out_iface)
1005 {
1006  int mapfd = EBPFGetMapFDByName(iface, "tx_peer");
1007  if (mapfd < 0) {
1008  SCLogError("Unable to find 'tx_peer' map");
1009  return -1;
1010  }
1011  int intmapfd = EBPFGetMapFDByName(iface, "tx_peer_int");
1012  if (intmapfd < 0) {
1013  SCLogError("Unable to find 'tx_peer_int' map");
1014  return -1;
1015  }
1016 
1017  int key0 = 0;
1018  unsigned int peer_index = if_nametoindex(out_iface);
1019  if (peer_index == 0) {
1020  SCLogError("No iface '%s'", out_iface);
1021  return -1;
1022  }
1023  int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY);
1024  if (ret) {
1025  SCLogError("Create peer entry failed (err:%d)", ret);
1026  return -1;
1027  }
1028  ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY);
1029  if (ret) {
1030  SCLogError("Create peer entry failed (err:%d)", ret);
1031  return -1;
1032  }
1033  return 0;
1034 }
1035 
1036 /**
1037  * Bypass the flow on all ifaces it is seen on. This is used
1038  * in IPS mode.
1039  */
1040 
1041 int EBPFUpdateFlow(Flow *f, Packet *p, void *data)
1042 {
1043  BypassedIfaceList *ifl = (BypassedIfaceList *)FlowGetStorageById(f, g_flow_storage_id);
1044  if (ifl == NULL) {
1045  ifl = SCCalloc(1, sizeof(*ifl));
1046  if (ifl == NULL) {
1047  return 0;
1048  }
1049  ifl->dev = p->livedev;
1050  FlowSetStorageById(f, g_flow_storage_id, ifl);
1051  return 1;
1052  }
1053  /* Look for packet iface in the list */
1054  BypassedIfaceList *ldev = ifl;
1055  while (ldev) {
1056  if (p->livedev == ldev->dev) {
1057  return 1;
1058  }
1059  ldev = ldev->next;
1060  }
1061  /* Call bypass function if ever not in the list */
1062  p->BypassPacketsFlow(p);
1063 
1064  /* Add iface to the list */
1065  BypassedIfaceList *nifl = SCCalloc(1, sizeof(*nifl));
1066  if (nifl == NULL) {
1067  return 0;
1068  }
1069  nifl->dev = p->livedev;
1070  nifl->next = ifl;
1071  FlowSetStorageById(f, g_flow_storage_id, nifl);
1072  return 1;
1073 }
1074 
1075 #endif /* HAVE_PACKET_XDP */
1076 
1077 #endif
FlowStorageId
Definition: flow-storage.h:31
tm-threads.h
flow-bypass.h
FLOW_IS_IPV6
#define FLOW_IS_IPV6(f)
Definition: flow.h:164
LiveDevStorageId_
Definition: device-storage.h:31
FlowKey_::src
Address src
Definition: flow.h:303
GetFlowBypassInfoID
FlowStorageId GetFlowBypassInfoID(void)
Definition: flow-util.c:217
FlowBypassInfo_
Definition: flow.h:520
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:269
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
flows_stats::count
uint64_t count
Definition: flow-bypass.h:30
FlowKeyGetHash
uint32_t FlowKeyGetHash(FlowKey *fk)
Definition: flow-hash.c:307
BuildCpusetWithCallback
void BuildCpusetWithCallback(const char *name, ConfNode *node, void(*Callback)(int i, void *data), void *data)
Definition: util-affinity.c:98
Flow_
Flow data structure.
Definition: flow.h:347
LiveDevice_
Definition: util-device.h:49
SC_ATOMIC_ADD
#define SC_ATOMIC_ADD(name, val)
add a value to our atomic variable
Definition: util-atomic.h:333
LiveDevice_::id
uint16_t id
Definition: util-device.h:55
flow-hash.h
FlowBypassInfo_::tosrcbytecnt
uint64_t tosrcbytecnt
Definition: flow.h:525
LiveDeviceForEach
LiveDevice * LiveDeviceForEach(LiveDevice **ldev, LiveDevice **ndev)
Definition: util-device.c:420
device-storage.h
Packet_::BypassPacketsFlow
int(* BypassPacketsFlow)(struct Packet_ *)
Definition: decode.h:522
FLOWLOCK_UNLOCK
#define FLOWLOCK_UNLOCK(fb)
Definition: flow.h:266
LiveDevGetStorageById
void * LiveDevGetStorageById(LiveDevice *d, LiveDevStorageId id)
Get a value from a given LiveDevice storage.
Definition: device-storage.c:89
LiveDevStorageRegister
LiveDevStorageId LiveDevStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
Register a LiveDevice storage.
Definition: device-storage.c:60
SCTIME_FROM_SECS
#define SCTIME_FROM_SECS(s)
Definition: util-time.h:61
util-device.h
FlowBypassInfo_::todstbytecnt
uint64_t todstbytecnt
Definition: flow.h:527
FlowBypassInfo_::BypassUpdate
bool(* BypassUpdate)(Flow *f, void *data, time_t tsec)
Definition: flow.h:521
util-cpu.h
FlowBypassInfo_::BypassFree
void(* BypassFree)(void *data)
Definition: flow.h:522
LiveGetDevice
LiveDevice * LiveGetDevice(const char *name)
Get a pointer to the device at idx.
Definition: util-device.c:248
Flow_::lastts
SCTime_t lastts
Definition: flow.h:406
FlowKey_::recursion_level
uint8_t recursion_level
Definition: flow.h:306
util-ebpf.h
ThreadVars_
Per thread variable structure.
Definition: threadvars.h:57
util-affinity.h
THV_KILL
#define THV_KILL
Definition: threadvars.h:39
FlowSetStorageById
int FlowSetStorageById(Flow *f, FlowStorageId id, void *ptr)
Definition: flow-storage.c:45
FlowBypassInfo_::todstpktcnt
uint64_t todstpktcnt
Definition: flow.h:526
FlowStorageRegister
FlowStorageId FlowStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
Definition: flow-storage.c:66
LiveDevice_::dev
char * dev
Definition: util-device.h:50
FlowBypassInfo_::bypass_data
void * bypass_data
Definition: flow.h:523
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:249
FlowKey_::livedev_id
uint16_t livedev_id
Definition: flow.h:307
FlowKey_::sp
Port sp
Definition: flow.h:304
Packet_
Definition: decode.h:430
Packet_::livedev
struct LiveDevice_ * livedev
Definition: decode.h:590
LiveDevUseBypass
int LiveDevUseBypass(LiveDevice *dev)
Definition: util-device.c:474
LiveDevAddBypassStats
void LiveDevAddBypassStats(LiveDevice *dev, uint64_t cnt, int family)
Definition: util-device.c:515
FlowBypassInfo_::tosrcpktcnt
uint64_t tosrcpktcnt
Definition: flow.h:524
SCLogInfo
#define SCLogInfo(...)
Macro used to log INFORMATIONAL messages.
Definition: util-debug.h:224
FlowGetStorageById
void * FlowGetStorageById(const Flow *f, FlowStorageId id)
Definition: flow-storage.c:40
FlowUpdateState
void FlowUpdateState(Flow *f, const enum FlowState s)
Definition: flow.c:1180
flow-storage.h
flags
uint8_t flags
Definition: decode-gre.h:0
suricata-common.h
FlowKey_::dst
Address dst
Definition: flow.h:303
SCStrdup
#define SCStrdup(s)
Definition: util-mem.h:56
Flow_::livedev
struct LiveDevice_ * livedev
Definition: flow.h:394
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
FlowKey_::proto
uint8_t proto
Definition: flow.h:305
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:261
SCFree
#define SCFree(p)
Definition: util-mem.h:61
ConfNode_
Definition: conf.h:32
FlowGetFromFlowKey
Flow * FlowGetFromFlowKey(FlowKey *key, struct timespec *ttime, const uint32_t hash)
Get or create a Flow using a FlowKey.
Definition: flow-hash.c:1032
bpf_program
Definition: source-af-packet.c:80
FlowStorageId::id
int id
Definition: flow-storage.h:32
LiveDevSetStorageById
int LiveDevSetStorageById(LiveDevice *d, LiveDevStorageId id, void *ptr)
Store a pointer in a given LiveDevice storage.
Definition: device-storage.c:76
FlowKey_
Definition: flow.h:302
Address_::family
char family
Definition: decode.h:116
FlowKey_::vlan_id
uint16_t vlan_id[VLAN_MAX_LAYERS]
Definition: flow.h:308
flow.h
TmThreadsCheckFlag
int TmThreadsCheckFlag(ThreadVars *tv, uint32_t flag)
Check if a thread flag is set.
Definition: tm-threads.c:91
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
LiveDevStorageId_::id
int id
Definition: device-storage.h:32
FlowKey_::dp
Port dp
Definition: flow.h:304
flows_stats::packets
uint64_t packets
Definition: flow-bypass.h:31
flows_stats
Definition: flow-bypass.h:29