suricata
util-ebpf.c
Go to the documentation of this file.
1 /* Copyright (C) 2018-2021 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \ingroup afppacket
20  *
21  * @{
22  */
23 
24 /**
25  * \file
26  *
27  * \author Eric Leblond <eric@regit.org>
28  *
29  * eBPF utility
30  *
31  */
32 
33 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
34 
35 #include "suricata-common.h"
36 #include "flow-bypass.h"
37 
38 #ifdef HAVE_PACKET_EBPF
39 
40 #include <sys/time.h>
41 #include <sys/resource.h>
42 
43 #include "util-ebpf.h"
44 #include "util-affinity.h"
45 #include "util-cpu.h"
46 #include "util-device-private.h"
47 
48 #include "device-storage.h"
49 #include "flow-storage.h"
50 #include "flow.h"
51 #include "flow-hash.h"
52 #include "tm-threads.h"
53 
54 #include <bpf/libbpf.h>
55 #include <bpf/bpf.h>
56 #include <net/if.h>
57 #include "autoconf.h"
58 
59 #define BPF_MAP_MAX_COUNT 16
60 
61 #define BYPASSED_FLOW_TIMEOUT 60
62 
63 static LiveDevStorageId g_livedev_storage_id = { .id = -1 };
64 static FlowStorageId g_flow_storage_id = { .id = -1 };
65 
66 struct bpf_map_item {
67  char iface[IFNAMSIZ];
68  char * name;
69  int fd;
70  uint8_t to_unlink;
71 };
72 
73 struct bpf_maps_info {
74  struct bpf_map_item array[BPF_MAP_MAX_COUNT];
75  int last;
76 };
77 
78 typedef struct BypassedIfaceList_ {
79  LiveDevice *dev;
80  struct BypassedIfaceList_ *next;
81 } BypassedIfaceList;
82 
83 static void BpfMapsInfoFree(void *bpf)
84 {
85  struct bpf_maps_info *bpfinfo = (struct bpf_maps_info *)bpf;
86  int i;
87  for (i = 0; i < bpfinfo->last; i ++) {
88  if (bpfinfo->array[i].name) {
89  if (bpfinfo->array[i].to_unlink) {
90  char pinnedpath[PATH_MAX];
91  int ret = snprintf(pinnedpath, sizeof(pinnedpath),
92  "/sys/fs/bpf/suricata-%s-%s",
93  bpfinfo->array[i].iface,
94  bpfinfo->array[i].name);
95  if (ret > 0) {
96  /* Unlink the pinned entry */
97  ret = unlink(pinnedpath);
98  if (ret == -1) {
99  int error = errno;
100  SCLogWarning(
101  "Unable to remove %s: %s (%d)", pinnedpath, strerror(error), error);
102  }
103  } else {
104  SCLogWarning("Unable to remove map %s", bpfinfo->array[i].name);
105  }
106  }
107  SCFree(bpfinfo->array[i].name);
108  }
109  }
110  SCFree(bpfinfo);
111 }
112 
113 static void BypassedListFree(void *ifl)
114 {
115  BypassedIfaceList *mifl = (BypassedIfaceList *)ifl;
116  BypassedIfaceList *nifl;
117  while (mifl) {
118  nifl = mifl->next;
119  SCFree(mifl);
120  mifl = nifl;
121  }
122 }
123 
124 void EBPFDeleteKey(int fd, void *key)
125 {
126  int ret = bpf_map_delete_elem(fd, key);
127  if (ret < 0) {
128  SCLogWarning("Unable to delete entry: %s (%d)", strerror(errno), errno);
129  }
130 }
131 
132 static struct bpf_maps_info *EBPFGetBpfMap(const char *iface)
133 {
134  LiveDevice *livedev = LiveGetDevice(iface);
135  if (livedev == NULL)
136  return NULL;
137  void *data = LiveDevGetStorageById(livedev, g_livedev_storage_id);
138 
139  return (struct bpf_maps_info *)data;
140 }
141 
142 /**
143  * Get file descriptor of a map in the scope of a interface
144  *
145  * \param iface the interface where the map need to be looked for
146  * \param name the name of the map
147  * \return the file descriptor or -1 in case of error
148  */
149 int EBPFGetMapFDByName(const char *iface, const char *name)
150 {
151  int i;
152 
153  if (iface == NULL || name == NULL)
154  return -1;
155  struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface);
156  if (bpf_maps == NULL)
157  return -1;
158 
159  for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
160  if (!bpf_maps->array[i].name)
161  continue;
162  if (!strcmp(bpf_maps->array[i].name, name)) {
163  SCLogDebug("Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd, name);
164  return bpf_maps->array[i].fd;
165  }
166  }
167 
168  return -1;
169 }
170 
171 static int EBPFLoadPinnedMapsFile(LiveDevice *livedev, const char *file)
172 {
173  char pinnedpath[1024];
174  snprintf(pinnedpath, sizeof(pinnedpath),
175  "/sys/fs/bpf/suricata-%s-%s",
176  livedev->dev,
177  file);
178 
179  return bpf_obj_get(pinnedpath);
180 }
181 
182 static int EBPFLoadPinnedMaps(LiveDevice *livedev, struct ebpf_timeout_config *config)
183 {
184  int fd_v4 = -1, fd_v6 = -1;
185 
186  /* First try to load the eBPF check map and return if found */
187  if (config->pinned_maps_name) {
188  int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name);
189  if (ret == 0) {
190  /* pinned maps found, let's just exit as XDP filter is in place */
191  return ret;
192  }
193  }
194 
195  if (config->mode == AFP_MODE_XDP_BYPASS) {
196  /* Get flow v4 table */
197  fd_v4 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v4");
198  if (fd_v4 < 0) {
199  return fd_v4;
200  }
201 
202  /* Get flow v6 table */
203  fd_v6 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v6");
204  if (fd_v6 < 0) {
205  SCLogWarning("Found a flow_table_v4 map but no flow_table_v6 map");
206  return fd_v6;
207  }
208  }
209 
210  struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
211  if (bpf_map_data == NULL) {
212  SCLogError("Can't allocate bpf map array");
213  return -1;
214  }
215 
216  if (config->mode == AFP_MODE_XDP_BYPASS) {
217  bpf_map_data->array[0].fd = fd_v4;
218  bpf_map_data->array[0].name = SCStrdup("flow_table_v4");
219  if (bpf_map_data->array[0].name == NULL) {
220  goto alloc_error;
221  }
222  bpf_map_data->array[1].fd = fd_v6;
223  bpf_map_data->array[1].name = SCStrdup("flow_table_v6");
224  if (bpf_map_data->array[1].name == NULL) {
225  goto alloc_error;
226  }
227  bpf_map_data->last = 2;
228  } else {
229  bpf_map_data->last = 0;
230  }
231 
232  /* Load other known maps: cpu_map, cpus_available, tx_peer, tx_peer_int */
233  int fd = EBPFLoadPinnedMapsFile(livedev, "cpu_map");
234  if (fd >= 0) {
235  bpf_map_data->array[bpf_map_data->last].fd = fd;
236  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpu_map");
237  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
238  goto alloc_error;
239  }
240  bpf_map_data->last++;
241  }
242  fd = EBPFLoadPinnedMapsFile(livedev, "cpus_available");
243  if (fd >= 0) {
244  bpf_map_data->array[bpf_map_data->last].fd = fd;
245  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpus_available");
246  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
247  goto alloc_error;
248  }
249  bpf_map_data->last++;
250  }
251  fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer");
252  if (fd >= 0) {
253  bpf_map_data->array[bpf_map_data->last].fd = fd;
254  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer");
255  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
256  goto alloc_error;
257  }
258  bpf_map_data->last++;
259  }
260  fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer_int");
261  if (fd >= 0) {
262  bpf_map_data->array[bpf_map_data->last].fd = fd;
263  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer_int");
264  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
265  goto alloc_error;
266  }
267  bpf_map_data->last++;
268  }
269 
270  /* Attach the bpf_maps_info to the LiveDevice via the device storage */
271  LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
272  /* Declare that device will use bypass stats */
273  LiveDevUseBypass(livedev);
274 
275  return 0;
276 
277 alloc_error:
278  for (int i = 0; i < bpf_map_data->last; i++) {
279  SCFree(bpf_map_data->array[i].name);
280  }
281  bpf_map_data->last = 0;
282  SCLogError("Can't allocate bpf map name");
283  return -1;
284 }
285 
286 /**
287  * Load a section of an eBPF file
288  *
289  * This function loads a section inside an eBPF and return
290  * via the parameter val the file descriptor that will be used to
291  * inject the eBPF code into the kernel via a syscall.
292  *
293  * \param path the path of the eBPF file to load
294  * \param section the section in the eBPF file to load
295  * \param val a pointer to an integer that will be the file desc
296  * \return -1 in case of error, 0 in case of success, 1 if pinned maps is loaded
297  */
298 int EBPFLoadFile(const char *iface, const char *path, const char * section,
299  int *val, struct ebpf_timeout_config *config)
300 {
301  int err, pfd;
302  bool found = false;
303  struct bpf_object *bpfobj = NULL;
304  struct bpf_program *bpfprog = NULL;
305  struct bpf_map *map = NULL;
306 
307  if (iface == NULL)
308  return -1;
309  LiveDevice *livedev = LiveGetDevice(iface);
310  if (livedev == NULL)
311  return -1;
312 
313  if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) {
314  /* We try to get our flow table maps and if we have them we can simply return */
315  if (EBPFLoadPinnedMaps(livedev, config) == 0) {
316  SCLogInfo("Loaded pinned maps, will use already loaded eBPF filter");
317  return 1;
318  }
319  }
320 
321  if (! path) {
322  SCLogError("No file defined to load eBPF from");
323  return -1;
324  }
325 
326  /* Sending the eBPF code to the kernel requires a large amount of
327  * locked memory so we set it to unlimited to avoid a ENOPERM error */
328  struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
329  if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) {
330  SCLogError("Unable to lock memory: %s (%d)", strerror(errno), errno);
331  return -1;
332  }
333 
334  /* Open the eBPF file and parse it */
335  bpfobj = bpf_object__open(path);
336  long error = libbpf_get_error(bpfobj);
337  if (error) {
338  char err_buf[128];
339  libbpf_strerror(error, err_buf,
340  sizeof(err_buf));
341  SCLogError("Unable to load eBPF objects in '%s': %s", path, err_buf);
342  return -1;
343  }
344 
345  if (config->flags & EBPF_XDP_HW_MODE) {
346  unsigned int ifindex = if_nametoindex(iface);
347  bpf_object__for_each_program(bpfprog, bpfobj) {
348  bpf_program__set_ifindex(bpfprog, ifindex);
349  }
350  bpf_map__for_each(map, bpfobj) {
351  bpf_map__set_ifindex(map, ifindex);
352  }
353  }
354 
355  /* Let's check that our section is here */
356  bpf_object__for_each_program(bpfprog, bpfobj) {
357 #ifdef HAVE_BPF_PROGRAM__SECTION_NAME
358  const char *title = bpf_program__section_name(bpfprog);
359 #else
360  const char *title = bpf_program__title(bpfprog, 0);
361 #endif
362  if (!strcmp(title, section)) {
363  if (config->flags & EBPF_SOCKET_FILTER) {
364 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
365  bpf_program__set_type(bpfprog, BPF_PROG_TYPE_SOCKET_FILTER);
366 #else
367  /* Fall back to legacy API */
368  bpf_program__set_socket_filter(bpfprog);
369 #endif
370  } else {
371 #ifdef HAVE_BPF_PROGRAM__SET_TYPE
372  bpf_program__set_type(bpfprog, BPF_PROG_TYPE_XDP);
373 #else
374  /* Fall back to legacy API */
375  bpf_program__set_xdp(bpfprog);
376 #endif
377  }
378  found = true;
379  break;
380  }
381  }
382 
383  if (!found) {
384  SCLogError("No section '%s' in '%s' file. Will not be able to use the file", section, path);
385  return -1;
386  }
387 
388  err = bpf_object__load(bpfobj);
389  if (err < 0) {
390  if (err == -EPERM) {
391  SCLogError("Permission issue when loading eBPF object"
392  " (check libbpf error on stdout)");
393  } else {
394  char buf[129];
395  libbpf_strerror(err, buf, sizeof(buf));
396  SCLogError("Unable to load eBPF object: %s (%d)", buf, err);
397  }
398  return -1;
399  }
400 
401  /* Kernel and userspace are sharing data via map. Userspace access to the
402  * map via a file descriptor. So we need to store the map to fd info. For
403  * that we use bpf_maps_info:: */
404  struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
405  if (bpf_map_data == NULL) {
406  SCLogError("Can't allocate bpf map array");
407  return -1;
408  }
409 
410  /* Store the maps in bpf_maps_info:: */
411  bpf_map__for_each(map, bpfobj) {
412  if (bpf_map_data->last == BPF_MAP_MAX_COUNT) {
413  SCLogError("Too many BPF maps in eBPF files");
414  break;
415  }
416  if (strcmp(bpf_map__name(map), "flow_table_v4") == 0) {
417  if (bpf_map__key_size(map) != sizeof(struct flowv4_keys)) {
418  SCLogError("Incompatible flow_table_v4");
419  break;
420  }
421  }
422  if (strcmp(bpf_map__name(map), "flow_table_v6") == 0) {
423  if (bpf_map__key_size(map) != sizeof(struct flowv6_keys)) {
424  SCLogError("Incompatible flow_table_v6");
425  break;
426  }
427  }
428  SCLogDebug("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
429  bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map);
430  bpf_map_data->array[bpf_map_data->last].name = SCStrdup(bpf_map__name(map));
431  snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ,
432  "%s", iface);
433  if (!bpf_map_data->array[bpf_map_data->last].name) {
434  SCLogError("Unable to duplicate map name");
435  BpfMapsInfoFree(bpf_map_data);
436  return -1;
437  }
438  bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
439  if (config->flags & EBPF_PINNED_MAPS) {
440  SCLogConfig("Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd,
441  bpf_map_data->array[bpf_map_data->last].name);
442  char buf[1024];
443  snprintf(buf, sizeof(buf), "/sys/fs/bpf/suricata-%s-%s", iface,
444  bpf_map_data->array[bpf_map_data->last].name);
445  int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf);
446  if (ret != 0) {
447  SCLogWarning("Can not pin: %s", strerror(errno));
448  }
449  /* Don't unlink pinned maps in XDP mode to avoid a state reset */
450  if (config->flags & EBPF_XDP_CODE) {
451  bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
452  } else {
453  bpf_map_data->array[bpf_map_data->last].to_unlink = 1;
454  }
455  }
456  bpf_map_data->last++;
457  }
458 
459  /* Attach the bpf_maps_info to the LiveDevice via the device storage */
460  LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
461  LiveDevUseBypass(livedev);
462 
463  /* Finally we get the file descriptor for our eBPF program. We will use
464  * the fd to attach the program to the socket (eBPF case) or to the device
465  * (XDP case). */
466  pfd = bpf_program__fd(bpfprog);
467  if (pfd == -1) {
468  SCLogError("Unable to find %s section", section);
469  return -1;
470  }
471 
472  SCLogInfo("Successfully loaded eBPF file '%s' on '%s'", path, iface);
473  *val = pfd;
474  return 0;
475 }
476 
477 /**
478  * Attach a XDP program identified by its file descriptor to a device
479  *
480  * \param iface the name of interface
481  * \param fd the eBPF/XDP program file descriptor
482  * \param a flag to pass to attach function mostly used to set XDP mode
483  * \return -1 in case of error, 0 if success
484  */
485 int EBPFSetupXDP(const char *iface, int fd, uint8_t flags)
486 {
487 #ifdef HAVE_PACKET_XDP
488  unsigned int ifindex = if_nametoindex(iface);
489  if (ifindex == 0) {
490  SCLogError("Unknown interface '%s'", iface);
491  return -1;
492  }
493 #ifdef HAVE_BPF_XDP_ATTACH
494  int err = bpf_xdp_attach(ifindex, fd, flags, NULL);
495 #else
496  /* Fall back to legacy API */
497  int err = bpf_set_link_xdp_fd(ifindex, fd, flags);
498 #endif
499  if (err != 0) {
500  char buf[129];
501  libbpf_strerror(err, buf, sizeof(buf));
502  SCLogError("Unable to set XDP on '%s': %s (%d)", iface, buf, err);
503  return -1;
504  }
505 #endif
506  return 0;
507 }
508 
509 /**
510  * Create a Flow in the table for a Flowkey
511  *
512  * \return false (this create function never returns true)
513  */
514 static bool EBPFCreateFlowForKey(struct flows_stats *flowstats, LiveDevice *dev, void *key,
515  size_t skey, FlowKey *flow_key, struct timespec *ctime,
516  uint64_t pkts_cnt, uint64_t bytes_cnt,
517  int mapfd, int cpus_count)
518 {
519  Flow *f = NULL;
520  uint32_t hash = FlowKeyGetHash(flow_key);
521 
522  f = FlowGetFromFlowKey(flow_key, ctime, hash);
523  if (f == NULL)
524  return false;
525 
526  /* set accounting, we can't know the direction, so let's just start to
527  * serve them if we already have something from server to client. We need
528  * these numbers as we will use it to see if we have new traffic coming
529  * on the flow */
531  if (fc == NULL) {
532  fc = SCCalloc(sizeof(FlowBypassInfo), 1);
533  if (fc) {
534  FlowUpdateState(f, FLOW_STATE_CAPTURE_BYPASSED);
536  fc->BypassUpdate = EBPFBypassUpdate;
537  fc->BypassFree = EBPFBypassFree;
538  fc->todstpktcnt = pkts_cnt;
539  fc->todstbytecnt = bytes_cnt;
540  f->livedev = dev;
541  EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
542  if (eb == NULL) {
543  SCFree(fc);
544  FLOWLOCK_UNLOCK(f);
545  return false;
546  }
547  void *mkey = SCCalloc(1, skey);
548  if (mkey == NULL) {
549  SCFree(fc);
550  SCFree(eb);
551  FLOWLOCK_UNLOCK(f);
552  return false;
553  }
554  memcpy(mkey, key, skey);
555  eb->key[0] = mkey;
556  eb->mapfd = mapfd;
557  eb->cpus_count = cpus_count;
558  fc->bypass_data = eb;
559  flowstats->count++;
560  } else {
561  FLOWLOCK_UNLOCK(f);
562  return false;
563  }
564  } else {
565  EBPFBypassData *eb = (EBPFBypassData *) fc->bypass_data;
566  if (eb == NULL) {
567  FLOWLOCK_UNLOCK(f);
568  return false;
569  }
570  /* if both keys are here, then it is a flow bypassed by this
571  * instance so we ignore it */
572  if (eb->key[0] && eb->key[1]) {
573  FLOWLOCK_UNLOCK(f);
574  return false;
575  }
576  fc->tosrcpktcnt = pkts_cnt;
577  fc->tosrcbytecnt = bytes_cnt;
578  void *mkey = SCCalloc(1, skey);
579  if (mkey == NULL) {
580  FLOWLOCK_UNLOCK(f);
581  return false;
582  }
583  memcpy(mkey, key, skey);
584  eb->key[1] = mkey;
585  }
586  f->livedev = dev;
587  FLOWLOCK_UNLOCK(f);
588  return false;
589 }
590 
591 void EBPFBypassFree(void *data)
592 {
593  EBPFBypassData *eb = (EBPFBypassData *)data;
594  if (eb == NULL)
595  return;
596  SCFree(eb->key[0]);
597  if (eb->key[1]) {
598  SCFree(eb->key[1]);
599  }
600  SCFree(eb);
601 }
602 
603 /**
604  *
605  * Compare eBPF half flow to Flow
606  *
607  * \return true if entries have activity, false if not
608  */
609 
610 static bool EBPFBypassCheckHalfFlow(Flow *f, FlowBypassInfo *fc,
611  EBPFBypassData *eb, void *key,
612  int index)
613 {
614  int i;
615  uint64_t pkts_cnt = 0;
616  uint64_t bytes_cnt = 0;
617  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
618  * is 1 then we have a global hash. */
619  BPF_DECLARE_PERCPU(struct pair, values_array, eb->cpus_count);
620  memset(values_array, 0, sizeof(values_array));
621  int res = bpf_map_lookup_elem(eb->mapfd, key, values_array);
622  if (res < 0) {
623  SCLogDebug("errno: (%d) %s", errno, strerror(errno));
624  return false;
625  }
626  for (i = 0; i < eb->cpus_count; i++) {
627  /* let's start accumulating value so we can compute the counters */
628  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
629  BPF_PERCPU(values_array, i).packets,
630  BPF_PERCPU(values_array, i).bytes);
631  pkts_cnt += BPF_PERCPU(values_array, i).packets;
632  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
633  }
634  if (index == 0) {
635  if (pkts_cnt != fc->todstpktcnt) {
636  fc->todstpktcnt = pkts_cnt;
637  fc->todstbytecnt = bytes_cnt;
638  return true;
639  }
640  } else {
641  if (pkts_cnt != fc->tosrcpktcnt) {
642  fc->tosrcpktcnt = pkts_cnt;
643  fc->tosrcbytecnt = bytes_cnt;
644  return true;
645  }
646  }
647 
648  return false;
649 }
650 
651 /** Check both half flows for update
652  *
653  * Update lastts in the flow and do accounting
654  *
655  * */
656 bool EBPFBypassUpdate(Flow *f, void *data, time_t tsec)
657 {
658  EBPFBypassData *eb = (EBPFBypassData *)data;
659  if (eb == NULL) {
660  return false;
661  }
663  if (fc == NULL) {
664  return false;
665  }
666  bool activity = EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[0], 0);
667  activity |= EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[1], 1);
668  if (!activity) {
669  SCLogDebug("Delete entry: %u (%" PRIu64 ")", FLOW_IS_IPV6(f), FlowGetId(f));
670  /* delete the entries if no time update */
671  EBPFDeleteKey(eb->mapfd, eb->key[0]);
672  EBPFDeleteKey(eb->mapfd, eb->key[1]);
673  SCLogDebug("Done delete entry: %u", FLOW_IS_IPV6(f));
674  } else {
675  f->lastts = SCTIME_FROM_SECS(tsec);
676  return true;
677  }
678  return false;
679 }
680 
681 typedef bool (*OpFlowForKey)(struct flows_stats * flowstats, LiveDevice*dev, void *key,
682  size_t skey, FlowKey *flow_key, struct timespec *ctime,
683  uint64_t pkts_cnt, uint64_t bytes_cnt,
684  int mapfd, int cpus_count);
685 
686 /**
687  * Bypassed flows iterator for IPv4
688  *
689  * This function iterates on all the flows of the IPv4 table
690  * running a callback function on each flow.
691  */
692 static int EBPFForEachFlowV4Table(ThreadVars *th_v, LiveDevice *dev, const char *name,
693  struct timespec *ctime,
694  struct ebpf_timeout_config *tcfg,
695  OpFlowForKey EBPFOpFlowForKey
696  )
697 {
698  struct flows_stats flowstats = { 0, 0, 0};
699  int mapfd = EBPFGetMapFDByName(dev->dev, name);
700  if (mapfd == -1)
701  return -1;
702 
703  struct flowv4_keys key = {}, next_key;
704  int found = 0;
705  unsigned int i;
706  uint64_t hash_cnt = 0;
707 
708  if (tcfg->cpus_count == 0) {
709  return 0;
710  }
711 
712  bool dead_flow = false;
713  while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
714  uint64_t bytes_cnt = 0;
715  uint64_t pkts_cnt = 0;
716  hash_cnt++;
717  if (dead_flow) {
718  EBPFDeleteKey(mapfd, &key);
719  dead_flow = false;
720  }
721  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
722  * is 1 then we have a global hash. */
723  BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
724  memset(values_array, 0, sizeof(values_array));
725  int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
726  if (res < 0) {
727  SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
728  SCLogDebug("errno: (%d) %s", errno, strerror(errno));
729  key = next_key;
730  continue;
731  }
732  for (i = 0; i < tcfg->cpus_count; i++) {
733  /* let's start accumulating value so we can compute the counters */
734  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
735  BPF_PERCPU(values_array, i).packets,
736  BPF_PERCPU(values_array, i).bytes);
737  pkts_cnt += BPF_PERCPU(values_array, i).packets;
738  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
739  }
740  /* Get the corresponding Flow in the Flow table to compare and update
741  * its counters and lastseen if needed */
742  FlowKey flow_key;
743  if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
744  flow_key.sp = ntohs(next_key.port16[0]);
745  flow_key.dp = ntohs(next_key.port16[1]);
746  flow_key.src.addr_data32[0] = next_key.src;
747  flow_key.dst.addr_data32[0] = next_key.dst;
748  } else {
749  flow_key.sp = next_key.port16[0];
750  flow_key.dp = next_key.port16[1];
751  flow_key.src.addr_data32[0] = ntohl(next_key.src);
752  flow_key.dst.addr_data32[0] = ntohl(next_key.dst);
753  }
754  flow_key.src.family = AF_INET;
755  flow_key.src.addr_data32[1] = 0;
756  flow_key.src.addr_data32[2] = 0;
757  flow_key.src.addr_data32[3] = 0;
758  flow_key.dst.family = AF_INET;
759  flow_key.dst.addr_data32[1] = 0;
760  flow_key.dst.addr_data32[2] = 0;
761  flow_key.dst.addr_data32[3] = 0;
762  flow_key.vlan_id[0] = next_key.vlan0;
763  flow_key.vlan_id[1] = next_key.vlan1;
764  if (next_key.ip_proto == 1) {
765  flow_key.proto = IPPROTO_TCP;
766  } else {
767  flow_key.proto = IPPROTO_UDP;
768  }
769  flow_key.recursion_level = 0;
770  flow_key.livedev_id = dev->id;
771  dead_flow = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
772  ctime, pkts_cnt, bytes_cnt,
773  mapfd, tcfg->cpus_count);
774  if (dead_flow) {
775  found = 1;
776  }
777 
778  if (TmThreadsCheckFlag(th_v, THV_KILL)) {
779  return 0;
780  }
781 
782  key = next_key;
783  }
784  if (dead_flow) {
785  EBPFDeleteKey(mapfd, &key);
786  found = 1;
787  }
788  SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
789 
790  LiveDevAddBypassStats(dev, flowstats.count, AF_INET);
791  SCLogInfo("IPv4 bypassed flow table size: %" PRIu64, hash_cnt);
792 
793  return found;
794 }
795 
796 /**
797  * Bypassed flows iterator for IPv6
798  *
799  * This function iterates on all the flows of the IPv4 table
800  * running a callback function on each flow.
801  */
802 static int EBPFForEachFlowV6Table(ThreadVars *th_v,
803  LiveDevice *dev, const char *name,
804  struct timespec *ctime,
805  struct ebpf_timeout_config *tcfg,
806  OpFlowForKey EBPFOpFlowForKey
807  )
808 {
809  struct flows_stats flowstats = { 0, 0, 0};
810  int mapfd = EBPFGetMapFDByName(dev->dev, name);
811  if (mapfd == -1)
812  return -1;
813 
814  struct flowv6_keys key = {}, next_key;
815  int found = 0;
816  unsigned int i;
817  uint64_t hash_cnt = 0;
818 
819  if (tcfg->cpus_count == 0) {
820  SCLogWarning("CPU count should not be 0");
821  return 0;
822  }
823 
824  uint64_t pkts_cnt = 0;
825  while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
826  uint64_t bytes_cnt = 0;
827  hash_cnt++;
828  if (pkts_cnt > 0) {
829  EBPFDeleteKey(mapfd, &key);
830  }
831  pkts_cnt = 0;
832  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
833  * is 1 then we have a global hash. */
834  BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
835  memset(values_array, 0, sizeof(values_array));
836  int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
837  if (res < 0) {
838  SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
839  key = next_key;
840  continue;
841  }
842  for (i = 0; i < tcfg->cpus_count; i++) {
843  /* let's start accumulating value so we can compute the counters */
844  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
845  BPF_PERCPU(values_array, i).packets,
846  BPF_PERCPU(values_array, i).bytes);
847  pkts_cnt += BPF_PERCPU(values_array, i).packets;
848  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
849  }
850  /* Get the corresponding Flow in the Flow table to compare and update
851  * its counters and lastseen if needed */
852  FlowKey flow_key;
853  if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
854  flow_key.sp = ntohs(next_key.port16[0]);
855  flow_key.dp = ntohs(next_key.port16[1]);
856  flow_key.src.family = AF_INET6;
857  flow_key.src.addr_data32[0] = next_key.src[0];
858  flow_key.src.addr_data32[1] = next_key.src[1];
859  flow_key.src.addr_data32[2] = next_key.src[2];
860  flow_key.src.addr_data32[3] = next_key.src[3];
861  flow_key.dst.family = AF_INET6;
862  flow_key.dst.addr_data32[0] = next_key.dst[0];
863  flow_key.dst.addr_data32[1] = next_key.dst[1];
864  flow_key.dst.addr_data32[2] = next_key.dst[2];
865  flow_key.dst.addr_data32[3] = next_key.dst[3];
866  } else {
867  flow_key.sp = next_key.port16[0];
868  flow_key.dp = next_key.port16[1];
869  flow_key.src.family = AF_INET6;
870  flow_key.src.addr_data32[0] = ntohl(next_key.src[0]);
871  flow_key.src.addr_data32[1] = ntohl(next_key.src[1]);
872  flow_key.src.addr_data32[2] = ntohl(next_key.src[2]);
873  flow_key.src.addr_data32[3] = ntohl(next_key.src[3]);
874  flow_key.dst.family = AF_INET6;
875  flow_key.dst.addr_data32[0] = ntohl(next_key.dst[0]);
876  flow_key.dst.addr_data32[1] = ntohl(next_key.dst[1]);
877  flow_key.dst.addr_data32[2] = ntohl(next_key.dst[2]);
878  flow_key.dst.addr_data32[3] = ntohl(next_key.dst[3]);
879  }
880  flow_key.vlan_id[0] = next_key.vlan0;
881  flow_key.vlan_id[1] = next_key.vlan1;
882  if (next_key.ip_proto == 1) {
883  flow_key.proto = IPPROTO_TCP;
884  } else {
885  flow_key.proto = IPPROTO_UDP;
886  }
887  flow_key.recursion_level = 0;
888  flow_key.livedev_id = dev->id;
889  pkts_cnt = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
890  ctime, pkts_cnt, bytes_cnt,
891  mapfd, tcfg->cpus_count);
892  if (pkts_cnt > 0) {
893  found = 1;
894  }
895 
896  if (TmThreadsCheckFlag(th_v, THV_KILL)) {
897  return 0;
898  }
899 
900  key = next_key;
901  }
902  if (pkts_cnt > 0) {
903  EBPFDeleteKey(mapfd, &key);
904  found = 1;
905  }
906  SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
907 
908  LiveDevAddBypassStats(dev, flowstats.count, AF_INET6);
909  SCLogInfo("IPv6 bypassed flow table size: %" PRIu64, hash_cnt);
910  return found;
911 }
912 
913 
914 int EBPFCheckBypassedFlowCreate(ThreadVars *th_v, struct timespec *curtime, void *data)
915 {
916  LiveDevice *ldev = NULL, *ndev;
917  struct ebpf_timeout_config *cfg = (struct ebpf_timeout_config *)data;
918  while(LiveDeviceForEach(&ldev, &ndev)) {
919  EBPFForEachFlowV4Table(th_v, ldev, "flow_table_v4",
920  curtime,
921  cfg, EBPFCreateFlowForKey);
922  EBPFForEachFlowV6Table(th_v, ldev, "flow_table_v6",
923  curtime,
924  cfg, EBPFCreateFlowForKey);
925  }
926 
927  return 0;
928 }
929 
930 void EBPFRegisterExtension(void)
931 {
932  g_livedev_storage_id = LiveDevStorageRegister("bpfmap", sizeof(void *), NULL, BpfMapsInfoFree);
933  g_flow_storage_id = FlowStorageRegister("bypassedlist", sizeof(void *), NULL, BypassedListFree);
934 }
935 
936 
937 #ifdef HAVE_PACKET_XDP
938 
939 static uint32_t g_redirect_iface_cpu_counter = 0;
940 
941 static int EBPFAddCPUToMap(const char *iface, uint32_t i)
942 {
943  int cpumap = EBPFGetMapFDByName(iface, "cpu_map");
944  uint32_t queue_size = 4096;
945  int ret;
946 
947  if (cpumap < 0) {
948  SCLogError("Can't find cpu_map");
949  return -1;
950  }
951  ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0);
952  if (ret) {
953  SCLogError("Create CPU entry failed (err:%d)", ret);
954  return -1;
955  }
956  int cpus_available = EBPFGetMapFDByName(iface, "cpus_available");
957  if (cpus_available < 0) {
958  SCLogError("Can't find cpus_available map");
959  return -1;
960  }
961 
962  ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0);
963  if (ret) {
964  SCLogError("Create CPU entry failed (err:%d)", ret);
965  return -1;
966  }
967  return 0;
968 }
969 
970 static void EBPFRedirectMapAddCPU(int i, void *data)
971 {
972  if (EBPFAddCPUToMap(data, i) < 0) {
973  SCLogError("Unable to add CPU %d to set", i);
974  } else {
975  g_redirect_iface_cpu_counter++;
976  }
977 }
978 
979 void EBPFBuildCPUSet(SCConfNode *node, char *iface)
980 {
981  uint32_t key0 = 0;
982  int mapfd = EBPFGetMapFDByName(iface, "cpus_count");
983  if (mapfd < 0) {
984  SCLogError("Unable to find 'cpus_count' map");
985  return;
986  }
987  g_redirect_iface_cpu_counter = 0;
988  if (node == NULL) {
989  bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
990  BPF_ANY);
991  return;
992  }
993  if (BuildCpusetWithCallback("xdp-cpu-redirect", node, EBPFRedirectMapAddCPU, iface) < 0) {
994  SCLogWarning("Failed to parse XDP CPU redirect configuration");
995  return;
996  }
997  bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
998  BPF_ANY);
999 }
1000 
1001 /**
1002  * Setup peer interface in XDP system
1003  *
1004  * Ths function set up the peer interface in the XDP maps used by the
1005  * bypass filter. The first map tx_peer has type device map and is
1006  * used to store the peer. The second map tx_peer_int is used by the
1007  * code to check if we have a peer defined for this interface.
1008  *
1009  * As the map are per device we just need maps with one single element.
1010  * In both case, we use the key 0 to enter element so XDP kernel code
1011  * is using the same key.
1012  */
1013 int EBPFSetPeerIface(const char *iface, const char *out_iface)
1014 {
1015  int mapfd = EBPFGetMapFDByName(iface, "tx_peer");
1016  if (mapfd < 0) {
1017  SCLogError("Unable to find 'tx_peer' map");
1018  return -1;
1019  }
1020  int intmapfd = EBPFGetMapFDByName(iface, "tx_peer_int");
1021  if (intmapfd < 0) {
1022  SCLogError("Unable to find 'tx_peer_int' map");
1023  return -1;
1024  }
1025 
1026  int key0 = 0;
1027  unsigned int peer_index = if_nametoindex(out_iface);
1028  if (peer_index == 0) {
1029  SCLogError("No iface '%s'", out_iface);
1030  return -1;
1031  }
1032  int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY);
1033  if (ret) {
1034  SCLogError("Create peer entry failed (err:%d)", ret);
1035  return -1;
1036  }
1037  ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY);
1038  if (ret) {
1039  SCLogError("Create peer entry failed (err:%d)", ret);
1040  return -1;
1041  }
1042  return 0;
1043 }
1044 
1045 /**
1046  * Bypass the flow on all ifaces it is seen on. This is used
1047  * in IPS mode.
1048  */
1049 
1050 int EBPFUpdateFlow(Flow *f, Packet *p, void *data)
1051 {
1052  BypassedIfaceList *ifl = (BypassedIfaceList *)FlowGetStorageById(f, g_flow_storage_id);
1053  if (ifl == NULL) {
1054  ifl = SCCalloc(1, sizeof(*ifl));
1055  if (ifl == NULL) {
1056  return 0;
1057  }
1058  ifl->dev = p->livedev;
1059  FlowSetStorageById(f, g_flow_storage_id, ifl);
1060  return 1;
1061  }
1062  /* Look for packet iface in the list */
1063  BypassedIfaceList *ldev = ifl;
1064  while (ldev) {
1065  if (p->livedev == ldev->dev) {
1066  return 1;
1067  }
1068  ldev = ldev->next;
1069  }
1070  /* Call bypass function if ever not in the list */
1071  p->BypassPacketsFlow(p);
1072 
1073  /* Add iface to the list */
1074  BypassedIfaceList *nifl = SCCalloc(1, sizeof(*nifl));
1075  if (nifl == NULL) {
1076  return 0;
1077  }
1078  nifl->dev = p->livedev;
1079  nifl->next = ifl;
1080  FlowSetStorageById(f, g_flow_storage_id, nifl);
1081  return 1;
1082 }
1083 
1084 #endif /* HAVE_PACKET_XDP */
1085 
1086 #endif
FlowStorageId
Definition: flow-storage.h:31
util-device-private.h
tm-threads.h
flow-bypass.h
FLOW_IS_IPV6
#define FLOW_IS_IPV6(f)
Definition: flow.h:163
LiveDevStorageId_
Definition: device-storage.h:31
FlowKey_::src
Address src
Definition: flow.h:301
GetFlowBypassInfoID
FlowStorageId GetFlowBypassInfoID(void)
Definition: flow-util.c:222
FlowBypassInfo_
Definition: flow.h:522
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:282
next
struct HtpBodyChunk_ * next
Definition: app-layer-htp.h:0
flows_stats::count
uint64_t count
Definition: flow-bypass.h:30
name
const char * name
Definition: detect-engine-proto.c:48
FlowKeyGetHash
uint32_t FlowKeyGetHash(FlowKey *fk)
Definition: flow-hash.c:314
Flow_
Flow data structure.
Definition: flow.h:347
LiveDevice_
Definition: util-device-private.h:32
SC_ATOMIC_ADD
#define SC_ATOMIC_ADD(name, val)
add a value to our atomic variable
Definition: util-atomic.h:332
LiveDevice_::id
uint16_t id
Definition: util-device-private.h:38
flow-hash.h
FlowBypassInfo_::tosrcbytecnt
uint64_t tosrcbytecnt
Definition: flow.h:527
LiveDeviceForEach
LiveDevice * LiveDeviceForEach(LiveDevice **ldev, LiveDevice **ndev)
Definition: util-device.c:440
device-storage.h
Packet_::BypassPacketsFlow
int(* BypassPacketsFlow)(struct Packet_ *)
Definition: decode.h:594
FLOWLOCK_UNLOCK
#define FLOWLOCK_UNLOCK(fb)
Definition: flow.h:264
LiveDevGetStorageById
void * LiveDevGetStorageById(LiveDevice *d, LiveDevStorageId id)
Get a value from a given LiveDevice storage.
Definition: device-storage.c:90
LiveDevStorageRegister
LiveDevStorageId LiveDevStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
Register a LiveDevice storage.
Definition: device-storage.c:61
SCTIME_FROM_SECS
#define SCTIME_FROM_SECS(s)
Definition: util-time.h:69
FlowBypassInfo_::todstbytecnt
uint64_t todstbytecnt
Definition: flow.h:529
FlowBypassInfo_::BypassUpdate
bool(* BypassUpdate)(Flow *f, void *data, time_t tsec)
Definition: flow.h:523
util-cpu.h
FlowBypassInfo_::BypassFree
void(* BypassFree)(void *data)
Definition: flow.h:524
LiveGetDevice
LiveDevice * LiveGetDevice(const char *name)
Get a pointer to the device at idx.
Definition: util-device.c:268
Flow_::lastts
SCTime_t lastts
Definition: flow.h:411
FlowKey_::recursion_level
uint8_t recursion_level
Definition: flow.h:304
util-ebpf.h
ThreadVars_
Per thread variable structure.
Definition: threadvars.h:58
util-affinity.h
THV_KILL
#define THV_KILL
Definition: threadvars.h:40
FlowSetStorageById
int FlowSetStorageById(Flow *f, FlowStorageId id, void *ptr)
Definition: flow-storage.c:45
FlowBypassInfo_::todstpktcnt
uint64_t todstpktcnt
Definition: flow.h:528
FlowStorageRegister
FlowStorageId FlowStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
Definition: flow-storage.c:66
LiveDevice_::dev
char * dev
Definition: util-device-private.h:33
FlowBypassInfo_::bypass_data
void * bypass_data
Definition: flow.h:525
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:262
FlowKey_::livedev_id
uint16_t livedev_id
Definition: flow.h:305
FlowKey_::sp
Port sp
Definition: flow.h:302
Packet_
Definition: decode.h:501
Packet_::livedev
struct LiveDevice_ * livedev
Definition: decode.h:618
LiveDevUseBypass
int LiveDevUseBypass(LiveDevice *dev)
Definition: util-device.c:494
LiveDevAddBypassStats
void LiveDevAddBypassStats(LiveDevice *dev, uint64_t cnt, int family)
Definition: util-device.c:516
FlowBypassInfo_::tosrcpktcnt
uint64_t tosrcpktcnt
Definition: flow.h:526
SCLogInfo
#define SCLogInfo(...)
Macro used to log INFORMATIONAL messages.
Definition: util-debug.h:232
FlowGetStorageById
void * FlowGetStorageById(const Flow *f, FlowStorageId id)
Definition: flow-storage.c:40
FlowUpdateState
void FlowUpdateState(Flow *f, const enum FlowState s)
Definition: flow.c:1186
flow-storage.h
flags
uint8_t flags
Definition: decode-gre.h:0
suricata-common.h
FlowKey_::dst
Address dst
Definition: flow.h:301
SCStrdup
#define SCStrdup(s)
Definition: util-mem.h:56
Flow_::livedev
struct LiveDevice_ * livedev
Definition: flow.h:392
SCLogConfig
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
FlowKey_::proto
uint8_t proto
Definition: flow.h:303
BuildCpusetWithCallback
int BuildCpusetWithCallback(const char *name, SCConfNode *node, void(*Callback)(int i, void *data), void *data)
Definition: util-affinity.c:227
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:274
SCFree
#define SCFree(p)
Definition: util-mem.h:61
FlowGetFromFlowKey
Flow * FlowGetFromFlowKey(FlowKey *key, struct timespec *ttime, const uint32_t hash)
Get or create a Flow using a FlowKey.
Definition: flow-hash.c:1102
FlowStorageId::id
int id
Definition: flow-storage.h:32
LiveDevSetStorageById
int LiveDevSetStorageById(LiveDevice *d, LiveDevStorageId id, void *ptr)
Store a pointer in a given LiveDevice storage.
Definition: device-storage.c:77
FlowKey_
Definition: flow.h:300
Address_::family
char family
Definition: decode.h:113
FlowKey_::vlan_id
uint16_t vlan_id[VLAN_MAX_LAYERS]
Definition: flow.h:306
flow.h
TmThreadsCheckFlag
int TmThreadsCheckFlag(ThreadVars *tv, uint32_t flag)
Check if a thread flag is set.
Definition: tm-threads.c:95
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53
LiveDevStorageId_::id
int id
Definition: device-storage.h:32
FlowKey_::dp
Port dp
Definition: flow.h:302
flows_stats::packets
uint64_t packets
Definition: flow-bypass.h:31
SCConfNode_
Definition: conf.h:37
flows_stats
Definition: flow-bypass.h:29