suricata
util-ebpf.c
Go to the documentation of this file.
1 /* Copyright (C) 2018-2019 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \ingroup afppacket
20  *
21  * @{
22  */
23 
24 /**
25  * \file
26  *
27  * \author Eric Leblond <eric@regit.org>
28  *
29  * eBPF utility
30  *
31  */
32 
33 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
35 
36 #include "suricata-common.h"
37 #include "flow-bypass.h"
38 
39 #ifdef HAVE_PACKET_EBPF
40 
41 #include <sys/time.h>
42 #include <sys/resource.h>
43 
44 #include "util-ebpf.h"
45 #include "util-cpu.h"
46 #include "util-device.h"
47 
48 #include "device-storage.h"
49 #include "flow-storage.h"
50 #include "flow.h"
51 #include "flow-hash.h"
52 #include "tm-threads.h"
53 
54 #include <bpf/libbpf.h>
55 #include <bpf/bpf.h>
56 #include <net/if.h>
57 #include "config.h"
58 
59 #define BPF_MAP_MAX_COUNT 16
60 
61 #define BYPASSED_FLOW_TIMEOUT 60
62 
63 static int g_livedev_storage_id = -1;
64 static int g_flow_storage_id = -1;
65 
66 struct bpf_map_item {
67  char iface[IFNAMSIZ];
68  char * name;
69  int fd;
70  uint8_t to_unlink;
71 };
72 
73 struct bpf_maps_info {
74  struct bpf_map_item array[BPF_MAP_MAX_COUNT];
75  int last;
76 };
77 
78 typedef struct BypassedIfaceList_ {
79  LiveDevice *dev;
80  struct BypassedIfaceList_ *next;
81 } BypassedIfaceList;
82 
83 static void BpfMapsInfoFree(void *bpf)
84 {
85  struct bpf_maps_info *bpfinfo = (struct bpf_maps_info *)bpf;
86  int i;
87  for (i = 0; i < bpfinfo->last; i ++) {
88  if (bpfinfo->array[i].name) {
89  if (bpfinfo->array[i].to_unlink) {
90  char pinnedpath[PATH_MAX];
91  int ret = snprintf(pinnedpath, sizeof(pinnedpath),
92  "/sys/fs/bpf/suricata-%s-%s",
93  bpfinfo->array[i].iface,
94  bpfinfo->array[i].name);
95  if (ret > 0) {
96  /* Unlink the pinned entry */
97  ret = unlink(pinnedpath);
98  if (ret == -1) {
99  int error = errno;
101  "Unable to remove %s: %s (%d)",
102  pinnedpath,
103  strerror(error),
104  error);
105  }
106  } else {
107  SCLogWarning(SC_ERR_SPRINTF, "Unable to remove map %s",
108  bpfinfo->array[i].name);
109  }
110  }
111  SCFree(bpfinfo->array[i].name);
112  }
113  }
114  SCFree(bpfinfo);
115 }
116 
117 static void BypassedListFree(void *ifl)
118 {
119  BypassedIfaceList *mifl = (BypassedIfaceList *)ifl;
120  BypassedIfaceList *nifl;
121  while (mifl) {
122  nifl = mifl->next;
123  SCFree(mifl);
124  mifl = nifl;
125  }
126 }
127 
128 void EBPFDeleteKey(int fd, void *key)
129 {
130  int ret = bpf_map_delete_elem(fd, key);
131  if (ret < 0) {
133  "Unable to delete entry: %s (%d)",
134  strerror(errno),
135  errno);
136  }
137 }
138 
139 static struct bpf_maps_info *EBPFGetBpfMap(const char *iface)
140 {
141  LiveDevice *livedev = LiveGetDevice(iface);
142  if (livedev == NULL)
143  return NULL;
144  void *data = LiveDevGetStorageById(livedev, g_livedev_storage_id);
145 
146  return (struct bpf_maps_info *)data;
147 }
148 
149 /**
150  * Get file descriptor of a map in the scope of a interface
151  *
152  * \param iface the interface where the map need to be looked for
153  * \param name the name of the map
154  * \return the file descriptor or -1 in case of error
155  */
156 int EBPFGetMapFDByName(const char *iface, const char *name)
157 {
158  int i;
159 
160  if (iface == NULL || name == NULL)
161  return -1;
162  struct bpf_maps_info *bpf_maps = EBPFGetBpfMap(iface);
163  if (bpf_maps == NULL)
164  return -1;
165 
166  for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
167  if (!bpf_maps->array[i].name)
168  continue;
169  if (!strcmp(bpf_maps->array[i].name, name)) {
170  SCLogDebug("Got fd %d for eBPF map '%s'", bpf_maps->array[i].fd, name);
171  return bpf_maps->array[i].fd;
172  }
173  }
174 
175  return -1;
176 }
177 
178 static int EBPFLoadPinnedMapsFile(LiveDevice *livedev, const char *file)
179 {
180  char pinnedpath[1024];
181  snprintf(pinnedpath, sizeof(pinnedpath),
182  "/sys/fs/bpf/suricata-%s-%s",
183  livedev->dev,
184  file);
185 
186  return bpf_obj_get(pinnedpath);
187 }
188 
189 static int EBPFLoadPinnedMaps(LiveDevice *livedev, struct ebpf_timeout_config *config)
190 {
191  int fd_v4 = -1, fd_v6 = -1;
192 
193  /* First try to load the eBPF check map and return if found */
194  if (config->pinned_maps_name) {
195  int ret = EBPFLoadPinnedMapsFile(livedev, config->pinned_maps_name);
196  if (ret == 0) {
197  /* pinned maps found, let's just exit as XDP filter is in place */
198  return ret;
199  }
200  }
201 
202  if (config->mode == AFP_MODE_XDP_BYPASS) {
203  /* Get flow v4 table */
204  fd_v4 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v4");
205  if (fd_v4 < 0) {
206  return fd_v4;
207  }
208 
209  /* Get flow v6 table */
210  fd_v6 = EBPFLoadPinnedMapsFile(livedev, "flow_table_v6");
211  if (fd_v6 < 0) {
213  "Found a flow_table_v4 map but no flow_table_v6 map");
214  return fd_v6;
215  }
216  }
217 
218  struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
219  if (bpf_map_data == NULL) {
220  SCLogError(SC_ERR_MEM_ALLOC, "Can't allocate bpf map array");
221  return -1;
222  }
223 
224  if (config->mode == AFP_MODE_XDP_BYPASS) {
225  bpf_map_data->array[0].fd = fd_v4;
226  bpf_map_data->array[0].name = SCStrdup("flow_table_v4");
227  if (bpf_map_data->array[0].name == NULL) {
228  goto alloc_error;
229  }
230  bpf_map_data->array[1].fd = fd_v6;
231  bpf_map_data->array[1].name = SCStrdup("flow_table_v6");
232  if (bpf_map_data->array[1].name == NULL) {
233  goto alloc_error;
234  }
235  bpf_map_data->last = 2;
236  } else {
237  bpf_map_data->last = 0;
238  }
239 
240  /* Load other known maps: cpu_map, cpus_available, tx_peer, tx_peer_int */
241  int fd = EBPFLoadPinnedMapsFile(livedev, "cpu_map");
242  if (fd >= 0) {
243  bpf_map_data->array[bpf_map_data->last].fd = fd;
244  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpu_map");
245  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
246  goto alloc_error;
247  }
248  bpf_map_data->last++;
249  }
250  fd = EBPFLoadPinnedMapsFile(livedev, "cpus_available");
251  if (fd >= 0) {
252  bpf_map_data->array[bpf_map_data->last].fd = fd;
253  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("cpus_available");
254  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
255  goto alloc_error;
256  }
257  bpf_map_data->last++;
258  }
259  fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer");
260  if (fd >= 0) {
261  bpf_map_data->array[bpf_map_data->last].fd = fd;
262  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer");
263  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
264  goto alloc_error;
265  }
266  bpf_map_data->last++;
267  }
268  fd = EBPFLoadPinnedMapsFile(livedev, "tx_peer_int");
269  if (fd >= 0) {
270  bpf_map_data->array[bpf_map_data->last].fd = fd;
271  bpf_map_data->array[bpf_map_data->last].name = SCStrdup("tx_peer_int");
272  if (bpf_map_data->array[bpf_map_data->last].name == NULL) {
273  goto alloc_error;
274  }
275  bpf_map_data->last++;
276  }
277 
278  /* Attach the bpf_maps_info to the LiveDevice via the device storage */
279  LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
280  /* Declare that device will use bypass stats */
281  LiveDevUseBypass(livedev);
282 
283  return 0;
284 
285 alloc_error:
286  for (int i = 0; i < bpf_map_data->last; i++) {
287  SCFree(bpf_map_data->array[i].name);
288  }
289  bpf_map_data->last = 0;
290  SCLogError(SC_ERR_MEM_ALLOC, "Can't allocate bpf map name");
291  return -1;
292 }
293 
294 /**
295  * Load a section of an eBPF file
296  *
297  * This function loads a section inside an eBPF and return
298  * via the parameter val the file descriptor that will be used to
299  * inject the eBPF code into the kernel via a syscall.
300  *
301  * \param path the path of the eBPF file to load
302  * \param section the section in the eBPF file to load
303  * \param val a pointer to an integer that will be the file desc
304  * \return -1 in case of error, 0 in case of success, 1 if pinned maps is loaded
305  */
306 int EBPFLoadFile(const char *iface, const char *path, const char * section,
307  int *val, struct ebpf_timeout_config *config)
308 {
309  int err, pfd;
310  bool found = false;
311  struct bpf_object *bpfobj = NULL;
312  struct bpf_program *bpfprog = NULL;
313  struct bpf_map *map = NULL;
314 
315  if (iface == NULL)
316  return -1;
317  LiveDevice *livedev = LiveGetDevice(iface);
318  if (livedev == NULL)
319  return -1;
320 
321  if (config->flags & EBPF_XDP_CODE && config->flags & EBPF_PINNED_MAPS) {
322  /* We try to get our flow table maps and if we have them we can simply return */
323  if (EBPFLoadPinnedMaps(livedev, config) == 0) {
324  SCLogInfo("Loaded pinned maps, will use already loaded eBPF filter");
325  return 1;
326  }
327  }
328 
329  if (! path) {
330  SCLogError(SC_ERR_INVALID_VALUE, "No file defined to load eBPF from");
331  return -1;
332  }
333 
334  /* Sending the eBPF code to the kernel requires a large amount of
335  * locked memory so we set it to unlimited to avoid a ENOPERM error */
336  struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
337  if (setrlimit(RLIMIT_MEMLOCK, &r) != 0) {
338  SCLogError(SC_ERR_MEM_ALLOC, "Unable to lock memory: %s (%d)",
339  strerror(errno), errno);
340  return -1;
341  }
342 
343  /* Open the eBPF file and parse it */
344  bpfobj = bpf_object__open(path);
345  long error = libbpf_get_error(bpfobj);
346  if (error) {
347  char err_buf[128];
348  libbpf_strerror(error, err_buf,
349  sizeof(err_buf));
351  "Unable to load eBPF objects in '%s': %s",
352  path, err_buf);
353  return -1;
354  }
355 
356  if (config->flags & EBPF_XDP_HW_MODE) {
357  unsigned int ifindex = if_nametoindex(iface);
358  bpf_object__for_each_program(bpfprog, bpfobj) {
359  bpf_program__set_ifindex(bpfprog, ifindex);
360  }
361  bpf_map__for_each(map, bpfobj) {
362  bpf_map__set_ifindex(map, ifindex);
363  }
364  }
365 
366  /* Let's check that our section is here */
367  bpf_object__for_each_program(bpfprog, bpfobj) {
368  const char *title = bpf_program__title(bpfprog, 0);
369  if (!strcmp(title, section)) {
370  if (config->flags & EBPF_SOCKET_FILTER) {
371  bpf_program__set_socket_filter(bpfprog);
372  } else {
373  bpf_program__set_xdp(bpfprog);
374  }
375  found = true;
376  break;
377  }
378  }
379 
380  if (found == false) {
382  "No section '%s' in '%s' file. Will not be able to use the file",
383  section,
384  path);
385  return -1;
386  }
387 
388  err = bpf_object__load(bpfobj);
389  if (err < 0) {
390  if (err == -EPERM) {
392  "Permission issue when loading eBPF object"
393  " (check libbpf error on stdout)");
394  } else {
395  char buf[129];
396  libbpf_strerror(err, buf, sizeof(buf));
398  "Unable to load eBPF object: %s (%d)",
399  buf,
400  err);
401  }
402  return -1;
403  }
404 
405  /* Kernel and userspace are sharing data via map. Userspace access to the
406  * map via a file descriptor. So we need to store the map to fd info. For
407  * that we use bpf_maps_info:: */
408  struct bpf_maps_info *bpf_map_data = SCCalloc(1, sizeof(*bpf_map_data));
409  if (bpf_map_data == NULL) {
410  SCLogError(SC_ERR_MEM_ALLOC, "Can't allocate bpf map array");
411  return -1;
412  }
413 
414  /* Store the maps in bpf_maps_info:: */
415  bpf_map__for_each(map, bpfobj) {
416  if (bpf_map_data->last == BPF_MAP_MAX_COUNT) {
417  SCLogError(SC_ERR_NOT_SUPPORTED, "Too many BPF maps in eBPF files");
418  break;
419  }
420  SCLogDebug("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
421  bpf_map_data->array[bpf_map_data->last].fd = bpf_map__fd(map);
422  bpf_map_data->array[bpf_map_data->last].name = SCStrdup(bpf_map__name(map));
423  snprintf(bpf_map_data->array[bpf_map_data->last].iface, IFNAMSIZ,
424  "%s", iface);
425  if (!bpf_map_data->array[bpf_map_data->last].name) {
426  SCLogError(SC_ERR_MEM_ALLOC, "Unable to duplicate map name");
427  BpfMapsInfoFree(bpf_map_data);
428  return -1;
429  }
430  bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
431  if (config->flags & EBPF_PINNED_MAPS) {
432  SCLogConfig("Pinning: %d to %s", bpf_map_data->array[bpf_map_data->last].fd,
433  bpf_map_data->array[bpf_map_data->last].name);
434  char buf[1024];
435  snprintf(buf, sizeof(buf), "/sys/fs/bpf/suricata-%s-%s", iface,
436  bpf_map_data->array[bpf_map_data->last].name);
437  int ret = bpf_obj_pin(bpf_map_data->array[bpf_map_data->last].fd, buf);
438  if (ret != 0) {
439  SCLogWarning(SC_ERR_AFP_CREATE, "Can not pin: %s", strerror(errno));
440  }
441  /* Don't unlink pinned maps in XDP mode to avoid a state reset */
442  if (config->flags & EBPF_XDP_CODE) {
443  bpf_map_data->array[bpf_map_data->last].to_unlink = 0;
444  } else {
445  bpf_map_data->array[bpf_map_data->last].to_unlink = 1;
446  }
447  }
448  bpf_map_data->last++;
449  }
450 
451  /* Attach the bpf_maps_info to the LiveDevice via the device storage */
452  LiveDevSetStorageById(livedev, g_livedev_storage_id, bpf_map_data);
453  LiveDevUseBypass(livedev);
454 
455  /* Finally we get the file descriptor for our eBPF program. We will use
456  * the fd to attach the program to the socket (eBPF case) or to the device
457  * (XDP case). */
458  pfd = bpf_program__fd(bpfprog);
459  if (pfd == -1) {
461  "Unable to find %s section", section);
462  return -1;
463  }
464 
465  SCLogInfo("Successfully loaded eBPF file '%s' on '%s'", path, iface);
466  *val = pfd;
467  return 0;
468 }
469 
470 /**
471  * Attach a XDP program identified by its file descriptor to a device
472  *
473  * \param iface the name of interface
474  * \param fd the eBPF/XDP program file descriptor
475  * \param a flag to pass to attach function mostly used to set XDP mode
476  * \return -1 in case of error, 0 if success
477  */
478 int EBPFSetupXDP(const char *iface, int fd, uint8_t flags)
479 {
480 #ifdef HAVE_PACKET_XDP
481  unsigned int ifindex = if_nametoindex(iface);
482  if (ifindex == 0) {
484  "Unknown interface '%s'", iface);
485  return -1;
486  }
487  int err = bpf_set_link_xdp_fd(ifindex, fd, flags);
488  if (err != 0) {
489  char buf[129];
490  libbpf_strerror(err, buf, sizeof(buf));
491  SCLogError(SC_ERR_INVALID_VALUE, "Unable to set XDP on '%s': %s (%d)",
492  iface, buf, err);
493  return -1;
494  }
495 #endif
496  return 0;
497 }
498 
499 /**
500  * Create a Flow in the table for a Flowkey
501  *
502  * \return false (this create function never returns true)
503  */
504 static bool EBPFCreateFlowForKey(struct flows_stats *flowstats, LiveDevice *dev, void *key,
505  size_t skey, FlowKey *flow_key, struct timespec *ctime,
506  uint64_t pkts_cnt, uint64_t bytes_cnt,
507  int mapfd, int cpus_count)
508 {
509  Flow *f = NULL;
510  uint32_t hash = FlowKeyGetHash(flow_key);
511 
512  f = FlowGetFromFlowKey(flow_key, ctime, hash);
513  if (f == NULL)
514  return false;
515 
516  /* set accounting, we can't know the direction, so let's just start to
517  * server then if we already have something in to server to client. We need
518  * these numbers as we will use it to see if we have new traffic coming
519  * on the flow */
521  if (fc == NULL) {
522  fc = SCCalloc(sizeof(FlowBypassInfo), 1);
523  if (fc) {
524  FlowUpdateState(f, FLOW_STATE_CAPTURE_BYPASSED);
526  fc->BypassUpdate = EBPFBypassUpdate;
527  fc->BypassFree = EBPFBypassFree;
528  fc->todstpktcnt = pkts_cnt;
529  fc->todstbytecnt = bytes_cnt;
530  f->livedev = dev;
531  EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
532  if (eb == NULL) {
533  SCFree(fc);
534  FLOWLOCK_UNLOCK(f);
535  return false;
536  }
537  void *mkey = SCCalloc(1, skey);
538  if (mkey == NULL) {
539  SCFree(fc);
540  SCFree(eb);
541  FLOWLOCK_UNLOCK(f);
542  return false;
543  }
544  memcpy(mkey, key, skey);
545  eb->key[0] = mkey;
546  eb->mapfd = mapfd;
547  eb->cpus_count = cpus_count;
548  fc->bypass_data = eb;
549  flowstats->count++;
550  } else {
551  FLOWLOCK_UNLOCK(f);
552  return false;
553  }
554  } else {
555  EBPFBypassData *eb = (EBPFBypassData *) fc->bypass_data;
556  if (eb == NULL) {
557  FLOWLOCK_UNLOCK(f);
558  return false;
559  }
560  /* if both keys are here, then it is a flow bypassed by this
561  * instance so we ignore it */
562  if (eb->key[0] && eb->key[1]) {
563  FLOWLOCK_UNLOCK(f);
564  return false;
565  }
566  fc->tosrcpktcnt = pkts_cnt;
567  fc->tosrcbytecnt = bytes_cnt;
568  void *mkey = SCCalloc(1, skey);
569  if (mkey == NULL) {
570  FLOWLOCK_UNLOCK(f);
571  return false;
572  }
573  memcpy(mkey, key, skey);
574  eb->key[1] = mkey;
575  }
576  f->livedev = dev;
577  FLOWLOCK_UNLOCK(f);
578  return false;
579 }
580 
581 void EBPFBypassFree(void *data)
582 {
583  EBPFBypassData *eb = (EBPFBypassData *)data;
584  if (eb == NULL)
585  return;
586  SCFree(eb->key[0]);
587  if (eb->key[1]) {
588  SCFree(eb->key[1]);
589  }
590  SCFree(eb);
591  return;
592 }
593 
594 /**
595  *
596  * Compare eBPF half flow to Flow
597  *
598  * \return true if entries have activity, false if not
599  */
600 
601 static bool EBPFBypassCheckHalfFlow(Flow *f, FlowBypassInfo *fc,
602  EBPFBypassData *eb, void *key,
603  int index)
604 {
605  int i;
606  uint64_t pkts_cnt = 0;
607  uint64_t bytes_cnt = 0;
608  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
609  * is 1 then we have a global hash. */
610  BPF_DECLARE_PERCPU(struct pair, values_array, eb->cpus_count);
611  memset(values_array, 0, sizeof(values_array));
612  int res = bpf_map_lookup_elem(eb->mapfd, key, values_array);
613  if (res < 0) {
614  SCLogDebug("errno: (%d) %s", errno, strerror(errno));
615  return false;
616  }
617  for (i = 0; i < eb->cpus_count; i++) {
618  /* let's start accumulating value so we can compute the counters */
619  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
620  BPF_PERCPU(values_array, i).packets,
621  BPF_PERCPU(values_array, i).bytes);
622  pkts_cnt += BPF_PERCPU(values_array, i).packets;
623  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
624  }
625  if (index == 0) {
626  if (pkts_cnt != fc->todstpktcnt) {
627  fc->todstpktcnt = pkts_cnt;
628  fc->todstbytecnt = bytes_cnt;
629  return true;
630  }
631  } else {
632  if (pkts_cnt != fc->tosrcpktcnt) {
633  fc->tosrcpktcnt = pkts_cnt;
634  fc->tosrcbytecnt = bytes_cnt;
635  return true;
636  }
637  }
638 
639  return false;
640 }
641 
642 /** Check both half flows for update
643  *
644  * Update lastts in the flow and do accounting
645  *
646  * */
647 bool EBPFBypassUpdate(Flow *f, void *data, time_t tsec)
648 {
649  EBPFBypassData *eb = (EBPFBypassData *)data;
650  if (eb == NULL) {
651  return false;
652  }
654  if (fc == NULL) {
655  return false;
656  }
657  bool activity = EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[0], 0);
658  activity |= EBPFBypassCheckHalfFlow(f, fc, eb, eb->key[1], 1);
659  if (!activity) {
660  SCLogDebug("Delete entry: %u (%ld)", FLOW_IS_IPV6(f), FlowGetId(f));
661  /* delete the entries if no time update */
662  EBPFDeleteKey(eb->mapfd, eb->key[0]);
663  EBPFDeleteKey(eb->mapfd, eb->key[1]);
664  SCLogDebug("Done delete entry: %u", FLOW_IS_IPV6(f));
665  } else {
666  f->lastts.tv_sec = tsec;
667  return true;
668  }
669  return false;
670 }
671 
672 typedef bool (*OpFlowForKey)(struct flows_stats * flowstats, LiveDevice*dev, void *key,
673  size_t skey, FlowKey *flow_key, struct timespec *ctime,
674  uint64_t pkts_cnt, uint64_t bytes_cnt,
675  int mapfd, int cpus_count);
676 
677 /**
678  * Bypassed flows cleaning for IPv4
679  *
680  * This function iterates on all the flows of the IPv4 table
681  * looking for timeouted flow to delete from the flow table.
682  */
683 static int EBPFForEachFlowV4Table(ThreadVars *th_v, LiveDevice *dev, const char *name,
684  struct timespec *ctime,
685  struct ebpf_timeout_config *tcfg,
686  OpFlowForKey EBPFOpFlowForKey
687  )
688 {
689  struct flows_stats flowstats = { 0, 0, 0};
690  int mapfd = EBPFGetMapFDByName(dev->dev, name);
691  if (mapfd == -1)
692  return -1;
693 
694  struct flowv4_keys key = {}, next_key;
695  int found = 0;
696  unsigned int i;
697  uint64_t hash_cnt = 0;
698 
699  if (tcfg->cpus_count == 0) {
700  return 0;
701  }
702 
703  bool dead_flow = false;
704  while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
705  uint64_t bytes_cnt = 0;
706  uint64_t pkts_cnt = 0;
707  hash_cnt++;
708  if (dead_flow) {
709  EBPFDeleteKey(mapfd, &key);
710  dead_flow = false;
711  }
712  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
713  * is 1 then we have a global hash. */
714  BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
715  memset(values_array, 0, sizeof(values_array));
716  int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
717  if (res < 0) {
718  SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
719  SCLogDebug("errno: (%d) %s", errno, strerror(errno));
720  key = next_key;
721  continue;
722  }
723  for (i = 0; i < tcfg->cpus_count; i++) {
724  /* let's start accumulating value so we can compute the counters */
725  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
726  BPF_PERCPU(values_array, i).packets,
727  BPF_PERCPU(values_array, i).bytes);
728  pkts_cnt += BPF_PERCPU(values_array, i).packets;
729  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
730  }
731  /* Get the corresponding Flow in the Flow table to compare and update
732  * its counters and lastseen if needed */
733  FlowKey flow_key;
734  if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
735  flow_key.sp = ntohs(next_key.port16[0]);
736  flow_key.dp = ntohs(next_key.port16[1]);
737  flow_key.src.addr_data32[0] = next_key.src;
738  flow_key.dst.addr_data32[0] = next_key.dst;
739  } else {
740  flow_key.sp = next_key.port16[0];
741  flow_key.dp = next_key.port16[1];
742  flow_key.src.addr_data32[0] = ntohl(next_key.src);
743  flow_key.dst.addr_data32[0] = ntohl(next_key.dst);
744  }
745  flow_key.src.family = AF_INET;
746  flow_key.src.addr_data32[1] = 0;
747  flow_key.src.addr_data32[2] = 0;
748  flow_key.src.addr_data32[3] = 0;
749  flow_key.dst.family = AF_INET;
750  flow_key.dst.addr_data32[1] = 0;
751  flow_key.dst.addr_data32[2] = 0;
752  flow_key.dst.addr_data32[3] = 0;
753  flow_key.vlan_id[0] = next_key.vlan0;
754  flow_key.vlan_id[1] = next_key.vlan1;
755  if (next_key.ip_proto == 1) {
756  flow_key.proto = IPPROTO_TCP;
757  } else {
758  flow_key.proto = IPPROTO_UDP;
759  }
760  flow_key.recursion_level = 0;
761  dead_flow = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
762  ctime, pkts_cnt, bytes_cnt,
763  mapfd, tcfg->cpus_count);
764  if (dead_flow) {
765  found = 1;
766  }
767 
768  if (TmThreadsCheckFlag(th_v, THV_KILL)) {
769  return 0;
770  }
771 
772  key = next_key;
773  }
774  if (dead_flow) {
775  EBPFDeleteKey(mapfd, &key);
776  found = 1;
777  }
778  SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
779 
780  LiveDevAddBypassStats(dev, flowstats.count, AF_INET);
781  SCLogInfo("IPv4 bypassed flow table size: %" PRIu64, hash_cnt);
782 
783  return found;
784 }
785 
786 /**
787  * Bypassed flows cleaning for IPv6
788  *
789  * This function iterates on all the flows of the IPv4 table
790  * looking for timeouted flow to delete from the flow table.
791  */
792 static int EBPFForEachFlowV6Table(ThreadVars *th_v,
793  LiveDevice *dev, const char *name,
794  struct timespec *ctime,
795  struct ebpf_timeout_config *tcfg,
796  OpFlowForKey EBPFOpFlowForKey
797  )
798 {
799  struct flows_stats flowstats = { 0, 0, 0};
800  int mapfd = EBPFGetMapFDByName(dev->dev, name);
801  if (mapfd == -1)
802  return -1;
803 
804  struct flowv6_keys key = {}, next_key;
805  int found = 0;
806  unsigned int i;
807  uint64_t hash_cnt = 0;
808 
809  if (tcfg->cpus_count == 0) {
810  SCLogWarning(SC_ERR_INVALID_VALUE, "CPU count should not be 0");
811  return 0;
812  }
813 
814  uint64_t pkts_cnt = 0;
815  while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
816  uint64_t bytes_cnt = 0;
817  hash_cnt++;
818  if (pkts_cnt > 0) {
819  EBPFDeleteKey(mapfd, &key);
820  }
821  pkts_cnt = 0;
822  /* We use a per CPU structure so we will get a array of values. But if nr_cpus
823  * is 1 then we have a global hash. */
824  BPF_DECLARE_PERCPU(struct pair, values_array, tcfg->cpus_count);
825  memset(values_array, 0, sizeof(values_array));
826  int res = bpf_map_lookup_elem(mapfd, &next_key, values_array);
827  if (res < 0) {
828  SCLogDebug("no entry in v4 table for %d -> %d", key.port16[0], key.port16[1]);
829  key = next_key;
830  continue;
831  }
832  for (i = 0; i < tcfg->cpus_count; i++) {
833  /* let's start accumulating value so we can compute the counters */
834  SCLogDebug("%d: Adding pkts %lu bytes %lu", i,
835  BPF_PERCPU(values_array, i).packets,
836  BPF_PERCPU(values_array, i).bytes);
837  pkts_cnt += BPF_PERCPU(values_array, i).packets;
838  bytes_cnt += BPF_PERCPU(values_array, i).bytes;
839  }
840  /* Get the corresponding Flow in the Flow table to compare and update
841  * its counters and lastseen if needed */
842  FlowKey flow_key;
843  if (tcfg->mode == AFP_MODE_XDP_BYPASS) {
844  flow_key.sp = ntohs(next_key.port16[0]);
845  flow_key.dp = ntohs(next_key.port16[1]);
846  flow_key.src.family = AF_INET6;
847  flow_key.src.addr_data32[0] = next_key.src[0];
848  flow_key.src.addr_data32[1] = next_key.src[1];
849  flow_key.src.addr_data32[2] = next_key.src[2];
850  flow_key.src.addr_data32[3] = next_key.src[3];
851  flow_key.dst.family = AF_INET6;
852  flow_key.dst.addr_data32[0] = next_key.dst[0];
853  flow_key.dst.addr_data32[1] = next_key.dst[1];
854  flow_key.dst.addr_data32[2] = next_key.dst[2];
855  flow_key.dst.addr_data32[3] = next_key.dst[3];
856  } else {
857  flow_key.sp = next_key.port16[0];
858  flow_key.dp = next_key.port16[1];
859  flow_key.src.family = AF_INET6;
860  flow_key.src.addr_data32[0] = ntohl(next_key.src[0]);
861  flow_key.src.addr_data32[1] = ntohl(next_key.src[1]);
862  flow_key.src.addr_data32[2] = ntohl(next_key.src[2]);
863  flow_key.src.addr_data32[3] = ntohl(next_key.src[3]);
864  flow_key.dst.family = AF_INET6;
865  flow_key.dst.addr_data32[0] = ntohl(next_key.dst[0]);
866  flow_key.dst.addr_data32[1] = ntohl(next_key.dst[1]);
867  flow_key.dst.addr_data32[2] = ntohl(next_key.dst[2]);
868  flow_key.dst.addr_data32[3] = ntohl(next_key.dst[3]);
869  }
870  flow_key.vlan_id[0] = next_key.vlan0;
871  flow_key.vlan_id[1] = next_key.vlan1;
872  if (next_key.ip_proto == 1) {
873  flow_key.proto = IPPROTO_TCP;
874  } else {
875  flow_key.proto = IPPROTO_UDP;
876  }
877  flow_key.recursion_level = 0;
878  pkts_cnt = EBPFOpFlowForKey(&flowstats, dev, &next_key, sizeof(next_key), &flow_key,
879  ctime, pkts_cnt, bytes_cnt,
880  mapfd, tcfg->cpus_count);
881  if (pkts_cnt > 0) {
882  found = 1;
883  }
884 
885  if (TmThreadsCheckFlag(th_v, THV_KILL)) {
886  return 0;
887  }
888 
889  key = next_key;
890  }
891  if (pkts_cnt > 0) {
892  EBPFDeleteKey(mapfd, &key);
893  found = 1;
894  }
895  SC_ATOMIC_ADD(dev->bypassed, flowstats.packets);
896 
897  LiveDevAddBypassStats(dev, flowstats.count, AF_INET6);
898  SCLogInfo("IPv6 bypassed flow table size: %" PRIu64, hash_cnt);
899  return found;
900 }
901 
902 
903 int EBPFCheckBypassedFlowCreate(ThreadVars *th_v, struct timespec *curtime, void *data)
904 {
905  LiveDevice *ldev = NULL, *ndev;
906  struct ebpf_timeout_config *cfg = (struct ebpf_timeout_config *)data;
907  while(LiveDeviceForEach(&ldev, &ndev)) {
908  EBPFForEachFlowV4Table(th_v, ldev, "flow_table_v4",
909  curtime,
910  cfg, EBPFCreateFlowForKey);
911  EBPFForEachFlowV6Table(th_v, ldev, "flow_table_v6",
912  curtime,
913  cfg, EBPFCreateFlowForKey);
914  }
915 
916  return 0;
917 }
918 
919 void EBPFRegisterExtension(void)
920 {
921  g_livedev_storage_id = LiveDevStorageRegister("bpfmap", sizeof(void *), NULL, BpfMapsInfoFree);
922  g_flow_storage_id = FlowStorageRegister("bypassedlist", sizeof(void *), NULL, BypassedListFree);
923 }
924 
925 
926 #ifdef HAVE_PACKET_XDP
927 
928 static uint32_t g_redirect_iface_cpu_counter = 0;
929 
930 static int EBPFAddCPUToMap(const char *iface, uint32_t i)
931 {
932  int cpumap = EBPFGetMapFDByName(iface, "cpu_map");
933  uint32_t queue_size = 4096;
934  int ret;
935 
936  if (cpumap < 0) {
937  SCLogError(SC_ERR_AFP_CREATE, "Can't find cpu_map");
938  return -1;
939  }
940  ret = bpf_map_update_elem(cpumap, &i, &queue_size, 0);
941  if (ret) {
942  SCLogError(SC_ERR_AFP_CREATE, "Create CPU entry failed (err:%d)", ret);
943  return -1;
944  }
945  int cpus_available = EBPFGetMapFDByName(iface, "cpus_available");
946  if (cpus_available < 0) {
947  SCLogError(SC_ERR_AFP_CREATE, "Can't find cpus_available map");
948  return -1;
949  }
950 
951  ret = bpf_map_update_elem(cpus_available, &g_redirect_iface_cpu_counter, &i, 0);
952  if (ret) {
953  SCLogError(SC_ERR_AFP_CREATE, "Create CPU entry failed (err:%d)", ret);
954  return -1;
955  }
956  return 0;
957 }
958 
959 static void EBPFRedirectMapAddCPU(int i, void *data)
960 {
961  if (EBPFAddCPUToMap(data, i) < 0) {
963  "Unable to add CPU %d to set", i);
964  } else {
965  g_redirect_iface_cpu_counter++;
966  }
967 }
968 
969 void EBPFBuildCPUSet(ConfNode *node, char *iface)
970 {
971  uint32_t key0 = 0;
972  int mapfd = EBPFGetMapFDByName(iface, "cpus_count");
973  if (mapfd < 0) {
975  "Unable to find 'cpus_count' map");
976  return;
977  }
978  g_redirect_iface_cpu_counter = 0;
979  if (node == NULL) {
980  bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
981  BPF_ANY);
982  return;
983  }
984  BuildCpusetWithCallback("xdp-cpu-redirect", node,
985  EBPFRedirectMapAddCPU,
986  iface);
987  bpf_map_update_elem(mapfd, &key0, &g_redirect_iface_cpu_counter,
988  BPF_ANY);
989 }
990 
991 /**
992  * Setup peer interface in XDP system
993  *
994  * Ths function set up the peer interface in the XDP maps used by the
995  * bypass filter. The first map tx_peer has type device map and is
996  * used to store the peer. The second map tx_peer_int is used by the
997  * code to check if we have a peer defined for this interface.
998  *
999  * As the map are per device we just need maps with one single element.
1000  * In both case, we use the key 0 to enter element so XDP kernel code
1001  * is using the same key.
1002  */
1003 int EBPFSetPeerIface(const char *iface, const char *out_iface)
1004 {
1005  int mapfd = EBPFGetMapFDByName(iface, "tx_peer");
1006  if (mapfd < 0) {
1008  "Unable to find 'tx_peer' map");
1009  return -1;
1010  }
1011  int intmapfd = EBPFGetMapFDByName(iface, "tx_peer_int");
1012  if (intmapfd < 0) {
1014  "Unable to find 'tx_peer_int' map");
1015  return -1;
1016  }
1017 
1018  int key0 = 0;
1019  unsigned int peer_index = if_nametoindex(out_iface);
1020  if (peer_index == 0) {
1021  SCLogError(SC_ERR_INVALID_VALUE, "No iface '%s'", out_iface);
1022  return -1;
1023  }
1024  int ret = bpf_map_update_elem(mapfd, &key0, &peer_index, BPF_ANY);
1025  if (ret) {
1026  SCLogError(SC_ERR_AFP_CREATE, "Create peer entry failed (err:%d)", ret);
1027  return -1;
1028  }
1029  ret = bpf_map_update_elem(intmapfd, &key0, &peer_index, BPF_ANY);
1030  if (ret) {
1031  SCLogError(SC_ERR_AFP_CREATE, "Create peer entry failed (err:%d)", ret);
1032  return -1;
1033  }
1034  return 0;
1035 }
1036 
1037 /**
1038  * Bypass the flow on all ifaces it is seen on. This is used
1039  * in IPS mode.
1040  */
1041 
1042 int EBPFUpdateFlow(Flow *f, Packet *p, void *data)
1043 {
1044  BypassedIfaceList *ifl = (BypassedIfaceList *)FlowGetStorageById(f, g_flow_storage_id);
1045  if (ifl == NULL) {
1046  ifl = SCCalloc(1, sizeof(*ifl));
1047  if (ifl == NULL) {
1048  return 0;
1049  }
1050  ifl->dev = p->livedev;
1051  FlowSetStorageById(f, g_flow_storage_id, ifl);
1052  return 1;
1053  }
1054  /* Look for packet iface in the list */
1055  BypassedIfaceList *ldev = ifl;
1056  while (ldev) {
1057  if (p->livedev == ldev->dev) {
1058  return 1;
1059  }
1060  ldev = ldev->next;
1061  }
1062  /* Call bypass function if ever not in the list */
1063  p->BypassPacketsFlow(p);
1064 
1065  /* Add iface to the list */
1066  BypassedIfaceList *nifl = SCCalloc(1, sizeof(*nifl));
1067  if (nifl == NULL) {
1068  return 0;
1069  }
1070  nifl->dev = p->livedev;
1071  nifl->next = ifl;
1072  FlowSetStorageById(f, g_flow_storage_id, nifl);
1073  return 1;
1074 }
1075 
1076 #endif /* HAVE_PACKET_XDP */
1077 
1078 #endif
uint16_t flags
#define SCLogDebug(...)
Definition: util-debug.h:335
uint64_t tosrcbytecnt
Definition: flow.h:492
int(* BypassPacketsFlow)(struct Packet_ *)
Definition: decode.h:487
struct HtpBodyChunk_ * next
LiveDevice * LiveDeviceForEach(LiveDevice **ldev, LiveDevice **ndev)
Definition: util-device.c:408
#define FLOWLOCK_UNLOCK(fb)
Definition: flow.h:243
int LiveDevSetStorageById(LiveDevice *d, int id, void *ptr)
Store a pointer in a given LiveDevice storage.
uint16_t vlan_id[2]
Definition: flow.h:285
uint64_t count
Definition: flow-bypass.h:28
#define SC_ATOMIC_ADD(name, val)
add a value to our atomic variable
Definition: util-atomic.h:107
Address src
Definition: flow.h:281
uint32_t FlowKeyGetHash(FlowKey *fk)
Definition: flow-hash.c:223
void BuildCpusetWithCallback(const char *name, ConfNode *node, void(*Callback)(int i, void *data), void *data)
Definition: util-affinity.c:98
uint64_t todstpktcnt
Definition: flow.h:493
uint64_t todstbytecnt
Definition: flow.h:494
char * dev
Definition: util-device.h:41
#define SCCalloc(nm, a)
Definition: util-mem.h:253
Port sp
Definition: flow.h:282
char family
Definition: decode.h:111
#define SCLogError(err_code,...)
Macro used to log ERROR messages.
Definition: util-debug.h:294
uint8_t recursion_level
Definition: flow.h:284
void * bypass_data
Definition: flow.h:490
int LiveDevUseBypass(LiveDevice *dev)
Definition: util-device.c:462
int FlowSetStorageById(Flow *f, int id, void *ptr)
Definition: flow-storage.c:44
LiveDevice * LiveGetDevice(const char *name)
Get a pointer to the device at idx.
Definition: util-device.c:236
struct LiveDevice_ * livedev
Definition: flow.h:350
struct timeval lastts
Definition: flow.h:358
void LiveDevAddBypassStats(LiveDevice *dev, uint64_t cnt, int family)
Definition: util-device.c:503
#define THV_KILL
Definition: threadvars.h:39
#define SCLogWarning(err_code,...)
Macro used to log WARNING messages.
Definition: util-debug.h:281
Definition: conf.h:32
void * LiveDevGetStorageById(LiveDevice *d, int id)
Get a value from a given LiveDevice storage.
#define SCLogInfo(...)
Macro used to log INFORMATIONAL messages.
Definition: util-debug.h:254
int FlowStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
Definition: flow-storage.c:65
#define SCFree(a)
Definition: util-mem.h:322
PoolThreadReserved res
#define FLOW_IS_IPV6(f)
Definition: flow.h:136
Address dst
Definition: flow.h:281
void(* BypassFree)(void *data)
Definition: flow.h:489
Definition: flow.h:279
uint64_t tosrcpktcnt
Definition: flow.h:491
void FlowUpdateState(Flow *f, enum FlowState s)
Definition: flow.c:1104
int TmThreadsCheckFlag(ThreadVars *tv, uint16_t flag)
Check if a thread flag is set.
Definition: tm-threads.c:90
int LiveDevStorageRegister(const char *name, const unsigned int size, void *(*Alloc)(unsigned int), void(*Free)(void *))
Register a LiveDevice storage.
void * FlowGetStorageById(Flow *f, int id)
Definition: flow-storage.c:39
bool(* BypassUpdate)(Flow *f, void *data, time_t tsec)
Definition: flow.h:488
#define SCStrdup(a)
Definition: util-mem.h:268
int GetFlowBypassInfoID(void)
Definition: flow-util.c:209
struct LiveDevice_ * livedev
Definition: decode.h:553
Per thread variable structure.
Definition: threadvars.h:57
uint8_t proto
Definition: flow.h:283
struct SCLogConfig_ SCLogConfig
Holds the config state used by the logging api.
Flow * FlowGetFromFlowKey(FlowKey *key, struct timespec *ttime, const uint32_t hash)
Get or create a Flow using a FlowKey.
Definition: flow-hash.c:746
Flow data structure.
Definition: flow.h:325
Port dp
Definition: flow.h:282
uint64_t packets
Definition: flow-bypass.h:29