suricata
source-af-packet.c
Go to the documentation of this file.
1 /* Copyright (C) 2011-2018 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \defgroup afppacket AF_PACKET running mode
20  *
21  * @{
22  */
23 
24 /**
25  * \file
26  *
27  * \author Eric Leblond <eric@regit.org>
28  *
29  * AF_PACKET socket acquisition support
30  *
31  */
32 
33 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
35 #include "suricata-common.h"
36 #include "config.h"
37 #include "suricata.h"
38 #include "decode.h"
39 #include "packet-queue.h"
40 #include "threads.h"
41 #include "threadvars.h"
42 #include "tm-queuehandlers.h"
43 #include "tm-modules.h"
44 #include "tm-threads.h"
45 #include "tm-threads-common.h"
46 #include "conf.h"
47 #include "util-cpu.h"
48 #include "util-debug.h"
49 #include "util-device.h"
50 #include "util-ebpf.h"
51 #include "util-error.h"
52 #include "util-privs.h"
53 #include "util-optimize.h"
54 #include "util-checksum.h"
55 #include "util-ioctl.h"
56 #include "util-host-info.h"
57 #include "tmqh-packetpool.h"
58 #include "source-af-packet.h"
59 #include "runmodes.h"
60 
61 #ifdef HAVE_AF_PACKET
62 
63 #if HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
65 #endif
66 
67 #ifdef HAVE_PACKET_EBPF
68 #include "util-ebpf.h"
69 #include <bpf/libbpf.h>
70 #include <bpf/bpf.h>
71 #endif
72 
73 struct bpf_program {
74  unsigned int bf_len;
75  struct bpf_insn *bf_insns;
76 };
77 
78 #ifdef HAVE_PCAP_H
79 #include <pcap.h>
80 #endif
81 
82 #ifdef HAVE_PCAP_PCAP_H
83 #include <pcap/pcap.h>
84 #endif
85 
86 #include "util-bpf.h"
87 
88 #if HAVE_LINUX_IF_ETHER_H
89 #include <linux/if_ether.h>
90 #endif
91 
92 #if HAVE_LINUX_IF_PACKET_H
93 #include <linux/if_packet.h>
94 #endif
95 
96 #if HAVE_LINUX_IF_ARP_H
97 #include <linux/if_arp.h>
98 #endif
99 
100 #if HAVE_LINUX_FILTER_H
101 #include <linux/filter.h>
102 #endif
103 
104 #if HAVE_SYS_MMAN_H
105 #include <sys/mman.h>
106 #endif
107 
108 #ifdef HAVE_HW_TIMESTAMPING
109 #include <linux/net_tstamp.h>
110 #endif
111 
112 #endif /* HAVE_AF_PACKET */
113 
114 extern int max_pending_packets;
115 
116 #ifndef HAVE_AF_PACKET
117 
118 TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
119 
120 void TmModuleReceiveAFPRegister (void)
121 {
122  tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
123  tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
130 }
131 
132 /**
133  * \brief Registration Function for DecodeAFP.
134  */
135 void TmModuleDecodeAFPRegister (void)
136 {
137  tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
138  tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
145 }
146 
147 /**
148  * \brief this function prints an error message and exits.
149  */
150 TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
151 {
152  SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
153  "support for AF_PACKET enabled, on Linux host please recompile "
154  "with --enable-af-packet", tv->name);
155  exit(EXIT_FAILURE);
156 }
157 
158 #else /* We have AF_PACKET support */
159 
160 #define AFP_IFACE_NAME_LENGTH 48
161 
162 #define AFP_STATE_DOWN 0
163 #define AFP_STATE_UP 1
164 
165 #define AFP_RECONNECT_TIMEOUT 500000
166 #define AFP_DOWN_COUNTER_INTERVAL 40
167 
168 #define POLL_TIMEOUT 100
169 
170 #ifndef TP_STATUS_USER_BUSY
171 /* for new use latest bit available in tp_status */
172 #define TP_STATUS_USER_BUSY (1 << 31)
173 #endif
174 
175 #ifndef TP_STATUS_VLAN_VALID
176 #define TP_STATUS_VLAN_VALID (1 << 4)
177 #endif
178 
179 enum {
182  /** Error during treatment by other functions of Suricata */
185 };
186 
187 enum {
190 };
191 
192 union thdr {
193  struct tpacket2_hdr *h2;
194 #ifdef HAVE_TPACKET_V3
195  struct tpacket3_hdr *h3;
196 #endif
197  void *raw;
198 };
199 
200 static int AFPBypassCallback(Packet *p);
201 static int AFPXDPBypassCallback(Packet *p);
202 
203 #define MAX_MAPS 32
204 /**
205  * \brief Structure to hold thread specific variables.
206  */
207 typedef struct AFPThreadVars_
208 {
209  union AFPRing {
210  char *v2;
211  struct iovec *v3;
212  } ring;
213 
214  /* counters */
215  uint64_t pkts;
216 
220  /* data link type for the thread */
221  uint32_t datalink;
222 
223 #ifdef HAVE_PACKET_EBPF
224  /* File descriptor of the IPv4 flow bypass table maps */
225  int v4_map_fd;
226  /* File descriptor of the IPv6 flow bypass table maps */
227  int v6_map_fd;
228 #endif
229 
230  unsigned int frame_offset;
231 
233 
234  /* references to packet and drop counters */
237  uint16_t capture_errors;
238 
239  /* handle state */
240  uint8_t afp_state;
241  uint8_t copy_mode;
242  unsigned int flags;
243 
244  /* IPS peer */
246 
247  /* no mmap mode */
248  uint8_t *data; /** Per function and thread data */
249  int datalen; /** Length of per function and thread data */
250  int cooked;
251 
252  /*
253  * Init related members
254  */
255 
256  /* thread specific socket */
257  int socket;
258 
262  /* socket buffer size */
264  /* Filter */
265  const char *bpf_filter;
268 
269  int promisc;
270 
272 
275 
276  int threads;
277 
279  struct tpacket_req v2;
280 #ifdef HAVE_TPACKET_V3
281  struct tpacket_req3 v3;
282 #endif
283  } req;
284 
286  /* IPS output iface */
287  char out_iface[AFP_IFACE_NAME_LENGTH];
288 
289  /* mmap'ed ring buffer */
290  unsigned int ring_buflen;
291  uint8_t *ring_buf;
292 
293  uint8_t xdp_mode;
294 
295 } AFPThreadVars;
296 
298 TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
299 void ReceiveAFPThreadExitStats(ThreadVars *, void *);
301 TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
302 
303 TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
304 TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
306 
308 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
309 static int AFPGetDevFlags(int fd, const char *ifname);
310 static int AFPDerefSocket(AFPPeer* peer);
311 static int AFPRefSocket(AFPPeer* peer);
312 
313 
314 static unsigned int nr_cpus;
315 
316 /**
317  * \brief Registration Function for RecieveAFP.
318  * \todo Unit tests are needed for this module.
319  */
321 {
322  tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
332 
334 }
335 
336 
337 /**
338  * \defgroup afppeers AFP peers list
339  *
340  * AF_PACKET has an IPS mode were interface are peered: packet from
341  * on interface are sent the peered interface and the other way. The ::AFPPeer
342  * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
343  * information to be able to send packet on the interface.
344  * A element of the list must not be destroyed during the run of Suricata as it
345  * is used by ::Packet and other threads.
346  *
347  * @{
348  */
349 
350 typedef struct AFPPeersList_ {
351  TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
352  int cnt;
353  int peered;
354  int turn; /**< Next value for initialisation order */
355  SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
356 } AFPPeersList;
357 
358 /**
359  * \brief Update the peer.
360  *
361  * Update the AFPPeer of a thread ie set new state, socket number
362  * or iface index.
363  *
364  */
365 static void AFPPeerUpdate(AFPThreadVars *ptv)
366 {
367  if (ptv->mpeer == NULL) {
368  return;
369  }
370  (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
371  (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
372  (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
373 }
374 
375 /**
376  * \brief Clean and free ressource used by an ::AFPPeer
377  */
378 static void AFPPeerClean(AFPPeer *peer)
379 {
380  if (peer->flags & AFP_SOCK_PROTECT)
382  SC_ATOMIC_DESTROY(peer->socket);
383  SC_ATOMIC_DESTROY(peer->if_idx);
384  SC_ATOMIC_DESTROY(peer->state);
385  SCFree(peer);
386 }
387 
389 
390 
391 /**
392  * \brief Init the global list of ::AFPPeer
393  */
395 {
396  SCEnter();
397  TAILQ_INIT(&peerslist.peers);
398  peerslist.peered = 0;
399  peerslist.cnt = 0;
400  peerslist.turn = 0;
401  SC_ATOMIC_INIT(peerslist.reached);
402  (void) SC_ATOMIC_SET(peerslist.reached, 0);
404 }
405 
406 /**
407  * \brief Check that all ::AFPPeer got a peer
408  *
409  * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
410  */
412 {
413 #define AFP_PEERS_MAX_TRY 4
414 #define AFP_PEERS_WAIT 20000
415  int try = 0;
416  SCEnter();
417  while (try < AFP_PEERS_MAX_TRY) {
418  if (peerslist.cnt != peerslist.peered) {
419  usleep(AFP_PEERS_WAIT);
420  } else {
422  }
423  try++;
424  }
425  SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
427 }
428 
429 /**
430  * \brief Declare a new AFP thread to AFP peers list.
431  */
432 static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
433 {
434  SCEnter();
435  AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
436  AFPPeer *pitem;
437  int mtu, out_mtu;
438 
439  if (unlikely(peer == NULL)) {
441  }
442  memset(peer, 0, sizeof(AFPPeer));
443  SC_ATOMIC_INIT(peer->socket);
444  SC_ATOMIC_INIT(peer->sock_usage);
445  SC_ATOMIC_INIT(peer->if_idx);
446  SC_ATOMIC_INIT(peer->state);
447  peer->flags = ptv->flags;
448  peer->turn = peerslist.turn++;
449 
450  if (peer->flags & AFP_SOCK_PROTECT) {
451  SCMutexInit(&peer->sock_protect, NULL);
452  }
453 
454  (void)SC_ATOMIC_SET(peer->sock_usage, 0);
455  (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
457  ptv->mpeer = peer;
458  /* add element to iface list */
459  TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
460 
461  if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
462  peerslist.cnt++;
463 
464  /* Iter to find a peer */
465  TAILQ_FOREACH(pitem, &peerslist.peers, next) {
466  if (pitem->peer)
467  continue;
468  if (strcmp(pitem->iface, ptv->out_iface))
469  continue;
470  peer->peer = pitem;
471  pitem->peer = peer;
472  mtu = GetIfaceMTU(ptv->iface);
473  out_mtu = GetIfaceMTU(ptv->out_iface);
474  if (mtu != out_mtu) {
476  "MTU on %s (%d) and %s (%d) are not equal, "
477  "transmission of packets bigger than %d will fail.",
478  ptv->iface, mtu,
479  ptv->out_iface, out_mtu,
480  (out_mtu > mtu) ? mtu : out_mtu);
481  }
482  peerslist.peered += 2;
483  break;
484  }
485  }
486 
487  AFPPeerUpdate(ptv);
488 
490 }
491 
492 static int AFPPeersListWaitTurn(AFPPeer *peer)
493 {
494  /* If turn is zero, we already have started threads once */
495  if (peerslist.turn == 0)
496  return 0;
497 
498  if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
499  return 0;
500  return 1;
501 }
502 
503 static void AFPPeersListReachedInc(void)
504 {
505  if (peerslist.turn == 0)
506  return;
507 
508  if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
509  SCLogInfo("All AFP capture threads are running.");
510  (void)SC_ATOMIC_SET(peerslist.reached, 0);
511  /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
512  * restarted.
513  */
514  peerslist.turn = 0;
515  }
516 }
517 
518 static int AFPPeersListStarted(void)
519 {
520  return !peerslist.turn;
521 }
522 
523 /**
524  * \brief Clean the global peers list.
525  */
527 {
528  AFPPeer *pitem;
529 
530  while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
531  TAILQ_REMOVE(&peerslist.peers, pitem, next);
532  AFPPeerClean(pitem);
533  }
534 }
535 
536 /**
537  * @}
538  */
539 
540 /**
541  * \brief Registration Function for DecodeAFP.
542  * \todo Unit tests are needed for this module.
543  */
545 {
546  tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
554 }
555 
556 
557 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
558 
559 static inline void AFPDumpCounters(AFPThreadVars *ptv)
560 {
561 #ifdef PACKET_STATISTICS
562  struct tpacket_stats kstats;
563  socklen_t len = sizeof (struct tpacket_stats);
564  if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
565  &kstats, &len) > -1) {
566  SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
567  ptv->tv->name,
568  kstats.tp_packets, kstats.tp_drops);
569  StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
570  StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
571  (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
572  (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
573  }
574 #endif
575 }
576 
577 /**
578  * \brief AF packet read function.
579  *
580  * This function fills
581  * From here the packets are picked up by the DecodeAFP thread.
582  *
583  * \param user pointer to AFPThreadVars
584  * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
585  */
586 static int AFPRead(AFPThreadVars *ptv)
587 {
588  Packet *p = NULL;
589  /* XXX should try to use read that get directly to packet */
590  int offset = 0;
591  int caplen;
592  struct sockaddr_ll from;
593  struct iovec iov;
594  struct msghdr msg;
595  struct cmsghdr *cmsg;
596  union {
597  struct cmsghdr cmsg;
598  char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
599  } cmsg_buf;
600  unsigned char aux_checksum = 0;
601 
602  msg.msg_name = &from;
603  msg.msg_namelen = sizeof(from);
604  msg.msg_iov = &iov;
605  msg.msg_iovlen = 1;
606  msg.msg_control = &cmsg_buf;
607  msg.msg_controllen = sizeof(cmsg_buf);
608  msg.msg_flags = 0;
609 
610  if (ptv->cooked)
611  offset = SLL_HEADER_LEN;
612  else
613  offset = 0;
614  iov.iov_len = ptv->datalen - offset;
615  iov.iov_base = ptv->data + offset;
616 
617  caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
618 
619  if (caplen < 0) {
620  SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
621  errno);
623  }
624 
626  if (p == NULL) {
628  }
630  if (ptv->flags & AFP_BYPASS) {
631  p->BypassPacketsFlow = AFPBypassCallback;
632 #ifdef HAVE_PACKET_EBPF
633  p->afp_v.v4_map_fd = ptv->v4_map_fd;
634  p->afp_v.v6_map_fd = ptv->v6_map_fd;
635 #endif
636  }
637  if (ptv->flags & AFP_XDPBYPASS) {
638  p->BypassPacketsFlow = AFPXDPBypassCallback;
639 #ifdef HAVE_PACKET_EBPF
640  p->afp_v.v4_map_fd = ptv->v4_map_fd;
641  p->afp_v.v6_map_fd = ptv->v6_map_fd;
642 #endif
643  }
644 
645  /* get timestamp of packet via ioctl */
646  if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
647  SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
648  errno);
649  TmqhOutputPacketpool(ptv->tv, p);
651  }
652 
653  ptv->pkts++;
654  p->livedev = ptv->livedev;
655 
656  /* add forged header */
657  if (ptv->cooked) {
658  SllHdr * hdrp = (SllHdr *)ptv->data;
659  /* XXX this is minimalist, but this seems enough */
660  hdrp->sll_protocol = from.sll_protocol;
661  }
662 
663  p->datalink = ptv->datalink;
664  SET_PKT_LEN(p, caplen + offset);
665  if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
666  TmqhOutputPacketpool(ptv->tv, p);
668  }
669  SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
670  GET_PKT_LEN(p), p, GET_PKT_DATA(p));
671 
672  /* We only check for checksum disable */
675  } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
676  if (ptv->livedev->ignore_checksum) {
678  } else if (ChecksumAutoModeCheck(ptv->pkts,
679  SC_ATOMIC_GET(ptv->livedev->pkts),
680  SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
681  ptv->livedev->ignore_checksum = 1;
683  }
684  } else {
685  aux_checksum = 1;
686  }
687 
688  /* List is NULL if we don't have activated auxiliary data */
689  for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
690  struct tpacket_auxdata *aux;
691 
692  if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
693  cmsg->cmsg_level != SOL_PACKET ||
694  cmsg->cmsg_type != PACKET_AUXDATA)
695  continue;
696 
697  aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
698 
699  if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
701  }
702  break;
703  }
704 
705  if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
706  TmqhOutputPacketpool(ptv->tv, p);
708  }
710 }
711 
712 /**
713  * \brief AF packet write function.
714  *
715  * This function has to be called before the memory
716  * related to Packet in ring buffer is released.
717  *
718  * \param pointer to Packet
719  * \param version of capture: TPACKET_V2 or TPACKET_V3
720  * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
721  *
722  */
723 static TmEcode AFPWritePacket(Packet *p, int version)
724 {
725  struct sockaddr_ll socket_address;
726  int socket;
727  uint8_t *pstart;
728  size_t plen;
729  union thdr h;
730  uint16_t vlan_tci = 0;
731 
732  if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
734  return TM_ECODE_OK;
735  }
736  }
737 
738  if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
739  return TM_ECODE_OK;
740 
741  if (p->ethh == NULL) {
742  SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
743  return TM_ECODE_FAILED;
744  }
745  /* Index of the network device */
746  socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
747  /* Address length*/
748  socket_address.sll_halen = ETH_ALEN;
749  /* Destination MAC */
750  memcpy(socket_address.sll_addr, p->ethh, 6);
751 
752  /* Send packet, locking the socket if necessary */
753  if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
754  SCMutexLock(&p->afp_v.peer->sock_protect);
755  socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
756 
757  h.raw = p->afp_v.relptr;
758 
759  if (version == TPACKET_V2) {
760  /* Copy VLAN header from ring memory. For post june 2011 kernel we test
761  * the flag. It is not defined for older kernel so we go best effort
762  * and test for non zero value of the TCI header. */
763  if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
764  vlan_tci = h.h2->tp_vlan_tci;
765  }
766  } else {
767 #ifdef HAVE_TPACKET_V3
768  if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
769  vlan_tci = h.h3->hv1.tp_vlan_tci;
770  }
771 #else
772  /* Should not get here */
773  BUG_ON(1);
774 #endif
775  }
776 
777  if (vlan_tci != 0) {
778  pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
779  plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
780  /* move ethernet addresses */
781  memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
782  /* write vlan info */
783  *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
784  *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
785  } else {
786  pstart = GET_PKT_DATA(p);
787  plen = GET_PKT_LEN(p);
788  }
789 
790  if (sendto(socket, pstart, plen, 0,
791  (struct sockaddr*) &socket_address,
792  sizeof(struct sockaddr_ll)) < 0) {
793  SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
794  socket,
795  strerror(errno));
796  if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
797  SCMutexUnlock(&p->afp_v.peer->sock_protect);
798  return TM_ECODE_FAILED;
799  }
800  if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
801  SCMutexUnlock(&p->afp_v.peer->sock_protect);
802 
803  return TM_ECODE_OK;
804 }
805 
806 static void AFPReleaseDataFromRing(Packet *p)
807 {
808  /* Need to be in copy mode and need to detect early release
809  where Ethernet header could not be set (and pseudo packet) */
810  if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
811  AFPWritePacket(p, TPACKET_V2);
812  }
813 
814  if (AFPDerefSocket(p->afp_v.mpeer) == 0)
815  goto cleanup;
816 
817  if (p->afp_v.relptr) {
818  union thdr h;
819  h.raw = p->afp_v.relptr;
820  h.h2->tp_status = TP_STATUS_KERNEL;
821  }
822 
823 cleanup:
824  AFPV_CLEANUP(&p->afp_v);
825 }
826 
827 #ifdef HAVE_TPACKET_V3
828 static void AFPReleasePacketV3(Packet *p)
829 {
830  /* Need to be in copy mode and need to detect early release
831  where Ethernet header could not be set (and pseudo packet) */
832  if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
833  AFPWritePacket(p, TPACKET_V3);
834  }
836 }
837 #endif
838 
839 static void AFPReleasePacket(Packet *p)
840 {
841  AFPReleaseDataFromRing(p);
843 }
844 
845 /**
846  * \brief AF packet read function for ring
847  *
848  * This function fills
849  * From here the packets are picked up by the DecodeAFP thread.
850  *
851  * \param user pointer to AFPThreadVars
852  * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
853  */
854 static int AFPReadFromRing(AFPThreadVars *ptv)
855 {
856  Packet *p = NULL;
857  union thdr h;
858  uint8_t emergency_flush = 0;
859  int read_pkts = 0;
860  int loop_start = -1;
861 
862 
863  /* Loop till we have packets available */
864  while (1) {
865  if (unlikely(suricata_ctl_flags != 0)) {
866  break;
867  }
868 
869  /* Read packet from ring */
870  h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
871  if (unlikely(h.raw == NULL)) {
872  /* Impossible we reach this point in normal condition, so trigger
873  * a failure in reading */
875  }
876 
877  if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
878  if (read_pkts == 0) {
879  if (loop_start == -1) {
880  loop_start = ptv->frame_offset;
881  } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
883  }
884  if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
885  ptv->frame_offset = 0;
886  }
887  continue;
888  }
889  if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
891  } else {
893  }
894  }
895 
896  read_pkts++;
897  loop_start = -1;
898 
899  /* Our packet is still used by suricata, we exit read loop to
900  * gain some time */
901  if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
903  }
904 
905  if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
906  h.h2->tp_status = TP_STATUS_KERNEL;
907  goto next_frame;
908  }
909 
911  if (p == NULL) {
913  }
915  if (ptv->flags & AFP_BYPASS) {
916  p->BypassPacketsFlow = AFPBypassCallback;
917 #ifdef HAVE_PACKET_EBPF
918  p->afp_v.v4_map_fd = ptv->v4_map_fd;
919  p->afp_v.v6_map_fd = ptv->v6_map_fd;
920 #endif
921  }
922  if (ptv->flags & AFP_XDPBYPASS) {
923  p->BypassPacketsFlow = AFPXDPBypassCallback;
924 #ifdef HAVE_PACKET_EBPF
925  p->afp_v.v4_map_fd = ptv->v4_map_fd;
926  p->afp_v.v6_map_fd = ptv->v6_map_fd;
927 #endif
928  }
929 
930  /* Suricata will treat packet so telling it is busy, this
931  * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
932  * function. */
933  h.h2->tp_status |= TP_STATUS_USER_BUSY;
934 
935  ptv->pkts++;
936  p->livedev = ptv->livedev;
937  p->datalink = ptv->datalink;
938 
939  if (h.h2->tp_len > h.h2->tp_snaplen) {
940  SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
941  h.h2->tp_len, h.h2->tp_snaplen);
942  }
943 
944  /* get vlan id from header */
945  if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
946  (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
947  p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
948  p->vlan_idx = 1;
949  p->vlanh[0] = NULL;
950  }
951 
952  if (ptv->flags & AFP_ZERO_COPY) {
953  if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
954  TmqhOutputPacketpool(ptv->tv, p);
956  } else {
957  p->afp_v.relptr = h.raw;
958  p->ReleasePacket = AFPReleasePacket;
959  p->afp_v.mpeer = ptv->mpeer;
960  AFPRefSocket(ptv->mpeer);
961 
962  p->afp_v.copy_mode = ptv->copy_mode;
963  if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
964  p->afp_v.peer = ptv->mpeer->peer;
965  } else {
966  p->afp_v.peer = NULL;
967  }
968  }
969  } else {
970  if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
971  /* As we can possibly fail to copy the data due to invalid data, let's
972  * skip this packet and switch to the next one.
973  */
974  h.h2->tp_status = TP_STATUS_KERNEL;
975  if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
976  ptv->frame_offset = 0;
977  }
978  TmqhOutputPacketpool(ptv->tv, p);
980  }
981  }
982 
983  /* Timestamp */
984  p->ts.tv_sec = h.h2->tp_sec;
985  p->ts.tv_usec = h.h2->tp_nsec/1000;
986  SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
987  GET_PKT_LEN(p), p, GET_PKT_DATA(p));
988 
989  /* We only check for checksum disable */
992  } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
993  if (ptv->livedev->ignore_checksum) {
995  } else if (ChecksumAutoModeCheck(ptv->pkts,
996  SC_ATOMIC_GET(ptv->livedev->pkts),
997  SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
998  ptv->livedev->ignore_checksum = 1;
1000  }
1001  } else {
1002  if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
1003  p->flags |= PKT_IGNORE_CHECKSUM;
1004  }
1005  }
1006  if (h.h2->tp_status & TP_STATUS_LOSING) {
1007  emergency_flush = 1;
1008  AFPDumpCounters(ptv);
1009  }
1010 
1011  /* release frame if not in zero copy mode */
1012  if (!(ptv->flags & AFP_ZERO_COPY)) {
1013  h.h2->tp_status = TP_STATUS_KERNEL;
1014  }
1015 
1016  if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1017  h.h2->tp_status = TP_STATUS_KERNEL;
1018  if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1019  ptv->frame_offset = 0;
1020  }
1021  TmqhOutputPacketpool(ptv->tv, p);
1023  }
1024 
1025 next_frame:
1026  if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1027  ptv->frame_offset = 0;
1028  /* Get out of loop to be sure we will reach maintenance tasks */
1030  }
1031  }
1032 
1034 }
1035 
1036 #ifdef HAVE_TPACKET_V3
1037 static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1038 {
1039  pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1040 }
1041 
1042 static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1043 {
1045  if (p == NULL) {
1047  }
1049  if (ptv->flags & AFP_BYPASS) {
1050  p->BypassPacketsFlow = AFPBypassCallback;
1051 #ifdef HAVE_PACKET_EBPF
1052  p->afp_v.v4_map_fd = ptv->v4_map_fd;
1053  p->afp_v.v6_map_fd = ptv->v6_map_fd;
1054 #endif
1055  } else if (ptv->flags & AFP_XDPBYPASS) {
1056  p->BypassPacketsFlow = AFPXDPBypassCallback;
1057 #ifdef HAVE_PACKET_EBPF
1058  p->afp_v.v4_map_fd = ptv->v4_map_fd;
1059  p->afp_v.v6_map_fd = ptv->v6_map_fd;
1060 #endif
1061  }
1062 
1063  ptv->pkts++;
1064  p->livedev = ptv->livedev;
1065  p->datalink = ptv->datalink;
1066 
1067  if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
1068  (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1069  p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1070  p->vlan_idx = 1;
1071  p->vlanh[0] = NULL;
1072  }
1073 
1074  if (ptv->flags & AFP_ZERO_COPY) {
1075  if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1076  TmqhOutputPacketpool(ptv->tv, p);
1078  }
1079  p->afp_v.relptr = ppd;
1080  p->ReleasePacket = AFPReleasePacketV3;
1081  p->afp_v.mpeer = ptv->mpeer;
1082  AFPRefSocket(ptv->mpeer);
1083 
1084  p->afp_v.copy_mode = ptv->copy_mode;
1085  if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1086  p->afp_v.peer = ptv->mpeer->peer;
1087  } else {
1088  p->afp_v.peer = NULL;
1089  }
1090  } else {
1091  if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1092  TmqhOutputPacketpool(ptv->tv, p);
1094  }
1095  }
1096  /* Timestamp */
1097  p->ts.tv_sec = ppd->tp_sec;
1098  p->ts.tv_usec = ppd->tp_nsec/1000;
1099  SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1100  GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1101 
1102  /* We only check for checksum disable */
1104  p->flags |= PKT_IGNORE_CHECKSUM;
1105  } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1106  if (ptv->livedev->ignore_checksum) {
1107  p->flags |= PKT_IGNORE_CHECKSUM;
1108  } else if (ChecksumAutoModeCheck(ptv->pkts,
1109  SC_ATOMIC_GET(ptv->livedev->pkts),
1110  SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1111  ptv->livedev->ignore_checksum = 1;
1112  p->flags |= PKT_IGNORE_CHECKSUM;
1113  }
1114  } else {
1115  if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1116  p->flags |= PKT_IGNORE_CHECKSUM;
1117  }
1118  }
1119 
1120  if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1121  TmqhOutputPacketpool(ptv->tv, p);
1123  }
1124 
1126 }
1127 
1128 static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1129 {
1130  int num_pkts = pbd->hdr.bh1.num_pkts, i;
1131  uint8_t *ppd;
1132  int ret = 0;
1133 
1134  ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1135  for (i = 0; i < num_pkts; ++i) {
1136  ret = AFPParsePacketV3(ptv, pbd,
1137  (struct tpacket3_hdr *)ppd);
1138  switch (ret) {
1139  case AFP_READ_OK:
1140  break;
1141  case AFP_SURI_FAILURE:
1142  /* Internal error but let's just continue and
1143  * treat thenext packet */
1144  break;
1145  case AFP_READ_FAILURE:
1147  default:
1148  SCReturnInt(ret);
1149  }
1150  ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1151  }
1152 
1154 }
1155 #endif /* HAVE_TPACKET_V3 */
1156 
1157 /**
1158  * \brief AF packet read function for ring
1159  *
1160  * This function fills
1161  * From here the packets are picked up by the DecodeAFP thread.
1162  *
1163  * \param user pointer to AFPThreadVars
1164  * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1165  */
1166 static int AFPReadFromRingV3(AFPThreadVars *ptv)
1167 {
1168 #ifdef HAVE_TPACKET_V3
1169  struct tpacket_block_desc *pbd;
1170  int ret = 0;
1171 
1172  /* Loop till we have packets available */
1173  while (1) {
1174  if (unlikely(suricata_ctl_flags != 0)) {
1175  SCLogInfo("Exiting AFP V3 read loop");
1176  break;
1177  }
1178 
1179  pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
1180 
1181  /* block is not ready to be read */
1182  if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1184  }
1185 
1186  ret = AFPWalkBlock(ptv, pbd);
1187  if (unlikely(ret != AFP_READ_OK)) {
1188  AFPFlushBlock(pbd);
1189  SCReturnInt(ret);
1190  }
1191 
1192  AFPFlushBlock(pbd);
1193  ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
1194  /* return to maintenance task after one loop on the ring */
1195  if (ptv->frame_offset == 0) {
1197  }
1198  }
1199 #endif
1201 }
1202 
1203 /**
1204  * \brief Reference socket
1205  *
1206  * \retval O in case of failure, 1 in case of success
1207  */
1208 static int AFPRefSocket(AFPPeer* peer)
1209 {
1210  if (unlikely(peer == NULL))
1211  return 0;
1212 
1213  (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1214  return 1;
1215 }
1216 
1217 
1218 /**
1219  * \brief Dereference socket
1220  *
1221  * \retval 1 if socket is still alive, 0 if not
1222  */
1223 static int AFPDerefSocket(AFPPeer* peer)
1224 {
1225  if (peer == NULL)
1226  return 1;
1227 
1228  if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1229  if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1230  SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1231  close(SC_ATOMIC_GET(peer->socket));
1232  return 0;
1233  }
1234  }
1235  return 1;
1236 }
1237 
1238 static void AFPSwitchState(AFPThreadVars *ptv, int state)
1239 {
1240  ptv->afp_state = state;
1241  ptv->down_count = 0;
1242 
1243  AFPPeerUpdate(ptv);
1244 
1245  /* Do cleaning if switching to down state */
1246  if (state == AFP_STATE_DOWN) {
1247 #ifdef HAVE_TPACKET_V3
1248  if (ptv->flags & AFP_TPACKET_V3) {
1249  if (!ptv->ring.v3) {
1250  SCFree(ptv->ring.v3);
1251  ptv->ring.v3 = NULL;
1252  }
1253  } else {
1254 #endif
1255  if (ptv->ring.v2) {
1256  /* only used in reading phase, we can free it */
1257  SCFree(ptv->ring.v2);
1258  ptv->ring.v2 = NULL;
1259  }
1260 #ifdef HAVE_TPACKET_V3
1261  }
1262 #endif
1263  if (ptv->socket != -1) {
1264  /* we need to wait for all packets to return data */
1265  if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
1266  SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
1267  munmap(ptv->ring_buf, ptv->ring_buflen);
1268  close(ptv->socket);
1269  ptv->socket = -1;
1270  }
1271  }
1272  }
1273  if (state == AFP_STATE_UP) {
1274  (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1275  }
1276 }
1277 
1278 static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1279  uint64_t *discarded_pkts)
1280 {
1281  struct sockaddr_ll from;
1282  struct iovec iov;
1283  struct msghdr msg;
1284  struct timeval ts;
1285  union {
1286  struct cmsghdr cmsg;
1287  char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1288  } cmsg_buf;
1289 
1290 
1291  if (unlikely(suricata_ctl_flags != 0)) {
1292  return 1;
1293  }
1294 
1295  msg.msg_name = &from;
1296  msg.msg_namelen = sizeof(from);
1297  msg.msg_iov = &iov;
1298  msg.msg_iovlen = 1;
1299  msg.msg_control = &cmsg_buf;
1300  msg.msg_controllen = sizeof(cmsg_buf);
1301  msg.msg_flags = 0;
1302 
1303  iov.iov_len = ptv->datalen;
1304  iov.iov_base = ptv->data;
1305 
1306  (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
1307 
1308  if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1309  /* FIXME */
1310  return -1;
1311  }
1312 
1313  if ((ts.tv_sec > synctv->tv_sec) ||
1314  (ts.tv_sec >= synctv->tv_sec &&
1315  ts.tv_usec > synctv->tv_usec)) {
1316  return 1;
1317  }
1318  return 0;
1319 }
1320 
1321 static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1322  uint64_t *discarded_pkts)
1323 {
1324  union thdr h;
1325 
1326  if (unlikely(suricata_ctl_flags != 0)) {
1327  return 1;
1328  }
1329 
1330 #ifdef HAVE_TPACKET_V3
1331  if (ptv->flags & AFP_TPACKET_V3) {
1332  int ret = 0;
1333  struct tpacket_block_desc *pbd;
1334  pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
1335  *discarded_pkts += pbd->hdr.bh1.num_pkts;
1336  struct tpacket3_hdr *ppd =
1337  (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1338  if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1339  ((time_t)ppd->tp_sec == synctv->tv_sec &&
1340  (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1341  ret = 1;
1342  }
1343  AFPFlushBlock(pbd);
1344  ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
1345  return ret;
1346 
1347  } else
1348 #endif
1349  {
1350  /* Read packet from ring */
1351  h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
1352  if (h.raw == NULL) {
1353  return -1;
1354  }
1355  (*discarded_pkts)++;
1356  if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1357  ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1358  (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1359  return 1;
1360  }
1361 
1362  h.h2->tp_status = TP_STATUS_KERNEL;
1363  if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1364  ptv->frame_offset = 0;
1365  }
1366  }
1367 
1368 
1369  return 0;
1370 }
1371 
1372 /** \brief wait for all afpacket threads to fully init
1373  *
1374  * Discard packets before all threads are ready, as the cluster
1375  * setup is not complete yet.
1376  *
1377  * if AFPPeersListStarted() returns true init is complete
1378  *
1379  * \retval r 1 = happy, otherwise unhappy
1380  */
1381 static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
1382 {
1383  struct timeval synctv;
1384  struct pollfd fds;
1385 
1386  fds.fd = ptv->socket;
1387  fds.events = POLLIN;
1388 
1389  /* Set timeval to end of the world */
1390  synctv.tv_sec = 0xffffffff;
1391  synctv.tv_usec = 0xffffffff;
1392 
1393  while (1) {
1394  int r = poll(&fds, 1, POLL_TIMEOUT);
1395  if (r > 0 &&
1396  (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1397  SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1398  fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1399  return 0;
1400  } else if (r > 0) {
1401  if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1402  gettimeofday(&synctv, NULL);
1403  }
1404  if (ptv->flags & AFP_RING_MODE) {
1405  r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
1406  } else {
1407  r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
1408  }
1409  SCLogDebug("Discarding on %s", ptv->tv->name);
1410  switch (r) {
1411  case 1:
1412  SCLogDebug("Starting to read on %s", ptv->tv->name);
1413  return 1;
1414  case -1:
1415  return r;
1416  }
1417  /* no packets */
1418  } else if (r == 0 && AFPPeersListStarted()) {
1419  SCLogDebug("Starting to read on %s", ptv->tv->name);
1420  return 1;
1421  } else if (r < 0) { /* only exit on error */
1422  SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1423  return 0;
1424  }
1425  }
1426  return 1;
1427 }
1428 
1429 /**
1430  * \brief Try to reopen socket
1431  *
1432  * \retval 0 in case of success, negative if error occurs or a condition
1433  * is not met.
1434  */
1435 static int AFPTryReopen(AFPThreadVars *ptv)
1436 {
1437  ptv->down_count++;
1438 
1439  /* Don't reconnect till we have packet that did not release data */
1440  if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1441  return -1;
1442  }
1443 
1444  int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
1445  if (afp_activate_r != 0) {
1446  if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1447  SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1448  ptv->iface);
1449  }
1450  return afp_activate_r;
1451  }
1452 
1453  SCLogInfo("Interface '%s' is back", ptv->iface);
1454  return 0;
1455 }
1456 
1457 /**
1458  * \brief Main AF_PACKET reading Loop function
1459  */
1460 TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1461 {
1462  SCEnter();
1463 
1464  AFPThreadVars *ptv = (AFPThreadVars *)data;
1465  struct pollfd fds;
1466  int r;
1467  TmSlot *s = (TmSlot *)slot;
1468  time_t last_dump = 0;
1469  time_t current_time;
1470  int (*AFPReadFunc) (AFPThreadVars *);
1471  uint64_t discarded_pkts = 0;
1472 
1473  ptv->slot = s->slot_next;
1474 
1475  if (ptv->flags & AFP_RING_MODE) {
1476  if (ptv->flags & AFP_TPACKET_V3) {
1477  AFPReadFunc = AFPReadFromRingV3;
1478  } else {
1479  AFPReadFunc = AFPReadFromRing;
1480  }
1481  } else {
1482  AFPReadFunc = AFPRead;
1483  }
1484 
1485  if (ptv->afp_state == AFP_STATE_DOWN) {
1486  /* Wait for our turn, threads before us must have opened the socket */
1487  while (AFPPeersListWaitTurn(ptv->mpeer)) {
1488  usleep(1000);
1489  if (suricata_ctl_flags != 0) {
1490  break;
1491  }
1492  }
1493  r = AFPCreateSocket(ptv, ptv->iface, 1);
1494  if (r < 0) {
1495  switch (-r) {
1496  case AFP_FATAL_ERROR:
1497  SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1499  case AFP_RECOVERABLE_ERROR:
1500  SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1501  }
1502  }
1503  AFPPeersListReachedInc();
1504  }
1505  if (ptv->afp_state == AFP_STATE_UP) {
1506  SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
1507  AFPSynchronizeStart(ptv, &discarded_pkts);
1508  /* let's reset counter as we will start the capture at the
1509  * next function call */
1510 #ifdef PACKET_STATISTICS
1511  struct tpacket_stats kstats;
1512  socklen_t len = sizeof (struct tpacket_stats);
1513  if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1514  &kstats, &len) > -1) {
1515  uint64_t pkts = 0;
1516  SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1517  ", dropped %" PRIu32 "",
1518  ptv->tv->name,
1519  kstats.tp_packets, kstats.tp_drops);
1520  pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1521  StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1522  (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1523  }
1524 #endif
1525  }
1526 
1527  fds.fd = ptv->socket;
1528  fds.events = POLLIN;
1529 
1530  while (1) {
1531  /* Start by checking the state of our interface */
1532  if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1533  int dbreak = 0;
1534 
1535  do {
1536  usleep(AFP_RECONNECT_TIMEOUT);
1537  if (suricata_ctl_flags != 0) {
1538  dbreak = 1;
1539  break;
1540  }
1541  r = AFPTryReopen(ptv);
1542  fds.fd = ptv->socket;
1543  } while (r < 0);
1544  if (dbreak == 1)
1545  break;
1546  }
1547 
1548  /* make sure we have at least one packet in the packet pool, to prevent
1549  * us from alloc'ing packets at line rate */
1550  PacketPoolWait();
1551 
1552  r = poll(&fds, 1, POLL_TIMEOUT);
1553 
1554  if (suricata_ctl_flags != 0) {
1555  break;
1556  }
1557 
1558  if (r > 0 &&
1559  (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1560  if (fds.revents & (POLLHUP | POLLRDHUP)) {
1561  AFPSwitchState(ptv, AFP_STATE_DOWN);
1562  continue;
1563  } else if (fds.revents & POLLERR) {
1564  char c;
1565  /* Do a recv to get errno */
1566  if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1567  continue; /* what, no error? */
1569  "Error reading data from iface '%s': (%d) %s",
1570  ptv->iface, errno, strerror(errno));
1571  AFPSwitchState(ptv, AFP_STATE_DOWN);
1572  continue;
1573  } else if (fds.revents & POLLNVAL) {
1574  SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
1575  AFPSwitchState(ptv, AFP_STATE_DOWN);
1576  continue;
1577  }
1578  } else if (r > 0) {
1579  r = AFPReadFunc(ptv);
1580  switch (r) {
1581  case AFP_READ_OK:
1582  /* Trigger one dump of stats every second */
1583  current_time = time(NULL);
1584  if (current_time != last_dump) {
1585  AFPDumpCounters(ptv);
1586  last_dump = current_time;
1587  }
1588  break;
1589  case AFP_READ_FAILURE:
1590  /* AFPRead in error: best to reset the socket */
1592  "AFPRead error reading data from iface '%s': (%d) %s",
1593  ptv->iface, errno, strerror(errno));
1594  AFPSwitchState(ptv, AFP_STATE_DOWN);
1595  continue;
1596  case AFP_SURI_FAILURE:
1597  StatsIncr(ptv->tv, ptv->capture_errors);
1598  break;
1599  case AFP_KERNEL_DROP:
1600  AFPDumpCounters(ptv);
1601  break;
1602  }
1603  } else if (unlikely(r == 0)) {
1604  /* Trigger one dump of stats every second */
1605  current_time = time(NULL);
1606  if (current_time != last_dump) {
1607  AFPDumpCounters(ptv);
1608  last_dump = current_time;
1609  }
1610  /* poll timed out, lets see if we need to inject a fake packet */
1611  TmThreadsCaptureInjectPacket(tv, ptv->slot, NULL);
1612 
1613  } else if ((r < 0) && (errno != EINTR)) {
1614  SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
1615  ptv->iface,
1616  errno, strerror(errno));
1617  AFPSwitchState(ptv, AFP_STATE_DOWN);
1618  continue;
1619  }
1621  }
1622 
1623  AFPDumpCounters(ptv);
1626 }
1627 
1628 static int AFPGetDevFlags(int fd, const char *ifname)
1629 {
1630  struct ifreq ifr;
1631 
1632  memset(&ifr, 0, sizeof(ifr));
1633  strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1634 
1635  if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1636  SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1637  ifname, strerror(errno));
1638  return -1;
1639  }
1640 
1641  return ifr.ifr_flags;
1642 }
1643 
1644 
1645 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
1646 {
1647  struct ifreq ifr;
1648 
1649  memset(&ifr, 0, sizeof(ifr));
1650  strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1651 
1652  if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
1653  if (verbose)
1654  SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1655  ifname, strerror(errno));
1656  return -1;
1657  }
1658 
1659  return ifr.ifr_ifindex;
1660 }
1661 
1662 static int AFPGetDevLinktype(int fd, const char *ifname)
1663 {
1664  struct ifreq ifr;
1665 
1666  memset(&ifr, 0, sizeof(ifr));
1667  strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1668 
1669  if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1670  SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1671  ifname, strerror(errno));
1672  return -1;
1673  }
1674 
1675  switch (ifr.ifr_hwaddr.sa_family) {
1676  case ARPHRD_LOOPBACK:
1677  return LINKTYPE_ETHERNET;
1678  case ARPHRD_PPP:
1679  case ARPHRD_NONE:
1680  return LINKTYPE_RAW;
1681  default:
1682  return ifr.ifr_hwaddr.sa_family;
1683  }
1684 }
1685 
1686 int AFPGetLinkType(const char *ifname)
1687 {
1688  int ltype;
1689 
1690  int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1691  if (fd == -1) {
1692  SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1693  return LINKTYPE_RAW;
1694  }
1695 
1696  ltype = AFPGetDevLinktype(fd, ifname);
1697  close(fd);
1698 
1699  return ltype;
1700 }
1701 
1702 static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1703 {
1704  /* Compute structure:
1705  Target is to store all pending packets
1706  with a size equal to MTU + auxdata
1707  And we keep a decent number of block
1708 
1709  To do so:
1710  Compute frame_size (aligned to be able to fit in block
1711  Check which block size we need. Blocksize is a 2^n * pagesize
1712  We then need to get order, big enough to have
1713  frame_size < block size
1714  Find number of frame per block (divide)
1715  Fill in packet_req
1716 
1717  Compute frame size:
1718  described in packet_mmap.txt
1719  dependant on snaplen (need to use a variable ?)
1720 snaplen: MTU ?
1721 tp_hdrlen determine_version in daq_afpacket
1722 in V1: sizeof(struct tpacket_hdr);
1723 in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1724 frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1725 
1726  */
1727  int tp_hdrlen = sizeof(struct tpacket_hdr);
1728  int snaplen = default_packet_size;
1729 
1730  if (snaplen == 0) {
1731  snaplen = GetIfaceMaxPacketSize(ptv->iface);
1732  if (snaplen <= 0) {
1734  "Unable to get MTU, setting snaplen to sane default of 1514");
1735  snaplen = 1514;
1736  }
1737  }
1738 
1739  ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1740  ptv->req.v2.tp_block_size = getpagesize() << order;
1741  int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
1742  if (frames_per_block == 0) {
1743  SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
1744  return -1;
1745  }
1746  ptv->req.v2.tp_frame_nr = ptv->ring_size;
1747  ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
1748  /* exact division */
1749  ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
1750  SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
1751  ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1752  ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
1753  return 1;
1754 }
1755 
1756 #ifdef HAVE_TPACKET_V3
1757 static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1758 {
1759  ptv->req.v3.tp_block_size = ptv->block_size;
1760  ptv->req.v3.tp_frame_size = 2048;
1761  int frames_per_block = 0;
1762  int tp_hdrlen = sizeof(struct tpacket3_hdr);
1763  int snaplen = default_packet_size;
1764 
1765  if (snaplen == 0) {
1766  snaplen = GetIfaceMaxPacketSize(ptv->iface);
1767  if (snaplen <= 0) {
1769  "Unable to get MTU, setting snaplen to sane default of 1514");
1770  snaplen = 1514;
1771  }
1772  }
1773 
1774  ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1775  frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
1776 
1777  if (frames_per_block == 0) {
1779  "Block size is too small, it should be at least %d",
1780  ptv->req.v3.tp_frame_size);
1781  return -1;
1782  }
1783  ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
1784  /* exact division */
1785  ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1786  ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1787  ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
1788  SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
1789  ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1790  ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1791  ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
1792  );
1793  return 1;
1794 }
1795 #endif
1796 
1797 static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1798 {
1799  int val;
1800  unsigned int len = sizeof(val), i;
1801  int order;
1802  int r, mmap_flag;
1803 
1804 #ifdef HAVE_TPACKET_V3
1805  if (ptv->flags & AFP_TPACKET_V3) {
1806  val = TPACKET_V3;
1807  } else
1808 #endif
1809  {
1810  val = TPACKET_V2;
1811  }
1812  if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1813  if (errno == ENOPROTOOPT) {
1814  if (ptv->flags & AFP_TPACKET_V3) {
1816  "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1817  } else {
1819  "Too old kernel giving up (need 2.6.27 at least)");
1820  }
1821  }
1822  SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1823  return AFP_FATAL_ERROR;
1824  }
1825 
1826  val = TPACKET_V2;
1827 #ifdef HAVE_TPACKET_V3
1828  if (ptv->flags & AFP_TPACKET_V3) {
1829  val = TPACKET_V3;
1830  }
1831 #endif
1832  if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1833  sizeof(val)) < 0) {
1835  "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1836  strerror(errno));
1837  return AFP_FATAL_ERROR;
1838  }
1839 
1840 #ifdef HAVE_HW_TIMESTAMPING
1841  int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1842  if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1843  sizeof(req)) < 0) {
1845  "Can't activate hardware timestamping on packet socket: %s",
1846  strerror(errno));
1847  }
1848 #endif
1849 
1850  /* Let's reserve head room so we can add the VLAN header in IPS
1851  * or TAP mode before write the packet */
1852  if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1853  /* Only one vlan is extracted from AFP header so
1854  * one VLAN header length is enough. */
1855  int reserve = VLAN_HEADER_LEN;
1856  if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1857  sizeof(reserve)) < 0) {
1859  "Can't activate reserve on packet socket: %s",
1860  strerror(errno));
1861  return AFP_FATAL_ERROR;
1862  }
1863  }
1864 
1865  /* Allocate RX ring */
1866 #ifdef HAVE_TPACKET_V3
1867  if (ptv->flags & AFP_TPACKET_V3) {
1868  if (AFPComputeRingParamsV3(ptv) != 1) {
1869  return AFP_FATAL_ERROR;
1870  }
1871  r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1872  (void *) &ptv->req.v3, sizeof(ptv->req.v3));
1873  if (r < 0) {
1875  "Unable to allocate RX Ring for iface %s: (%d) %s",
1876  devname,
1877  errno,
1878  strerror(errno));
1879  return AFP_FATAL_ERROR;
1880  }
1881  } else {
1882 #endif
1883  for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
1884  if (AFPComputeRingParams(ptv, order) != 1) {
1885  SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1886  return AFP_FATAL_ERROR;
1887  }
1888 
1889  r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1890  (void *) &ptv->req, sizeof(ptv->req));
1891 
1892  if (r < 0) {
1893  if (errno == ENOMEM) {
1894  SCLogInfo("Memory issue with ring parameters. Retrying.");
1895  continue;
1896  }
1898  "Unable to allocate RX Ring for iface %s: (%d) %s",
1899  devname,
1900  errno,
1901  strerror(errno));
1902  return AFP_FATAL_ERROR;
1903  } else {
1904  break;
1905  }
1906  }
1907  if (order < 0) {
1909  "Unable to allocate RX Ring for iface %s (order 0 failed)",
1910  devname);
1911  return AFP_FATAL_ERROR;
1912  }
1913 #ifdef HAVE_TPACKET_V3
1914  }
1915 #endif
1916 
1917  /* Allocate the Ring */
1918 #ifdef HAVE_TPACKET_V3
1919  if (ptv->flags & AFP_TPACKET_V3) {
1920  ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
1921  } else {
1922 #endif
1923  ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
1924 #ifdef HAVE_TPACKET_V3
1925  }
1926 #endif
1927  mmap_flag = MAP_SHARED;
1928  if (ptv->flags & AFP_MMAP_LOCKED)
1929  mmap_flag |= MAP_LOCKED;
1930  ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
1931  mmap_flag, ptv->socket, 0);
1932  if (ptv->ring_buf == MAP_FAILED) {
1933  SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1934  strerror(errno));
1935  goto mmap_err;
1936  }
1937 #ifdef HAVE_TPACKET_V3
1938  if (ptv->flags & AFP_TPACKET_V3) {
1939  ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1940  if (!ptv->ring.v3) {
1941  SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
1942  goto postmmap_err;
1943  }
1944  for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1945  ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1946  ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
1947  }
1948  } else {
1949 #endif
1950  /* allocate a ring for each frame header pointer*/
1951  ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1952  if (ptv->ring.v2 == NULL) {
1953  SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
1954  goto postmmap_err;
1955  }
1956  memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1957  /* fill the header ring with proper frame ptr*/
1958  ptv->frame_offset = 0;
1959  for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1960  void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
1961  unsigned int j;
1962  for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1963  (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1964  base += ptv->req.v2.tp_frame_size;
1965  }
1966  }
1967  ptv->frame_offset = 0;
1968 #ifdef HAVE_TPACKET_V3
1969  }
1970 #endif
1971 
1972  return 0;
1973 
1974 postmmap_err:
1975  munmap(ptv->ring_buf, ptv->ring_buflen);
1976  if (ptv->ring.v2)
1977  SCFree(ptv->ring.v2);
1978  if (ptv->ring.v3)
1979  SCFree(ptv->ring.v3);
1980 mmap_err:
1981  /* Packet mmap does the cleaning when socket is closed */
1982  return AFP_FATAL_ERROR;
1983 }
1984 
1985 /** \brief test if we can use FANOUT. Older kernels like those in
1986  * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1987  */
1989 {
1990 #ifdef HAVE_PACKET_FANOUT
1991  int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1992  if (fd < 0)
1993  return 0;
1994 
1996  uint16_t id = 1;
1997  uint32_t option = (mode << 16) | (id & 0xffff);
1998  int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
1999  close(fd);
2000 
2001  if (r < 0) {
2002  SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
2003  return 0;
2004  }
2005  return 1;
2006 #else
2007  return 0;
2008 #endif
2009 }
2010 
2011 #ifdef HAVE_PACKET_EBPF
2012 
2013 static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2014 {
2015  int pfd = ptv->ebpf_lb_fd;
2016  if (pfd == -1) {
2018  "Fanout file descriptor is invalid");
2019  return -1;
2020  }
2021 
2022  if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2023  SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2024  return -1;
2025  }
2026  SCLogInfo("Activated eBPF on socket");
2027 
2028  return 0;
2029 }
2030 
2031 static int SetEbpfFilter(AFPThreadVars *ptv)
2032 {
2033  int pfd = ptv->ebpf_filter_fd;
2034  if (pfd == -1) {
2036  "Filter file descriptor is invalid");
2037  return -1;
2038  }
2039 
2040  if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2041  SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2042  return -1;
2043  }
2044  SCLogInfo("Activated eBPF filter on socket");
2045 
2046  return 0;
2047 }
2048 #endif
2049 
2050 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
2051 {
2052  int r;
2053  int ret = AFP_FATAL_ERROR;
2054  struct packet_mreq sock_params;
2055  struct sockaddr_ll bind_address;
2056  int if_idx;
2057 
2058  /* open socket */
2059  ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2060  if (ptv->socket == -1) {
2061  SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
2062  goto error;
2063  }
2064 
2065  if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
2066 
2067  if (if_idx == -1) {
2068  goto socket_err;
2069  }
2070 
2071  /* bind socket */
2072  memset(&bind_address, 0, sizeof(bind_address));
2073  bind_address.sll_family = AF_PACKET;
2074  bind_address.sll_protocol = htons(ETH_P_ALL);
2075  bind_address.sll_ifindex = if_idx;
2076  if (bind_address.sll_ifindex == -1) {
2077  if (verbose)
2078  SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
2079  ret = AFP_RECOVERABLE_ERROR;
2080  goto socket_err;
2081  }
2082 
2083  int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2084  if (if_flags == -1) {
2085  if (verbose) {
2087  "Couldn't get flags for interface '%s'",
2088  ptv->iface);
2089  }
2090  ret = AFP_RECOVERABLE_ERROR;
2091  goto socket_err;
2092  } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2093  if (verbose) {
2095  "Interface '%s' is down",
2096  ptv->iface);
2097  }
2098  ret = AFP_RECOVERABLE_ERROR;
2099  goto socket_err;
2100  }
2101 
2102  if (ptv->promisc != 0) {
2103  /* Force promiscuous mode */
2104  memset(&sock_params, 0, sizeof(sock_params));
2105  sock_params.mr_type = PACKET_MR_PROMISC;
2106  sock_params.mr_ifindex = bind_address.sll_ifindex;
2107  r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2108  if (r < 0) {
2110  "Couldn't switch iface %s to promiscuous, error %s",
2111  devname, strerror(errno));
2112  goto socket_err;
2113  }
2114  }
2115 
2117  int val = 1;
2118  if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2119  sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2121  "'kernel' checksum mode not supported, falling back to full mode.");
2123  }
2124  }
2125 
2126  /* set socket recv buffer size */
2127  if (ptv->buffer_size != 0) {
2128  /*
2129  * Set the socket buffer size to the specified value.
2130  */
2131  SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
2132  if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2133  &ptv->buffer_size,
2134  sizeof(ptv->buffer_size)) == -1) {
2136  "Couldn't set buffer size to %d on iface %s, error %s",
2137  ptv->buffer_size, devname, strerror(errno));
2138  goto socket_err;
2139  }
2140  }
2141 
2142  r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2143  if (r < 0) {
2144  if (verbose) {
2145  if (errno == ENETDOWN) {
2147  "Couldn't bind AF_PACKET socket, iface %s is down",
2148  devname);
2149  } else {
2151  "Couldn't bind AF_PACKET socket to iface %s, error %s",
2152  devname, strerror(errno));
2153  }
2154  }
2155  ret = AFP_RECOVERABLE_ERROR;
2156  goto socket_err;
2157  }
2158 
2159 
2160 #ifdef HAVE_PACKET_FANOUT
2161  /* add binded socket to fanout group */
2162  if (ptv->threads > 1) {
2163  uint16_t mode = ptv->cluster_type;
2164  uint16_t id = ptv->cluster_id;
2165  uint32_t option = (mode << 16) | (id & 0xffff);
2166  r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2167  if (r < 0) {
2169  "Couldn't set fanout mode, error %s",
2170  strerror(errno));
2171  goto socket_err;
2172  }
2173  }
2174 #endif
2175 
2176 #ifdef HAVE_PACKET_EBPF
2177  if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2178  r = SockFanoutSeteBPF(ptv);
2179  if (r < 0) {
2181  "Coudn't set EBPF, error %s",
2182  strerror(errno));
2183  goto socket_err;
2184  }
2185  }
2186 #endif
2187 
2188  if (ptv->flags & AFP_RING_MODE) {
2189  ret = AFPSetupRing(ptv, devname);
2190  if (ret != 0)
2191  goto socket_err;
2192  }
2193 
2194  SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
2195 
2196  ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2197  switch (ptv->datalink) {
2198  case ARPHRD_PPP:
2199  case ARPHRD_ATM:
2200  ptv->cooked = 1;
2201  break;
2202  }
2203 
2204  TmEcode rc = AFPSetBPFFilter(ptv);
2205  if (rc == TM_ECODE_FAILED) {
2206  ret = AFP_FATAL_ERROR;
2207  goto socket_err;
2208  }
2209 
2210  /* Init is ok */
2211  AFPSwitchState(ptv, AFP_STATE_UP);
2212  return 0;
2213 
2214 socket_err:
2215  close(ptv->socket);
2216  ptv->socket = -1;
2217  if (ptv->flags & AFP_TPACKET_V3) {
2218  if (ptv->ring.v3) {
2219  SCFree(ptv->ring.v3);
2220  ptv->ring.v3 = NULL;
2221  }
2222  } else {
2223  if (ptv->ring.v2) {
2224  SCFree(ptv->ring.v2);
2225  ptv->ring.v2 = NULL;
2226  }
2227  }
2228 
2229 error:
2230  return -ret;
2231 }
2232 
2234 {
2235  struct bpf_program filter;
2236  struct sock_fprog fcode;
2237  int rc;
2238 
2239 #ifdef HAVE_PACKET_EBPF
2240  if (ptv->ebpf_filter_fd != -1) {
2241  return SetEbpfFilter(ptv);
2242  }
2243 #endif
2244 
2245  if (!ptv->bpf_filter)
2246  return TM_ECODE_OK;
2247 
2248  SCLogInfo("Using BPF '%s' on iface '%s'",
2249  ptv->bpf_filter,
2250  ptv->iface);
2251 
2252  char errbuf[PCAP_ERRBUF_SIZE];
2253  if (SCBPFCompile(default_packet_size, /* snaplen_arg */
2254  ptv->datalink, /* linktype_arg */
2255  &filter, /* program */
2256  ptv->bpf_filter, /* const char *buf */
2257  1, /* optimize */
2258  0, /* mask */
2259  errbuf,
2260  sizeof(errbuf)) == -1) {
2261  SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2262  ptv->bpf_filter,
2263  errbuf);
2264  return TM_ECODE_FAILED;
2265  }
2266 
2267  fcode.len = filter.bf_len;
2268  fcode.filter = (struct sock_filter*)filter.bf_insns;
2269 
2270  rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2271 
2272  SCBPFFree(&filter);
2273  if(rc == -1) {
2274  SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2275  return TM_ECODE_FAILED;
2276  }
2277 
2278  return TM_ECODE_OK;
2279 }
2280 
2281 #ifdef HAVE_PACKET_EBPF
2282 /**
2283  * Insert a half flow in the kernel bypass table
2284  *
2285  * \param mapfd file descriptor of the protocol bypass table
2286  * \param key data to use as key in the table
2287  * \param inittime time of creation of the entry (in monotonic clock)
2288  * \return 0 in case of error, 1 if success
2289  */
2290 static int AFPInsertHalfFlow(int mapd, void *key, uint64_t inittime)
2291 {
2292  struct pair value[nr_cpus];
2293  unsigned int i;
2294 
2295  if (mapd == -1) {
2296  return 0;
2297  }
2298 
2299  /* We use a per CPU structure so we have to set an array of values as the kernel
2300  * is not duplicating the data on each CPU by itself. */
2301  for (i = 0; i < nr_cpus; i++) {
2302  value[i].time = inittime;
2303  value[i].packets = 0;
2304  value[i].bytes = 0;
2305  }
2306  SCLogDebug("Inserting element in eBPF mapping: %lu", inittime);
2307  if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2308  switch (errno) {
2309  /* no more place in the hash */
2310  case E2BIG:
2311  return 0;
2312  /* if we already have the key then bypass is a success */
2313  case EEXIST:
2314  return 1;
2315  /* Not supposed to be there so issue a error */
2316  default:
2317  SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2318  strerror(errno),
2319  errno);
2320  return 0;
2321  }
2322  }
2323  return 1;
2324 }
2325 #endif
2326 
2327 /**
2328  * Bypass function for AF_PACKET capture in eBPF mode
2329  *
2330  * This function creates two half flows in the map shared with the kernel
2331  * to trigger bypass.
2332  *
2333  * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2334  * This table contains the list of half flows to bypass. The in-kernel filter
2335  * will skip/drop the packet if they belong to a flow in one of the flows
2336  * table.
2337  *
2338  * \param p the packet belonging to the flow to bypass
2339  * \return 0 if unable to bypass, 1 if success
2340  */
2341 static int AFPBypassCallback(Packet *p)
2342 {
2343 #ifdef HAVE_PACKET_EBPF
2344  SCLogDebug("Calling af_packet callback function");
2345  /* Only bypass TCP and UDP */
2346  if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2347  return 0;
2348  }
2349 
2350  /* Bypassing tunneled packets is currently not supported
2351  * because we can't discard the inner packet only due to
2352  * primitive parsing in eBPF */
2353  if (IS_TUNNEL_PKT(p)) {
2354  return 0;
2355  }
2356  struct timespec curtime;
2357  uint64_t inittime = 0;
2358  /* In eBPF, the function that we have use to get time return the
2359  * monotonic clock (the time since start of the computer). So we
2360  * can't use the timestamp of the packet. */
2361  if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2362  inittime = curtime.tv_sec * 1000000000;
2363  }
2364  if (PKT_IS_IPV4(p)) {
2365  SCLogDebug("add an IPv4");
2366  if (p->afp_v.v4_map_fd == -1) {
2367  return 0;
2368  }
2369  struct flowv4_keys key = {};
2370  key.src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2371  key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2372  key.port16[0] = GET_TCP_SRC_PORT(p);
2373  key.port16[1] = GET_TCP_DST_PORT(p);
2374 
2375  key.ip_proto = IPV4_GET_IPPROTO(p);
2376  if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2377  return 0;
2378  }
2379  key.src = htonl(GET_IPV4_DST_ADDR_U32(p));
2380  key.dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2381  key.port16[0] = GET_TCP_DST_PORT(p);
2382  key.port16[1] = GET_TCP_SRC_PORT(p);
2383  if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2384  return 0;
2385  }
2386  EBPFUpdateFlow(p->flow, p);
2387  return 1;
2388  }
2389  /* For IPv6 case we don't handle extended header in eBPF */
2390  if (PKT_IS_IPV6(p) &&
2391  ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2392  int i;
2393  if (p->afp_v.v6_map_fd == -1) {
2394  return 0;
2395  }
2396  SCLogDebug("add an IPv6");
2397  struct flowv6_keys key = {};
2398  for (i = 0; i < 4; i++) {
2399  key.src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2400  key.dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2401  }
2402  key.port16[0] = GET_TCP_SRC_PORT(p);
2403  key.port16[1] = GET_TCP_DST_PORT(p);
2404  key.ip_proto = IPV6_GET_NH(p);
2405  if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2406  return 0;
2407  }
2408  for (i = 0; i < 4; i++) {
2409  key.src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2410  key.dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2411  }
2412  key.port16[0] = GET_TCP_DST_PORT(p);
2413  key.port16[1] = GET_TCP_SRC_PORT(p);
2414  if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2415  return 0;
2416  }
2417  EBPFUpdateFlow(p->flow, p);
2418  return 1;
2419  }
2420 #endif
2421  return 0;
2422 }
2423 
2424 /**
2425  * Bypass function for AF_PACKET capture in XDP mode
2426  *
2427  * This function creates two half flows in the map shared with the kernel
2428  * to trigger bypass. This function is similar to AFPBypassCallback() but
2429  * the bytes order is changed for some data due to the way we get the data
2430  * in the XDP case.
2431  *
2432  * \param p the packet belonging to the flow to bypass
2433  * \return 0 if unable to bypass, 1 if success
2434  */
2435 static int AFPXDPBypassCallback(Packet *p)
2436 {
2437 #ifdef HAVE_PACKET_XDP
2438  SCLogDebug("Calling af_packet callback function");
2439  /* Only bypass TCP and UDP */
2440  if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2441  return 0;
2442  }
2443 
2444  /* Bypassing tunneled packets is currently not supported
2445  * because we can't discard the inner packet only due to
2446  * primitive parsing in eBPF */
2447  if (IS_TUNNEL_PKT(p)) {
2448  return 0;
2449  }
2450  struct timespec curtime;
2451  uint64_t inittime = 0;
2452  /* In eBPF, the function that we have use to get time return the
2453  * monotonic clock (the time since start of the computer). So we
2454  * can't use the timestamp of the packet. */
2455  if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2456  inittime = curtime.tv_sec * 1000000000;
2457  }
2458  if (PKT_IS_IPV4(p)) {
2459  struct flowv4_keys key = {};
2460  if (p->afp_v.v4_map_fd == -1) {
2461  return 0;
2462  }
2463  key.src = GET_IPV4_SRC_ADDR_U32(p);
2464  key.dst = GET_IPV4_DST_ADDR_U32(p);
2465  /* In the XDP filter we get port from parsing of packet and not from skb
2466  * (as in eBPF filter) so we need to pass from host to network order */
2467  key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2468  key.port16[1] = htons(GET_TCP_DST_PORT(p));
2469  key.ip_proto = IPV4_GET_IPPROTO(p);
2470  if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2471  return 0;
2472  }
2473  key.src = GET_IPV4_DST_ADDR_U32(p);
2474  key.dst = GET_IPV4_SRC_ADDR_U32(p);
2475  key.port16[0] = htons(GET_TCP_DST_PORT(p));
2476  key.port16[1] = htons(GET_TCP_SRC_PORT(p));
2477  if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2478  return 0;
2479  }
2480  return 1;
2481  }
2482  /* For IPv6 case we don't handle extended header in eBPF */
2483  if (PKT_IS_IPV6(p) &&
2484  ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2485  SCLogDebug("add an IPv6");
2486  if (p->afp_v.v6_map_fd == -1) {
2487  return 0;
2488  }
2489  int i;
2490  struct flowv6_keys key = {};
2491  for (i = 0; i < 4; i++) {
2492  key.src[i] = GET_IPV6_SRC_ADDR(p)[i];
2493  key.dst[i] = GET_IPV6_DST_ADDR(p)[i];
2494  }
2495  key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2496  key.port16[1] = htons(GET_TCP_DST_PORT(p));
2497  key.ip_proto = IPV6_GET_NH(p);
2498  if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2499  return 0;
2500  }
2501  for (i = 0; i < 4; i++) {
2502  key.src[i] = GET_IPV6_DST_ADDR(p)[i];
2503  key.dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2504  }
2505  key.port16[0] = htons(GET_TCP_DST_PORT(p));
2506  key.port16[1] = htons(GET_TCP_SRC_PORT(p));
2507  if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2508  return 0;
2509  }
2510  return 1;
2511  }
2512 #endif
2513  return 0;
2514 }
2515 
2516 /**
2517  * \brief Init function for ReceiveAFP.
2518  *
2519  * \param tv pointer to ThreadVars
2520  * \param initdata pointer to the interface passed from the user
2521  * \param data pointer gets populated with AFPThreadVars
2522  *
2523  * \todo Create a general AFP setup function.
2524  */
2525 TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2526 {
2527  SCEnter();
2528  AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
2529 
2530  if (initdata == NULL) {
2531  SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2533  }
2534 
2535  AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
2536  if (unlikely(ptv == NULL)) {
2537  afpconfig->DerefFunc(afpconfig);
2539  }
2540  memset(ptv, 0, sizeof(AFPThreadVars));
2541 
2542  ptv->tv = tv;
2543  ptv->cooked = 0;
2544 
2545  strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
2546  ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2547 
2548  ptv->livedev = LiveGetDevice(ptv->iface);
2549  if (ptv->livedev == NULL) {
2550  SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
2551  SCFree(ptv);
2553  }
2554 
2555  ptv->buffer_size = afpconfig->buffer_size;
2556  ptv->ring_size = afpconfig->ring_size;
2557  ptv->block_size = afpconfig->block_size;
2558 
2559  ptv->promisc = afpconfig->promisc;
2560  ptv->checksum_mode = afpconfig->checksum_mode;
2561  ptv->bpf_filter = NULL;
2562 
2563  ptv->threads = 1;
2564 #ifdef HAVE_PACKET_FANOUT
2566  ptv->cluster_id = 1;
2567  /* We only set cluster info if the number of reader threads is greater than 1 */
2568  if (afpconfig->threads > 1) {
2569  ptv->cluster_id = afpconfig->cluster_id;
2570  ptv->cluster_type = afpconfig->cluster_type;
2571  ptv->threads = afpconfig->threads;
2572  }
2573 #endif
2574  ptv->flags = afpconfig->flags;
2575 
2576  if (afpconfig->bpf_filter) {
2577  ptv->bpf_filter = afpconfig->bpf_filter;
2578  }
2579  ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2580  ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
2581  ptv->xdp_mode = afpconfig->xdp_mode;
2582 
2583 #ifdef HAVE_PACKET_EBPF
2584  if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
2585  ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
2586  if (ptv->v4_map_fd == -1) {
2587  SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v4");
2588  }
2589  ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
2590  if (ptv->v6_map_fd == -1) {
2591  SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v6");
2592  }
2593  }
2594 #endif
2595 
2596 #ifdef PACKET_STATISTICS
2597  ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2598  ptv->tv);
2599  ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2600  ptv->tv);
2601  ptv->capture_errors = StatsRegisterCounter("capture.errors",
2602  ptv->tv);
2603 #endif
2604 
2605  ptv->copy_mode = afpconfig->copy_mode;
2606  if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2607  strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2608  ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2609  /* Warn about BPF filter consequence */
2610  if (ptv->bpf_filter) {
2611  SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2612  " in dropping all non matching packets.");
2613  }
2614  }
2615 
2616 
2617  if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2618  SCFree(ptv);
2619  afpconfig->DerefFunc(afpconfig);
2621  }
2622 
2623 #define T_DATA_SIZE 70000
2624  ptv->data = SCMalloc(T_DATA_SIZE);
2625  if (ptv->data == NULL) {
2626  afpconfig->DerefFunc(afpconfig);
2627  SCFree(ptv);
2629  }
2630  ptv->datalen = T_DATA_SIZE;
2631 #undef T_DATA_SIZE
2632 
2633  *data = (void *)ptv;
2634 
2635  afpconfig->DerefFunc(afpconfig);
2636 
2637  /* A bit strange to have this here but we only have vlan information
2638  * during reading so we need to know if we want to keep vlan during
2639  * the capture phase */
2640  int vlanbool = 0;
2641  if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
2642  ptv->flags |= AFP_VLAN_DISABLED;
2643  }
2644 
2645  /* If kernel is older than 3.0, VLAN is not stripped so we don't
2646  * get the info from packet extended header but we will use a standard
2647  * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2648  if (! SCKernelVersionIsAtLeast(3, 0)) {
2649  ptv->flags |= AFP_VLAN_DISABLED;
2650  }
2651 
2653 }
2654 
2655 /**
2656  * \brief This function prints stats to the screen at exit.
2657  * \param tv pointer to ThreadVars
2658  * \param data pointer that gets cast into AFPThreadVars for ptv
2659  */
2661 {
2662  SCEnter();
2663  AFPThreadVars *ptv = (AFPThreadVars *)data;
2664 
2665 #ifdef PACKET_STATISTICS
2666  AFPDumpCounters(ptv);
2667  SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
2668  tv->name,
2671 #endif
2672 }
2673 
2674 /**
2675  * \brief DeInit function closes af packet socket at exit.
2676  * \param tv pointer to ThreadVars
2677  * \param data pointer that gets cast into AFPThreadVars for ptv
2678  */
2680 {
2681  AFPThreadVars *ptv = (AFPThreadVars *)data;
2682 
2683  AFPSwitchState(ptv, AFP_STATE_DOWN);
2684 
2685 #ifdef HAVE_PACKET_XDP
2686  EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2687 #endif
2688  if (ptv->data != NULL) {
2689  SCFree(ptv->data);
2690  ptv->data = NULL;
2691  }
2692  ptv->datalen = 0;
2693 
2694  ptv->bpf_filter = NULL;
2695  if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2696  SCFree(ptv->ring.v3);
2697  } else {
2698  if (ptv->ring.v2)
2699  SCFree(ptv->ring.v2);
2700  }
2701 
2702  SCFree(ptv);
2704 }
2705 
2706 /**
2707  * \brief This function passes off to link type decoders.
2708  *
2709  * DecodeAFP reads packets from the PacketQueue and passes
2710  * them off to the proper link type decoder.
2711  *
2712  * \param t pointer to ThreadVars
2713  * \param p pointer to the current packet
2714  * \param data pointer that gets cast into AFPThreadVars for ptv
2715  * \param pq pointer to the current PacketQueue
2716  */
2717 TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
2718 {
2719  SCEnter();
2720  DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2721 
2722  /* XXX HACK: flow timeout can call us for injected pseudo packets
2723  * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2724  if (p->flags & PKT_PSEUDO_STREAM_END)
2725  return TM_ECODE_OK;
2726 
2727  /* update counters */
2728  DecodeUpdatePacketCounters(tv, dtv, p);
2729 
2730  /* If suri has set vlan during reading, we increase vlan counter */
2731  if (p->vlan_idx) {
2732  StatsIncr(tv, dtv->counter_vlan);
2733  }
2734 
2735  /* call the decoder */
2736  switch (p->datalink) {
2737  case LINKTYPE_ETHERNET:
2738  DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2739  break;
2740  case LINKTYPE_LINUX_SLL:
2741  DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2742  break;
2743  case LINKTYPE_PPP:
2744  DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2745  break;
2746  case LINKTYPE_RAW:
2747  case LINKTYPE_GRE_OVER_IP:
2748  DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2749  break;
2750  case LINKTYPE_NULL:
2751  DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2752  break;
2753  default:
2754  SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2755  break;
2756  }
2757 
2758  PacketDecodeFinalize(tv, dtv, p);
2759 
2761 }
2762 
2763 TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2764 {
2765  SCEnter();
2766  DecodeThreadVars *dtv = NULL;
2767 
2768  dtv = DecodeThreadVarsAlloc(tv);
2769 
2770  if (dtv == NULL)
2772 
2773  DecodeRegisterPerfCounters(dtv, tv);
2774 
2775  *data = (void *)dtv;
2776 
2778 }
2779 
2781 {
2782  if (data != NULL)
2783  DecodeThreadVarsFree(tv, data);
2785 }
2786 
2787 #endif /* HAVE_AF_PACKET */
2788 /* eof */
2789 /**
2790  * @}
2791  */
ChecksumValidationMode checksum_mode
char iface[AFP_IFACE_NAME_LENGTH]
EthernetHdr * ethh
Definition: decode.h:491
#define GET_IPV4_SRC_ADDR_U32(p)
Definition: decode.h:210
TmEcode AFPPeersListInit()
Init the global list of AFPPeer.
#define TM_FLAG_DECODE_TM
Definition: tm-modules.h:32
SCMutex sock_protect
DecodeThreadVars * DecodeThreadVarsAlloc(ThreadVars *tv)
Alloc and setup DecodeThreadVars.
Definition: decode.c:592
#define SCLogDebug(...)
Definition: util-debug.h:335
void TmModuleDecodeAFPRegister(void)
Registration Function for DecodeAFP.
int(* BypassPacketsFlow)(struct Packet_ *)
Definition: decode.h:485
#define AFP_BLOCK_SIZE_DEFAULT_ORDER
#define TAILQ_FIRST(head)
Definition: queue.h:339
struct Flow_ * flow
Definition: decode.h:443
#define SC_ATOMIC_DECLARE(type, name)
wrapper to declare an atomic variable including a (spin) lock to protect it.
Definition: util-atomic.h:56
uint16_t UtilCpuGetNumProcessorsConfigured(void)
Get the number of cpus configured in the system.
Definition: util-cpu.c:58
uint8_t cap_flags
Definition: tm-modules.h:67
#define SLL_HEADER_LEN
Definition: decode-sll.h:27
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:350
struct HtpBodyChunk_ * next
uint16_t capture_kernel_packets
AFPPeersList peerslist
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: util-strlcpyu.c:43
#define LINKTYPE_LINUX_SLL
Definition: decode.h:1073
int DecodePPP(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, uint8_t *pkt, uint32_t len, PacketQueue *pq)
Definition: decode-ppp.c:43
#define BUG_ON(x)
uint8_t flags
Definition: tm-modules.h:70
#define PACKET_TEST_ACTION(p, a)
Definition: decode.h:860
struct bpf_insn * bf_insns
#define SET_PKT_LEN(p, len)
Definition: decode.h:227
TmEcode AFPPeersListCheck()
Check that all AFPPeer got a peer.
Structure to hold thread specific variables.
#define AFP_RECONNECT_TIMEOUT
void PacketDecodeFinalize(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p)
Finalize decoding of a packet.
Definition: decode.c:114
#define IPV4_GET_IPPROTO(p)
Definition: decode-ipv4.h:148
#define POLL_TIMEOUT
#define unlikely(expr)
Definition: util-optimize.h:35
int AFPGetLinkType(const char *ifname)
void AFPPeersListClean()
Clean the global peers list.
struct AFPPeersList_ AFPPeersList
int DecodeRaw(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, uint8_t *pkt, uint32_t len, PacketQueue *pq)
Definition: decode-raw.c:46
TmEcode(* Func)(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *)
Definition: tm-modules.h:52
int ConfGetBool(const char *name, int *val)
Retrieve a configuration value as an boolen.
Definition: conf.c:517
#define GET_IPV4_DST_ADDR_U32(p)
Definition: decode.h:211
char iface[AFP_IFACE_NAME_LENGTH]
void DecodeRegisterPerfCounters(DecodeThreadVars *dtv, ThreadVars *tv)
Definition: decode.c:453
union AFPThreadVars_::AFPTpacketReq req
TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *)
int GetIfaceMaxPacketSize(const char *pcap_dev)
output max packet size for a link
Definition: util-ioctl.c:132
#define SC_ATOMIC_ADD(name, val)
add a value to our atomic variable
Definition: util-atomic.h:107
uint64_t offset
#define TP_STATUS_VLAN_VALID
#define PACKET_FANOUT_LB
uint16_t capture_kernel_drops
#define IS_TUNNEL_PKT(p)
Definition: decode.h:884
#define PKT_IS_IPV6(p)
Definition: decode.h:250
#define AFP_ZERO_COPY
#define VLAN_HEADER_LEN
Definition: decode-vlan.h:50
volatile uint8_t suricata_ctl_flags
Definition: suricata.c:199
struct tpacket2_hdr * h2
#define SC_ATOMIC_SUB(name, val)
sub a value from our atomic variable
Definition: util-atomic.h:124
#define TAILQ_HEAD(name, type)
Definition: queue.h:321
int AFPIsFanoutSupported(void)
test if we can use FANOUT. Older kernels like those in CentOS6 have HAVE_PACKET_FANOUT defined but fa...
#define AFP_PEERS_WAIT
int DecodeNull(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, uint8_t *pkt, uint32_t len, PacketQueue *pq)
Definition: decode-null.c:48
Packet * PacketGetFromQueueOrAlloc(void)
Get a packet. We try to get a packet from the packetpool first, but if that is empty we alloc a packe...
Definition: decode.c:176
TmEcode(* PktAcqLoop)(ThreadVars *, void *, void *)
Definition: tm-modules.h:54
void(* ReleasePacket)(struct Packet_ *)
Definition: decode.h:482
#define SCMutexLock(mut)
#define AFP_STATE_DOWN
unsigned int flags
void PacketFreeOrRelease(Packet *p)
Return a packet to where it was allocated.
Definition: decode.c:161
#define PKT_IS_IPV4(p)
Definition: decode.h:249
const char * out_iface
#define SC_ATOMIC_DESTROY(name)
Destroy the lock used to protect this variable.
Definition: util-atomic.h:97
uint16_t vlan_id[2]
Definition: decode.h:433
uint16_t counter_vlan
Definition: decode.h:661
#define PKT_SET_SRC(p, src_val)
Definition: decode.h:1134
int SCKernelVersionIsAtLeast(int major, int minor)
#define TM_FLAG_RECEIVE_TM
Definition: tm-modules.h:31
int ChecksumAutoModeCheck(uint64_t thread_count, uint64_t iface_count, uint64_t iface_fail)
Check if the number of invalid checksums indicate checksum offloading in place.
Definition: util-checksum.c:70
TmEcode(* PktAcqBreakLoop)(ThreadVars *, void *)
Definition: tm-modules.h:57
#define LINKTYPE_PPP
Definition: decode.h:1074
uint16_t StatsRegisterCounter(const char *name, struct ThreadVars_ *tv)
Registers a normal, unqualified counter.
Definition: counters.c:939
#define LINKTYPE_NULL
Definition: decode.h:1071
unsigned int bf_len
#define SC_ATOMIC_INIT(name)
Initialize the previously declared atomic variable and it&#39;s lock.
Definition: util-atomic.h:81
#define SCMutexUnlock(mut)
#define AFP_COPY_MODE_NONE
void TmqhOutputPacketpool(ThreadVars *t, Packet *p)
TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
Main AF_PACKET reading Loop function.
TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *)
DeInit function closes af packet socket at exit.
#define PACKET_FANOUT_HASH
int max_pending_packets
Definition: suricata.c:213
unsigned int ring_buflen
int datalink
Definition: decode.h:574
#define GET_IPV6_DST_ADDR(p)
Definition: decode.h:218
void TmModuleReceiveAFPRegister(void)
Registration Function for RecieveAFP.
#define TAILQ_INIT(head)
Definition: queue.h:370
int DecodeEthernet(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, uint8_t *pkt, uint32_t len, PacketQueue *pq)
#define T_DATA_SIZE
#define SCLogError(err_code,...)
Macro used to log ERROR messages.
Definition: util-debug.h:294
TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **)
Init function for ReceiveAFP.
Structure to hold thread specific data for all decode modules.
Definition: decode.h:632
#define TAILQ_REMOVE(head, elm, field)
Definition: queue.h:412
void(* RegisterTests)(void)
Definition: tm-modules.h:65
TmEcode(* ThreadDeinit)(ThreadVars *, void *)
Definition: tm-modules.h:49
#define SCEnter(...)
Definition: util-debug.h:337
#define AFP_DOWN_COUNTER_INTERVAL
int GetIfaceMTU(const char *pcap_dev)
output the link MTU
Definition: util-ioctl.c:91
struct TmSlot_ * slot_next
Definition: tm-threads.h:87
unsigned int flags
int SCBPFCompile(int snaplen_arg, int linktype_arg, struct bpf_program *program, const char *buf, int optimize, uint32_t mask, char *errbuf, size_t errbuf_len)
Definition: util-bpf.c:41
void StatsIncr(ThreadVars *tv, uint16_t id)
Increments the local counter.
Definition: counters.c:163
#define PKT_IGNORE_CHECKSUM
Definition: decode.h:1101
unsigned int frame_offset
#define PKT_PSEUDO_STREAM_END
Definition: decode.h:1093
#define IPV6_GET_NH(p)
Definition: decode-ipv6.h:85
#define SCMutexInit(mut, mutattrs)
TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
#define SCReturnInt(x)
Definition: util-debug.h:341
LiveDevice * LiveGetDevice(const char *name)
Get a pointer to the device at idx.
Definition: util-device.c:248
TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **)
void(* ThreadExitPrintStats)(ThreadVars *, void *)
Definition: tm-modules.h:48
#define LINKTYPE_RAW
Definition: decode.h:1075
#define AFPV_CLEANUP(afpv)
#define LINKTYPE_GRE_OVER_IP
Definition: decode.h:1080
#define SCLogWarning(err_code,...)
Macro used to log WARNING messages.
Definition: util-debug.h:281
TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
int DecodeSll(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p, uint8_t *pkt, uint32_t len, PacketQueue *pq)
Definition: decode-sll.c:39
#define TAILQ_INSERT_TAIL(head, elm, field)
Definition: queue.h:385
uint8_t vlan_idx
Definition: decode.h:434
#define SC_CAP_NET_RAW
Definition: util-privs.h:32
void * raw
void PacketPoolWait(void)
const char * name
Definition: tm-modules.h:44
int ignore_checksum
Definition: util-device.h:45
void(* DerefFunc)(void *)
#define SCMalloc(a)
Definition: util-mem.h:166
#define GET_IPV6_SRC_ADDR(p)
Definition: decode.h:217
char iface[AFP_IFACE_NAME_LENGTH]
#define SC_ATOMIC_SET(name, val)
Set the value for the atomic variable.
Definition: util-atomic.h:207
#define SCLogInfo(...)
Macro used to log INFORMATIONAL messages.
Definition: util-debug.h:254
#define PACKET_FANOUT
int PacketSetData(Packet *p, uint8_t *pktdata, uint32_t pktlen)
Set data for Packet and set length when zeo copy is used.
Definition: decode.c:638
uint8_t version
Definition: decode-gre.h:405
#define SCFree(a)
Definition: util-mem.h:228
LiveDevice * livedev
#define PKT_IS_TCP(p)
Definition: decode.h:251
TmModule tmm_modules[TMM_SIZE]
Definition: tm-modules.h:73
#define AFP_SOCK_PROTECT
VLANHdr * vlanh[2]
Definition: decode.h:536
uint32_t default_packet_size
Definition: decode.h:617
#define AFP_XDPBYPASS
#define SCLogPerf(...)
Definition: util-debug.h:261
#define AFP_MMAP_LOCKED
struct AFPPeer_ * peer
struct AFPThreadVars_ AFPThreadVars
Structure to hold thread specific variables.
#define StatsSyncCountersIfSignalled(tv)
Definition: counters.h:136
const char * bpf_filter
TmEcode(* ThreadInit)(ThreadVars *, const void *, void **)
Definition: tm-modules.h:47
#define AFP_BYPASS
#define GET_TCP_DST_PORT(p)
Definition: decode.h:220
#define AFP_RING_MODE
ChecksumValidationMode
Definition: decode.h:40
#define SC_ATOMIC_GET(name)
Get the value from the atomic variable.
Definition: util-atomic.h:192
#define PACKET_FANOUT_FLAG_DEFRAG
#define GET_PKT_DATA(p)
Definition: decode.h:223
#define TP_STATUS_USER_BUSY
void DecodeUpdatePacketCounters(ThreadVars *tv, const DecodeThreadVars *dtv, const Packet *p)
Definition: decode.c:558
char name[16]
Definition: threadvars.h:59
#define PKT_IS_PSEUDOPKT(p)
return 1 if the packet is a pseudo packet
Definition: decode.h:1131
ChecksumValidationMode checksum_mode
#define LINKTYPE_ETHERNET
Definition: decode.h:1072
struct LiveDevice_ * livedev
Definition: decode.h:553
#define AFP_EMERGENCY_MODE
#define PKT_IS_UDP(p)
Definition: decode.h:252
uint8_t len
#define AFP_PEERS_MAX_TRY
Per thread variable structure.
Definition: threadvars.h:57
struct timeval ts
Definition: decode.h:449
void ReceiveAFPThreadExitStats(ThreadVars *, void *)
This function prints stats to the screen at exit.
#define GET_PKT_LEN(p)
Definition: decode.h:222
#define AFP_COPY_MODE_IPS
#define AFP_VLAN_DISABLED
#define ACTION_DROP
uint32_t flags
Definition: decode.h:441
void StatsAddUI64(ThreadVars *tv, uint16_t id, uint64_t x)
Adds a value of type uint64_t to the local counter.
Definition: counters.c:142
union AFPThreadVars_::AFPRing ring
char out_iface[AFP_IFACE_NAME_LENGTH]
void SCBPFFree(struct bpf_program *program)
Definition: util-bpf.c:35
void DecodeThreadVarsFree(ThreadVars *tv, DecodeThreadVars *dtv)
Definition: decode.c:618
#define GET_TCP_SRC_PORT(p)
Definition: decode.h:219
#define AFP_STATE_UP
const char * bpf_filter
#define AFP_IFACE_NAME_LENGTH
ThreadVars * tv
int PacketCopyData(Packet *p, uint8_t *pktdata, uint32_t pktlen)
Copy data to Packet payload and set packet length.
Definition: decode.c:258
TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *)
This function passes off to link type decoders.
#define AFP_TPACKET_V3
uint16_t capture_errors
uint64_t StatsGetLocalCounterValue(ThreadVars *tv, uint16_t id)
Get the value of the local copy of the counter that hold this id.
Definition: counters.c:1251
#define SCMutexDestroy