1/* $NetBSD: pcap-linux.c,v 1.7 2023/08/17 15:18:12 christos Exp $ */ 2 3/* 4 * pcap-linux.c: Packet capture interface to the Linux kernel 5 * 6 * Copyright (c) 2000 Torsten Landschoff <torsten@debian.org> 7 * Sebastian Krahmer <krahmer@cs.uni-potsdam.de> 8 * 9 * License: BSD 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in 19 * the documentation and/or other materials provided with the 20 * distribution. 21 * 3. The names of the authors may not be used to endorse or promote 22 * products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 28 * 29 * Modifications: Added PACKET_MMAP support 30 * Paolo Abeni <paolo.abeni@email.it> 31 * Added TPACKET_V3 support 32 * Gabor Tatarka <gabor.tatarka@ericsson.com> 33 * 34 * based on previous works of: 35 * Simon Patarin <patarin@cs.unibo.it> 36 * Phil Wood <cpw@lanl.gov> 37 * 38 * Monitor-mode support for mac80211 includes code taken from the iw 39 * command; the copyright notice for that code is 40 * 41 * Copyright (c) 2007, 2008 Johannes Berg 42 * Copyright (c) 2007 Andy Lutomirski 43 * Copyright (c) 2007 Mike Kershaw 44 * Copyright (c) 2008 G��bor Stefanik 45 * 46 * All rights reserved. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. The name of the author may not be used to endorse or promote products 57 * derived from this software without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 60 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 61 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 62 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 63 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 64 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 65 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 66 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 67 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 */ 71 72 73#define _GNU_SOURCE 74 75#include <sys/cdefs.h> 76__RCSID("$NetBSD: pcap-linux.c,v 1.7 2023/08/17 15:18:12 christos Exp $"); 77 78#ifdef HAVE_CONFIG_H 79#include <config.h> 80#endif 81 82#include <errno.h> 83#include <stdio.h> 84#include <stdlib.h> 85#include <unistd.h> 86#include <fcntl.h> 87#include <string.h> 88#include <limits.h> 89#include <sys/stat.h> 90#include <sys/socket.h> 91#include <sys/ioctl.h> 92#include <sys/utsname.h> 93#include <sys/mman.h> 94#include <linux/if.h> 95#include <linux/if_packet.h> 96#include <linux/sockios.h> 97#include <linux/ethtool.h> 98#include <netinet/in.h> 99#include <linux/if_ether.h> 100#include <linux/if_arp.h> 101#include <poll.h> 102#include <dirent.h> 103#include <sys/eventfd.h> 104 105#include "pcap-int.h" 106#include "pcap/sll.h" 107#include "pcap/vlan.h" 108#include "pcap/can_socketcan.h" 109 110#include "diag-control.h" 111 112/* 113 * We require TPACKET_V2 support. 114 */ 115#ifndef TPACKET2_HDRLEN 116#error "Libpcap will only work if TPACKET_V2 is supported; you must build for a 2.6.27 or later kernel" 117#endif 118 119/* check for memory mapped access avaibility. We assume every needed 120 * struct is defined if the macro TPACKET_HDRLEN is defined, because it 121 * uses many ring related structs and macros */ 122#ifdef TPACKET3_HDRLEN 123# define HAVE_TPACKET3 124#endif /* TPACKET3_HDRLEN */ 125 126/* 127 * Not all compilers that are used to compile code to run on Linux have 128 * these builtins. For example, older versions of GCC don't, and at 129 * least some people are doing cross-builds for MIPS with older versions 130 * of GCC. 131 */ 132#ifndef HAVE___ATOMIC_LOAD_N 133#define __atomic_load_n(ptr, memory_model) (*(ptr)) 134#endif 135#ifndef HAVE___ATOMIC_STORE_N 136#define __atomic_store_n(ptr, val, memory_model) *(ptr) = (val) 137#endif 138 139#define packet_mmap_acquire(pkt) \ 140 (__atomic_load_n(&pkt->tp_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 141#define packet_mmap_release(pkt) \ 142 (__atomic_store_n(&pkt->tp_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 143#define packet_mmap_v3_acquire(pkt) \ 144 (__atomic_load_n(&pkt->hdr.bh1.block_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 145#define packet_mmap_v3_release(pkt) \ 146 (__atomic_store_n(&pkt->hdr.bh1.block_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 147 148#include <linux/types.h> 149#include <linux/filter.h> 150 151#ifdef HAVE_LINUX_NET_TSTAMP_H 152#include <linux/net_tstamp.h> 153#endif 154 155/* 156 * For checking whether a device is a bonding device. 157 */ 158#include <linux/if_bonding.h> 159 160/* 161 * Got libnl? 162 */ 163#ifdef HAVE_LIBNL 164#include <linux/nl80211.h> 165 166#include <netlink/genl/genl.h> 167#include <netlink/genl/family.h> 168#include <netlink/genl/ctrl.h> 169#include <netlink/msg.h> 170#include <netlink/attr.h> 171#endif /* HAVE_LIBNL */ 172 173#ifndef HAVE_SOCKLEN_T 174typedef int socklen_t; 175#endif 176 177#define MAX_LINKHEADER_SIZE 256 178 179/* 180 * When capturing on all interfaces we use this as the buffer size. 181 * Should be bigger then all MTUs that occur in real life. 182 * 64kB should be enough for now. 183 */ 184#define BIGGER_THAN_ALL_MTUS (64*1024) 185 186/* 187 * Private data for capturing on Linux PF_PACKET sockets. 188 */ 189struct pcap_linux { 190 long long sysfs_dropped; /* packets reported dropped by /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors */ 191 struct pcap_stat stat; 192 193 char *device; /* device name */ 194 int filter_in_userland; /* must filter in userland */ 195 int blocks_to_filter_in_userland; 196 int must_do_on_close; /* stuff we must do when we close */ 197 int timeout; /* timeout for buffering */ 198 int cooked; /* using SOCK_DGRAM rather than SOCK_RAW */ 199 int ifindex; /* interface index of device we're bound to */ 200 int lo_ifindex; /* interface index of the loopback device */ 201 int netdown; /* we got an ENETDOWN and haven't resolved it */ 202 bpf_u_int32 oldmode; /* mode to restore when turning monitor mode off */ 203 char *mondevice; /* mac80211 monitor device we created */ 204 u_char *mmapbuf; /* memory-mapped region pointer */ 205 size_t mmapbuflen; /* size of region */ 206 int vlan_offset; /* offset at which to insert vlan tags; if -1, don't insert */ 207 u_int tp_version; /* version of tpacket_hdr for mmaped ring */ 208 u_int tp_hdrlen; /* hdrlen of tpacket_hdr for mmaped ring */ 209 u_char *oneshot_buffer; /* buffer for copy of packet */ 210 int poll_timeout; /* timeout to use in poll() */ 211#ifdef HAVE_TPACKET3 212 unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */ 213 int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */ 214#endif 215 int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */ 216}; 217 218/* 219 * Stuff to do when we close. 220 */ 221#define MUST_CLEAR_RFMON 0x00000001 /* clear rfmon (monitor) mode */ 222#define MUST_DELETE_MONIF 0x00000002 /* delete monitor-mode interface */ 223 224/* 225 * Prototypes for internal functions and methods. 226 */ 227static int get_if_flags(const char *, bpf_u_int32 *, char *); 228static int is_wifi(const char *); 229static void map_arphrd_to_dlt(pcap_t *, int, const char *, int); 230static int pcap_activate_linux(pcap_t *); 231static int setup_socket(pcap_t *, int); 232static int setup_mmapped(pcap_t *, int *); 233static int pcap_can_set_rfmon_linux(pcap_t *); 234static int pcap_inject_linux(pcap_t *, const void *, int); 235static int pcap_stats_linux(pcap_t *, struct pcap_stat *); 236static int pcap_setfilter_linux(pcap_t *, struct bpf_program *); 237static int pcap_setdirection_linux(pcap_t *, pcap_direction_t); 238static int pcap_set_datalink_linux(pcap_t *, int); 239static void pcap_cleanup_linux(pcap_t *); 240 241union thdr { 242 struct tpacket2_hdr *h2; 243#ifdef HAVE_TPACKET3 244 struct tpacket_block_desc *h3; 245#endif 246 u_char *raw; 247}; 248 249#define RING_GET_FRAME_AT(h, offset) (((u_char **)h->buffer)[(offset)]) 250#define RING_GET_CURRENT_FRAME(h) RING_GET_FRAME_AT(h, h->offset) 251 252static void destroy_ring(pcap_t *handle); 253static int create_ring(pcap_t *handle, int *status); 254static int prepare_tpacket_socket(pcap_t *handle); 255static int pcap_read_linux_mmap_v2(pcap_t *, int, pcap_handler , u_char *); 256#ifdef HAVE_TPACKET3 257static int pcap_read_linux_mmap_v3(pcap_t *, int, pcap_handler , u_char *); 258#endif 259static int pcap_setnonblock_linux(pcap_t *p, int nonblock); 260static int pcap_getnonblock_linux(pcap_t *p); 261static void pcap_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 262 const u_char *bytes); 263 264/* 265 * In pre-3.0 kernels, the tp_vlan_tci field is set to whatever the 266 * vlan_tci field in the skbuff is. 0 can either mean "not on a VLAN" 267 * or "on VLAN 0". There is no flag set in the tp_status field to 268 * distinguish between them. 269 * 270 * In 3.0 and later kernels, if there's a VLAN tag present, the tp_vlan_tci 271 * field is set to the VLAN tag, and the TP_STATUS_VLAN_VALID flag is set 272 * in the tp_status field, otherwise the tp_vlan_tci field is set to 0 and 273 * the TP_STATUS_VLAN_VALID flag isn't set in the tp_status field. 274 * 275 * With a pre-3.0 kernel, we cannot distinguish between packets with no 276 * VLAN tag and packets on VLAN 0, so we will mishandle some packets, and 277 * there's nothing we can do about that. 278 * 279 * So, on those systems, which never set the TP_STATUS_VLAN_VALID flag, we 280 * continue the behavior of earlier libpcaps, wherein we treated packets 281 * with a VLAN tag of 0 as being packets without a VLAN tag rather than packets 282 * on VLAN 0. We do this by treating packets with a tp_vlan_tci of 0 and 283 * with the TP_STATUS_VLAN_VALID flag not set in tp_status as not having 284 * VLAN tags. This does the right thing on 3.0 and later kernels, and 285 * continues the old unfixably-imperfect behavior on pre-3.0 kernels. 286 * 287 * If TP_STATUS_VLAN_VALID isn't defined, we test it as the 0x10 bit; it 288 * has that value in 3.0 and later kernels. 289 */ 290#ifdef TP_STATUS_VLAN_VALID 291 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & TP_STATUS_VLAN_VALID)) 292#else 293 /* 294 * This is being compiled on a system that lacks TP_STATUS_VLAN_VALID, 295 * so we testwith the value it has in the 3.0 and later kernels, so 296 * we can test it if we're running on a system that has it. (If we're 297 * running on a system that doesn't have it, it won't be set in the 298 * tp_status field, so the tests of it will always fail; that means 299 * we behave the way we did before we introduced this macro.) 300 */ 301 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & 0x10)) 302#endif 303 304#ifdef TP_STATUS_VLAN_TPID_VALID 305# define VLAN_TPID(hdr, hv) (((hv)->tp_vlan_tpid || ((hdr)->tp_status & TP_STATUS_VLAN_TPID_VALID)) ? (hv)->tp_vlan_tpid : ETH_P_8021Q) 306#else 307# define VLAN_TPID(hdr, hv) ETH_P_8021Q 308#endif 309 310/* 311 * Required select timeout if we're polling for an "interface disappeared" 312 * indication - 1 millisecond. 313 */ 314static const struct timeval netdown_timeout = { 315 0, 1000 /* 1000 microseconds = 1 millisecond */ 316}; 317 318/* 319 * Wrap some ioctl calls 320 */ 321static int iface_get_id(int fd, const char *device, char *ebuf); 322static int iface_get_mtu(int fd, const char *device, char *ebuf); 323static int iface_get_arptype(int fd, const char *device, char *ebuf); 324static int iface_bind(int fd, int ifindex, char *ebuf, int protocol); 325static int enter_rfmon_mode(pcap_t *handle, int sock_fd, 326 const char *device); 327static int iface_get_ts_types(const char *device, pcap_t *handle, 328 char *ebuf); 329static int iface_get_offload(pcap_t *handle); 330 331static int fix_program(pcap_t *handle, struct sock_fprog *fcode); 332static int fix_offset(pcap_t *handle, struct bpf_insn *p); 333static int set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode); 334static int reset_kernel_filter(pcap_t *handle); 335 336static struct sock_filter total_insn 337 = BPF_STMT(BPF_RET | BPF_K, 0); 338static struct sock_fprog total_fcode 339 = { 1, &total_insn }; 340 341static int iface_dsa_get_proto_info(const char *device, pcap_t *handle); 342 343pcap_t * 344pcap_create_interface(const char *device, char *ebuf) 345{ 346 pcap_t *handle; 347 348 handle = PCAP_CREATE_COMMON(ebuf, struct pcap_linux); 349 if (handle == NULL) 350 return NULL; 351 352 handle->activate_op = pcap_activate_linux; 353 handle->can_set_rfmon_op = pcap_can_set_rfmon_linux; 354 355 /* 356 * See what time stamp types we support. 357 */ 358 if (iface_get_ts_types(device, handle, ebuf) == -1) { 359 pcap_close(handle); 360 return NULL; 361 } 362 363 /* 364 * We claim that we support microsecond and nanosecond time 365 * stamps. 366 * 367 * XXX - with adapter-supplied time stamps, can we choose 368 * microsecond or nanosecond time stamps on arbitrary 369 * adapters? 370 */ 371 handle->tstamp_precision_list = malloc(2 * sizeof(u_int)); 372 if (handle->tstamp_precision_list == NULL) { 373 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 374 errno, "malloc"); 375 pcap_close(handle); 376 return NULL; 377 } 378 handle->tstamp_precision_list[0] = PCAP_TSTAMP_PRECISION_MICRO; 379 handle->tstamp_precision_list[1] = PCAP_TSTAMP_PRECISION_NANO; 380 handle->tstamp_precision_count = 2; 381 382 struct pcap_linux *handlep = handle->priv; 383 handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK); 384 385 return handle; 386} 387 388#ifdef HAVE_LIBNL 389/* 390 * If interface {if_name} is a mac80211 driver, the file 391 * /sys/class/net/{if_name}/phy80211 is a symlink to 392 * /sys/class/ieee80211/{phydev_name}, for some {phydev_name}. 393 * 394 * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at 395 * least, has a "wmaster0" device and a "wlan0" device; the 396 * latter is the one with the IP address. Both show up in 397 * "tcpdump -D" output. Capturing on the wmaster0 device 398 * captures with 802.11 headers. 399 * 400 * airmon-ng searches through /sys/class/net for devices named 401 * monN, starting with mon0; as soon as one *doesn't* exist, 402 * it chooses that as the monitor device name. If the "iw" 403 * command exists, it does 404 * 405 * iw dev {if_name} interface add {monif_name} type monitor 406 * 407 * where {monif_name} is the monitor device. It then (sigh) sleeps 408 * .1 second, and then configures the device up. Otherwise, if 409 * /sys/class/ieee80211/{phydev_name}/add_iface is a file, it writes 410 * {mondev_name}, without a newline, to that file, and again (sigh) 411 * sleeps .1 second, and then iwconfig's that device into monitor 412 * mode and configures it up. Otherwise, you can't do monitor mode. 413 * 414 * All these devices are "glued" together by having the 415 * /sys/class/net/{if_name}/phy80211 links pointing to the same 416 * place, so, given a wmaster, wlan, or mon device, you can 417 * find the other devices by looking for devices with 418 * the same phy80211 link. 419 * 420 * To turn monitor mode off, delete the monitor interface, 421 * either with 422 * 423 * iw dev {monif_name} interface del 424 * 425 * or by sending {monif_name}, with no NL, down 426 * /sys/class/ieee80211/{phydev_name}/remove_iface 427 * 428 * Note: if you try to create a monitor device named "monN", and 429 * there's already a "monN" device, it fails, as least with 430 * the netlink interface (which is what iw uses), with a return 431 * value of -ENFILE. (Return values are negative errnos.) We 432 * could probably use that to find an unused device. 433 * 434 * Yes, you can have multiple monitor devices for a given 435 * physical device. 436 */ 437 438/* 439 * Is this a mac80211 device? If so, fill in the physical device path and 440 * return 1; if not, return 0. On an error, fill in handle->errbuf and 441 * return PCAP_ERROR. 442 */ 443static int 444get_mac80211_phydev(pcap_t *handle, const char *device, char *phydev_path, 445 size_t phydev_max_pathlen) 446{ 447 char *pathstr; 448 ssize_t bytes_read; 449 450 /* 451 * Generate the path string for the symlink to the physical device. 452 */ 453 if (asprintf(&pathstr, "/sys/class/net/%s/phy80211", device) == -1) { 454 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 455 "%s: Can't generate path name string for /sys/class/net device", 456 device); 457 return PCAP_ERROR; 458 } 459 bytes_read = readlink(pathstr, phydev_path, phydev_max_pathlen); 460 if (bytes_read == -1) { 461 if (errno == ENOENT || errno == EINVAL) { 462 /* 463 * Doesn't exist, or not a symlink; assume that 464 * means it's not a mac80211 device. 465 */ 466 free(pathstr); 467 return 0; 468 } 469 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 470 errno, "%s: Can't readlink %s", device, pathstr); 471 free(pathstr); 472 return PCAP_ERROR; 473 } 474 free(pathstr); 475 phydev_path[bytes_read] = '\0'; 476 return 1; 477} 478 479struct nl80211_state { 480 struct nl_sock *nl_sock; 481 struct nl_cache *nl_cache; 482 struct genl_family *nl80211; 483}; 484 485static int 486nl80211_init(pcap_t *handle, struct nl80211_state *state, const char *device) 487{ 488 int err; 489 490 state->nl_sock = nl_socket_alloc(); 491 if (!state->nl_sock) { 492 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 493 "%s: failed to allocate netlink handle", device); 494 return PCAP_ERROR; 495 } 496 497 if (genl_connect(state->nl_sock)) { 498 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 499 "%s: failed to connect to generic netlink", device); 500 goto out_handle_destroy; 501 } 502 503 err = genl_ctrl_alloc_cache(state->nl_sock, &state->nl_cache); 504 if (err < 0) { 505 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 506 "%s: failed to allocate generic netlink cache: %s", 507 device, nl_geterror(-err)); 508 goto out_handle_destroy; 509 } 510 511 state->nl80211 = genl_ctrl_search_by_name(state->nl_cache, "nl80211"); 512 if (!state->nl80211) { 513 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 514 "%s: nl80211 not found", device); 515 goto out_cache_free; 516 } 517 518 return 0; 519 520out_cache_free: 521 nl_cache_free(state->nl_cache); 522out_handle_destroy: 523 nl_socket_free(state->nl_sock); 524 return PCAP_ERROR; 525} 526 527static void 528nl80211_cleanup(struct nl80211_state *state) 529{ 530 genl_family_put(state->nl80211); 531 nl_cache_free(state->nl_cache); 532 nl_socket_free(state->nl_sock); 533} 534 535static int 536del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 537 const char *device, const char *mondevice); 538 539static int 540add_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 541 const char *device, const char *mondevice) 542{ 543 struct pcap_linux *handlep = handle->priv; 544 int ifindex; 545 struct nl_msg *msg; 546 int err; 547 548 ifindex = iface_get_id(sock_fd, device, handle->errbuf); 549 if (ifindex == -1) 550 return PCAP_ERROR; 551 552 msg = nlmsg_alloc(); 553 if (!msg) { 554 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 555 "%s: failed to allocate netlink msg", device); 556 return PCAP_ERROR; 557 } 558 559 genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0, 560 0, NL80211_CMD_NEW_INTERFACE, 0); 561 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 562DIAG_OFF_NARROWING 563 NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, mondevice); 564DIAG_ON_NARROWING 565 NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, NL80211_IFTYPE_MONITOR); 566 567 err = nl_send_auto_complete(state->nl_sock, msg); 568 if (err < 0) { 569 if (err == -NLE_FAILURE) { 570 /* 571 * Device not available; our caller should just 572 * keep trying. (libnl 2.x maps ENFILE to 573 * NLE_FAILURE; it can also map other errors 574 * to that, but there's not much we can do 575 * about that.) 576 */ 577 nlmsg_free(msg); 578 return 0; 579 } else { 580 /* 581 * Real failure, not just "that device is not 582 * available. 583 */ 584 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 585 "%s: nl_send_auto_complete failed adding %s interface: %s", 586 device, mondevice, nl_geterror(-err)); 587 nlmsg_free(msg); 588 return PCAP_ERROR; 589 } 590 } 591 err = nl_wait_for_ack(state->nl_sock); 592 if (err < 0) { 593 if (err == -NLE_FAILURE) { 594 /* 595 * Device not available; our caller should just 596 * keep trying. (libnl 2.x maps ENFILE to 597 * NLE_FAILURE; it can also map other errors 598 * to that, but there's not much we can do 599 * about that.) 600 */ 601 nlmsg_free(msg); 602 return 0; 603 } else { 604 /* 605 * Real failure, not just "that device is not 606 * available. 607 */ 608 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 609 "%s: nl_wait_for_ack failed adding %s interface: %s", 610 device, mondevice, nl_geterror(-err)); 611 nlmsg_free(msg); 612 return PCAP_ERROR; 613 } 614 } 615 616 /* 617 * Success. 618 */ 619 nlmsg_free(msg); 620 621 /* 622 * Try to remember the monitor device. 623 */ 624 handlep->mondevice = strdup(mondevice); 625 if (handlep->mondevice == NULL) { 626 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 627 errno, "strdup"); 628 /* 629 * Get rid of the monitor device. 630 */ 631 del_mon_if(handle, sock_fd, state, device, mondevice); 632 return PCAP_ERROR; 633 } 634 return 1; 635 636nla_put_failure: 637 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 638 "%s: nl_put failed adding %s interface", 639 device, mondevice); 640 nlmsg_free(msg); 641 return PCAP_ERROR; 642} 643 644static int 645del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 646 const char *device, const char *mondevice) 647{ 648 int ifindex; 649 struct nl_msg *msg; 650 int err; 651 652 ifindex = iface_get_id(sock_fd, mondevice, handle->errbuf); 653 if (ifindex == -1) 654 return PCAP_ERROR; 655 656 msg = nlmsg_alloc(); 657 if (!msg) { 658 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 659 "%s: failed to allocate netlink msg", device); 660 return PCAP_ERROR; 661 } 662 663 genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0, 664 0, NL80211_CMD_DEL_INTERFACE, 0); 665 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 666 667 err = nl_send_auto_complete(state->nl_sock, msg); 668 if (err < 0) { 669 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 670 "%s: nl_send_auto_complete failed deleting %s interface: %s", 671 device, mondevice, nl_geterror(-err)); 672 nlmsg_free(msg); 673 return PCAP_ERROR; 674 } 675 err = nl_wait_for_ack(state->nl_sock); 676 if (err < 0) { 677 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 678 "%s: nl_wait_for_ack failed adding %s interface: %s", 679 device, mondevice, nl_geterror(-err)); 680 nlmsg_free(msg); 681 return PCAP_ERROR; 682 } 683 684 /* 685 * Success. 686 */ 687 nlmsg_free(msg); 688 return 1; 689 690nla_put_failure: 691 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 692 "%s: nl_put failed deleting %s interface", 693 device, mondevice); 694 nlmsg_free(msg); 695 return PCAP_ERROR; 696} 697#endif /* HAVE_LIBNL */ 698 699static int pcap_protocol(pcap_t *handle) 700{ 701 int protocol; 702 703 protocol = handle->opt.protocol; 704 if (protocol == 0) 705 protocol = ETH_P_ALL; 706 707 return htons(protocol); 708} 709 710static int 711pcap_can_set_rfmon_linux(pcap_t *handle) 712{ 713#ifdef HAVE_LIBNL 714 char phydev_path[PATH_MAX+1]; 715 int ret; 716#endif 717 718 if (strcmp(handle->opt.device, "any") == 0) { 719 /* 720 * Monitor mode makes no sense on the "any" device. 721 */ 722 return 0; 723 } 724 725#ifdef HAVE_LIBNL 726 /* 727 * Bleah. There doesn't seem to be a way to ask a mac80211 728 * device, through libnl, whether it supports monitor mode; 729 * we'll just check whether the device appears to be a 730 * mac80211 device and, if so, assume the device supports 731 * monitor mode. 732 */ 733 ret = get_mac80211_phydev(handle, handle->opt.device, phydev_path, 734 PATH_MAX); 735 if (ret < 0) 736 return ret; /* error */ 737 if (ret == 1) 738 return 1; /* mac80211 device */ 739#endif 740 741 return 0; 742} 743 744/* 745 * Grabs the number of missed packets by the interface from 746 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors. 747 * 748 * Compared to /proc/net/dev this avoids counting software drops, 749 * but may be unimplemented and just return 0. 750 * The author has found no straigthforward way to check for support. 751 */ 752static long long int 753linux_get_stat(const char * if_name, const char * stat) { 754 ssize_t bytes_read; 755 int fd; 756 char buffer[PATH_MAX]; 757 758 snprintf(buffer, sizeof(buffer), "/sys/class/net/%s/statistics/%s", if_name, stat); 759 fd = open(buffer, O_RDONLY); 760 if (fd == -1) 761 return 0; 762 763 bytes_read = read(fd, buffer, sizeof(buffer) - 1); 764 close(fd); 765 if (bytes_read == -1) 766 return 0; 767 buffer[bytes_read] = '\0'; 768 769 return strtoll(buffer, NULL, 10); 770} 771 772static long long int 773linux_if_drops(const char * if_name) 774{ 775 long long int missed = linux_get_stat(if_name, "rx_missed_errors"); 776 long long int fifo = linux_get_stat(if_name, "rx_fifo_errors"); 777 return missed + fifo; 778} 779 780 781/* 782 * Monitor mode is kind of interesting because we have to reset the 783 * interface before exiting. The problem can't really be solved without 784 * some daemon taking care of managing usage counts. If we put the 785 * interface into monitor mode, we set a flag indicating that we must 786 * take it out of that mode when the interface is closed, and, when 787 * closing the interface, if that flag is set we take it out of monitor 788 * mode. 789 */ 790 791static void pcap_cleanup_linux( pcap_t *handle ) 792{ 793 struct pcap_linux *handlep = handle->priv; 794#ifdef HAVE_LIBNL 795 struct nl80211_state nlstate; 796 int ret; 797#endif /* HAVE_LIBNL */ 798 799 if (handlep->must_do_on_close != 0) { 800 /* 801 * There's something we have to do when closing this 802 * pcap_t. 803 */ 804#ifdef HAVE_LIBNL 805 if (handlep->must_do_on_close & MUST_DELETE_MONIF) { 806 ret = nl80211_init(handle, &nlstate, handlep->device); 807 if (ret >= 0) { 808 ret = del_mon_if(handle, handle->fd, &nlstate, 809 handlep->device, handlep->mondevice); 810 nl80211_cleanup(&nlstate); 811 } 812 if (ret < 0) { 813 fprintf(stderr, 814 "Can't delete monitor interface %s (%s).\n" 815 "Please delete manually.\n", 816 handlep->mondevice, handle->errbuf); 817 } 818 } 819#endif /* HAVE_LIBNL */ 820 821 /* 822 * Take this pcap out of the list of pcaps for which we 823 * have to take the interface out of some mode. 824 */ 825 pcap_remove_from_pcaps_to_close(handle); 826 } 827 828 if (handle->fd != -1) { 829 /* 830 * Destroy the ring buffer (assuming we've set it up), 831 * and unmap it if it's mapped. 832 */ 833 destroy_ring(handle); 834 } 835 836 if (handlep->oneshot_buffer != NULL) { 837 free(handlep->oneshot_buffer); 838 handlep->oneshot_buffer = NULL; 839 } 840 841 if (handlep->mondevice != NULL) { 842 free(handlep->mondevice); 843 handlep->mondevice = NULL; 844 } 845 if (handlep->device != NULL) { 846 free(handlep->device); 847 handlep->device = NULL; 848 } 849 850 if (handlep->poll_breakloop_fd != -1) { 851 close(handlep->poll_breakloop_fd); 852 handlep->poll_breakloop_fd = -1; 853 } 854 pcap_cleanup_live_common(handle); 855} 856 857#ifdef HAVE_TPACKET3 858/* 859 * Some versions of TPACKET_V3 have annoying bugs/misfeatures 860 * around which we have to work. Determine if we have those 861 * problems or not. 862 * 3.19 is the first release with a fixed version of 863 * TPACKET_V3. We treat anything before that as 864 * not having a fixed version; that may really mean 865 * it has *no* version. 866 */ 867static int has_broken_tpacket_v3(void) 868{ 869 struct utsname utsname; 870 const char *release; 871 long major, minor; 872 int matches, verlen; 873 874 /* No version information, assume broken. */ 875 if (uname(&utsname) == -1) 876 return 1; 877 release = utsname.release; 878 879 /* A malformed version, ditto. */ 880 matches = sscanf(release, "%ld.%ld%n", &major, &minor, &verlen); 881 if (matches != 2) 882 return 1; 883 if (release[verlen] != '.' && release[verlen] != '\0') 884 return 1; 885 886 /* OK, a fixed version. */ 887 if (major > 3 || (major == 3 && minor >= 19)) 888 return 0; 889 890 /* Too old :( */ 891 return 1; 892} 893#endif 894 895/* 896 * Set the timeout to be used in poll() with memory-mapped packet capture. 897 */ 898static void 899set_poll_timeout(struct pcap_linux *handlep) 900{ 901#ifdef HAVE_TPACKET3 902 int broken_tpacket_v3 = has_broken_tpacket_v3(); 903#endif 904 if (handlep->timeout == 0) { 905#ifdef HAVE_TPACKET3 906 /* 907 * XXX - due to a set of (mis)features in the TPACKET_V3 908 * kernel code prior to the 3.19 kernel, blocking forever 909 * with a TPACKET_V3 socket can, if few packets are 910 * arriving and passing the socket filter, cause most 911 * packets to be dropped. See libpcap issue #335 for the 912 * full painful story. 913 * 914 * The workaround is to have poll() time out very quickly, 915 * so we grab the frames handed to us, and return them to 916 * the kernel, ASAP. 917 */ 918 if (handlep->tp_version == TPACKET_V3 && broken_tpacket_v3) 919 handlep->poll_timeout = 1; /* don't block for very long */ 920 else 921#endif 922 handlep->poll_timeout = -1; /* block forever */ 923 } else if (handlep->timeout > 0) { 924#ifdef HAVE_TPACKET3 925 /* 926 * For TPACKET_V3, the timeout is handled by the kernel, 927 * so block forever; that way, we don't get extra timeouts. 928 * Don't do that if we have a broken TPACKET_V3, though. 929 */ 930 if (handlep->tp_version == TPACKET_V3 && !broken_tpacket_v3) 931 handlep->poll_timeout = -1; /* block forever, let TPACKET_V3 wake us up */ 932 else 933#endif 934 handlep->poll_timeout = handlep->timeout; /* block for that amount of time */ 935 } else { 936 /* 937 * Non-blocking mode; we call poll() to pick up error 938 * indications, but we don't want it to wait for 939 * anything. 940 */ 941 handlep->poll_timeout = 0; 942 } 943} 944 945static void pcap_breakloop_linux(pcap_t *handle) 946{ 947 pcap_breakloop_common(handle); 948 struct pcap_linux *handlep = handle->priv; 949 950 uint64_t value = 1; 951 /* XXX - what if this fails? */ 952 if (handlep->poll_breakloop_fd != -1) 953 (void)write(handlep->poll_breakloop_fd, &value, sizeof(value)); 954} 955 956/* 957 * Set the offset at which to insert VLAN tags. 958 * That should be the offset of the type field. 959 */ 960static void 961set_vlan_offset(pcap_t *handle) 962{ 963 struct pcap_linux *handlep = handle->priv; 964 965 switch (handle->linktype) { 966 967 case DLT_EN10MB: 968 /* 969 * The type field is after the destination and source 970 * MAC address. 971 */ 972 handlep->vlan_offset = 2 * ETH_ALEN; 973 break; 974 975 case DLT_LINUX_SLL: 976 /* 977 * The type field is in the last 2 bytes of the 978 * DLT_LINUX_SLL header. 979 */ 980 handlep->vlan_offset = SLL_HDR_LEN - 2; 981 break; 982 983 default: 984 handlep->vlan_offset = -1; /* unknown */ 985 break; 986 } 987} 988 989/* 990 * Get a handle for a live capture from the given device. You can 991 * pass NULL as device to get all packages (without link level 992 * information of course). If you pass 1 as promisc the interface 993 * will be set to promiscuous mode (XXX: I think this usage should 994 * be deprecated and functions be added to select that later allow 995 * modification of that values -- Torsten). 996 */ 997static int 998pcap_activate_linux(pcap_t *handle) 999{ 1000 struct pcap_linux *handlep = handle->priv; 1001 const char *device; 1002 int is_any_device; 1003 struct ifreq ifr; 1004 int status = 0; 1005 int status2 = 0; 1006 int ret; 1007 1008 device = handle->opt.device; 1009 1010 /* 1011 * Make sure the name we were handed will fit into the ioctls we 1012 * might perform on the device; if not, return a "No such device" 1013 * indication, as the Linux kernel shouldn't support creating 1014 * a device whose name won't fit into those ioctls. 1015 * 1016 * "Will fit" means "will fit, complete with a null terminator", 1017 * so if the length, which does *not* include the null terminator, 1018 * is greater than *or equal to* the size of the field into which 1019 * we'll be copying it, that won't fit. 1020 */ 1021 if (strlen(device) >= sizeof(ifr.ifr_name)) { 1022 /* 1023 * There's nothing more to say, so clear the error 1024 * message. 1025 */ 1026 handle->errbuf[0] = '\0'; 1027 status = PCAP_ERROR_NO_SUCH_DEVICE; 1028 goto fail; 1029 } 1030 1031 /* 1032 * Turn a negative snapshot value (invalid), a snapshot value of 1033 * 0 (unspecified), or a value bigger than the normal maximum 1034 * value, into the maximum allowed value. 1035 * 1036 * If some application really *needs* a bigger snapshot 1037 * length, we should just increase MAXIMUM_SNAPLEN. 1038 */ 1039 if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN) 1040 handle->snapshot = MAXIMUM_SNAPLEN; 1041 1042 handlep->device = strdup(device); 1043 if (handlep->device == NULL) { 1044 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1045 errno, "strdup"); 1046 status = PCAP_ERROR; 1047 goto fail; 1048 } 1049 1050 /* 1051 * The "any" device is a special device which causes us not 1052 * to bind to a particular device and thus to look at all 1053 * devices. 1054 */ 1055 is_any_device = (strcmp(device, "any") == 0); 1056 if (is_any_device) { 1057 if (handle->opt.promisc) { 1058 handle->opt.promisc = 0; 1059 /* Just a warning. */ 1060 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 1061 "Promiscuous mode not supported on the \"any\" device"); 1062 status = PCAP_WARNING_PROMISC_NOTSUP; 1063 } 1064 } 1065 1066 /* copy timeout value */ 1067 handlep->timeout = handle->opt.timeout; 1068 1069 /* 1070 * If we're in promiscuous mode, then we probably want 1071 * to see when the interface drops packets too, so get an 1072 * initial count from 1073 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1074 */ 1075 if (handle->opt.promisc) 1076 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1077 1078 /* 1079 * If the "any" device is specified, try to open a SOCK_DGRAM. 1080 * Otherwise, open a SOCK_RAW. 1081 */ 1082 ret = setup_socket(handle, is_any_device); 1083 if (ret < 0) { 1084 /* 1085 * Fatal error; the return value is the error code, 1086 * and handle->errbuf has been set to an appropriate 1087 * error message. 1088 */ 1089 status = ret; 1090 goto fail; 1091 } 1092 /* 1093 * Success. 1094 * Try to set up memory-mapped access. 1095 */ 1096 ret = setup_mmapped(handle, &status); 1097 if (ret == -1) { 1098 /* 1099 * We failed to set up to use it, or the 1100 * kernel supports it, but we failed to 1101 * enable it. status has been set to the 1102 * error status to return and, if it's 1103 * PCAP_ERROR, handle->errbuf contains 1104 * the error message. 1105 */ 1106 goto fail; 1107 } 1108 1109 /* 1110 * We succeeded. status has been set to the status to return, 1111 * which might be 0, or might be a PCAP_WARNING_ value. 1112 */ 1113 /* 1114 * Now that we have activated the mmap ring, we can 1115 * set the correct protocol. 1116 */ 1117 if ((status2 = iface_bind(handle->fd, handlep->ifindex, 1118 handle->errbuf, pcap_protocol(handle))) != 0) { 1119 status = status2; 1120 goto fail; 1121 } 1122 1123 handle->inject_op = pcap_inject_linux; 1124 handle->setfilter_op = pcap_setfilter_linux; 1125 handle->setdirection_op = pcap_setdirection_linux; 1126 handle->set_datalink_op = pcap_set_datalink_linux; 1127 handle->setnonblock_op = pcap_setnonblock_linux; 1128 handle->getnonblock_op = pcap_getnonblock_linux; 1129 handle->cleanup_op = pcap_cleanup_linux; 1130 handle->stats_op = pcap_stats_linux; 1131 handle->breakloop_op = pcap_breakloop_linux; 1132 1133 switch (handlep->tp_version) { 1134 1135 case TPACKET_V2: 1136 handle->read_op = pcap_read_linux_mmap_v2; 1137 break; 1138#ifdef HAVE_TPACKET3 1139 case TPACKET_V3: 1140 handle->read_op = pcap_read_linux_mmap_v3; 1141 break; 1142#endif 1143 } 1144 handle->oneshot_callback = pcap_oneshot_linux; 1145 handle->selectable_fd = handle->fd; 1146 1147 return status; 1148 1149fail: 1150 pcap_cleanup_linux(handle); 1151 return status; 1152} 1153 1154static int 1155pcap_set_datalink_linux(pcap_t *handle, int dlt) 1156{ 1157 handle->linktype = dlt; 1158 1159 /* 1160 * Update the offset at which to insert VLAN tags for the 1161 * new link-layer type. 1162 */ 1163 set_vlan_offset(handle); 1164 1165 return 0; 1166} 1167 1168/* 1169 * linux_check_direction() 1170 * 1171 * Do checks based on packet direction. 1172 */ 1173static inline int 1174linux_check_direction(const pcap_t *handle, const struct sockaddr_ll *sll) 1175{ 1176 struct pcap_linux *handlep = handle->priv; 1177 1178 if (sll->sll_pkttype == PACKET_OUTGOING) { 1179 /* 1180 * Outgoing packet. 1181 * If this is from the loopback device, reject it; 1182 * we'll see the packet as an incoming packet as well, 1183 * and we don't want to see it twice. 1184 */ 1185 if (sll->sll_ifindex == handlep->lo_ifindex) 1186 return 0; 1187 1188 /* 1189 * If this is an outgoing CAN or CAN FD frame, and 1190 * the user doesn't only want outgoing packets, 1191 * reject it; CAN devices and drivers, and the CAN 1192 * stack, always arrange to loop back transmitted 1193 * packets, so they also appear as incoming packets. 1194 * We don't want duplicate packets, and we can't 1195 * easily distinguish packets looped back by the CAN 1196 * layer than those received by the CAN layer, so we 1197 * eliminate this packet instead. 1198 * 1199 * We check whether this is a CAN or CAN FD frame 1200 * by checking whether the device's hardware type 1201 * is ARPHRD_CAN. 1202 */ 1203 if (sll->sll_hatype == ARPHRD_CAN && 1204 handle->direction != PCAP_D_OUT) 1205 return 0; 1206 1207 /* 1208 * If the user only wants incoming packets, reject it. 1209 */ 1210 if (handle->direction == PCAP_D_IN) 1211 return 0; 1212 } else { 1213 /* 1214 * Incoming packet. 1215 * If the user only wants outgoing packets, reject it. 1216 */ 1217 if (handle->direction == PCAP_D_OUT) 1218 return 0; 1219 } 1220 return 1; 1221} 1222 1223/* 1224 * Check whether the device to which the pcap_t is bound still exists. 1225 * We do so by asking what address the socket is bound to, and checking 1226 * whether the ifindex in the address is -1, meaning "that device is gone", 1227 * or some other value, meaning "that device still exists". 1228 */ 1229static int 1230device_still_exists(pcap_t *handle) 1231{ 1232 struct pcap_linux *handlep = handle->priv; 1233 struct sockaddr_ll addr; 1234 socklen_t addr_len; 1235 1236 /* 1237 * If handlep->ifindex is -1, the socket isn't bound, meaning 1238 * we're capturing on the "any" device; that device never 1239 * disappears. (It should also never be configured down, so 1240 * we shouldn't even get here, but let's make sure.) 1241 */ 1242 if (handlep->ifindex == -1) 1243 return (1); /* it's still here */ 1244 1245 /* 1246 * OK, now try to get the address for the socket. 1247 */ 1248 addr_len = sizeof (addr); 1249 if (getsockname(handle->fd, (struct sockaddr *) &addr, &addr_len) == -1) { 1250 /* 1251 * Error - report an error and return -1. 1252 */ 1253 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1254 errno, "getsockname failed"); 1255 return (-1); 1256 } 1257 if (addr.sll_ifindex == -1) { 1258 /* 1259 * This means the device went away. 1260 */ 1261 return (0); 1262 } 1263 1264 /* 1265 * The device presumably just went down. 1266 */ 1267 return (1); 1268} 1269 1270static int 1271pcap_inject_linux(pcap_t *handle, const void *buf, int size) 1272{ 1273 struct pcap_linux *handlep = handle->priv; 1274 int ret; 1275 1276 if (handlep->ifindex == -1) { 1277 /* 1278 * We don't support sending on the "any" device. 1279 */ 1280 pcap_strlcpy(handle->errbuf, 1281 "Sending packets isn't supported on the \"any\" device", 1282 PCAP_ERRBUF_SIZE); 1283 return (-1); 1284 } 1285 1286 if (handlep->cooked) { 1287 /* 1288 * We don't support sending on cooked-mode sockets. 1289 * 1290 * XXX - how do you send on a bound cooked-mode 1291 * socket? 1292 * Is a "sendto()" required there? 1293 */ 1294 pcap_strlcpy(handle->errbuf, 1295 "Sending packets isn't supported in cooked mode", 1296 PCAP_ERRBUF_SIZE); 1297 return (-1); 1298 } 1299 1300 ret = (int)send(handle->fd, buf, size, 0); 1301 if (ret == -1) { 1302 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1303 errno, "send"); 1304 return (-1); 1305 } 1306 return (ret); 1307} 1308 1309/* 1310 * Get the statistics for the given packet capture handle. 1311 */ 1312static int 1313pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats) 1314{ 1315 struct pcap_linux *handlep = handle->priv; 1316#ifdef HAVE_TPACKET3 1317 /* 1318 * For sockets using TPACKET_V2, the extra stuff at the end 1319 * of a struct tpacket_stats_v3 will not be filled in, and 1320 * we don't look at it so this is OK even for those sockets. 1321 * In addition, the PF_PACKET socket code in the kernel only 1322 * uses the length parameter to compute how much data to 1323 * copy out and to indicate how much data was copied out, so 1324 * it's OK to base it on the size of a struct tpacket_stats. 1325 * 1326 * XXX - it's probably OK, in fact, to just use a 1327 * struct tpacket_stats for V3 sockets, as we don't 1328 * care about the tp_freeze_q_cnt stat. 1329 */ 1330 struct tpacket_stats_v3 kstats; 1331#else /* HAVE_TPACKET3 */ 1332 struct tpacket_stats kstats; 1333#endif /* HAVE_TPACKET3 */ 1334 socklen_t len = sizeof (struct tpacket_stats); 1335 1336 long long if_dropped = 0; 1337 1338 /* 1339 * To fill in ps_ifdrop, we parse 1340 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1341 * for the numbers 1342 */ 1343 if (handle->opt.promisc) 1344 { 1345 /* 1346 * XXX - is there any reason to do this by remembering 1347 * the last counts value, subtracting it from the 1348 * current counts value, and adding that to stat.ps_ifdrop, 1349 * maintaining stat.ps_ifdrop as a count, rather than just 1350 * saving the *initial* counts value and setting 1351 * stat.ps_ifdrop to the difference between the current 1352 * value and the initial value? 1353 * 1354 * One reason might be to handle the count wrapping 1355 * around, on platforms where the count is 32 bits 1356 * and where you might get more than 2^32 dropped 1357 * packets; is there any other reason? 1358 * 1359 * (We maintain the count as a long long int so that, 1360 * if the kernel maintains the counts as 64-bit even 1361 * on 32-bit platforms, we can handle the real count. 1362 * 1363 * Unfortunately, we can't report 64-bit counts; we 1364 * need a better API for reporting statistics, such as 1365 * one that reports them in a style similar to the 1366 * pcapng Interface Statistics Block, so that 1) the 1367 * counts are 64-bit, 2) it's easier to add new statistics 1368 * without breaking the ABI, and 3) it's easier to 1369 * indicate to a caller that wants one particular 1370 * statistic that it's not available by just not supplying 1371 * it.) 1372 */ 1373 if_dropped = handlep->sysfs_dropped; 1374 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1375 handlep->stat.ps_ifdrop += (u_int)(handlep->sysfs_dropped - if_dropped); 1376 } 1377 1378 /* 1379 * Try to get the packet counts from the kernel. 1380 */ 1381 if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, 1382 &kstats, &len) > -1) { 1383 /* 1384 * "ps_recv" counts only packets that *passed* the 1385 * filter, not packets that didn't pass the filter. 1386 * This includes packets later dropped because we 1387 * ran out of buffer space. 1388 * 1389 * "ps_drop" counts packets dropped because we ran 1390 * out of buffer space. It doesn't count packets 1391 * dropped by the interface driver. It counts only 1392 * packets that passed the filter. 1393 * 1394 * See above for ps_ifdrop. 1395 * 1396 * Both statistics include packets not yet read from 1397 * the kernel by libpcap, and thus not yet seen by 1398 * the application. 1399 * 1400 * In "linux/net/packet/af_packet.c", at least in 2.6.27 1401 * through 5.6 kernels, "tp_packets" is incremented for 1402 * every packet that passes the packet filter *and* is 1403 * successfully copied to the ring buffer; "tp_drops" is 1404 * incremented for every packet dropped because there's 1405 * not enough free space in the ring buffer. 1406 * 1407 * When the statistics are returned for a PACKET_STATISTICS 1408 * "getsockopt()" call, "tp_drops" is added to "tp_packets", 1409 * so that "tp_packets" counts all packets handed to 1410 * the PF_PACKET socket, including packets dropped because 1411 * there wasn't room on the socket buffer - but not 1412 * including packets that didn't pass the filter. 1413 * 1414 * In the BSD BPF, the count of received packets is 1415 * incremented for every packet handed to BPF, regardless 1416 * of whether it passed the filter. 1417 * 1418 * We can't make "pcap_stats()" work the same on both 1419 * platforms, but the best approximation is to return 1420 * "tp_packets" as the count of packets and "tp_drops" 1421 * as the count of drops. 1422 * 1423 * Keep a running total because each call to 1424 * getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, .... 1425 * resets the counters to zero. 1426 */ 1427 handlep->stat.ps_recv += kstats.tp_packets; 1428 handlep->stat.ps_drop += kstats.tp_drops; 1429 *stats = handlep->stat; 1430 return 0; 1431 } 1432 1433 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, errno, 1434 "failed to get statistics from socket"); 1435 return -1; 1436} 1437 1438/* 1439 * Description string for the "any" device. 1440 */ 1441static const char any_descr[] = "Pseudo-device that captures on all interfaces"; 1442 1443/* 1444 * A PF_PACKET socket can be bound to any network interface. 1445 */ 1446static int 1447can_be_bound(const char *name _U_) 1448{ 1449 return (1); 1450} 1451 1452/* 1453 * Get a socket to use with various interface ioctls. 1454 */ 1455static int 1456get_if_ioctl_socket(void) 1457{ 1458 int fd; 1459 1460 /* 1461 * This is a bit ugly. 1462 * 1463 * There isn't a socket type that's guaranteed to work. 1464 * 1465 * AF_NETLINK will work *if* you have Netlink configured into the 1466 * kernel (can it be configured out if you have any networking 1467 * support at all?) *and* if you're running a sufficiently recent 1468 * kernel, but not all the kernels we support are sufficiently 1469 * recent - that feature was introduced in Linux 4.6. 1470 * 1471 * AF_UNIX will work *if* you have UNIX-domain sockets configured 1472 * into the kernel and *if* you're not on a system that doesn't 1473 * allow them - some SELinux systems don't allow you create them. 1474 * Most systems probably have them configured in, but not all systems 1475 * have them configured in and allow them to be created. 1476 * 1477 * AF_INET will work *if* you have IPv4 configured into the kernel, 1478 * but, apparently, some systems have network adapters but have 1479 * kernels without IPv4 support. 1480 * 1481 * AF_INET6 will work *if* you have IPv6 configured into the 1482 * kernel, but if you don't have AF_INET, you might not have 1483 * AF_INET6, either (that is, independently on its own grounds). 1484 * 1485 * AF_PACKET would work, except that some of these calls should 1486 * work even if you *don't* have capture permission (you should be 1487 * able to enumerate interfaces and get information about them 1488 * without capture permission; you shouldn't get a failure until 1489 * you try pcap_activate()). (If you don't allow programs to 1490 * get as much information as possible about interfaces if you 1491 * don't have permission to capture, you run the risk of users 1492 * asking "why isn't it showing XXX" - or, worse, if you don't 1493 * show interfaces *at all* if you don't have permission to 1494 * capture on them, "why do no interfaces show up?" - when the 1495 * real problem is a permissions problem. Error reports of that 1496 * type require a lot more back-and-forth to debug, as evidenced 1497 * by many Wireshark bugs/mailing list questions/Q&A questions.) 1498 * 1499 * So: 1500 * 1501 * we first try an AF_NETLINK socket, where "try" includes 1502 * "try to do a device ioctl on it", as, in the future, once 1503 * pre-4.6 kernels are sufficiently rare, that will probably 1504 * be the mechanism most likely to work; 1505 * 1506 * if that fails, we try an AF_UNIX socket, as that's less 1507 * likely to be configured out on a networking-capable system 1508 * than is IP; 1509 * 1510 * if that fails, we try an AF_INET6 socket; 1511 * 1512 * if that fails, we try an AF_INET socket. 1513 */ 1514 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 1515 if (fd != -1) { 1516 /* 1517 * OK, let's make sure we can do an SIOCGIFNAME 1518 * ioctl. 1519 */ 1520 struct ifreq ifr; 1521 1522 memset(&ifr, 0, sizeof(ifr)); 1523 if (ioctl(fd, SIOCGIFNAME, &ifr) == 0 || 1524 errno != EOPNOTSUPP) { 1525 /* 1526 * It succeeded, or failed for some reason 1527 * other than "netlink sockets don't support 1528 * device ioctls". Go with the AF_NETLINK 1529 * socket. 1530 */ 1531 return (fd); 1532 } 1533 1534 /* 1535 * OK, that didn't work, so it's as bad as "netlink 1536 * sockets aren't available". Close the socket and 1537 * drive on. 1538 */ 1539 close(fd); 1540 } 1541 1542 /* 1543 * Now try an AF_UNIX socket. 1544 */ 1545 fd = socket(AF_UNIX, SOCK_RAW, 0); 1546 if (fd != -1) { 1547 /* 1548 * OK, we got it! 1549 */ 1550 return (fd); 1551 } 1552 1553 /* 1554 * Now try an AF_INET6 socket. 1555 */ 1556 fd = socket(AF_INET6, SOCK_DGRAM, 0); 1557 if (fd != -1) { 1558 return (fd); 1559 } 1560 1561 /* 1562 * Now try an AF_INET socket. 1563 * 1564 * XXX - if that fails, is there anything else we should try? 1565 * AF_CAN, for embedded systems in vehicles, in case they're 1566 * built without Internet protocol support? Any other socket 1567 * types popular in non-Internet embedded systems? 1568 */ 1569 return (socket(AF_INET, SOCK_DGRAM, 0)); 1570} 1571 1572/* 1573 * Get additional flags for a device, using SIOCGIFMEDIA. 1574 */ 1575static int 1576get_if_flags(const char *name, bpf_u_int32 *flags, char *errbuf) 1577{ 1578 int sock; 1579 FILE *fh; 1580 unsigned int arptype; 1581 struct ifreq ifr; 1582 struct ethtool_value info; 1583 1584 if (*flags & PCAP_IF_LOOPBACK) { 1585 /* 1586 * Loopback devices aren't wireless, and "connected"/ 1587 * "disconnected" doesn't apply to them. 1588 */ 1589 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1590 return 0; 1591 } 1592 1593 sock = get_if_ioctl_socket(); 1594 if (sock == -1) { 1595 pcap_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, errno, 1596 "Can't create socket to get ethtool information for %s", 1597 name); 1598 return -1; 1599 } 1600 1601 /* 1602 * OK, what type of network is this? 1603 * In particular, is it wired or wireless? 1604 */ 1605 if (is_wifi(name)) { 1606 /* 1607 * Wi-Fi, hence wireless. 1608 */ 1609 *flags |= PCAP_IF_WIRELESS; 1610 } else { 1611 /* 1612 * OK, what does /sys/class/net/{if_name}/type contain? 1613 * (We don't use that for Wi-Fi, as it'll report 1614 * "Ethernet", i.e. ARPHRD_ETHER, for non-monitor- 1615 * mode devices.) 1616 */ 1617 char *pathstr; 1618 1619 if (asprintf(&pathstr, "/sys/class/net/%s/type", name) == -1) { 1620 snprintf(errbuf, PCAP_ERRBUF_SIZE, 1621 "%s: Can't generate path name string for /sys/class/net device", 1622 name); 1623 close(sock); 1624 return -1; 1625 } 1626 fh = fopen(pathstr, "r"); 1627 if (fh != NULL) { 1628 if (fscanf(fh, "%u", &arptype) == 1) { 1629 /* 1630 * OK, we got an ARPHRD_ type; what is it? 1631 */ 1632 switch (arptype) { 1633 1634 case ARPHRD_LOOPBACK: 1635 /* 1636 * These are types to which 1637 * "connected" and "disconnected" 1638 * don't apply, so don't bother 1639 * asking about it. 1640 * 1641 * XXX - add other types? 1642 */ 1643 close(sock); 1644 fclose(fh); 1645 free(pathstr); 1646 return 0; 1647 1648 case ARPHRD_IRDA: 1649 case ARPHRD_IEEE80211: 1650 case ARPHRD_IEEE80211_PRISM: 1651 case ARPHRD_IEEE80211_RADIOTAP: 1652#ifdef ARPHRD_IEEE802154 1653 case ARPHRD_IEEE802154: 1654#endif 1655#ifdef ARPHRD_IEEE802154_MONITOR 1656 case ARPHRD_IEEE802154_MONITOR: 1657#endif 1658#ifdef ARPHRD_6LOWPAN 1659 case ARPHRD_6LOWPAN: 1660#endif 1661 /* 1662 * Various wireless types. 1663 */ 1664 *flags |= PCAP_IF_WIRELESS; 1665 break; 1666 } 1667 } 1668 fclose(fh); 1669 } 1670 free(pathstr); 1671 } 1672 1673#ifdef ETHTOOL_GLINK 1674 memset(&ifr, 0, sizeof(ifr)); 1675 pcap_strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); 1676 info.cmd = ETHTOOL_GLINK; 1677 /* 1678 * XXX - while Valgrind handles SIOCETHTOOL and knows that 1679 * the ETHTOOL_GLINK command sets the .data member of the 1680 * structure, Memory Sanitizer doesn't yet do so: 1681 * 1682 * https://bugs.llvm.org/show_bug.cgi?id=45814 1683 * 1684 * For now, we zero it out to squelch warnings; if the bug 1685 * in question is fixed, we can remove this. 1686 */ 1687 info.data = 0; 1688 ifr.ifr_data = (caddr_t)&info; 1689 if (ioctl(sock, SIOCETHTOOL, &ifr) == -1) { 1690 int save_errno = errno; 1691 1692 switch (save_errno) { 1693 1694 case EOPNOTSUPP: 1695 case EINVAL: 1696 /* 1697 * OK, this OS version or driver doesn't support 1698 * asking for this information. 1699 * XXX - distinguish between "this doesn't 1700 * support ethtool at all because it's not 1701 * that type of device" vs. "this doesn't 1702 * support ethtool even though it's that 1703 * type of device", and return "unknown". 1704 */ 1705 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1706 close(sock); 1707 return 0; 1708 1709 case ENODEV: 1710 /* 1711 * OK, no such device. 1712 * The user will find that out when they try to 1713 * activate the device; just say "OK" and 1714 * don't set anything. 1715 */ 1716 close(sock); 1717 return 0; 1718 1719 default: 1720 /* 1721 * Other error. 1722 */ 1723 pcap_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, 1724 save_errno, 1725 "%s: SIOCETHTOOL(ETHTOOL_GLINK) ioctl failed", 1726 name); 1727 close(sock); 1728 return -1; 1729 } 1730 } 1731 1732 /* 1733 * Is it connected? 1734 */ 1735 if (info.data) { 1736 /* 1737 * It's connected. 1738 */ 1739 *flags |= PCAP_IF_CONNECTION_STATUS_CONNECTED; 1740 } else { 1741 /* 1742 * It's disconnected. 1743 */ 1744 *flags |= PCAP_IF_CONNECTION_STATUS_DISCONNECTED; 1745 } 1746#endif 1747 1748 close(sock); 1749 return 0; 1750} 1751 1752int 1753pcap_platform_finddevs(pcap_if_list_t *devlistp, char *errbuf) 1754{ 1755 /* 1756 * Get the list of regular interfaces first. 1757 */ 1758 if (pcap_findalldevs_interfaces(devlistp, errbuf, can_be_bound, 1759 get_if_flags) == -1) 1760 return (-1); /* failure */ 1761 1762 /* 1763 * Add the "any" device. 1764 * As it refers to all network devices, not to any particular 1765 * network device, the notion of "connected" vs. "disconnected" 1766 * doesn't apply. 1767 */ 1768 if (add_dev(devlistp, "any", 1769 PCAP_IF_UP|PCAP_IF_RUNNING|PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE, 1770 any_descr, errbuf) == NULL) 1771 return (-1); 1772 1773 return (0); 1774} 1775 1776/* 1777 * Set direction flag: Which packets do we accept on a forwarding 1778 * single device? IN, OUT or both? 1779 */ 1780static int 1781pcap_setdirection_linux(pcap_t *handle, pcap_direction_t d) 1782{ 1783 /* 1784 * It's guaranteed, at this point, that d is a valid 1785 * direction value. 1786 */ 1787 handle->direction = d; 1788 return 0; 1789} 1790 1791static int 1792is_wifi(const char *device) 1793{ 1794 char *pathstr; 1795 struct stat statb; 1796 1797 /* 1798 * See if there's a sysfs wireless directory for it. 1799 * If so, it's a wireless interface. 1800 */ 1801 if (asprintf(&pathstr, "/sys/class/net/%s/wireless", device) == -1) { 1802 /* 1803 * Just give up here. 1804 */ 1805 return 0; 1806 } 1807 if (stat(pathstr, &statb) == 0) { 1808 free(pathstr); 1809 return 1; 1810 } 1811 free(pathstr); 1812 1813 return 0; 1814} 1815 1816/* 1817 * Linux uses the ARP hardware type to identify the type of an 1818 * interface. pcap uses the DLT_xxx constants for this. This 1819 * function takes a pointer to a "pcap_t", and an ARPHRD_xxx 1820 * constant, as arguments, and sets "handle->linktype" to the 1821 * appropriate DLT_XXX constant and sets "handle->offset" to 1822 * the appropriate value (to make "handle->offset" plus link-layer 1823 * header length be a multiple of 4, so that the link-layer payload 1824 * will be aligned on a 4-byte boundary when capturing packets). 1825 * (If the offset isn't set here, it'll be 0; add code as appropriate 1826 * for cases where it shouldn't be 0.) 1827 * 1828 * If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture 1829 * in cooked mode; otherwise, we can't use cooked mode, so we have 1830 * to pick some type that works in raw mode, or fail. 1831 * 1832 * Sets the link type to -1 if unable to map the type. 1833 */ 1834static void map_arphrd_to_dlt(pcap_t *handle, int arptype, 1835 const char *device, int cooked_ok) 1836{ 1837 static const char cdma_rmnet[] = "cdma_rmnet"; 1838 1839 switch (arptype) { 1840 1841 case ARPHRD_ETHER: 1842 /* 1843 * For various annoying reasons having to do with DHCP 1844 * software, some versions of Android give the mobile- 1845 * phone-network interface an ARPHRD_ value of 1846 * ARPHRD_ETHER, even though the packets supplied by 1847 * that interface have no link-layer header, and begin 1848 * with an IP header, so that the ARPHRD_ value should 1849 * be ARPHRD_NONE. 1850 * 1851 * Detect those devices by checking the device name, and 1852 * use DLT_RAW for them. 1853 */ 1854 if (strncmp(device, cdma_rmnet, sizeof cdma_rmnet - 1) == 0) { 1855 handle->linktype = DLT_RAW; 1856 return; 1857 } 1858 1859 /* 1860 * Is this a real Ethernet device? If so, give it a 1861 * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so 1862 * that an application can let you choose it, in case you're 1863 * capturing DOCSIS traffic that a Cisco Cable Modem 1864 * Termination System is putting out onto an Ethernet (it 1865 * doesn't put an Ethernet header onto the wire, it puts raw 1866 * DOCSIS frames out on the wire inside the low-level 1867 * Ethernet framing). 1868 * 1869 * XXX - are there any other sorts of "fake Ethernet" that 1870 * have ARPHRD_ETHER but that shouldn't offer DLT_DOCSIS as 1871 * a Cisco CMTS won't put traffic onto it or get traffic 1872 * bridged onto it? ISDN is handled in "setup_socket()", 1873 * as we fall back on cooked mode there, and we use 1874 * is_wifi() to check for 802.11 devices; are there any 1875 * others? 1876 */ 1877 if (!is_wifi(device)) { 1878 int ret; 1879 1880 /* 1881 * This is not a Wi-Fi device but it could be 1882 * a DSA master/management network device. 1883 */ 1884 ret = iface_dsa_get_proto_info(device, handle); 1885 if (ret < 0) 1886 return; 1887 1888 if (ret == 1) { 1889 /* 1890 * This is a DSA master/management network 1891 * device linktype is already set by 1892 * iface_dsa_get_proto_info() set an 1893 * appropriate offset here. 1894 */ 1895 handle->offset = 2; 1896 break; 1897 } 1898 1899 /* 1900 * It's not a Wi-Fi device; offer DOCSIS. 1901 */ 1902 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 1903 /* 1904 * If that fails, just leave the list empty. 1905 */ 1906 if (handle->dlt_list != NULL) { 1907 handle->dlt_list[0] = DLT_EN10MB; 1908 handle->dlt_list[1] = DLT_DOCSIS; 1909 handle->dlt_count = 2; 1910 } 1911 } 1912 /* FALLTHROUGH */ 1913 1914 case ARPHRD_METRICOM: 1915 case ARPHRD_LOOPBACK: 1916 handle->linktype = DLT_EN10MB; 1917 handle->offset = 2; 1918 break; 1919 1920 case ARPHRD_EETHER: 1921 handle->linktype = DLT_EN3MB; 1922 break; 1923 1924 case ARPHRD_AX25: 1925 handle->linktype = DLT_AX25_KISS; 1926 break; 1927 1928 case ARPHRD_PRONET: 1929 handle->linktype = DLT_PRONET; 1930 break; 1931 1932 case ARPHRD_CHAOS: 1933 handle->linktype = DLT_CHAOS; 1934 break; 1935#ifndef ARPHRD_CAN 1936#define ARPHRD_CAN 280 1937#endif 1938 case ARPHRD_CAN: 1939 handle->linktype = DLT_CAN_SOCKETCAN; 1940 break; 1941 1942#ifndef ARPHRD_IEEE802_TR 1943#define ARPHRD_IEEE802_TR 800 /* From Linux 2.4 */ 1944#endif 1945 case ARPHRD_IEEE802_TR: 1946 case ARPHRD_IEEE802: 1947 handle->linktype = DLT_IEEE802; 1948 handle->offset = 2; 1949 break; 1950 1951 case ARPHRD_ARCNET: 1952 handle->linktype = DLT_ARCNET_LINUX; 1953 break; 1954 1955#ifndef ARPHRD_FDDI /* From Linux 2.2.13 */ 1956#define ARPHRD_FDDI 774 1957#endif 1958 case ARPHRD_FDDI: 1959 handle->linktype = DLT_FDDI; 1960 handle->offset = 3; 1961 break; 1962 1963#ifndef ARPHRD_ATM /* FIXME: How to #include this? */ 1964#define ARPHRD_ATM 19 1965#endif 1966 case ARPHRD_ATM: 1967 /* 1968 * The Classical IP implementation in ATM for Linux 1969 * supports both what RFC 1483 calls "LLC Encapsulation", 1970 * in which each packet has an LLC header, possibly 1971 * with a SNAP header as well, prepended to it, and 1972 * what RFC 1483 calls "VC Based Multiplexing", in which 1973 * different virtual circuits carry different network 1974 * layer protocols, and no header is prepended to packets. 1975 * 1976 * They both have an ARPHRD_ type of ARPHRD_ATM, so 1977 * you can't use the ARPHRD_ type to find out whether 1978 * captured packets will have an LLC header, and, 1979 * while there's a socket ioctl to *set* the encapsulation 1980 * type, there's no ioctl to *get* the encapsulation type. 1981 * 1982 * This means that 1983 * 1984 * programs that dissect Linux Classical IP frames 1985 * would have to check for an LLC header and, 1986 * depending on whether they see one or not, dissect 1987 * the frame as LLC-encapsulated or as raw IP (I 1988 * don't know whether there's any traffic other than 1989 * IP that would show up on the socket, or whether 1990 * there's any support for IPv6 in the Linux 1991 * Classical IP code); 1992 * 1993 * filter expressions would have to compile into 1994 * code that checks for an LLC header and does 1995 * the right thing. 1996 * 1997 * Both of those are a nuisance - and, at least on systems 1998 * that support PF_PACKET sockets, we don't have to put 1999 * up with those nuisances; instead, we can just capture 2000 * in cooked mode. That's what we'll do, if we can. 2001 * Otherwise, we'll just fail. 2002 */ 2003 if (cooked_ok) 2004 handle->linktype = DLT_LINUX_SLL; 2005 else 2006 handle->linktype = -1; 2007 break; 2008 2009#ifndef ARPHRD_IEEE80211 /* From Linux 2.4.6 */ 2010#define ARPHRD_IEEE80211 801 2011#endif 2012 case ARPHRD_IEEE80211: 2013 handle->linktype = DLT_IEEE802_11; 2014 break; 2015 2016#ifndef ARPHRD_IEEE80211_PRISM /* From Linux 2.4.18 */ 2017#define ARPHRD_IEEE80211_PRISM 802 2018#endif 2019 case ARPHRD_IEEE80211_PRISM: 2020 handle->linktype = DLT_PRISM_HEADER; 2021 break; 2022 2023#ifndef ARPHRD_IEEE80211_RADIOTAP /* new */ 2024#define ARPHRD_IEEE80211_RADIOTAP 803 2025#endif 2026 case ARPHRD_IEEE80211_RADIOTAP: 2027 handle->linktype = DLT_IEEE802_11_RADIO; 2028 break; 2029 2030 case ARPHRD_PPP: 2031 /* 2032 * Some PPP code in the kernel supplies no link-layer 2033 * header whatsoever to PF_PACKET sockets; other PPP 2034 * code supplies PPP link-layer headers ("syncppp.c"); 2035 * some PPP code might supply random link-layer 2036 * headers (PPP over ISDN - there's code in Ethereal, 2037 * for example, to cope with PPP-over-ISDN captures 2038 * with which the Ethereal developers have had to cope, 2039 * heuristically trying to determine which of the 2040 * oddball link-layer headers particular packets have). 2041 * 2042 * As such, we just punt, and run all PPP interfaces 2043 * in cooked mode, if we can; otherwise, we just treat 2044 * it as DLT_RAW, for now - if somebody needs to capture, 2045 * on a 2.0[.x] kernel, on PPP devices that supply a 2046 * link-layer header, they'll have to add code here to 2047 * map to the appropriate DLT_ type (possibly adding a 2048 * new DLT_ type, if necessary). 2049 */ 2050 if (cooked_ok) 2051 handle->linktype = DLT_LINUX_SLL; 2052 else { 2053 /* 2054 * XXX - handle ISDN types here? We can't fall 2055 * back on cooked sockets, so we'd have to 2056 * figure out from the device name what type of 2057 * link-layer encapsulation it's using, and map 2058 * that to an appropriate DLT_ value, meaning 2059 * we'd map "isdnN" devices to DLT_RAW (they 2060 * supply raw IP packets with no link-layer 2061 * header) and "isdY" devices to a new DLT_I4L_IP 2062 * type that has only an Ethernet packet type as 2063 * a link-layer header. 2064 * 2065 * But sometimes we seem to get random crap 2066 * in the link-layer header when capturing on 2067 * ISDN devices.... 2068 */ 2069 handle->linktype = DLT_RAW; 2070 } 2071 break; 2072 2073#ifndef ARPHRD_CISCO 2074#define ARPHRD_CISCO 513 /* previously ARPHRD_HDLC */ 2075#endif 2076 case ARPHRD_CISCO: 2077 handle->linktype = DLT_C_HDLC; 2078 break; 2079 2080 /* Not sure if this is correct for all tunnels, but it 2081 * works for CIPE */ 2082 case ARPHRD_TUNNEL: 2083#ifndef ARPHRD_SIT 2084#define ARPHRD_SIT 776 /* From Linux 2.2.13 */ 2085#endif 2086 case ARPHRD_SIT: 2087 case ARPHRD_CSLIP: 2088 case ARPHRD_SLIP6: 2089 case ARPHRD_CSLIP6: 2090 case ARPHRD_ADAPT: 2091 case ARPHRD_SLIP: 2092#ifndef ARPHRD_RAWHDLC 2093#define ARPHRD_RAWHDLC 518 2094#endif 2095 case ARPHRD_RAWHDLC: 2096#ifndef ARPHRD_DLCI 2097#define ARPHRD_DLCI 15 2098#endif 2099 case ARPHRD_DLCI: 2100 /* 2101 * XXX - should some of those be mapped to DLT_LINUX_SLL 2102 * instead? Should we just map all of them to DLT_LINUX_SLL? 2103 */ 2104 handle->linktype = DLT_RAW; 2105 break; 2106 2107#ifndef ARPHRD_FRAD 2108#define ARPHRD_FRAD 770 2109#endif 2110 case ARPHRD_FRAD: 2111 handle->linktype = DLT_FRELAY; 2112 break; 2113 2114 case ARPHRD_LOCALTLK: 2115 handle->linktype = DLT_LTALK; 2116 break; 2117 2118 case 18: 2119 /* 2120 * RFC 4338 defines an encapsulation for IP and ARP 2121 * packets that's compatible with the RFC 2625 2122 * encapsulation, but that uses a different ARP 2123 * hardware type and hardware addresses. That 2124 * ARP hardware type is 18; Linux doesn't define 2125 * any ARPHRD_ value as 18, but if it ever officially 2126 * supports RFC 4338-style IP-over-FC, it should define 2127 * one. 2128 * 2129 * For now, we map it to DLT_IP_OVER_FC, in the hopes 2130 * that this will encourage its use in the future, 2131 * should Linux ever officially support RFC 4338-style 2132 * IP-over-FC. 2133 */ 2134 handle->linktype = DLT_IP_OVER_FC; 2135 break; 2136 2137#ifndef ARPHRD_FCPP 2138#define ARPHRD_FCPP 784 2139#endif 2140 case ARPHRD_FCPP: 2141#ifndef ARPHRD_FCAL 2142#define ARPHRD_FCAL 785 2143#endif 2144 case ARPHRD_FCAL: 2145#ifndef ARPHRD_FCPL 2146#define ARPHRD_FCPL 786 2147#endif 2148 case ARPHRD_FCPL: 2149#ifndef ARPHRD_FCFABRIC 2150#define ARPHRD_FCFABRIC 787 2151#endif 2152 case ARPHRD_FCFABRIC: 2153 /* 2154 * Back in 2002, Donald Lee at Cray wanted a DLT_ for 2155 * IP-over-FC: 2156 * 2157 * https://www.mail-archive.com/tcpdump-workers@sandelman.ottawa.on.ca/msg01043.html 2158 * 2159 * and one was assigned. 2160 * 2161 * In a later private discussion (spun off from a message 2162 * on the ethereal-users list) on how to get that DLT_ 2163 * value in libpcap on Linux, I ended up deciding that 2164 * the best thing to do would be to have him tweak the 2165 * driver to set the ARPHRD_ value to some ARPHRD_FCxx 2166 * type, and map all those types to DLT_IP_OVER_FC: 2167 * 2168 * I've checked into the libpcap and tcpdump CVS tree 2169 * support for DLT_IP_OVER_FC. In order to use that, 2170 * you'd have to modify your modified driver to return 2171 * one of the ARPHRD_FCxxx types, in "fcLINUXfcp.c" - 2172 * change it to set "dev->type" to ARPHRD_FCFABRIC, for 2173 * example (the exact value doesn't matter, it can be 2174 * any of ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, or 2175 * ARPHRD_FCFABRIC). 2176 * 2177 * 11 years later, Christian Svensson wanted to map 2178 * various ARPHRD_ values to DLT_FC_2 and 2179 * DLT_FC_2_WITH_FRAME_DELIMS for raw Fibre Channel 2180 * frames: 2181 * 2182 * https://github.com/mcr/libpcap/pull/29 2183 * 2184 * There doesn't seem to be any network drivers that uses 2185 * any of the ARPHRD_FC* values for IP-over-FC, and 2186 * it's not exactly clear what the "Dummy types for non 2187 * ARP hardware" are supposed to mean (link-layer 2188 * header type? Physical network type?), so it's 2189 * not exactly clear why the ARPHRD_FC* types exist 2190 * in the first place. 2191 * 2192 * For now, we map them to DLT_FC_2, and provide an 2193 * option of DLT_FC_2_WITH_FRAME_DELIMS, as well as 2194 * DLT_IP_OVER_FC just in case there's some old 2195 * driver out there that uses one of those types for 2196 * IP-over-FC on which somebody wants to capture 2197 * packets. 2198 */ 2199 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 3); 2200 /* 2201 * If that fails, just leave the list empty. 2202 */ 2203 if (handle->dlt_list != NULL) { 2204 handle->dlt_list[0] = DLT_FC_2; 2205 handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS; 2206 handle->dlt_list[2] = DLT_IP_OVER_FC; 2207 handle->dlt_count = 3; 2208 } 2209 handle->linktype = DLT_FC_2; 2210 break; 2211 2212#ifndef ARPHRD_IRDA 2213#define ARPHRD_IRDA 783 2214#endif 2215 case ARPHRD_IRDA: 2216 /* Don't expect IP packet out of this interfaces... */ 2217 handle->linktype = DLT_LINUX_IRDA; 2218 /* We need to save packet direction for IrDA decoding, 2219 * so let's use "Linux-cooked" mode. Jean II 2220 * 2221 * XXX - this is handled in setup_socket(). */ 2222 /* handlep->cooked = 1; */ 2223 break; 2224 2225 /* ARPHRD_LAPD is unofficial and randomly allocated, if reallocation 2226 * is needed, please report it to <daniele@orlandi.com> */ 2227#ifndef ARPHRD_LAPD 2228#define ARPHRD_LAPD 8445 2229#endif 2230 case ARPHRD_LAPD: 2231 /* Don't expect IP packet out of this interfaces... */ 2232 handle->linktype = DLT_LINUX_LAPD; 2233 break; 2234 2235#ifndef ARPHRD_NONE 2236#define ARPHRD_NONE 0xFFFE 2237#endif 2238 case ARPHRD_NONE: 2239 /* 2240 * No link-layer header; packets are just IP 2241 * packets, so use DLT_RAW. 2242 */ 2243 handle->linktype = DLT_RAW; 2244 break; 2245 2246#ifndef ARPHRD_IEEE802154 2247#define ARPHRD_IEEE802154 804 2248#endif 2249 case ARPHRD_IEEE802154: 2250 handle->linktype = DLT_IEEE802_15_4_NOFCS; 2251 break; 2252 2253#ifndef ARPHRD_NETLINK 2254#define ARPHRD_NETLINK 824 2255#endif 2256 case ARPHRD_NETLINK: 2257 handle->linktype = DLT_NETLINK; 2258 /* 2259 * We need to use cooked mode, so that in sll_protocol we 2260 * pick up the netlink protocol type such as NETLINK_ROUTE, 2261 * NETLINK_GENERIC, NETLINK_FIB_LOOKUP, etc. 2262 * 2263 * XXX - this is handled in setup_socket(). 2264 */ 2265 /* handlep->cooked = 1; */ 2266 break; 2267 2268#ifndef ARPHRD_VSOCKMON 2269#define ARPHRD_VSOCKMON 826 2270#endif 2271 case ARPHRD_VSOCKMON: 2272 handle->linktype = DLT_VSOCK; 2273 break; 2274 2275 default: 2276 handle->linktype = -1; 2277 break; 2278 } 2279} 2280 2281static void 2282set_dlt_list_cooked(pcap_t *handle) 2283{ 2284 /* 2285 * Support both DLT_LINUX_SLL and DLT_LINUX_SLL2. 2286 */ 2287 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 2288 2289 /* 2290 * If that failed, just leave the list empty. 2291 */ 2292 if (handle->dlt_list != NULL) { 2293 handle->dlt_list[0] = DLT_LINUX_SLL; 2294 handle->dlt_list[1] = DLT_LINUX_SLL2; 2295 handle->dlt_count = 2; 2296 } 2297} 2298 2299/* 2300 * Try to set up a PF_PACKET socket. 2301 * Returns 0 on success and a PCAP_ERROR_ value on failure. 2302 */ 2303static int 2304setup_socket(pcap_t *handle, int is_any_device) 2305{ 2306 struct pcap_linux *handlep = handle->priv; 2307 const char *device = handle->opt.device; 2308 int status = 0; 2309 int sock_fd, arptype; 2310 int val; 2311 int err = 0; 2312 struct packet_mreq mr; 2313#if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2314 int bpf_extensions; 2315 socklen_t len = sizeof(bpf_extensions); 2316#endif 2317 2318 /* 2319 * Open a socket with protocol family packet. If cooked is true, 2320 * we open a SOCK_DGRAM socket for the cooked interface, otherwise 2321 * we open a SOCK_RAW socket for the raw interface. 2322 * 2323 * The protocol is set to 0. This means we will receive no 2324 * packets until we "bind" the socket with a non-zero 2325 * protocol. This allows us to setup the ring buffers without 2326 * dropping any packets. 2327 */ 2328 sock_fd = is_any_device ? 2329 socket(PF_PACKET, SOCK_DGRAM, 0) : 2330 socket(PF_PACKET, SOCK_RAW, 0); 2331 2332 if (sock_fd == -1) { 2333 if (errno == EPERM || errno == EACCES) { 2334 /* 2335 * You don't have permission to open the 2336 * socket. 2337 */ 2338 status = PCAP_ERROR_PERM_DENIED; 2339 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2340 "Attempt to create packet socket failed - CAP_NET_RAW may be required"); 2341 } else { 2342 /* 2343 * Other error. 2344 */ 2345 status = PCAP_ERROR; 2346 } 2347 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2348 errno, "socket"); 2349 return status; 2350 } 2351 2352 /* 2353 * Get the interface index of the loopback device. 2354 * If the attempt fails, don't fail, just set the 2355 * "handlep->lo_ifindex" to -1. 2356 * 2357 * XXX - can there be more than one device that loops 2358 * packets back, i.e. devices other than "lo"? If so, 2359 * we'd need to find them all, and have an array of 2360 * indices for them, and check all of them in 2361 * "pcap_read_packet()". 2362 */ 2363 handlep->lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf); 2364 2365 /* 2366 * Default value for offset to align link-layer payload 2367 * on a 4-byte boundary. 2368 */ 2369 handle->offset = 0; 2370 2371 /* 2372 * What kind of frames do we have to deal with? Fall back 2373 * to cooked mode if we have an unknown interface type 2374 * or a type we know doesn't work well in raw mode. 2375 */ 2376 if (!is_any_device) { 2377 /* Assume for now we don't need cooked mode. */ 2378 handlep->cooked = 0; 2379 2380 if (handle->opt.rfmon) { 2381 /* 2382 * We were asked to turn on monitor mode. 2383 * Do so before we get the link-layer type, 2384 * because entering monitor mode could change 2385 * the link-layer type. 2386 */ 2387 err = enter_rfmon_mode(handle, sock_fd, device); 2388 if (err < 0) { 2389 /* Hard failure */ 2390 close(sock_fd); 2391 return err; 2392 } 2393 if (err == 0) { 2394 /* 2395 * Nothing worked for turning monitor mode 2396 * on. 2397 */ 2398 close(sock_fd); 2399 return PCAP_ERROR_RFMON_NOTSUP; 2400 } 2401 2402 /* 2403 * Either monitor mode has been turned on for 2404 * the device, or we've been given a different 2405 * device to open for monitor mode. If we've 2406 * been given a different device, use it. 2407 */ 2408 if (handlep->mondevice != NULL) 2409 device = handlep->mondevice; 2410 } 2411 arptype = iface_get_arptype(sock_fd, device, handle->errbuf); 2412 if (arptype < 0) { 2413 close(sock_fd); 2414 return arptype; 2415 } 2416 map_arphrd_to_dlt(handle, arptype, device, 1); 2417 if (handle->linktype == -1 || 2418 handle->linktype == DLT_LINUX_SLL || 2419 handle->linktype == DLT_LINUX_IRDA || 2420 handle->linktype == DLT_LINUX_LAPD || 2421 handle->linktype == DLT_NETLINK || 2422 (handle->linktype == DLT_EN10MB && 2423 (strncmp("isdn", device, 4) == 0 || 2424 strncmp("isdY", device, 4) == 0))) { 2425 /* 2426 * Unknown interface type (-1), or a 2427 * device we explicitly chose to run 2428 * in cooked mode (e.g., PPP devices), 2429 * or an ISDN device (whose link-layer 2430 * type we can only determine by using 2431 * APIs that may be different on different 2432 * kernels) - reopen in cooked mode. 2433 * 2434 * If the type is unknown, return a warning; 2435 * map_arphrd_to_dlt() has already set the 2436 * warning message. 2437 */ 2438 if (close(sock_fd) == -1) { 2439 pcap_fmt_errmsg_for_errno(handle->errbuf, 2440 PCAP_ERRBUF_SIZE, errno, "close"); 2441 return PCAP_ERROR; 2442 } 2443 sock_fd = socket(PF_PACKET, SOCK_DGRAM, 0); 2444 if (sock_fd < 0) { 2445 /* 2446 * Fatal error. We treat this as 2447 * a generic error; we already know 2448 * that we were able to open a 2449 * PF_PACKET/SOCK_RAW socket, so 2450 * any failure is a "this shouldn't 2451 * happen" case. 2452 */ 2453 pcap_fmt_errmsg_for_errno(handle->errbuf, 2454 PCAP_ERRBUF_SIZE, errno, "socket"); 2455 return PCAP_ERROR; 2456 } 2457 handlep->cooked = 1; 2458 2459 /* 2460 * Get rid of any link-layer type list 2461 * we allocated - this only supports cooked 2462 * capture. 2463 */ 2464 if (handle->dlt_list != NULL) { 2465 free(handle->dlt_list); 2466 handle->dlt_list = NULL; 2467 handle->dlt_count = 0; 2468 set_dlt_list_cooked(handle); 2469 } 2470 2471 if (handle->linktype == -1) { 2472 /* 2473 * Warn that we're falling back on 2474 * cooked mode; we may want to 2475 * update "map_arphrd_to_dlt()" 2476 * to handle the new type. 2477 */ 2478 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2479 "arptype %d not " 2480 "supported by libpcap - " 2481 "falling back to cooked " 2482 "socket", 2483 arptype); 2484 } 2485 2486 /* 2487 * IrDA capture is not a real "cooked" capture, 2488 * it's IrLAP frames, not IP packets. The 2489 * same applies to LAPD capture. 2490 */ 2491 if (handle->linktype != DLT_LINUX_IRDA && 2492 handle->linktype != DLT_LINUX_LAPD && 2493 handle->linktype != DLT_NETLINK) 2494 handle->linktype = DLT_LINUX_SLL; 2495 if (handle->linktype == -1) { 2496 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2497 "unknown arptype %d, defaulting to cooked mode", 2498 arptype); 2499 status = PCAP_WARNING; 2500 } 2501 } 2502 2503 handlep->ifindex = iface_get_id(sock_fd, device, 2504 handle->errbuf); 2505 if (handlep->ifindex == -1) { 2506 close(sock_fd); 2507 return PCAP_ERROR; 2508 } 2509 2510 if ((err = iface_bind(sock_fd, handlep->ifindex, 2511 handle->errbuf, 0)) != 0) { 2512 close(sock_fd); 2513 return err; 2514 } 2515 } else { 2516 /* 2517 * The "any" device. 2518 */ 2519 if (handle->opt.rfmon) { 2520 /* 2521 * It doesn't support monitor mode. 2522 */ 2523 close(sock_fd); 2524 return PCAP_ERROR_RFMON_NOTSUP; 2525 } 2526 2527 /* 2528 * It uses cooked mode. 2529 */ 2530 handlep->cooked = 1; 2531 handle->linktype = DLT_LINUX_SLL; 2532 handle->dlt_list = NULL; 2533 handle->dlt_count = 0; 2534 set_dlt_list_cooked(handle); 2535 2536 /* 2537 * We're not bound to a device. 2538 * For now, we're using this as an indication 2539 * that we can't transmit; stop doing that only 2540 * if we figure out how to transmit in cooked 2541 * mode. 2542 */ 2543 handlep->ifindex = -1; 2544 } 2545 2546 /* 2547 * Select promiscuous mode on if "promisc" is set. 2548 * 2549 * Do not turn allmulti mode on if we don't select 2550 * promiscuous mode - on some devices (e.g., Orinoco 2551 * wireless interfaces), allmulti mode isn't supported 2552 * and the driver implements it by turning promiscuous 2553 * mode on, and that screws up the operation of the 2554 * card as a normal networking interface, and on no 2555 * other platform I know of does starting a non- 2556 * promiscuous capture affect which multicast packets 2557 * are received by the interface. 2558 */ 2559 2560 /* 2561 * Hmm, how can we set promiscuous mode on all interfaces? 2562 * I am not sure if that is possible at all. For now, we 2563 * silently ignore attempts to turn promiscuous mode on 2564 * for the "any" device (so you don't have to explicitly 2565 * disable it in programs such as tcpdump). 2566 */ 2567 2568 if (!is_any_device && handle->opt.promisc) { 2569 memset(&mr, 0, sizeof(mr)); 2570 mr.mr_ifindex = handlep->ifindex; 2571 mr.mr_type = PACKET_MR_PROMISC; 2572 if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, 2573 &mr, sizeof(mr)) == -1) { 2574 pcap_fmt_errmsg_for_errno(handle->errbuf, 2575 PCAP_ERRBUF_SIZE, errno, "setsockopt (PACKET_ADD_MEMBERSHIP)"); 2576 close(sock_fd); 2577 return PCAP_ERROR; 2578 } 2579 } 2580 2581 /* 2582 * Enable auxiliary data and reserve room for reconstructing 2583 * VLAN headers. 2584 * 2585 * XXX - is enabling auxiliary data necessary, now that we 2586 * only support memory-mapped capture? The kernel's memory-mapped 2587 * capture code doesn't seem to check whether auxiliary data 2588 * is enabled, it seems to provide it whether it is or not. 2589 */ 2590 val = 1; 2591 if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val, 2592 sizeof(val)) == -1 && errno != ENOPROTOOPT) { 2593 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2594 errno, "setsockopt (PACKET_AUXDATA)"); 2595 close(sock_fd); 2596 return PCAP_ERROR; 2597 } 2598 handle->offset += VLAN_TAG_LEN; 2599 2600 /* 2601 * If we're in cooked mode, make the snapshot length 2602 * large enough to hold a "cooked mode" header plus 2603 * 1 byte of packet data (so we don't pass a byte 2604 * count of 0 to "recvfrom()"). 2605 * XXX - we don't know whether this will be DLT_LINUX_SLL 2606 * or DLT_LINUX_SLL2, so make sure it's big enough for 2607 * a DLT_LINUX_SLL2 "cooked mode" header; a snapshot length 2608 * that small is silly anyway. 2609 */ 2610 if (handlep->cooked) { 2611 if (handle->snapshot < SLL2_HDR_LEN + 1) 2612 handle->snapshot = SLL2_HDR_LEN + 1; 2613 } 2614 handle->bufsize = handle->snapshot; 2615 2616 /* 2617 * Set the offset at which to insert VLAN tags. 2618 */ 2619 set_vlan_offset(handle); 2620 2621 if (handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO) { 2622 int nsec_tstamps = 1; 2623 2624 if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPNS, &nsec_tstamps, sizeof(nsec_tstamps)) < 0) { 2625 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "setsockopt: unable to set SO_TIMESTAMPNS"); 2626 close(sock_fd); 2627 return PCAP_ERROR; 2628 } 2629 } 2630 2631 /* 2632 * We've succeeded. Save the socket FD in the pcap structure. 2633 */ 2634 handle->fd = sock_fd; 2635 2636#if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2637 /* 2638 * Can we generate special code for VLAN checks? 2639 * (XXX - what if we need the special code but it's not supported 2640 * by the OS? Is that possible?) 2641 */ 2642 if (getsockopt(sock_fd, SOL_SOCKET, SO_BPF_EXTENSIONS, 2643 &bpf_extensions, &len) == 0) { 2644 if (bpf_extensions >= SKF_AD_VLAN_TAG_PRESENT) { 2645 /* 2646 * Yes, we can. Request that we do so. 2647 */ 2648 handle->bpf_codegen_flags |= BPF_SPECIAL_VLAN_HANDLING; 2649 } 2650 } 2651#endif /* defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) */ 2652 2653 return status; 2654} 2655 2656/* 2657 * Attempt to setup memory-mapped access. 2658 * 2659 * On success, returns 1, and sets *status to 0 if there are no warnings 2660 * or to a PCAP_WARNING_ code if there is a warning. 2661 * 2662 * On error, returns -1, and sets *status to the appropriate error code; 2663 * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message. 2664 */ 2665static int 2666setup_mmapped(pcap_t *handle, int *status) 2667{ 2668 struct pcap_linux *handlep = handle->priv; 2669 int ret; 2670 2671 /* 2672 * Attempt to allocate a buffer to hold the contents of one 2673 * packet, for use by the oneshot callback. 2674 */ 2675 handlep->oneshot_buffer = malloc(handle->snapshot); 2676 if (handlep->oneshot_buffer == NULL) { 2677 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2678 errno, "can't allocate oneshot buffer"); 2679 *status = PCAP_ERROR; 2680 return -1; 2681 } 2682 2683 if (handle->opt.buffer_size == 0) { 2684 /* by default request 2M for the ring buffer */ 2685 handle->opt.buffer_size = 2*1024*1024; 2686 } 2687 ret = prepare_tpacket_socket(handle); 2688 if (ret == -1) { 2689 free(handlep->oneshot_buffer); 2690 handlep->oneshot_buffer = NULL; 2691 *status = PCAP_ERROR; 2692 return ret; 2693 } 2694 ret = create_ring(handle, status); 2695 if (ret == -1) { 2696 /* 2697 * Error attempting to enable memory-mapped capture; 2698 * fail. create_ring() has set *status. 2699 */ 2700 free(handlep->oneshot_buffer); 2701 handlep->oneshot_buffer = NULL; 2702 return -1; 2703 } 2704 2705 /* 2706 * Success. *status has been set either to 0 if there are no 2707 * warnings or to a PCAP_WARNING_ value if there is a warning. 2708 * 2709 * handle->offset is used to get the current position into the rx ring. 2710 * handle->cc is used to store the ring size. 2711 */ 2712 2713 /* 2714 * Set the timeout to use in poll() before returning. 2715 */ 2716 set_poll_timeout(handlep); 2717 2718 return 1; 2719} 2720 2721/* 2722 * Attempt to set the socket to the specified version of the memory-mapped 2723 * header. 2724 * 2725 * Return 0 if we succeed; return 1 if we fail because that version isn't 2726 * supported; return -1 on any other error, and set handle->errbuf. 2727 */ 2728static int 2729init_tpacket(pcap_t *handle, int version, const char *version_str) 2730{ 2731 struct pcap_linux *handlep = handle->priv; 2732 int val = version; 2733 socklen_t len = sizeof(val); 2734 2735 /* 2736 * Probe whether kernel supports the specified TPACKET version; 2737 * this also gets the length of the header for that version. 2738 * 2739 * This socket option was introduced in 2.6.27, which was 2740 * also the first release with TPACKET_V2 support. 2741 */ 2742 if (getsockopt(handle->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) { 2743 if (errno == EINVAL) { 2744 /* 2745 * EINVAL means this specific version of TPACKET 2746 * is not supported. Tell the caller they can try 2747 * with a different one; if they've run out of 2748 * others to try, let them set the error message 2749 * appropriately. 2750 */ 2751 return 1; 2752 } 2753 2754 /* 2755 * All other errors are fatal. 2756 */ 2757 if (errno == ENOPROTOOPT) { 2758 /* 2759 * PACKET_HDRLEN isn't supported, which means 2760 * that memory-mapped capture isn't supported. 2761 * Indicate that in the message. 2762 */ 2763 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2764 "Kernel doesn't support memory-mapped capture; a 2.6.27 or later 2.x kernel is required, with CONFIG_PACKET_MMAP specified for 2.x kernels"); 2765 } else { 2766 /* 2767 * Some unexpected error. 2768 */ 2769 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2770 errno, "can't get %s header len on packet socket", 2771 version_str); 2772 } 2773 return -1; 2774 } 2775 handlep->tp_hdrlen = val; 2776 2777 val = version; 2778 if (setsockopt(handle->fd, SOL_PACKET, PACKET_VERSION, &val, 2779 sizeof(val)) < 0) { 2780 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2781 errno, "can't activate %s on packet socket", version_str); 2782 return -1; 2783 } 2784 handlep->tp_version = version; 2785 2786 return 0; 2787} 2788 2789/* 2790 * Attempt to set the socket to version 3 of the memory-mapped header and, 2791 * if that fails because version 3 isn't supported, attempt to fall 2792 * back to version 2. If version 2 isn't supported, just fail. 2793 * 2794 * Return 0 if we succeed and -1 on any other error, and set handle->errbuf. 2795 */ 2796static int 2797prepare_tpacket_socket(pcap_t *handle) 2798{ 2799 int ret; 2800 2801#ifdef HAVE_TPACKET3 2802 /* 2803 * Try setting the version to TPACKET_V3. 2804 * 2805 * The only mode in which buffering is done on PF_PACKET 2806 * sockets, so that packets might not be delivered 2807 * immediately, is TPACKET_V3 mode. 2808 * 2809 * The buffering cannot be disabled in that mode, so 2810 * if the user has requested immediate mode, we don't 2811 * use TPACKET_V3. 2812 */ 2813 if (!handle->opt.immediate) { 2814 ret = init_tpacket(handle, TPACKET_V3, "TPACKET_V3"); 2815 if (ret == 0) { 2816 /* 2817 * Success. 2818 */ 2819 return 0; 2820 } 2821 if (ret == -1) { 2822 /* 2823 * We failed for some reason other than "the 2824 * kernel doesn't support TPACKET_V3". 2825 */ 2826 return -1; 2827 } 2828 2829 /* 2830 * This means it returned 1, which means "the kernel 2831 * doesn't support TPACKET_V3"; try TPACKET_V2. 2832 */ 2833 } 2834#endif /* HAVE_TPACKET3 */ 2835 2836 /* 2837 * Try setting the version to TPACKET_V2. 2838 */ 2839 ret = init_tpacket(handle, TPACKET_V2, "TPACKET_V2"); 2840 if (ret == 0) { 2841 /* 2842 * Success. 2843 */ 2844 return 0; 2845 } 2846 2847 if (ret == 1) { 2848 /* 2849 * OK, the kernel supports memory-mapped capture, but 2850 * not TPACKET_V2. Set the error message appropriately. 2851 */ 2852 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2853 "Kernel doesn't support TPACKET_V2; a 2.6.27 or later kernel is required"); 2854 } 2855 2856 /* 2857 * We failed. 2858 */ 2859 return -1; 2860} 2861 2862#define MAX(a,b) ((a)>(b)?(a):(b)) 2863 2864/* 2865 * Attempt to set up memory-mapped access. 2866 * 2867 * On success, returns 1, and sets *status to 0 if there are no warnings 2868 * or to a PCAP_WARNING_ code if there is a warning. 2869 * 2870 * On error, returns -1, and sets *status to the appropriate error code; 2871 * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message. 2872 */ 2873static int 2874create_ring(pcap_t *handle, int *status) 2875{ 2876 struct pcap_linux *handlep = handle->priv; 2877 unsigned i, j, frames_per_block; 2878#ifdef HAVE_TPACKET3 2879 /* 2880 * For sockets using TPACKET_V2, the extra stuff at the end of a 2881 * struct tpacket_req3 will be ignored, so this is OK even for 2882 * those sockets. 2883 */ 2884 struct tpacket_req3 req; 2885#else 2886 struct tpacket_req req; 2887#endif 2888 socklen_t len; 2889 unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff; 2890 unsigned int frame_size; 2891 2892 /* 2893 * Start out assuming no warnings or errors. 2894 */ 2895 *status = 0; 2896 2897 /* 2898 * Reserve space for VLAN tag reconstruction. 2899 */ 2900 tp_reserve = VLAN_TAG_LEN; 2901 2902 /* 2903 * If we're capturing in cooked mode, reserve space for 2904 * a DLT_LINUX_SLL2 header; we don't know yet whether 2905 * we'll be using DLT_LINUX_SLL or DLT_LINUX_SLL2, as 2906 * that can be changed on an open device, so we reserve 2907 * space for the larger of the two. 2908 * 2909 * XXX - we assume that the kernel is still adding 2910 * 16 bytes of extra space, so we subtract 16 from 2911 * SLL2_HDR_LEN to get the additional space needed. 2912 * (Are they doing that for DLT_LINUX_SLL, the link- 2913 * layer header for which is 16 bytes?) 2914 * 2915 * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - 16)? 2916 */ 2917 if (handlep->cooked) 2918 tp_reserve += SLL2_HDR_LEN - 16; 2919 2920 /* 2921 * Try to request that amount of reserve space. 2922 * This must be done before creating the ring buffer. 2923 */ 2924 len = sizeof(tp_reserve); 2925 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, 2926 &tp_reserve, len) < 0) { 2927 pcap_fmt_errmsg_for_errno(handle->errbuf, 2928 PCAP_ERRBUF_SIZE, errno, 2929 "setsockopt (PACKET_RESERVE)"); 2930 *status = PCAP_ERROR; 2931 return -1; 2932 } 2933 2934 switch (handlep->tp_version) { 2935 2936 case TPACKET_V2: 2937 /* Note that with large snapshot length (say 256K, which is 2938 * the default for recent versions of tcpdump, Wireshark, 2939 * TShark, dumpcap or 64K, the value that "-s 0" has given for 2940 * a long time with tcpdump), if we use the snapshot 2941 * length to calculate the frame length, only a few frames 2942 * will be available in the ring even with pretty 2943 * large ring size (and a lot of memory will be unused). 2944 * 2945 * Ideally, we should choose a frame length based on the 2946 * minimum of the specified snapshot length and the maximum 2947 * packet size. That's not as easy as it sounds; consider, 2948 * for example, an 802.11 interface in monitor mode, where 2949 * the frame would include a radiotap header, where the 2950 * maximum radiotap header length is device-dependent. 2951 * 2952 * So, for now, we just do this for Ethernet devices, where 2953 * there's no metadata header, and the link-layer header is 2954 * fixed length. We can get the maximum packet size by 2955 * adding 18, the Ethernet header length plus the CRC length 2956 * (just in case we happen to get the CRC in the packet), to 2957 * the MTU of the interface; we fetch the MTU in the hopes 2958 * that it reflects support for jumbo frames. (Even if the 2959 * interface is just being used for passive snooping, the 2960 * driver might set the size of buffers in the receive ring 2961 * based on the MTU, so that the MTU limits the maximum size 2962 * of packets that we can receive.) 2963 * 2964 * If segmentation/fragmentation or receive offload are 2965 * enabled, we can get reassembled/aggregated packets larger 2966 * than MTU, but bounded to 65535 plus the Ethernet overhead, 2967 * due to kernel and protocol constraints */ 2968 frame_size = handle->snapshot; 2969 if (handle->linktype == DLT_EN10MB) { 2970 unsigned int max_frame_len; 2971 int mtu; 2972 int offload; 2973 2974 mtu = iface_get_mtu(handle->fd, handle->opt.device, 2975 handle->errbuf); 2976 if (mtu == -1) { 2977 *status = PCAP_ERROR; 2978 return -1; 2979 } 2980 offload = iface_get_offload(handle); 2981 if (offload == -1) { 2982 *status = PCAP_ERROR; 2983 return -1; 2984 } 2985 if (offload) 2986 max_frame_len = MAX(mtu, 65535); 2987 else 2988 max_frame_len = mtu; 2989 max_frame_len += 18; 2990 2991 if (frame_size > max_frame_len) 2992 frame_size = max_frame_len; 2993 } 2994 2995 /* NOTE: calculus matching those in tpacket_rcv() 2996 * in linux-2.6/net/packet/af_packet.c 2997 */ 2998 len = sizeof(sk_type); 2999 if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type, 3000 &len) < 0) { 3001 pcap_fmt_errmsg_for_errno(handle->errbuf, 3002 PCAP_ERRBUF_SIZE, errno, "getsockopt (SO_TYPE)"); 3003 *status = PCAP_ERROR; 3004 return -1; 3005 } 3006 maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE; 3007 /* XXX: in the kernel maclen is calculated from 3008 * LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len 3009 * in: packet_snd() in linux-2.6/net/packet/af_packet.c 3010 * then packet_alloc_skb() in linux-2.6/net/packet/af_packet.c 3011 * then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c 3012 * but I see no way to get those sizes in userspace, 3013 * like for instance with an ifreq ioctl(); 3014 * the best thing I've found so far is MAX_HEADER in 3015 * the kernel part of linux-2.6/include/linux/netdevice.h 3016 * which goes up to 128+48=176; since pcap-linux.c 3017 * defines a MAX_LINKHEADER_SIZE of 256 which is 3018 * greater than that, let's use it.. maybe is it even 3019 * large enough to directly replace macoff.. 3020 */ 3021 tp_hdrlen = TPACKET_ALIGN(handlep->tp_hdrlen) + sizeof(struct sockaddr_ll) ; 3022 netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve; 3023 /* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN 3024 * of netoff, which contradicts 3025 * linux-2.6/Documentation/networking/packet_mmap.txt 3026 * documenting that: 3027 * "- Gap, chosen so that packet data (Start+tp_net) 3028 * aligns to TPACKET_ALIGNMENT=16" 3029 */ 3030 /* NOTE: in linux-2.6/include/linux/skbuff.h: 3031 * "CPUs often take a performance hit 3032 * when accessing unaligned memory locations" 3033 */ 3034 macoff = netoff - maclen; 3035 req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size); 3036 /* 3037 * Round the buffer size up to a multiple of the 3038 * frame size (rather than rounding down, which 3039 * would give a buffer smaller than our caller asked 3040 * for, and possibly give zero frames if the requested 3041 * buffer size is too small for one frame). 3042 */ 3043 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3044 break; 3045 3046#ifdef HAVE_TPACKET3 3047 case TPACKET_V3: 3048 /* The "frames" for this are actually buffers that 3049 * contain multiple variable-sized frames. 3050 * 3051 * We pick a "frame" size of MAXIMUM_SNAPLEN to leave 3052 * enough room for at least one reasonably-sized packet 3053 * in the "frame". */ 3054 req.tp_frame_size = MAXIMUM_SNAPLEN; 3055 /* 3056 * Round the buffer size up to a multiple of the 3057 * "frame" size (rather than rounding down, which 3058 * would give a buffer smaller than our caller asked 3059 * for, and possibly give zero "frames" if the requested 3060 * buffer size is too small for one "frame"). 3061 */ 3062 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3063 break; 3064#endif 3065 default: 3066 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3067 "Internal error: unknown TPACKET_ value %u", 3068 handlep->tp_version); 3069 *status = PCAP_ERROR; 3070 return -1; 3071 } 3072 3073 /* compute the minimum block size that will handle this frame. 3074 * The block has to be page size aligned. 3075 * The max block size allowed by the kernel is arch-dependent and 3076 * it's not explicitly checked here. */ 3077 req.tp_block_size = getpagesize(); 3078 while (req.tp_block_size < req.tp_frame_size) 3079 req.tp_block_size <<= 1; 3080 3081 frames_per_block = req.tp_block_size/req.tp_frame_size; 3082 3083 /* 3084 * PACKET_TIMESTAMP was added after linux/net_tstamp.h was, 3085 * so we check for PACKET_TIMESTAMP. We check for 3086 * linux/net_tstamp.h just in case a system somehow has 3087 * PACKET_TIMESTAMP but not linux/net_tstamp.h; that might 3088 * be unnecessary. 3089 * 3090 * SIOCSHWTSTAMP was introduced in the patch that introduced 3091 * linux/net_tstamp.h, so we don't bother checking whether 3092 * SIOCSHWTSTAMP is defined (if your Linux system has 3093 * linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your 3094 * Linux system is badly broken). 3095 */ 3096#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 3097 /* 3098 * If we were told to do so, ask the kernel and the driver 3099 * to use hardware timestamps. 3100 * 3101 * Hardware timestamps are only supported with mmapped 3102 * captures. 3103 */ 3104 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER || 3105 handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) { 3106 struct hwtstamp_config hwconfig; 3107 struct ifreq ifr; 3108 int timesource; 3109 3110 /* 3111 * Ask for hardware time stamps on all packets, 3112 * including transmitted packets. 3113 */ 3114 memset(&hwconfig, 0, sizeof(hwconfig)); 3115 hwconfig.tx_type = HWTSTAMP_TX_ON; 3116 hwconfig.rx_filter = HWTSTAMP_FILTER_ALL; 3117 3118 memset(&ifr, 0, sizeof(ifr)); 3119 pcap_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 3120 ifr.ifr_data = (void *)&hwconfig; 3121 3122 /* 3123 * This may require CAP_NET_ADMIN. 3124 */ 3125 if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) { 3126 switch (errno) { 3127 3128 case EPERM: 3129 /* 3130 * Treat this as an error, as the 3131 * user should try to run this 3132 * with the appropriate privileges - 3133 * and, if they can't, shouldn't 3134 * try requesting hardware time stamps. 3135 */ 3136 *status = PCAP_ERROR_PERM_DENIED; 3137 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3138 "Attempt to set hardware timestamp failed - CAP_NET_ADMIN may be required"); 3139 return -1; 3140 3141 case EOPNOTSUPP: 3142 case ERANGE: 3143 /* 3144 * Treat this as a warning, as the 3145 * only way to fix the warning is to 3146 * get an adapter that supports hardware 3147 * time stamps for *all* packets. 3148 * (ERANGE means "we support hardware 3149 * time stamps, but for packets matching 3150 * that particular filter", so it means 3151 * "we don't support hardware time stamps 3152 * for all incoming packets" here.) 3153 * 3154 * We'll just fall back on the standard 3155 * host time stamps. 3156 */ 3157 *status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP; 3158 break; 3159 3160 default: 3161 pcap_fmt_errmsg_for_errno(handle->errbuf, 3162 PCAP_ERRBUF_SIZE, errno, 3163 "SIOCSHWTSTAMP failed"); 3164 *status = PCAP_ERROR; 3165 return -1; 3166 } 3167 } else { 3168 /* 3169 * Well, that worked. Now specify the type of 3170 * hardware time stamp we want for this 3171 * socket. 3172 */ 3173 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) { 3174 /* 3175 * Hardware timestamp, synchronized 3176 * with the system clock. 3177 */ 3178 timesource = SOF_TIMESTAMPING_SYS_HARDWARE; 3179 } else { 3180 /* 3181 * PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware 3182 * timestamp, not synchronized with the 3183 * system clock. 3184 */ 3185 timesource = SOF_TIMESTAMPING_RAW_HARDWARE; 3186 } 3187 if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP, 3188 (void *)×ource, sizeof(timesource))) { 3189 pcap_fmt_errmsg_for_errno(handle->errbuf, 3190 PCAP_ERRBUF_SIZE, errno, 3191 "can't set PACKET_TIMESTAMP"); 3192 *status = PCAP_ERROR; 3193 return -1; 3194 } 3195 } 3196 } 3197#endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */ 3198 3199 /* ask the kernel to create the ring */ 3200retry: 3201 req.tp_block_nr = req.tp_frame_nr / frames_per_block; 3202 3203 /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */ 3204 req.tp_frame_nr = req.tp_block_nr * frames_per_block; 3205 3206#ifdef HAVE_TPACKET3 3207 /* timeout value to retire block - use the configured buffering timeout, or default if <0. */ 3208 if (handlep->timeout > 0) { 3209 /* Use the user specified timeout as the block timeout */ 3210 req.tp_retire_blk_tov = handlep->timeout; 3211 } else if (handlep->timeout == 0) { 3212 /* 3213 * In pcap, this means "infinite timeout"; TPACKET_V3 3214 * doesn't support that, so just set it to UINT_MAX 3215 * milliseconds. In the TPACKET_V3 loop, if the 3216 * timeout is 0, and we haven't yet seen any packets, 3217 * and we block and still don't have any packets, we 3218 * keep blocking until we do. 3219 */ 3220 req.tp_retire_blk_tov = UINT_MAX; 3221 } else { 3222 /* 3223 * XXX - this is not valid; use 0, meaning "have the 3224 * kernel pick a default", for now. 3225 */ 3226 req.tp_retire_blk_tov = 0; 3227 } 3228 /* private data not used */ 3229 req.tp_sizeof_priv = 0; 3230 /* Rx ring - feature request bits - none (rxhash will not be filled) */ 3231 req.tp_feature_req_word = 0; 3232#endif 3233 3234 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3235 (void *) &req, sizeof(req))) { 3236 if ((errno == ENOMEM) && (req.tp_block_nr > 1)) { 3237 /* 3238 * Memory failure; try to reduce the requested ring 3239 * size. 3240 * 3241 * We used to reduce this by half -- do 5% instead. 3242 * That may result in more iterations and a longer 3243 * startup, but the user will be much happier with 3244 * the resulting buffer size. 3245 */ 3246 if (req.tp_frame_nr < 20) 3247 req.tp_frame_nr -= 1; 3248 else 3249 req.tp_frame_nr -= req.tp_frame_nr/20; 3250 goto retry; 3251 } 3252 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3253 errno, "can't create rx ring on packet socket"); 3254 *status = PCAP_ERROR; 3255 return -1; 3256 } 3257 3258 /* memory map the rx ring */ 3259 handlep->mmapbuflen = req.tp_block_nr * req.tp_block_size; 3260 handlep->mmapbuf = mmap(0, handlep->mmapbuflen, 3261 PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0); 3262 if (handlep->mmapbuf == MAP_FAILED) { 3263 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3264 errno, "can't mmap rx ring"); 3265 3266 /* clear the allocated ring on error*/ 3267 destroy_ring(handle); 3268 *status = PCAP_ERROR; 3269 return -1; 3270 } 3271 3272 /* allocate a ring for each frame header pointer*/ 3273 handle->cc = req.tp_frame_nr; 3274 handle->buffer = malloc(handle->cc * sizeof(union thdr *)); 3275 if (!handle->buffer) { 3276 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3277 errno, "can't allocate ring of frame headers"); 3278 3279 destroy_ring(handle); 3280 *status = PCAP_ERROR; 3281 return -1; 3282 } 3283 3284 /* fill the header ring with proper frame ptr*/ 3285 handle->offset = 0; 3286 for (i=0; i<req.tp_block_nr; ++i) { 3287 u_char *base = &handlep->mmapbuf[i*req.tp_block_size]; 3288 for (j=0; j<frames_per_block; ++j, ++handle->offset) { 3289 RING_GET_CURRENT_FRAME(handle) = base; 3290 base += req.tp_frame_size; 3291 } 3292 } 3293 3294 handle->bufsize = req.tp_frame_size; 3295 handle->offset = 0; 3296 return 1; 3297} 3298 3299/* free all ring related resources*/ 3300static void 3301destroy_ring(pcap_t *handle) 3302{ 3303 struct pcap_linux *handlep = handle->priv; 3304 3305 /* 3306 * Tell the kernel to destroy the ring. 3307 * We don't check for setsockopt failure, as 1) we can't recover 3308 * from an error and 2) we might not yet have set it up in the 3309 * first place. 3310 */ 3311 struct tpacket_req req; 3312 memset(&req, 0, sizeof(req)); 3313 (void)setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3314 (void *) &req, sizeof(req)); 3315 3316 /* if ring is mapped, unmap it*/ 3317 if (handlep->mmapbuf) { 3318 /* do not test for mmap failure, as we can't recover from any error */ 3319 (void)munmap(handlep->mmapbuf, handlep->mmapbuflen); 3320 handlep->mmapbuf = NULL; 3321 } 3322} 3323 3324/* 3325 * Special one-shot callback, used for pcap_next() and pcap_next_ex(), 3326 * for Linux mmapped capture. 3327 * 3328 * The problem is that pcap_next() and pcap_next_ex() expect the packet 3329 * data handed to the callback to be valid after the callback returns, 3330 * but pcap_read_linux_mmap() has to release that packet as soon as 3331 * the callback returns (otherwise, the kernel thinks there's still 3332 * at least one unprocessed packet available in the ring, so a select() 3333 * will immediately return indicating that there's data to process), so, 3334 * in the callback, we have to make a copy of the packet. 3335 * 3336 * Yes, this means that, if the capture is using the ring buffer, using 3337 * pcap_next() or pcap_next_ex() requires more copies than using 3338 * pcap_loop() or pcap_dispatch(). If that bothers you, don't use 3339 * pcap_next() or pcap_next_ex(). 3340 */ 3341static void 3342pcap_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 3343 const u_char *bytes) 3344{ 3345 struct oneshot_userdata *sp = (struct oneshot_userdata *)user; 3346 pcap_t *handle = sp->pd; 3347 struct pcap_linux *handlep = handle->priv; 3348 3349 *sp->hdr = *h; 3350 memcpy(handlep->oneshot_buffer, bytes, h->caplen); 3351 *sp->pkt = handlep->oneshot_buffer; 3352} 3353 3354static int 3355pcap_getnonblock_linux(pcap_t *handle) 3356{ 3357 struct pcap_linux *handlep = handle->priv; 3358 3359 /* use negative value of timeout to indicate non blocking ops */ 3360 return (handlep->timeout<0); 3361} 3362 3363static int 3364pcap_setnonblock_linux(pcap_t *handle, int nonblock) 3365{ 3366 struct pcap_linux *handlep = handle->priv; 3367 3368 /* 3369 * Set the file descriptor to non-blocking mode, as we use 3370 * it for sending packets. 3371 */ 3372 if (pcap_setnonblock_fd(handle, nonblock) == -1) 3373 return -1; 3374 3375 /* 3376 * Map each value to their corresponding negation to 3377 * preserve the timeout value provided with pcap_set_timeout. 3378 */ 3379 if (nonblock) { 3380 if (handlep->timeout >= 0) { 3381 /* 3382 * Indicate that we're switching to 3383 * non-blocking mode. 3384 */ 3385 handlep->timeout = ~handlep->timeout; 3386 } 3387 if (handlep->poll_breakloop_fd != -1) { 3388 /* Close the eventfd; we do not need it in nonblock mode. */ 3389 close(handlep->poll_breakloop_fd); 3390 handlep->poll_breakloop_fd = -1; 3391 } 3392 } else { 3393 if (handlep->poll_breakloop_fd == -1) { 3394 /* If we did not have an eventfd, open one now that we are blocking. */ 3395 if ( ( handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK) ) == -1 ) { 3396 int save_errno = errno; 3397 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3398 "Could not open eventfd: %s", 3399 strerror(errno)); 3400 errno = save_errno; 3401 return -1; 3402 } 3403 } 3404 if (handlep->timeout < 0) { 3405 handlep->timeout = ~handlep->timeout; 3406 } 3407 } 3408 /* Update the timeout to use in poll(). */ 3409 set_poll_timeout(handlep); 3410 return 0; 3411} 3412 3413/* 3414 * Get the status field of the ring buffer frame at a specified offset. 3415 */ 3416static inline u_int 3417pcap_get_ring_frame_status(pcap_t *handle, int offset) 3418{ 3419 struct pcap_linux *handlep = handle->priv; 3420 union thdr h; 3421 3422 h.raw = RING_GET_FRAME_AT(handle, offset); 3423 switch (handlep->tp_version) { 3424 case TPACKET_V2: 3425 return __atomic_load_n(&h.h2->tp_status, __ATOMIC_ACQUIRE); 3426 break; 3427#ifdef HAVE_TPACKET3 3428 case TPACKET_V3: 3429 return __atomic_load_n(&h.h3->hdr.bh1.block_status, __ATOMIC_ACQUIRE); 3430 break; 3431#endif 3432 } 3433 /* This should not happen. */ 3434 return 0; 3435} 3436 3437/* 3438 * Block waiting for frames to be available. 3439 */ 3440static int pcap_wait_for_frames_mmap(pcap_t *handle) 3441{ 3442 struct pcap_linux *handlep = handle->priv; 3443 int timeout; 3444 struct ifreq ifr; 3445 int ret; 3446 struct pollfd pollinfo[2]; 3447 int numpollinfo; 3448 pollinfo[0].fd = handle->fd; 3449 pollinfo[0].events = POLLIN; 3450 if ( handlep->poll_breakloop_fd == -1 ) { 3451 numpollinfo = 1; 3452 pollinfo[1].revents = 0; 3453 /* 3454 * We set pollinfo[1].revents to zero, even though 3455 * numpollinfo = 1 meaning that poll() doesn't see 3456 * pollinfo[1], so that we do not have to add a 3457 * conditional of numpollinfo > 1 below when we 3458 * test pollinfo[1].revents. 3459 */ 3460 } else { 3461 pollinfo[1].fd = handlep->poll_breakloop_fd; 3462 pollinfo[1].events = POLLIN; 3463 numpollinfo = 2; 3464 } 3465 3466 /* 3467 * Keep polling until we either get some packets to read, see 3468 * that we got told to break out of the loop, get a fatal error, 3469 * or discover that the device went away. 3470 * 3471 * In non-blocking mode, we must still do one poll() to catch 3472 * any pending error indications, but the poll() has a timeout 3473 * of 0, so that it doesn't block, and we quit after that one 3474 * poll(). 3475 * 3476 * If we've seen an ENETDOWN, it might be the first indication 3477 * that the device went away, or it might just be that it was 3478 * configured down. Unfortunately, there's no guarantee that 3479 * the device has actually been removed as an interface, because: 3480 * 3481 * 1) if, as appears to be the case at least some of the time, 3482 * the PF_PACKET socket code first gets a NETDEV_DOWN indication 3483 * for the device and then gets a NETDEV_UNREGISTER indication 3484 * for it, the first indication will cause a wakeup with ENETDOWN 3485 * but won't set the packet socket's field for the interface index 3486 * to -1, and the second indication won't cause a wakeup (because 3487 * the first indication also caused the protocol hook to be 3488 * unregistered) but will set the packet socket's field for the 3489 * interface index to -1; 3490 * 3491 * 2) even if just a NETDEV_UNREGISTER indication is registered, 3492 * the packet socket's field for the interface index only gets 3493 * set to -1 after the wakeup, so there's a small but non-zero 3494 * risk that a thread blocked waiting for the wakeup will get 3495 * to the "fetch the socket name" code before the interface index 3496 * gets set to -1, so it'll get the old interface index. 3497 * 3498 * Therefore, if we got an ENETDOWN and haven't seen a packet 3499 * since then, we assume that we might be waiting for the interface 3500 * to disappear, and poll with a timeout to try again in a short 3501 * period of time. If we *do* see a packet, the interface has 3502 * come back up again, and is *definitely* still there, so we 3503 * don't need to poll. 3504 */ 3505 for (;;) { 3506 /* 3507 * Yes, we do this even in non-blocking mode, as it's 3508 * the only way to get error indications from a 3509 * tpacket socket. 3510 * 3511 * The timeout is 0 in non-blocking mode, so poll() 3512 * returns immediately. 3513 */ 3514 timeout = handlep->poll_timeout; 3515 3516 /* 3517 * If we got an ENETDOWN and haven't gotten an indication 3518 * that the device has gone away or that the device is up, 3519 * we don't yet know for certain whether the device has 3520 * gone away or not, do a poll() with a 1-millisecond timeout, 3521 * as we have to poll indefinitely for "device went away" 3522 * indications until we either get one or see that the 3523 * device is up. 3524 */ 3525 if (handlep->netdown) { 3526 if (timeout != 0) 3527 timeout = 1; 3528 } 3529 ret = poll(pollinfo, numpollinfo, timeout); 3530 if (ret < 0) { 3531 /* 3532 * Error. If it's not EINTR, report it. 3533 */ 3534 if (errno != EINTR) { 3535 pcap_fmt_errmsg_for_errno(handle->errbuf, 3536 PCAP_ERRBUF_SIZE, errno, 3537 "can't poll on packet socket"); 3538 return PCAP_ERROR; 3539 } 3540 3541 /* 3542 * It's EINTR; if we were told to break out of 3543 * the loop, do so. 3544 */ 3545 if (handle->break_loop) { 3546 handle->break_loop = 0; 3547 return PCAP_ERROR_BREAK; 3548 } 3549 } else if (ret > 0) { 3550 /* 3551 * OK, some descriptor is ready. 3552 * Check the socket descriptor first. 3553 * 3554 * As I read the Linux man page, pollinfo[0].revents 3555 * will either be POLLIN, POLLERR, POLLHUP, or POLLNVAL. 3556 */ 3557 if (pollinfo[0].revents == POLLIN) { 3558 /* 3559 * OK, we may have packets to 3560 * read. 3561 */ 3562 break; 3563 } 3564 if (pollinfo[0].revents != 0) { 3565 /* 3566 * There's some indication other than 3567 * "you can read on this descriptor" on 3568 * the descriptor. 3569 */ 3570 if (pollinfo[0].revents & POLLNVAL) { 3571 snprintf(handle->errbuf, 3572 PCAP_ERRBUF_SIZE, 3573 "Invalid polling request on packet socket"); 3574 return PCAP_ERROR; 3575 } 3576 if (pollinfo[0].revents & (POLLHUP | POLLRDHUP)) { 3577 snprintf(handle->errbuf, 3578 PCAP_ERRBUF_SIZE, 3579 "Hangup on packet socket"); 3580 return PCAP_ERROR; 3581 } 3582 if (pollinfo[0].revents & POLLERR) { 3583 /* 3584 * Get the error. 3585 */ 3586 int err; 3587 socklen_t errlen; 3588 3589 errlen = sizeof(err); 3590 if (getsockopt(handle->fd, SOL_SOCKET, 3591 SO_ERROR, &err, &errlen) == -1) { 3592 /* 3593 * The call *itself* returned 3594 * an error; make *that* 3595 * the error. 3596 */ 3597 err = errno; 3598 } 3599 3600 /* 3601 * OK, we have the error. 3602 */ 3603 if (err == ENETDOWN) { 3604 /* 3605 * The device on which we're 3606 * capturing went away or the 3607 * interface was taken down. 3608 * 3609 * We don't know for certain 3610 * which happened, and the 3611 * next poll() may indicate 3612 * that there are packets 3613 * to be read, so just set 3614 * a flag to get us to do 3615 * checks later, and set 3616 * the required select 3617 * timeout to 1 millisecond 3618 * so that event loops that 3619 * check our socket descriptor 3620 * also time out so that 3621 * they can call us and we 3622 * can do the checks. 3623 */ 3624 handlep->netdown = 1; 3625 handle->required_select_timeout = &netdown_timeout; 3626 } else if (err == 0) { 3627 /* 3628 * This shouldn't happen, so 3629 * report a special indication 3630 * that it did. 3631 */ 3632 snprintf(handle->errbuf, 3633 PCAP_ERRBUF_SIZE, 3634 "Error condition on packet socket: Reported error was 0"); 3635 return PCAP_ERROR; 3636 } else { 3637 pcap_fmt_errmsg_for_errno(handle->errbuf, 3638 PCAP_ERRBUF_SIZE, 3639 err, 3640 "Error condition on packet socket"); 3641 return PCAP_ERROR; 3642 } 3643 } 3644 } 3645 /* 3646 * Now check the event device. 3647 */ 3648 if (pollinfo[1].revents & POLLIN) { 3649 ssize_t nread; 3650 uint64_t value; 3651 3652 /* 3653 * This should never fail, but, just 3654 * in case.... 3655 */ 3656 nread = read(handlep->poll_breakloop_fd, &value, 3657 sizeof(value)); 3658 if (nread == -1) { 3659 pcap_fmt_errmsg_for_errno(handle->errbuf, 3660 PCAP_ERRBUF_SIZE, 3661 errno, 3662 "Error reading from event FD"); 3663 return PCAP_ERROR; 3664 } 3665 3666 /* 3667 * According to the Linux read(2) man 3668 * page, read() will transfer at most 3669 * 2^31-1 bytes, so the return value is 3670 * either -1 or a value between 0 3671 * and 2^31-1, so it's non-negative. 3672 * 3673 * Cast it to size_t to squelch 3674 * warnings from the compiler; add this 3675 * comment to squelch warnings from 3676 * humans reading the code. :-) 3677 * 3678 * Don't treat an EOF as an error, but 3679 * *do* treat a short read as an error; 3680 * that "shouldn't happen", but.... 3681 */ 3682 if (nread != 0 && 3683 (size_t)nread < sizeof(value)) { 3684 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3685 "Short read from event FD: expected %zu, got %zd", 3686 sizeof(value), nread); 3687 return PCAP_ERROR; 3688 } 3689 3690 /* 3691 * This event gets signaled by a 3692 * pcap_breakloop() call; if we were told 3693 * to break out of the loop, do so. 3694 */ 3695 if (handle->break_loop) { 3696 handle->break_loop = 0; 3697 return PCAP_ERROR_BREAK; 3698 } 3699 } 3700 } 3701 3702 /* 3703 * Either: 3704 * 3705 * 1) we got neither an error from poll() nor any 3706 * readable descriptors, in which case there 3707 * are no packets waiting to read 3708 * 3709 * or 3710 * 3711 * 2) We got readable descriptors but the PF_PACKET 3712 * socket wasn't one of them, in which case there 3713 * are no packets waiting to read 3714 * 3715 * so, if we got an ENETDOWN, we've drained whatever 3716 * packets were available to read at the point of the 3717 * ENETDOWN. 3718 * 3719 * So, if we got an ENETDOWN and haven't gotten an indication 3720 * that the device has gone away or that the device is up, 3721 * we don't yet know for certain whether the device has 3722 * gone away or not, check whether the device exists and is 3723 * up. 3724 */ 3725 if (handlep->netdown) { 3726 if (!device_still_exists(handle)) { 3727 /* 3728 * The device doesn't exist any more; 3729 * report that. 3730 * 3731 * XXX - we should really return an 3732 * appropriate error for that, but 3733 * pcap_dispatch() etc. aren't documented 3734 * as having error returns other than 3735 * PCAP_ERROR or PCAP_ERROR_BREAK. 3736 */ 3737 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3738 "The interface disappeared"); 3739 return PCAP_ERROR; 3740 } 3741 3742 /* 3743 * The device still exists; try to see if it's up. 3744 */ 3745 memset(&ifr, 0, sizeof(ifr)); 3746 pcap_strlcpy(ifr.ifr_name, handlep->device, 3747 sizeof(ifr.ifr_name)); 3748 if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) { 3749 if (errno == ENXIO || errno == ENODEV) { 3750 /* 3751 * OK, *now* it's gone. 3752 * 3753 * XXX - see above comment. 3754 */ 3755 snprintf(handle->errbuf, 3756 PCAP_ERRBUF_SIZE, 3757 "The interface disappeared"); 3758 return PCAP_ERROR; 3759 } else { 3760 pcap_fmt_errmsg_for_errno(handle->errbuf, 3761 PCAP_ERRBUF_SIZE, errno, 3762 "%s: Can't get flags", 3763 handlep->device); 3764 return PCAP_ERROR; 3765 } 3766 } 3767 if (ifr.ifr_flags & IFF_UP) { 3768 /* 3769 * It's up, so it definitely still exists. 3770 * Cancel the ENETDOWN indication - we 3771 * presumably got it due to the interface 3772 * going down rather than the device going 3773 * away - and revert to "no required select 3774 * timeout. 3775 */ 3776 handlep->netdown = 0; 3777 handle->required_select_timeout = NULL; 3778 } 3779 } 3780 3781 /* 3782 * If we're in non-blocking mode, just quit now, rather 3783 * than spinning in a loop doing poll()s that immediately 3784 * time out if there's no indication on any descriptor. 3785 */ 3786 if (handlep->poll_timeout == 0) 3787 break; 3788 } 3789 return 0; 3790} 3791 3792/* handle a single memory mapped packet */ 3793static int pcap_handle_packet_mmap( 3794 pcap_t *handle, 3795 pcap_handler callback, 3796 u_char *user, 3797 unsigned char *frame, 3798 unsigned int tp_len, 3799 unsigned int tp_mac, 3800 unsigned int tp_snaplen, 3801 unsigned int tp_sec, 3802 unsigned int tp_usec, 3803 int tp_vlan_tci_valid, 3804 __u16 tp_vlan_tci, 3805 __u16 tp_vlan_tpid) 3806{ 3807 struct pcap_linux *handlep = handle->priv; 3808 unsigned char *bp; 3809 struct sockaddr_ll *sll; 3810 struct pcap_pkthdr pcaphdr; 3811 pcap_can_socketcan_hdr *canhdr; 3812 unsigned int snaplen = tp_snaplen; 3813 struct utsname utsname; 3814 3815 /* perform sanity check on internal offset. */ 3816 if (tp_mac + tp_snaplen > handle->bufsize) { 3817 /* 3818 * Report some system information as a debugging aid. 3819 */ 3820 if (uname(&utsname) != -1) { 3821 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3822 "corrupted frame on kernel ring mac " 3823 "offset %u + caplen %u > frame len %d " 3824 "(kernel %.32s version %s, machine %.16s)", 3825 tp_mac, tp_snaplen, handle->bufsize, 3826 utsname.release, utsname.version, 3827 utsname.machine); 3828 } else { 3829 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3830 "corrupted frame on kernel ring mac " 3831 "offset %u + caplen %u > frame len %d", 3832 tp_mac, tp_snaplen, handle->bufsize); 3833 } 3834 return -1; 3835 } 3836 3837 /* run filter on received packet 3838 * If the kernel filtering is enabled we need to run the 3839 * filter until all the frames present into the ring 3840 * at filter creation time are processed. 3841 * In this case, blocks_to_filter_in_userland is used 3842 * as a counter for the packet we need to filter. 3843 * Note: alternatively it could be possible to stop applying 3844 * the filter when the ring became empty, but it can possibly 3845 * happen a lot later... */ 3846 bp = frame + tp_mac; 3847 3848 /* if required build in place the sll header*/ 3849 sll = (void *)(frame + TPACKET_ALIGN(handlep->tp_hdrlen)); 3850 if (handlep->cooked) { 3851 if (handle->linktype == DLT_LINUX_SLL2) { 3852 struct sll2_header *hdrp; 3853 3854 /* 3855 * The kernel should have left us with enough 3856 * space for an sll header; back up the packet 3857 * data pointer into that space, as that'll be 3858 * the beginning of the packet we pass to the 3859 * callback. 3860 */ 3861 bp -= SLL2_HDR_LEN; 3862 3863 /* 3864 * Let's make sure that's past the end of 3865 * the tpacket header, i.e. >= 3866 * ((u_char *)thdr + TPACKET_HDRLEN), so we 3867 * don't step on the header when we construct 3868 * the sll header. 3869 */ 3870 if (bp < (u_char *)frame + 3871 TPACKET_ALIGN(handlep->tp_hdrlen) + 3872 sizeof(struct sockaddr_ll)) { 3873 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3874 "cooked-mode frame doesn't have room for sll header"); 3875 return -1; 3876 } 3877 3878 /* 3879 * OK, that worked; construct the sll header. 3880 */ 3881 hdrp = (struct sll2_header *)bp; 3882 hdrp->sll2_protocol = sll->sll_protocol; 3883 hdrp->sll2_reserved_mbz = 0; 3884 hdrp->sll2_if_index = htonl(sll->sll_ifindex); 3885 hdrp->sll2_hatype = htons(sll->sll_hatype); 3886 hdrp->sll2_pkttype = sll->sll_pkttype; 3887 hdrp->sll2_halen = sll->sll_halen; 3888 memcpy(hdrp->sll2_addr, sll->sll_addr, SLL_ADDRLEN); 3889 3890 snaplen += sizeof(struct sll2_header); 3891 } else { 3892 struct sll_header *hdrp; 3893 3894 /* 3895 * The kernel should have left us with enough 3896 * space for an sll header; back up the packet 3897 * data pointer into that space, as that'll be 3898 * the beginning of the packet we pass to the 3899 * callback. 3900 */ 3901 bp -= SLL_HDR_LEN; 3902 3903 /* 3904 * Let's make sure that's past the end of 3905 * the tpacket header, i.e. >= 3906 * ((u_char *)thdr + TPACKET_HDRLEN), so we 3907 * don't step on the header when we construct 3908 * the sll header. 3909 */ 3910 if (bp < (u_char *)frame + 3911 TPACKET_ALIGN(handlep->tp_hdrlen) + 3912 sizeof(struct sockaddr_ll)) { 3913 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3914 "cooked-mode frame doesn't have room for sll header"); 3915 return -1; 3916 } 3917 3918 /* 3919 * OK, that worked; construct the sll header. 3920 */ 3921 hdrp = (struct sll_header *)bp; 3922 hdrp->sll_pkttype = htons(sll->sll_pkttype); 3923 hdrp->sll_hatype = htons(sll->sll_hatype); 3924 hdrp->sll_halen = htons(sll->sll_halen); 3925 memcpy(hdrp->sll_addr, sll->sll_addr, SLL_ADDRLEN); 3926 hdrp->sll_protocol = sll->sll_protocol; 3927 3928 snaplen += sizeof(struct sll_header); 3929 } 3930 } else { 3931 /* 3932 * If this is a packet from a CAN device, so that 3933 * sll->sll_hatype is ARPHRD_CAN, then, as we're 3934 * not capturing in cooked mode, its link-layer 3935 * type is DLT_CAN_SOCKETCAN. Fix up the header 3936 * provided by the code below us to match what 3937 * DLT_CAN_SOCKETCAN is expected to provide. 3938 */ 3939 if (sll->sll_hatype == ARPHRD_CAN) { 3940 /* 3941 * DLT_CAN_SOCKETCAN is specified as having the 3942 * CAN ID and flags in network byte order, but 3943 * capturing on a CAN device provides it in host 3944 * byte order. Convert it to network byte order. 3945 */ 3946 canhdr = (pcap_can_socketcan_hdr *)bp; 3947 canhdr->can_id = htonl(canhdr->can_id); 3948 3949 /* 3950 * In addition, set the CANFD_FDF flag if 3951 * the protocol is LINUX_SLL_P_CANFD, as 3952 * the protocol field itself isn't in 3953 * the packet to indicate that it's a 3954 * CAN FD packet. 3955 */ 3956 uint16_t protocol = ntohs(sll->sll_protocol); 3957 if (protocol == LINUX_SLL_P_CANFD) { 3958 canhdr->fd_flags |= CANFD_FDF; 3959 3960 /* 3961 * Zero out all the unknown bits in 3962 * fd_flags and clear the reserved 3963 * fields, so that a program reading 3964 * this can assume that CANFD_FDF 3965 * is set because we set it, not 3966 * because some uninitialized crap 3967 * was provided in the fd_flags 3968 * field. 3969 * 3970 * (At least some LINKTYPE_CAN_SOCKETCAN 3971 * files attached to Wireshark bugs 3972 * had uninitialized junk there, so it 3973 * does happen.) 3974 * 3975 * Update this if Linux adds more flag 3976 * bits to the fd_flags field or uses 3977 * either of the reserved fields for 3978 * FD frames. 3979 */ 3980 canhdr->fd_flags &= ~(CANFD_FDF|CANFD_ESI|CANFD_BRS); 3981 canhdr->reserved1 = 0; 3982 canhdr->reserved2 = 0; 3983 } else { 3984 /* 3985 * Clear CANFD_FDF if it's set (probably 3986 * again meaning that this field is 3987 * uninitialized junk). 3988 */ 3989 canhdr->fd_flags &= ~CANFD_FDF; 3990 } 3991 } 3992 } 3993 3994 if (handlep->filter_in_userland && handle->fcode.bf_insns) { 3995 struct pcap_bpf_aux_data aux_data; 3996 3997 aux_data.vlan_tag_present = tp_vlan_tci_valid; 3998 aux_data.vlan_tag = tp_vlan_tci & 0x0fff; 3999 4000 if (pcap_filter_with_aux_data(handle->fcode.bf_insns, 4001 bp, 4002 tp_len, 4003 snaplen, 4004 &aux_data) == 0) 4005 return 0; 4006 } 4007 4008 if (!linux_check_direction(handle, sll)) 4009 return 0; 4010 4011 /* get required packet info from ring header */ 4012 pcaphdr.ts.tv_sec = tp_sec; 4013 pcaphdr.ts.tv_usec = tp_usec; 4014 pcaphdr.caplen = tp_snaplen; 4015 pcaphdr.len = tp_len; 4016 4017 /* if required build in place the sll header*/ 4018 if (handlep->cooked) { 4019 /* update packet len */ 4020 if (handle->linktype == DLT_LINUX_SLL2) { 4021 pcaphdr.caplen += SLL2_HDR_LEN; 4022 pcaphdr.len += SLL2_HDR_LEN; 4023 } else { 4024 pcaphdr.caplen += SLL_HDR_LEN; 4025 pcaphdr.len += SLL_HDR_LEN; 4026 } 4027 } 4028 4029 if (tp_vlan_tci_valid && 4030 handlep->vlan_offset != -1 && 4031 tp_snaplen >= (unsigned int) handlep->vlan_offset) 4032 { 4033 struct vlan_tag *tag; 4034 4035 /* 4036 * Move everything in the header, except the type field, 4037 * down VLAN_TAG_LEN bytes, to allow us to insert the 4038 * VLAN tag between that stuff and the type field. 4039 */ 4040 bp -= VLAN_TAG_LEN; 4041 memmove(bp, bp + VLAN_TAG_LEN, handlep->vlan_offset); 4042 4043 /* 4044 * Now insert the tag. 4045 */ 4046 tag = (struct vlan_tag *)(bp + handlep->vlan_offset); 4047 tag->vlan_tpid = htons(tp_vlan_tpid); 4048 tag->vlan_tci = htons(tp_vlan_tci); 4049 4050 /* 4051 * Add the tag to the packet lengths. 4052 */ 4053 pcaphdr.caplen += VLAN_TAG_LEN; 4054 pcaphdr.len += VLAN_TAG_LEN; 4055 } 4056 4057 /* 4058 * The only way to tell the kernel to cut off the 4059 * packet at a snapshot length is with a filter program; 4060 * if there's no filter program, the kernel won't cut 4061 * the packet off. 4062 * 4063 * Trim the snapshot length to be no longer than the 4064 * specified snapshot length. 4065 * 4066 * XXX - an alternative is to put a filter, consisting 4067 * of a "ret <snaplen>" instruction, on the socket 4068 * in the activate routine, so that the truncation is 4069 * done in the kernel even if nobody specified a filter; 4070 * that means that less buffer space is consumed in 4071 * the memory-mapped buffer. 4072 */ 4073 if (pcaphdr.caplen > (bpf_u_int32)handle->snapshot) 4074 pcaphdr.caplen = handle->snapshot; 4075 4076 /* pass the packet to the user */ 4077 callback(user, &pcaphdr, bp); 4078 4079 return 1; 4080} 4081 4082static int 4083pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback, 4084 u_char *user) 4085{ 4086 struct pcap_linux *handlep = handle->priv; 4087 union thdr h; 4088 int pkts = 0; 4089 int ret; 4090 4091 /* wait for frames availability.*/ 4092 h.raw = RING_GET_CURRENT_FRAME(handle); 4093 if (!packet_mmap_acquire(h.h2)) { 4094 /* 4095 * The current frame is owned by the kernel; wait for 4096 * a frame to be handed to us. 4097 */ 4098 ret = pcap_wait_for_frames_mmap(handle); 4099 if (ret) { 4100 return ret; 4101 } 4102 } 4103 4104 /* 4105 * This can conceivably process more than INT_MAX packets, 4106 * which would overflow the packet count, causing it either 4107 * to look like a negative number, and thus cause us to 4108 * return a value that looks like an error, or overflow 4109 * back into positive territory, and thus cause us to 4110 * return a too-low count. 4111 * 4112 * Therefore, if the packet count is unlimited, we clip 4113 * it at INT_MAX; this routine is not expected to 4114 * process packets indefinitely, so that's not an issue. 4115 */ 4116 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4117 max_packets = INT_MAX; 4118 4119 while (pkts < max_packets) { 4120 /* 4121 * Get the current ring buffer frame, and break if 4122 * it's still owned by the kernel. 4123 */ 4124 h.raw = RING_GET_CURRENT_FRAME(handle); 4125 if (!packet_mmap_acquire(h.h2)) 4126 break; 4127 4128 ret = pcap_handle_packet_mmap( 4129 handle, 4130 callback, 4131 user, 4132 h.raw, 4133 h.h2->tp_len, 4134 h.h2->tp_mac, 4135 h.h2->tp_snaplen, 4136 h.h2->tp_sec, 4137 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? h.h2->tp_nsec : h.h2->tp_nsec / 1000, 4138 VLAN_VALID(h.h2, h.h2), 4139 h.h2->tp_vlan_tci, 4140 VLAN_TPID(h.h2, h.h2)); 4141 if (ret == 1) { 4142 pkts++; 4143 } else if (ret < 0) { 4144 return ret; 4145 } 4146 4147 /* 4148 * Hand this block back to the kernel, and, if we're 4149 * counting blocks that need to be filtered in userland 4150 * after having been filtered by the kernel, count 4151 * the one we've just processed. 4152 */ 4153 packet_mmap_release(h.h2); 4154 if (handlep->blocks_to_filter_in_userland > 0) { 4155 handlep->blocks_to_filter_in_userland--; 4156 if (handlep->blocks_to_filter_in_userland == 0) { 4157 /* 4158 * No more blocks need to be filtered 4159 * in userland. 4160 */ 4161 handlep->filter_in_userland = 0; 4162 } 4163 } 4164 4165 /* next block */ 4166 if (++handle->offset >= handle->cc) 4167 handle->offset = 0; 4168 4169 /* check for break loop condition*/ 4170 if (handle->break_loop) { 4171 handle->break_loop = 0; 4172 return PCAP_ERROR_BREAK; 4173 } 4174 } 4175 return pkts; 4176} 4177 4178#ifdef HAVE_TPACKET3 4179static int 4180pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback, 4181 u_char *user) 4182{ 4183 struct pcap_linux *handlep = handle->priv; 4184 union thdr h; 4185 int pkts = 0; 4186 int ret; 4187 4188again: 4189 if (handlep->current_packet == NULL) { 4190 /* wait for frames availability.*/ 4191 h.raw = RING_GET_CURRENT_FRAME(handle); 4192 if (!packet_mmap_v3_acquire(h.h3)) { 4193 /* 4194 * The current frame is owned by the kernel; wait 4195 * for a frame to be handed to us. 4196 */ 4197 ret = pcap_wait_for_frames_mmap(handle); 4198 if (ret) { 4199 return ret; 4200 } 4201 } 4202 } 4203 h.raw = RING_GET_CURRENT_FRAME(handle); 4204 if (!packet_mmap_v3_acquire(h.h3)) { 4205 if (pkts == 0 && handlep->timeout == 0) { 4206 /* Block until we see a packet. */ 4207 goto again; 4208 } 4209 return pkts; 4210 } 4211 4212 /* 4213 * This can conceivably process more than INT_MAX packets, 4214 * which would overflow the packet count, causing it either 4215 * to look like a negative number, and thus cause us to 4216 * return a value that looks like an error, or overflow 4217 * back into positive territory, and thus cause us to 4218 * return a too-low count. 4219 * 4220 * Therefore, if the packet count is unlimited, we clip 4221 * it at INT_MAX; this routine is not expected to 4222 * process packets indefinitely, so that's not an issue. 4223 */ 4224 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4225 max_packets = INT_MAX; 4226 4227 while (pkts < max_packets) { 4228 int packets_to_read; 4229 4230 if (handlep->current_packet == NULL) { 4231 h.raw = RING_GET_CURRENT_FRAME(handle); 4232 if (!packet_mmap_v3_acquire(h.h3)) 4233 break; 4234 4235 handlep->current_packet = h.raw + h.h3->hdr.bh1.offset_to_first_pkt; 4236 handlep->packets_left = h.h3->hdr.bh1.num_pkts; 4237 } 4238 packets_to_read = handlep->packets_left; 4239 4240 if (packets_to_read > (max_packets - pkts)) { 4241 /* 4242 * There are more packets in the buffer than 4243 * the number of packets we have left to 4244 * process to get up to the maximum number 4245 * of packets to process. Only process enough 4246 * of them to get us up to that maximum. 4247 */ 4248 packets_to_read = max_packets - pkts; 4249 } 4250 4251 while (packets_to_read-- && !handle->break_loop) { 4252 struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet; 4253 ret = pcap_handle_packet_mmap( 4254 handle, 4255 callback, 4256 user, 4257 handlep->current_packet, 4258 tp3_hdr->tp_len, 4259 tp3_hdr->tp_mac, 4260 tp3_hdr->tp_snaplen, 4261 tp3_hdr->tp_sec, 4262 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000, 4263 VLAN_VALID(tp3_hdr, &tp3_hdr->hv1), 4264 tp3_hdr->hv1.tp_vlan_tci, 4265 VLAN_TPID(tp3_hdr, &tp3_hdr->hv1)); 4266 if (ret == 1) { 4267 pkts++; 4268 } else if (ret < 0) { 4269 handlep->current_packet = NULL; 4270 return ret; 4271 } 4272 handlep->current_packet += tp3_hdr->tp_next_offset; 4273 handlep->packets_left--; 4274 } 4275 4276 if (handlep->packets_left <= 0) { 4277 /* 4278 * Hand this block back to the kernel, and, if 4279 * we're counting blocks that need to be 4280 * filtered in userland after having been 4281 * filtered by the kernel, count the one we've 4282 * just processed. 4283 */ 4284 packet_mmap_v3_release(h.h3); 4285 if (handlep->blocks_to_filter_in_userland > 0) { 4286 handlep->blocks_to_filter_in_userland--; 4287 if (handlep->blocks_to_filter_in_userland == 0) { 4288 /* 4289 * No more blocks need to be filtered 4290 * in userland. 4291 */ 4292 handlep->filter_in_userland = 0; 4293 } 4294 } 4295 4296 /* next block */ 4297 if (++handle->offset >= handle->cc) 4298 handle->offset = 0; 4299 4300 handlep->current_packet = NULL; 4301 } 4302 4303 /* check for break loop condition*/ 4304 if (handle->break_loop) { 4305 handle->break_loop = 0; 4306 return PCAP_ERROR_BREAK; 4307 } 4308 } 4309 if (pkts == 0 && handlep->timeout == 0) { 4310 /* Block until we see a packet. */ 4311 goto again; 4312 } 4313 return pkts; 4314} 4315#endif /* HAVE_TPACKET3 */ 4316 4317/* 4318 * Attach the given BPF code to the packet capture device. 4319 */ 4320static int 4321pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter) 4322{ 4323 struct pcap_linux *handlep; 4324 struct sock_fprog fcode; 4325 int can_filter_in_kernel; 4326 int err = 0; 4327 int n, offset; 4328 4329 if (!handle) 4330 return -1; 4331 if (!filter) { 4332 pcap_strlcpy(handle->errbuf, "setfilter: No filter specified", 4333 PCAP_ERRBUF_SIZE); 4334 return -1; 4335 } 4336 4337 handlep = handle->priv; 4338 4339 /* Make our private copy of the filter */ 4340 4341 if (install_bpf_program(handle, filter) < 0) 4342 /* install_bpf_program() filled in errbuf */ 4343 return -1; 4344 4345 /* 4346 * Run user level packet filter by default. Will be overridden if 4347 * installing a kernel filter succeeds. 4348 */ 4349 handlep->filter_in_userland = 1; 4350 4351 /* Install kernel level filter if possible */ 4352 4353#ifdef USHRT_MAX 4354 if (handle->fcode.bf_len > USHRT_MAX) { 4355 /* 4356 * fcode.len is an unsigned short for current kernel. 4357 * I have yet to see BPF-Code with that much 4358 * instructions but still it is possible. So for the 4359 * sake of correctness I added this check. 4360 */ 4361 fprintf(stderr, "Warning: Filter too complex for kernel\n"); 4362 fcode.len = 0; 4363 fcode.filter = NULL; 4364 can_filter_in_kernel = 0; 4365 } else 4366#endif /* USHRT_MAX */ 4367 { 4368 /* 4369 * Oh joy, the Linux kernel uses struct sock_fprog instead 4370 * of struct bpf_program and of course the length field is 4371 * of different size. Pointed out by Sebastian 4372 * 4373 * Oh, and we also need to fix it up so that all "ret" 4374 * instructions with non-zero operands have MAXIMUM_SNAPLEN 4375 * as the operand if we're not capturing in memory-mapped 4376 * mode, and so that, if we're in cooked mode, all memory- 4377 * reference instructions use special magic offsets in 4378 * references to the link-layer header and assume that the 4379 * link-layer payload begins at 0; "fix_program()" will do 4380 * that. 4381 */ 4382 switch (fix_program(handle, &fcode)) { 4383 4384 case -1: 4385 default: 4386 /* 4387 * Fatal error; just quit. 4388 * (The "default" case shouldn't happen; we 4389 * return -1 for that reason.) 4390 */ 4391 return -1; 4392 4393 case 0: 4394 /* 4395 * The program performed checks that we can't make 4396 * work in the kernel. 4397 */ 4398 can_filter_in_kernel = 0; 4399 break; 4400 4401 case 1: 4402 /* 4403 * We have a filter that'll work in the kernel. 4404 */ 4405 can_filter_in_kernel = 1; 4406 break; 4407 } 4408 } 4409 4410 /* 4411 * NOTE: at this point, we've set both the "len" and "filter" 4412 * fields of "fcode". As of the 2.6.32.4 kernel, at least, 4413 * those are the only members of the "sock_fprog" structure, 4414 * so we initialize every member of that structure. 4415 * 4416 * If there is anything in "fcode" that is not initialized, 4417 * it is either a field added in a later kernel, or it's 4418 * padding. 4419 * 4420 * If a new field is added, this code needs to be updated 4421 * to set it correctly. 4422 * 4423 * If there are no other fields, then: 4424 * 4425 * if the Linux kernel looks at the padding, it's 4426 * buggy; 4427 * 4428 * if the Linux kernel doesn't look at the padding, 4429 * then if some tool complains that we're passing 4430 * uninitialized data to the kernel, then the tool 4431 * is buggy and needs to understand that it's just 4432 * padding. 4433 */ 4434 if (can_filter_in_kernel) { 4435 if ((err = set_kernel_filter(handle, &fcode)) == 0) 4436 { 4437 /* 4438 * Installation succeeded - using kernel filter, 4439 * so userland filtering not needed. 4440 */ 4441 handlep->filter_in_userland = 0; 4442 } 4443 else if (err == -1) /* Non-fatal error */ 4444 { 4445 /* 4446 * Print a warning if we weren't able to install 4447 * the filter for a reason other than "this kernel 4448 * isn't configured to support socket filters. 4449 */ 4450 if (errno == ENOMEM) { 4451 /* 4452 * Either a kernel memory allocation 4453 * failure occurred, or there's too 4454 * much "other/option memory" allocated 4455 * for this socket. Suggest that they 4456 * increase the "other/option memory" 4457 * limit. 4458 */ 4459 fprintf(stderr, 4460 "Warning: Couldn't allocate kernel memory for filter: try increasing net.core.optmem_max with sysctl\n"); 4461 } else if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) { 4462 fprintf(stderr, 4463 "Warning: Kernel filter failed: %s\n", 4464 pcap_strerror(errno)); 4465 } 4466 } 4467 } 4468 4469 /* 4470 * If we're not using the kernel filter, get rid of any kernel 4471 * filter that might've been there before, e.g. because the 4472 * previous filter could work in the kernel, or because some other 4473 * code attached a filter to the socket by some means other than 4474 * calling "pcap_setfilter()". Otherwise, the kernel filter may 4475 * filter out packets that would pass the new userland filter. 4476 */ 4477 if (handlep->filter_in_userland) { 4478 if (reset_kernel_filter(handle) == -1) { 4479 pcap_fmt_errmsg_for_errno(handle->errbuf, 4480 PCAP_ERRBUF_SIZE, errno, 4481 "can't remove kernel filter"); 4482 err = -2; /* fatal error */ 4483 } 4484 } 4485 4486 /* 4487 * Free up the copy of the filter that was made by "fix_program()". 4488 */ 4489 if (fcode.filter != NULL) 4490 free(fcode.filter); 4491 4492 if (err == -2) 4493 /* Fatal error */ 4494 return -1; 4495 4496 /* 4497 * If we're filtering in userland, there's nothing to do; 4498 * the new filter will be used for the next packet. 4499 */ 4500 if (handlep->filter_in_userland) 4501 return 0; 4502 4503 /* 4504 * We're filtering in the kernel; the packets present in 4505 * all blocks currently in the ring were already filtered 4506 * by the old filter, and so will need to be filtered in 4507 * userland by the new filter. 4508 * 4509 * Get an upper bound for the number of such blocks; first, 4510 * walk the ring backward and count the free blocks. 4511 */ 4512 offset = handle->offset; 4513 if (--offset < 0) 4514 offset = handle->cc - 1; 4515 for (n=0; n < handle->cc; ++n) { 4516 if (--offset < 0) 4517 offset = handle->cc - 1; 4518 if (pcap_get_ring_frame_status(handle, offset) != TP_STATUS_KERNEL) 4519 break; 4520 } 4521 4522 /* 4523 * If we found free blocks, decrement the count of free 4524 * blocks by 1, just in case we lost a race with another 4525 * thread of control that was adding a packet while 4526 * we were counting and that had run the filter before 4527 * we changed it. 4528 * 4529 * XXX - could there be more than one block added in 4530 * this fashion? 4531 * 4532 * XXX - is there a way to avoid that race, e.g. somehow 4533 * wait for all packets that passed the old filter to 4534 * be added to the ring? 4535 */ 4536 if (n != 0) 4537 n--; 4538 4539 /* 4540 * Set the count of blocks worth of packets to filter 4541 * in userland to the total number of blocks in the 4542 * ring minus the number of free blocks we found, and 4543 * turn on userland filtering. (The count of blocks 4544 * worth of packets to filter in userland is guaranteed 4545 * not to be zero - n, above, couldn't be set to a 4546 * value > handle->cc, and if it were equal to 4547 * handle->cc, it wouldn't be zero, and thus would 4548 * be decremented to handle->cc - 1.) 4549 */ 4550 handlep->blocks_to_filter_in_userland = handle->cc - n; 4551 handlep->filter_in_userland = 1; 4552 4553 return 0; 4554} 4555 4556/* 4557 * Return the index of the given device name. Fill ebuf and return 4558 * -1 on failure. 4559 */ 4560static int 4561iface_get_id(int fd, const char *device, char *ebuf) 4562{ 4563 struct ifreq ifr; 4564 4565 memset(&ifr, 0, sizeof(ifr)); 4566 pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 4567 4568 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) { 4569 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4570 errno, "SIOCGIFINDEX"); 4571 return -1; 4572 } 4573 4574 return ifr.ifr_ifindex; 4575} 4576 4577/* 4578 * Bind the socket associated with FD to the given device. 4579 * Return 0 on success or a PCAP_ERROR_ value on a hard error. 4580 */ 4581static int 4582iface_bind(int fd, int ifindex, char *ebuf, int protocol) 4583{ 4584 struct sockaddr_ll sll; 4585 int ret, err; 4586 socklen_t errlen = sizeof(err); 4587 4588 memset(&sll, 0, sizeof(sll)); 4589 sll.sll_family = AF_PACKET; 4590 sll.sll_ifindex = ifindex < 0 ? 0 : ifindex; 4591 sll.sll_protocol = protocol; 4592 4593 if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) { 4594 if (errno == ENETDOWN) { 4595 /* 4596 * Return a "network down" indication, so that 4597 * the application can report that rather than 4598 * saying we had a mysterious failure and 4599 * suggest that they report a problem to the 4600 * libpcap developers. 4601 */ 4602 return PCAP_ERROR_IFACE_NOT_UP; 4603 } 4604 if (errno == ENODEV) { 4605 /* 4606 * There's nothing more to say, so clear the 4607 * error message. 4608 */ 4609 ebuf[0] = '\0'; 4610 ret = PCAP_ERROR_NO_SUCH_DEVICE; 4611 } else { 4612 ret = PCAP_ERROR; 4613 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4614 errno, "bind"); 4615 } 4616 return ret; 4617 } 4618 4619 /* Any pending errors, e.g., network is down? */ 4620 4621 if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) { 4622 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4623 errno, "getsockopt (SO_ERROR)"); 4624 return PCAP_ERROR; 4625 } 4626 4627 if (err == ENETDOWN) { 4628 /* 4629 * Return a "network down" indication, so that 4630 * the application can report that rather than 4631 * saying we had a mysterious failure and 4632 * suggest that they report a problem to the 4633 * libpcap developers. 4634 */ 4635 return PCAP_ERROR_IFACE_NOT_UP; 4636 } else if (err > 0) { 4637 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4638 err, "bind"); 4639 return PCAP_ERROR; 4640 } 4641 4642 return 0; 4643} 4644 4645/* 4646 * Try to enter monitor mode. 4647 * If we have libnl, try to create a new monitor-mode device and 4648 * capture on that; otherwise, just say "not supported". 4649 */ 4650#ifdef HAVE_LIBNL 4651static int 4652enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device) 4653{ 4654 struct pcap_linux *handlep = handle->priv; 4655 int ret; 4656 char phydev_path[PATH_MAX+1]; 4657 struct nl80211_state nlstate; 4658 struct ifreq ifr; 4659 u_int n; 4660 4661 /* 4662 * Is this a mac80211 device? 4663 */ 4664 ret = get_mac80211_phydev(handle, device, phydev_path, PATH_MAX); 4665 if (ret < 0) 4666 return ret; /* error */ 4667 if (ret == 0) 4668 return 0; /* no error, but not mac80211 device */ 4669 4670 /* 4671 * XXX - is this already a monN device? 4672 * If so, we're done. 4673 */ 4674 4675 /* 4676 * OK, it's apparently a mac80211 device. 4677 * Try to find an unused monN device for it. 4678 */ 4679 ret = nl80211_init(handle, &nlstate, device); 4680 if (ret != 0) 4681 return ret; 4682 for (n = 0; n < UINT_MAX; n++) { 4683 /* 4684 * Try mon{n}. 4685 */ 4686 char mondevice[3+10+1]; /* mon{UINT_MAX}\0 */ 4687 4688 snprintf(mondevice, sizeof mondevice, "mon%u", n); 4689 ret = add_mon_if(handle, sock_fd, &nlstate, device, mondevice); 4690 if (ret == 1) { 4691 /* 4692 * Success. We don't clean up the libnl state 4693 * yet, as we'll be using it later. 4694 */ 4695 goto added; 4696 } 4697 if (ret < 0) { 4698 /* 4699 * Hard failure. Just return ret; handle->errbuf 4700 * has already been set. 4701 */ 4702 nl80211_cleanup(&nlstate); 4703 return ret; 4704 } 4705 } 4706 4707 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 4708 "%s: No free monN interfaces", device); 4709 nl80211_cleanup(&nlstate); 4710 return PCAP_ERROR; 4711 4712added: 4713 4714#if 0 4715 /* 4716 * Sleep for .1 seconds. 4717 */ 4718 delay.tv_sec = 0; 4719 delay.tv_nsec = 500000000; 4720 nanosleep(&delay, NULL); 4721#endif 4722 4723 /* 4724 * If we haven't already done so, arrange to have 4725 * "pcap_close_all()" called when we exit. 4726 */ 4727 if (!pcap_do_addexit(handle)) { 4728 /* 4729 * "atexit()" failed; don't put the interface 4730 * in rfmon mode, just give up. 4731 */ 4732 del_mon_if(handle, sock_fd, &nlstate, device, 4733 handlep->mondevice); 4734 nl80211_cleanup(&nlstate); 4735 return PCAP_ERROR; 4736 } 4737 4738 /* 4739 * Now configure the monitor interface up. 4740 */ 4741 memset(&ifr, 0, sizeof(ifr)); 4742 pcap_strlcpy(ifr.ifr_name, handlep->mondevice, sizeof(ifr.ifr_name)); 4743 if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) { 4744 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 4745 errno, "%s: Can't get flags for %s", device, 4746 handlep->mondevice); 4747 del_mon_if(handle, sock_fd, &nlstate, device, 4748 handlep->mondevice); 4749 nl80211_cleanup(&nlstate); 4750 return PCAP_ERROR; 4751 } 4752 ifr.ifr_flags |= IFF_UP|IFF_RUNNING; 4753 if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) { 4754 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 4755 errno, "%s: Can't set flags for %s", device, 4756 handlep->mondevice); 4757 del_mon_if(handle, sock_fd, &nlstate, device, 4758 handlep->mondevice); 4759 nl80211_cleanup(&nlstate); 4760 return PCAP_ERROR; 4761 } 4762 4763 /* 4764 * Success. Clean up the libnl state. 4765 */ 4766 nl80211_cleanup(&nlstate); 4767 4768 /* 4769 * Note that we have to delete the monitor device when we close 4770 * the handle. 4771 */ 4772 handlep->must_do_on_close |= MUST_DELETE_MONIF; 4773 4774 /* 4775 * Add this to the list of pcaps to close when we exit. 4776 */ 4777 pcap_add_to_pcaps_to_close(handle); 4778 4779 return 1; 4780} 4781#else /* HAVE_LIBNL */ 4782static int 4783enter_rfmon_mode(pcap_t *handle _U_, int sock_fd _U_, const char *device _U_) 4784{ 4785 /* 4786 * We don't have libnl, so we can't do monitor mode. 4787 */ 4788 return 0; 4789} 4790#endif /* HAVE_LIBNL */ 4791 4792#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 4793/* 4794 * Map SOF_TIMESTAMPING_ values to PCAP_TSTAMP_ values. 4795 */ 4796static const struct { 4797 int soft_timestamping_val; 4798 int pcap_tstamp_val; 4799} sof_ts_type_map[3] = { 4800 { SOF_TIMESTAMPING_SOFTWARE, PCAP_TSTAMP_HOST }, 4801 { SOF_TIMESTAMPING_SYS_HARDWARE, PCAP_TSTAMP_ADAPTER }, 4802 { SOF_TIMESTAMPING_RAW_HARDWARE, PCAP_TSTAMP_ADAPTER_UNSYNCED } 4803}; 4804#define NUM_SOF_TIMESTAMPING_TYPES (sizeof sof_ts_type_map / sizeof sof_ts_type_map[0]) 4805 4806/* 4807 * Set the list of time stamping types to include all types. 4808 */ 4809static int 4810iface_set_all_ts_types(pcap_t *handle, char *ebuf) 4811{ 4812 u_int i; 4813 4814 handle->tstamp_type_list = malloc(NUM_SOF_TIMESTAMPING_TYPES * sizeof(u_int)); 4815 if (handle->tstamp_type_list == NULL) { 4816 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4817 errno, "malloc"); 4818 return -1; 4819 } 4820 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) 4821 handle->tstamp_type_list[i] = sof_ts_type_map[i].pcap_tstamp_val; 4822 handle->tstamp_type_count = NUM_SOF_TIMESTAMPING_TYPES; 4823 return 0; 4824} 4825 4826/* 4827 * Get a list of time stamp types. 4828 */ 4829#ifdef ETHTOOL_GET_TS_INFO 4830static int 4831iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 4832{ 4833 int fd; 4834 struct ifreq ifr; 4835 struct ethtool_ts_info info; 4836 int num_ts_types; 4837 u_int i, j; 4838 4839 /* 4840 * This doesn't apply to the "any" device; you can't say "turn on 4841 * hardware time stamping for all devices that exist now and arrange 4842 * that it be turned on for any device that appears in the future", 4843 * and not all devices even necessarily *support* hardware time 4844 * stamping, so don't report any time stamp types. 4845 */ 4846 if (strcmp(device, "any") == 0) { 4847 handle->tstamp_type_list = NULL; 4848 return 0; 4849 } 4850 4851 /* 4852 * Create a socket from which to fetch time stamping capabilities. 4853 */ 4854 fd = get_if_ioctl_socket(); 4855 if (fd < 0) { 4856 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4857 errno, "socket for SIOCETHTOOL(ETHTOOL_GET_TS_INFO)"); 4858 return -1; 4859 } 4860 4861 memset(&ifr, 0, sizeof(ifr)); 4862 pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 4863 memset(&info, 0, sizeof(info)); 4864 info.cmd = ETHTOOL_GET_TS_INFO; 4865 ifr.ifr_data = (caddr_t)&info; 4866 if (ioctl(fd, SIOCETHTOOL, &ifr) == -1) { 4867 int save_errno = errno; 4868 4869 close(fd); 4870 switch (save_errno) { 4871 4872 case EOPNOTSUPP: 4873 case EINVAL: 4874 /* 4875 * OK, this OS version or driver doesn't support 4876 * asking for the time stamping types, so let's 4877 * just return all the possible types. 4878 */ 4879 if (iface_set_all_ts_types(handle, ebuf) == -1) 4880 return -1; 4881 return 0; 4882 4883 case ENODEV: 4884 /* 4885 * OK, no such device. 4886 * The user will find that out when they try to 4887 * activate the device; just return an empty 4888 * list of time stamp types. 4889 */ 4890 handle->tstamp_type_list = NULL; 4891 return 0; 4892 4893 default: 4894 /* 4895 * Other error. 4896 */ 4897 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4898 save_errno, 4899 "%s: SIOCETHTOOL(ETHTOOL_GET_TS_INFO) ioctl failed", 4900 device); 4901 return -1; 4902 } 4903 } 4904 close(fd); 4905 4906 /* 4907 * Do we support hardware time stamping of *all* packets? 4908 */ 4909 if (!(info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))) { 4910 /* 4911 * No, so don't report any time stamp types. 4912 * 4913 * XXX - some devices either don't report 4914 * HWTSTAMP_FILTER_ALL when they do support it, or 4915 * report HWTSTAMP_FILTER_ALL but map it to only 4916 * time stamping a few PTP packets. See 4917 * http://marc.info/?l=linux-netdev&m=146318183529571&w=2 4918 * 4919 * Maybe that got fixed later. 4920 */ 4921 handle->tstamp_type_list = NULL; 4922 return 0; 4923 } 4924 4925 num_ts_types = 0; 4926 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 4927 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) 4928 num_ts_types++; 4929 } 4930 if (num_ts_types != 0) { 4931 handle->tstamp_type_list = malloc(num_ts_types * sizeof(u_int)); 4932 if (handle->tstamp_type_list == NULL) { 4933 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4934 errno, "malloc"); 4935 return -1; 4936 } 4937 for (i = 0, j = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 4938 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) { 4939 handle->tstamp_type_list[j] = sof_ts_type_map[i].pcap_tstamp_val; 4940 j++; 4941 } 4942 } 4943 handle->tstamp_type_count = num_ts_types; 4944 } else 4945 handle->tstamp_type_list = NULL; 4946 4947 return 0; 4948} 4949#else /* ETHTOOL_GET_TS_INFO */ 4950static int 4951iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 4952{ 4953 /* 4954 * This doesn't apply to the "any" device; you can't say "turn on 4955 * hardware time stamping for all devices that exist now and arrange 4956 * that it be turned on for any device that appears in the future", 4957 * and not all devices even necessarily *support* hardware time 4958 * stamping, so don't report any time stamp types. 4959 */ 4960 if (strcmp(device, "any") == 0) { 4961 handle->tstamp_type_list = NULL; 4962 return 0; 4963 } 4964 4965 /* 4966 * We don't have an ioctl to use to ask what's supported, 4967 * so say we support everything. 4968 */ 4969 if (iface_set_all_ts_types(handle, ebuf) == -1) 4970 return -1; 4971 return 0; 4972} 4973#endif /* ETHTOOL_GET_TS_INFO */ 4974#else /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 4975static int 4976iface_get_ts_types(const char *device _U_, pcap_t *p _U_, char *ebuf _U_) 4977{ 4978 /* 4979 * Nothing to fetch, so it always "succeeds". 4980 */ 4981 return 0; 4982} 4983#endif /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 4984 4985/* 4986 * Find out if we have any form of fragmentation/reassembly offloading. 4987 * 4988 * We do so using SIOCETHTOOL checking for various types of offloading; 4989 * if SIOCETHTOOL isn't defined, or we don't have any #defines for any 4990 * of the types of offloading, there's nothing we can do to check, so 4991 * we just say "no, we don't". 4992 * 4993 * We treat EOPNOTSUPP, EINVAL and, if eperm_ok is true, EPERM as 4994 * indications that the operation isn't supported. We do EPERM 4995 * weirdly because the SIOCETHTOOL code in later kernels 1) doesn't 4996 * support ETHTOOL_GUFO, 2) also doesn't include it in the list 4997 * of ethtool operations that don't require CAP_NET_ADMIN privileges, 4998 * and 3) does the "is this permitted" check before doing the "is 4999 * this even supported" check, so it fails with "this is not permitted" 5000 * rather than "this is not even supported". To work around this 5001 * annoyance, we only treat EPERM as an error for the first feature, 5002 * and assume that they all do the same permission checks, so if the 5003 * first one is allowed all the others are allowed if supported. 5004 */ 5005#if defined(SIOCETHTOOL) && (defined(ETHTOOL_GTSO) || defined(ETHTOOL_GUFO) || defined(ETHTOOL_GGSO) || defined(ETHTOOL_GFLAGS) || defined(ETHTOOL_GGRO)) 5006static int 5007iface_ethtool_flag_ioctl(pcap_t *handle, int cmd, const char *cmdname, 5008 int eperm_ok) 5009{ 5010 struct ifreq ifr; 5011 struct ethtool_value eval; 5012 5013 memset(&ifr, 0, sizeof(ifr)); 5014 pcap_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 5015 eval.cmd = cmd; 5016 eval.data = 0; 5017 ifr.ifr_data = (caddr_t)&eval; 5018 if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) { 5019 if (errno == EOPNOTSUPP || errno == EINVAL || 5020 (errno == EPERM && eperm_ok)) { 5021 /* 5022 * OK, let's just return 0, which, in our 5023 * case, either means "no, what we're asking 5024 * about is not enabled" or "all the flags 5025 * are clear (i.e., nothing is enabled)". 5026 */ 5027 return 0; 5028 } 5029 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5030 errno, "%s: SIOCETHTOOL(%s) ioctl failed", 5031 handle->opt.device, cmdname); 5032 return -1; 5033 } 5034 return eval.data; 5035} 5036 5037/* 5038 * XXX - it's annoying that we have to check for offloading at all, but, 5039 * given that we have to, it's still annoying that we have to check for 5040 * particular types of offloading, especially that shiny new types of 5041 * offloading may be added - and, worse, may not be checkable with 5042 * a particular ETHTOOL_ operation; ETHTOOL_GFEATURES would, in 5043 * theory, give those to you, but the actual flags being used are 5044 * opaque (defined in a non-uapi header), and there doesn't seem to 5045 * be any obvious way to ask the kernel what all the offloading flags 5046 * are - at best, you can ask for a set of strings(!) to get *names* 5047 * for various flags. (That whole mechanism appears to have been 5048 * designed for the sole purpose of letting ethtool report flags 5049 * by name and set flags by name, with the names having no semantics 5050 * ethtool understands.) 5051 */ 5052static int 5053iface_get_offload(pcap_t *handle) 5054{ 5055 int ret; 5056 5057#ifdef ETHTOOL_GTSO 5058 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO", 0); 5059 if (ret == -1) 5060 return -1; 5061 if (ret) 5062 return 1; /* TCP segmentation offloading on */ 5063#endif 5064 5065#ifdef ETHTOOL_GGSO 5066 /* 5067 * XXX - will this cause large unsegmented packets to be 5068 * handed to PF_PACKET sockets on transmission? If not, 5069 * this need not be checked. 5070 */ 5071 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO", 0); 5072 if (ret == -1) 5073 return -1; 5074 if (ret) 5075 return 1; /* generic segmentation offloading on */ 5076#endif 5077 5078#ifdef ETHTOOL_GFLAGS 5079 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS", 0); 5080 if (ret == -1) 5081 return -1; 5082 if (ret & ETH_FLAG_LRO) 5083 return 1; /* large receive offloading on */ 5084#endif 5085 5086#ifdef ETHTOOL_GGRO 5087 /* 5088 * XXX - will this cause large reassembled packets to be 5089 * handed to PF_PACKET sockets on receipt? If not, 5090 * this need not be checked. 5091 */ 5092 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO", 0); 5093 if (ret == -1) 5094 return -1; 5095 if (ret) 5096 return 1; /* generic (large) receive offloading on */ 5097#endif 5098 5099#ifdef ETHTOOL_GUFO 5100 /* 5101 * Do this one last, as support for it was removed in later 5102 * kernels, and it fails with EPERM on those kernels rather 5103 * than with EOPNOTSUPP (see explanation in comment for 5104 * iface_ethtool_flag_ioctl()). 5105 */ 5106 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO", 1); 5107 if (ret == -1) 5108 return -1; 5109 if (ret) 5110 return 1; /* UDP fragmentation offloading on */ 5111#endif 5112 5113 return 0; 5114} 5115#else /* SIOCETHTOOL */ 5116static int 5117iface_get_offload(pcap_t *handle _U_) 5118{ 5119 /* 5120 * XXX - do we need to get this information if we don't 5121 * have the ethtool ioctls? If so, how do we do that? 5122 */ 5123 return 0; 5124} 5125#endif /* SIOCETHTOOL */ 5126 5127static struct dsa_proto { 5128 const char *name; 5129 bpf_u_int32 linktype; 5130} dsa_protos[] = { 5131 /* 5132 * None is special and indicates that the interface does not have 5133 * any tagging protocol configured, and is therefore a standard 5134 * Ethernet interface. 5135 */ 5136 { "none", DLT_EN10MB }, 5137 { "brcm", DLT_DSA_TAG_BRCM }, 5138 { "brcm-prepend", DLT_DSA_TAG_BRCM_PREPEND }, 5139 { "dsa", DLT_DSA_TAG_DSA }, 5140 { "edsa", DLT_DSA_TAG_EDSA }, 5141}; 5142 5143static int 5144iface_dsa_get_proto_info(const char *device, pcap_t *handle) 5145{ 5146 char *pathstr; 5147 unsigned int i; 5148 /* 5149 * Make this significantly smaller than PCAP_ERRBUF_SIZE; 5150 * the tag *shouldn't* have some huge long name, and making 5151 * it smaller keeps newer versions of GCC from whining that 5152 * the error message if we don't support the tag could 5153 * overflow the error message buffer. 5154 */ 5155 char buf[128]; 5156 ssize_t r; 5157 int fd; 5158 5159 fd = asprintf(&pathstr, "/sys/class/net/%s/dsa/tagging", device); 5160 if (fd < 0) { 5161 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5162 fd, "asprintf"); 5163 return PCAP_ERROR; 5164 } 5165 5166 fd = open(pathstr, O_RDONLY); 5167 free(pathstr); 5168 /* 5169 * This is not fatal, kernel >= 4.20 *might* expose this attribute 5170 */ 5171 if (fd < 0) 5172 return 0; 5173 5174 r = read(fd, buf, sizeof(buf) - 1); 5175 if (r <= 0) { 5176 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5177 errno, "read"); 5178 close(fd); 5179 return PCAP_ERROR; 5180 } 5181 close(fd); 5182 5183 /* 5184 * Buffer should be LF terminated. 5185 */ 5186 if (buf[r - 1] == '\n') 5187 r--; 5188 buf[r] = '\0'; 5189 5190 for (i = 0; i < sizeof(dsa_protos) / sizeof(dsa_protos[0]); i++) { 5191 if (strlen(dsa_protos[i].name) == (size_t)r && 5192 strcmp(buf, dsa_protos[i].name) == 0) { 5193 handle->linktype = dsa_protos[i].linktype; 5194 switch (dsa_protos[i].linktype) { 5195 case DLT_EN10MB: 5196 return 0; 5197 default: 5198 return 1; 5199 } 5200 } 5201 } 5202 5203 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 5204 "unsupported DSA tag: %s", buf); 5205 5206 return PCAP_ERROR; 5207} 5208 5209/* 5210 * Query the kernel for the MTU of the given interface. 5211 */ 5212static int 5213iface_get_mtu(int fd, const char *device, char *ebuf) 5214{ 5215 struct ifreq ifr; 5216 5217 if (!device) 5218 return BIGGER_THAN_ALL_MTUS; 5219 5220 memset(&ifr, 0, sizeof(ifr)); 5221 pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5222 5223 if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) { 5224 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5225 errno, "SIOCGIFMTU"); 5226 return -1; 5227 } 5228 5229 return ifr.ifr_mtu; 5230} 5231 5232/* 5233 * Get the hardware type of the given interface as ARPHRD_xxx constant. 5234 */ 5235static int 5236iface_get_arptype(int fd, const char *device, char *ebuf) 5237{ 5238 struct ifreq ifr; 5239 int ret; 5240 5241 memset(&ifr, 0, sizeof(ifr)); 5242 pcap_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5243 5244 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) { 5245 if (errno == ENODEV) { 5246 /* 5247 * No such device. 5248 * 5249 * There's nothing more to say, so clear 5250 * the error message. 5251 */ 5252 ret = PCAP_ERROR_NO_SUCH_DEVICE; 5253 ebuf[0] = '\0'; 5254 } else { 5255 ret = PCAP_ERROR; 5256 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5257 errno, "SIOCGIFHWADDR"); 5258 } 5259 return ret; 5260 } 5261 5262 return ifr.ifr_hwaddr.sa_family; 5263} 5264 5265static int 5266fix_program(pcap_t *handle, struct sock_fprog *fcode) 5267{ 5268 struct pcap_linux *handlep = handle->priv; 5269 size_t prog_size; 5270 register int i; 5271 register struct bpf_insn *p; 5272 struct bpf_insn *f; 5273 int len; 5274 5275 /* 5276 * Make a copy of the filter, and modify that copy if 5277 * necessary. 5278 */ 5279 prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len; 5280 len = handle->fcode.bf_len; 5281 f = (struct bpf_insn *)malloc(prog_size); 5282 if (f == NULL) { 5283 pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5284 errno, "malloc"); 5285 return -1; 5286 } 5287 memcpy(f, handle->fcode.bf_insns, prog_size); 5288 fcode->len = len; 5289 fcode->filter = (struct sock_filter *) f; 5290 5291 for (i = 0; i < len; ++i) { 5292 p = &f[i]; 5293 /* 5294 * What type of instruction is this? 5295 */ 5296 switch (BPF_CLASS(p->code)) { 5297 5298 case BPF_LD: 5299 case BPF_LDX: 5300 /* 5301 * It's a load instruction; is it loading 5302 * from the packet? 5303 */ 5304 switch (BPF_MODE(p->code)) { 5305 5306 case BPF_ABS: 5307 case BPF_IND: 5308 case BPF_MSH: 5309 /* 5310 * Yes; are we in cooked mode? 5311 */ 5312 if (handlep->cooked) { 5313 /* 5314 * Yes, so we need to fix this 5315 * instruction. 5316 */ 5317 if (fix_offset(handle, p) < 0) { 5318 /* 5319 * We failed to do so. 5320 * Return 0, so our caller 5321 * knows to punt to userland. 5322 */ 5323 return 0; 5324 } 5325 } 5326 break; 5327 } 5328 break; 5329 } 5330 } 5331 return 1; /* we succeeded */ 5332} 5333 5334static int 5335fix_offset(pcap_t *handle, struct bpf_insn *p) 5336{ 5337 /* 5338 * Existing references to auxiliary data shouldn't be adjusted. 5339 * 5340 * Note that SKF_AD_OFF is negative, but p->k is unsigned, so 5341 * we use >= and cast SKF_AD_OFF to unsigned. 5342 */ 5343 if (p->k >= (bpf_u_int32)SKF_AD_OFF) 5344 return 0; 5345 if (handle->linktype == DLT_LINUX_SLL2) { 5346 /* 5347 * What's the offset? 5348 */ 5349 if (p->k >= SLL2_HDR_LEN) { 5350 /* 5351 * It's within the link-layer payload; that starts 5352 * at an offset of 0, as far as the kernel packet 5353 * filter is concerned, so subtract the length of 5354 * the link-layer header. 5355 */ 5356 p->k -= SLL2_HDR_LEN; 5357 } else if (p->k == 0) { 5358 /* 5359 * It's the protocol field; map it to the 5360 * special magic kernel offset for that field. 5361 */ 5362 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 5363 } else if (p->k == 4) { 5364 /* 5365 * It's the ifindex field; map it to the 5366 * special magic kernel offset for that field. 5367 */ 5368 p->k = SKF_AD_OFF + SKF_AD_IFINDEX; 5369 } else if (p->k == 10) { 5370 /* 5371 * It's the packet type field; map it to the 5372 * special magic kernel offset for that field. 5373 */ 5374 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 5375 } else if ((bpf_int32)(p->k) > 0) { 5376 /* 5377 * It's within the header, but it's not one of 5378 * those fields; we can't do that in the kernel, 5379 * so punt to userland. 5380 */ 5381 return -1; 5382 } 5383 } else { 5384 /* 5385 * What's the offset? 5386 */ 5387 if (p->k >= SLL_HDR_LEN) { 5388 /* 5389 * It's within the link-layer payload; that starts 5390 * at an offset of 0, as far as the kernel packet 5391 * filter is concerned, so subtract the length of 5392 * the link-layer header. 5393 */ 5394 p->k -= SLL_HDR_LEN; 5395 } else if (p->k == 0) { 5396 /* 5397 * It's the packet type field; map it to the 5398 * special magic kernel offset for that field. 5399 */ 5400 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 5401 } else if (p->k == 14) { 5402 /* 5403 * It's the protocol field; map it to the 5404 * special magic kernel offset for that field. 5405 */ 5406 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 5407 } else if ((bpf_int32)(p->k) > 0) { 5408 /* 5409 * It's within the header, but it's not one of 5410 * those fields; we can't do that in the kernel, 5411 * so punt to userland. 5412 */ 5413 return -1; 5414 } 5415 } 5416 return 0; 5417} 5418 5419static int 5420set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode) 5421{ 5422 int total_filter_on = 0; 5423 int save_mode; 5424 int ret; 5425 int save_errno; 5426 5427 /* 5428 * The socket filter code doesn't discard all packets queued 5429 * up on the socket when the filter is changed; this means 5430 * that packets that don't match the new filter may show up 5431 * after the new filter is put onto the socket, if those 5432 * packets haven't yet been read. 5433 * 5434 * This means, for example, that if you do a tcpdump capture 5435 * with a filter, the first few packets in the capture might 5436 * be packets that wouldn't have passed the filter. 5437 * 5438 * We therefore discard all packets queued up on the socket 5439 * when setting a kernel filter. (This isn't an issue for 5440 * userland filters, as the userland filtering is done after 5441 * packets are queued up.) 5442 * 5443 * To flush those packets, we put the socket in read-only mode, 5444 * and read packets from the socket until there are no more to 5445 * read. 5446 * 5447 * In order to keep that from being an infinite loop - i.e., 5448 * to keep more packets from arriving while we're draining 5449 * the queue - we put the "total filter", which is a filter 5450 * that rejects all packets, onto the socket before draining 5451 * the queue. 5452 * 5453 * This code deliberately ignores any errors, so that you may 5454 * get bogus packets if an error occurs, rather than having 5455 * the filtering done in userland even if it could have been 5456 * done in the kernel. 5457 */ 5458 if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 5459 &total_fcode, sizeof(total_fcode)) == 0) { 5460 char drain[1]; 5461 5462 /* 5463 * Note that we've put the total filter onto the socket. 5464 */ 5465 total_filter_on = 1; 5466 5467 /* 5468 * Save the socket's current mode, and put it in 5469 * non-blocking mode; we drain it by reading packets 5470 * until we get an error (which is normally a 5471 * "nothing more to be read" error). 5472 */ 5473 save_mode = fcntl(handle->fd, F_GETFL, 0); 5474 if (save_mode == -1) { 5475 pcap_fmt_errmsg_for_errno(handle->errbuf, 5476 PCAP_ERRBUF_SIZE, errno, 5477 "can't get FD flags when changing filter"); 5478 return -2; 5479 } 5480 if (fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) < 0) { 5481 pcap_fmt_errmsg_for_errno(handle->errbuf, 5482 PCAP_ERRBUF_SIZE, errno, 5483 "can't set nonblocking mode when changing filter"); 5484 return -2; 5485 } 5486 while (recv(handle->fd, &drain, sizeof drain, MSG_TRUNC) >= 0) 5487 ; 5488 save_errno = errno; 5489 if (save_errno != EAGAIN) { 5490 /* 5491 * Fatal error. 5492 * 5493 * If we can't restore the mode or reset the 5494 * kernel filter, there's nothing we can do. 5495 */ 5496 (void)fcntl(handle->fd, F_SETFL, save_mode); 5497 (void)reset_kernel_filter(handle); 5498 pcap_fmt_errmsg_for_errno(handle->errbuf, 5499 PCAP_ERRBUF_SIZE, save_errno, 5500 "recv failed when changing filter"); 5501 return -2; 5502 } 5503 if (fcntl(handle->fd, F_SETFL, save_mode) == -1) { 5504 pcap_fmt_errmsg_for_errno(handle->errbuf, 5505 PCAP_ERRBUF_SIZE, errno, 5506 "can't restore FD flags when changing filter"); 5507 return -2; 5508 } 5509 } 5510 5511 /* 5512 * Now attach the new filter. 5513 */ 5514 ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 5515 fcode, sizeof(*fcode)); 5516 if (ret == -1 && total_filter_on) { 5517 /* 5518 * Well, we couldn't set that filter on the socket, 5519 * but we could set the total filter on the socket. 5520 * 5521 * This could, for example, mean that the filter was 5522 * too big to put into the kernel, so we'll have to 5523 * filter in userland; in any case, we'll be doing 5524 * filtering in userland, so we need to remove the 5525 * total filter so we see packets. 5526 */ 5527 save_errno = errno; 5528 5529 /* 5530 * If this fails, we're really screwed; we have the 5531 * total filter on the socket, and it won't come off. 5532 * Report it as a fatal error. 5533 */ 5534 if (reset_kernel_filter(handle) == -1) { 5535 pcap_fmt_errmsg_for_errno(handle->errbuf, 5536 PCAP_ERRBUF_SIZE, errno, 5537 "can't remove kernel total filter"); 5538 return -2; /* fatal error */ 5539 } 5540 5541 errno = save_errno; 5542 } 5543 return ret; 5544} 5545 5546static int 5547reset_kernel_filter(pcap_t *handle) 5548{ 5549 int ret; 5550 /* 5551 * setsockopt() barfs unless it get a dummy parameter. 5552 * valgrind whines unless the value is initialized, 5553 * as it has no idea that setsockopt() ignores its 5554 * parameter. 5555 */ 5556 int dummy = 0; 5557 5558 ret = setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER, 5559 &dummy, sizeof(dummy)); 5560 /* 5561 * Ignore ENOENT - it means "we don't have a filter", so there 5562 * was no filter to remove, and there's still no filter. 5563 * 5564 * Also ignore ENONET, as a lot of kernel versions had a 5565 * typo where ENONET, rather than ENOENT, was returned. 5566 */ 5567 if (ret == -1 && errno != ENOENT && errno != ENONET) 5568 return -1; 5569 return 0; 5570} 5571 5572int 5573pcap_set_protocol_linux(pcap_t *p, int protocol) 5574{ 5575 if (pcap_check_activated(p)) 5576 return (PCAP_ERROR_ACTIVATED); 5577 p->opt.protocol = protocol; 5578 return (0); 5579} 5580 5581/* 5582 * Libpcap version string. 5583 */ 5584const char * 5585pcap_lib_version(void) 5586{ 5587#if defined(HAVE_TPACKET3) 5588 return (PCAP_VERSION_STRING " (with TPACKET_V3)"); 5589#else 5590 return (PCAP_VERSION_STRING " (with TPACKET_V2)"); 5591#endif 5592} 5593