1/* 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1990, 1991, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * This code is derived from the Stanford/CMU enet packet filter, 33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 35 * Berkeley Laboratory. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 66 * 67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $ 68 */ 69/* 70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 71 * support for mandatory and extensible security protections. This notice 72 * is included in support of clause 2.2 (b) of the Apple Public License, 73 * Version 2.0. 74 */ 75 76#include "bpf.h" 77 78#ifndef __GNUC__ 79#define inline 80#else 81#define inline __inline 82#endif 83 84#include <sys/param.h> 85#include <sys/systm.h> 86#include <sys/conf.h> 87#include <sys/malloc.h> 88#include <sys/mbuf.h> 89#include <sys/time.h> 90#include <sys/proc.h> 91#include <sys/signalvar.h> 92#include <sys/filio.h> 93#include <sys/sockio.h> 94#include <sys/ttycom.h> 95#include <sys/filedesc.h> 96#include <sys/uio_internal.h> 97#include <sys/file_internal.h> 98#include <sys/event.h> 99 100#include <sys/poll.h> 101 102#include <sys/socket.h> 103#include <sys/socketvar.h> 104#include <sys/vnode.h> 105 106#include <net/if.h> 107#include <net/bpf.h> 108#include <net/bpfdesc.h> 109 110#include <netinet/in.h> 111#include <netinet/in_pcb.h> 112#include <netinet/in_var.h> 113#include <netinet/ip_var.h> 114#include <netinet/tcp.h> 115#include <netinet/tcp_var.h> 116#include <netinet/udp.h> 117#include <netinet/udp_var.h> 118#include <netinet/if_ether.h> 119#include <sys/kernel.h> 120#include <sys/sysctl.h> 121#include <net/firewire.h> 122 123#include <miscfs/devfs/devfs.h> 124#include <net/dlil.h> 125#include <net/pktap.h> 126 127#include <kern/locks.h> 128#include <kern/thread_call.h> 129 130#if CONFIG_MACF_NET 131#include <security/mac_framework.h> 132#endif /* MAC_NET */ 133 134extern int tvtohz(struct timeval *); 135 136#define BPF_BUFSIZE 4096 137#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio) 138 139 140#define PRINET 26 /* interruptible */ 141 142/* 143 * The default read buffer size is patchable. 144 */ 145static unsigned int bpf_bufsize = BPF_BUFSIZE; 146SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED, 147 &bpf_bufsize, 0, ""); 148__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE; 149SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED, 150 &bpf_maxbufsize, 0, ""); 151static unsigned int bpf_maxdevices = 256; 152SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED, 153 &bpf_maxdevices, 0, ""); 154/* 155 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP 156 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP 157 * explicitly to be able to use DLT_PKTAP. 158 */ 159static unsigned int bpf_wantpktap = 0; 160SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED, 161 &bpf_wantpktap, 0, ""); 162 163/* 164 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 165 * bpf_dtab holds pointer to the descriptors, indexed by minor device # 166 */ 167static struct bpf_if *bpf_iflist; 168#ifdef __APPLE__ 169/* 170 * BSD now stores the bpf_d in the dev_t which is a struct 171 * on their system. Our dev_t is an int, so we still store 172 * the bpf_d in a separate table indexed by minor device #. 173 * 174 * The value stored in bpf_dtab[n] represent three states: 175 * 0: device not opened 176 * 1: device opening or closing 177 * other: device <n> opened with pointer to storage 178 */ 179static struct bpf_d **bpf_dtab = NULL; 180static unsigned int bpf_dtab_size = 0; 181static unsigned int nbpfilter = 0; 182 183decl_lck_mtx_data(static, bpf_mlock_data); 184static lck_mtx_t *bpf_mlock = &bpf_mlock_data; 185static lck_grp_t *bpf_mlock_grp; 186static lck_grp_attr_t *bpf_mlock_grp_attr; 187static lck_attr_t *bpf_mlock_attr; 188 189static mbuf_tag_id_t bpf_mtag_id; 190#endif /* __APPLE__ */ 191 192static int bpf_allocbufs(struct bpf_d *); 193static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp); 194static void bpf_detachd(struct bpf_d *d); 195static void bpf_freed(struct bpf_d *); 196static void bpf_mcopy(const void *, void *, size_t); 197static int bpf_movein(struct uio *, int, 198 struct mbuf **, struct sockaddr *, int *); 199static int bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt, dev_t); 200static void bpf_timed_out(void *, void *); 201static void bpf_wakeup(struct bpf_d *); 202static void catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int, 203 u_int, int, void (*)(const void *, void *, size_t)); 204static void reset_d(struct bpf_d *); 205static int bpf_setf(struct bpf_d *, u_int , user_addr_t , dev_t, u_long); 206static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *); 207static int bpf_setdlt(struct bpf_d *, u_int, dev_t); 208static int bpf_set_traffic_class(struct bpf_d *, int); 209static void bpf_set_packet_service_class(struct mbuf *, int); 210 211/*static void *bpf_devfs_token[MAXBPFILTER];*/ 212 213static int bpf_devsw_installed; 214 215void bpf_init(void *unused); 216static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m); 217 218/* 219 * Darwin differs from BSD here, the following are static 220 * on BSD and not static on Darwin. 221 */ 222 d_open_t bpfopen; 223 d_close_t bpfclose; 224 d_read_t bpfread; 225 d_write_t bpfwrite; 226 ioctl_fcn_t bpfioctl; 227 select_fcn_t bpfselect; 228 229 230/* Darwin's cdevsw struct differs slightly from BSDs */ 231#define CDEV_MAJOR 23 232static struct cdevsw bpf_cdevsw = { 233 /* open */ bpfopen, 234 /* close */ bpfclose, 235 /* read */ bpfread, 236 /* write */ bpfwrite, 237 /* ioctl */ bpfioctl, 238 /* stop */ eno_stop, 239 /* reset */ eno_reset, 240 /* tty */ NULL, 241 /* select */ bpfselect, 242 /* mmap */ eno_mmap, 243 /* strategy*/ eno_strat, 244 /* getc */ eno_getc, 245 /* putc */ eno_putc, 246 /* type */ 0 247}; 248 249#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data) 250 251static int 252bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen) 253{ 254 struct mbuf *m; 255 int error; 256 int len; 257 uint8_t sa_family; 258 int hlen; 259 260 switch (linktype) { 261 262#if SLIP 263 case DLT_SLIP: 264 sa_family = AF_INET; 265 hlen = 0; 266 break; 267#endif /* SLIP */ 268 269 case DLT_EN10MB: 270 sa_family = AF_UNSPEC; 271 /* XXX Would MAXLINKHDR be better? */ 272 hlen = sizeof(struct ether_header); 273 break; 274 275#if FDDI 276 case DLT_FDDI: 277 #if defined(__FreeBSD__) || defined(__bsdi__) 278 sa_family = AF_IMPLINK; 279 hlen = 0; 280 #else 281 sa_family = AF_UNSPEC; 282 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */ 283 hlen = 24; 284 #endif 285 break; 286#endif /* FDDI */ 287 288 case DLT_RAW: 289 case DLT_NULL: 290 sa_family = AF_UNSPEC; 291 hlen = 0; 292 break; 293 294 #ifdef __FreeBSD__ 295 case DLT_ATM_RFC1483: 296 /* 297 * en atm driver requires 4-byte atm pseudo header. 298 * though it isn't standard, vpi:vci needs to be 299 * specified anyway. 300 */ 301 sa_family = AF_UNSPEC; 302 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 303 break; 304 #endif 305 306 case DLT_PPP: 307 sa_family = AF_UNSPEC; 308 hlen = 4; /* This should match PPP_HDRLEN */ 309 break; 310 311 case DLT_APPLE_IP_OVER_IEEE1394: 312 sa_family = AF_UNSPEC; 313 hlen = sizeof(struct firewire_header); 314 break; 315 316 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 317 sa_family = AF_IEEE80211; 318 hlen = 0; 319 break; 320 321 case DLT_IEEE802_11_RADIO: 322 sa_family = AF_IEEE80211; 323 hlen = 0; 324 break; 325 326 default: 327 return (EIO); 328 } 329 330 // LP64todo - fix this! 331 len = uio_resid(uio); 332 *datlen = len - hlen; 333 if ((unsigned)len > MCLBYTES) 334 return (EIO); 335 336 if (sockp) { 337 /* 338 * Build a sockaddr based on the data link layer type. 339 * We do this at this level because the ethernet header 340 * is copied directly into the data field of the sockaddr. 341 * In the case of SLIP, there is no header and the packet 342 * is forwarded as is. 343 * Also, we are careful to leave room at the front of the mbuf 344 * for the link level header. 345 */ 346 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) { 347 return (EIO); 348 } 349 sockp->sa_family = sa_family; 350 } else { 351 /* 352 * We're directly sending the packet data supplied by 353 * the user; we don't need to make room for the link 354 * header, and don't need the header length value any 355 * more, so set it to 0. 356 */ 357 hlen = 0; 358 } 359 360 MGETHDR(m, M_WAIT, MT_DATA); 361 if (m == 0) 362 return (ENOBUFS); 363 if ((unsigned)len > MHLEN) { 364 MCLGET(m, M_WAIT); 365 if ((m->m_flags & M_EXT) == 0) { 366 error = ENOBUFS; 367 goto bad; 368 } 369 } 370 m->m_pkthdr.len = m->m_len = len; 371 m->m_pkthdr.rcvif = NULL; 372 *mp = m; 373 374 /* 375 * Make room for link header. 376 */ 377 if (hlen != 0) { 378 m->m_pkthdr.len -= hlen; 379 m->m_len -= hlen; 380 m->m_data += hlen; /* XXX */ 381 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio); 382 if (error) 383 goto bad; 384 } 385 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio); 386 if (error) 387 goto bad; 388 389 /* Check for multicast destination */ 390 switch (linktype) { 391 case DLT_EN10MB: { 392 struct ether_header *eh = mtod(m, struct ether_header *); 393 394 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 395 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) 396 m->m_flags |= M_BCAST; 397 else 398 m->m_flags |= M_MCAST; 399 } 400 break; 401 } 402 } 403 404 return 0; 405 bad: 406 m_freem(m); 407 return (error); 408} 409 410#ifdef __APPLE__ 411 412/* 413 * The dynamic addition of a new device node must block all processes that 414 * are opening the last device so that no process will get an unexpected 415 * ENOENT 416 */ 417static void 418bpf_make_dev_t(int maj) 419{ 420 static int bpf_growing = 0; 421 unsigned int cur_size = nbpfilter, i; 422 423 if (nbpfilter >= bpf_maxdevices) 424 return; 425 426 while (bpf_growing) { 427 /* Wait until new device has been created */ 428 (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0); 429 } 430 if (nbpfilter > cur_size) { 431 /* other thread grew it already */ 432 return; 433 } 434 bpf_growing = 1; 435 436 /* need to grow bpf_dtab first */ 437 if (nbpfilter == bpf_dtab_size) { 438 int new_dtab_size; 439 struct bpf_d **new_dtab = NULL; 440 struct bpf_d **old_dtab = NULL; 441 442 new_dtab_size = bpf_dtab_size + NBPFILTER; 443 new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT); 444 if (new_dtab == 0) { 445 printf("bpf_make_dev_t: malloc bpf_dtab failed\n"); 446 goto done; 447 } 448 if (bpf_dtab) { 449 bcopy(bpf_dtab, new_dtab, 450 sizeof(struct bpf_d *) * bpf_dtab_size); 451 } 452 bzero(new_dtab + bpf_dtab_size, 453 sizeof(struct bpf_d *) * NBPFILTER); 454 old_dtab = bpf_dtab; 455 bpf_dtab = new_dtab; 456 bpf_dtab_size = new_dtab_size; 457 if (old_dtab != NULL) 458 _FREE(old_dtab, M_DEVBUF); 459 } 460 i = nbpfilter++; 461 (void) devfs_make_node(makedev(maj, i), 462 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600, 463 "bpf%d", i); 464done: 465 bpf_growing = 0; 466 wakeup((caddr_t)&bpf_growing); 467} 468 469#endif 470 471/* 472 * Attach file to the bpf interface, i.e. make d listen on bp. 473 */ 474static errno_t 475bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 476{ 477 int first = bp->bif_dlist == NULL; 478 int error = 0; 479 480 /* 481 * Point d at bp, and add d to the interface's list of listeners. 482 * Finally, point the driver's bpf cookie at the interface so 483 * it will divert packets to bpf. 484 */ 485 d->bd_bif = bp; 486 d->bd_next = bp->bif_dlist; 487 bp->bif_dlist = d; 488 489 if (first) { 490 /* Find the default bpf entry for this ifp */ 491 if (bp->bif_ifp->if_bpf == NULL) { 492 struct bpf_if *tmp, *primary = NULL; 493 494 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) { 495 if (tmp->bif_ifp != bp->bif_ifp) 496 continue; 497 primary = tmp; 498 /* 499 * Make DLT_PKTAP only if process knows how 500 * to deal with it, otherwise find another one 501 */ 502 if (tmp->bif_dlt == DLT_PKTAP && 503 !(d->bd_flags & BPF_WANT_PKTAP)) 504 continue; 505 break; 506 } 507 bp->bif_ifp->if_bpf = primary; 508 } 509 510 /* Only call dlil_set_bpf_tap for primary dlt */ 511 if (bp->bif_ifp->if_bpf == bp) 512 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback); 513 514 if (bp->bif_tap) 515 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT); 516 } 517 518 if (bp->bif_ifp->if_bpf != NULL && 519 bp->bif_ifp->if_bpf->bif_dlt == DLT_PKTAP) 520 d->bd_flags |= BPF_FINALIZE_PKTAP; 521 else 522 d->bd_flags &= ~BPF_FINALIZE_PKTAP; 523 524 return error; 525} 526 527/* 528 * Detach a file from its interface. 529 */ 530static void 531bpf_detachd(struct bpf_d *d) 532{ 533 struct bpf_d **p; 534 struct bpf_if *bp; 535 struct ifnet *ifp; 536 537 ifp = d->bd_bif->bif_ifp; 538 bp = d->bd_bif; 539 540 /* Remove d from the interface's descriptor list. */ 541 p = &bp->bif_dlist; 542 while (*p != d) { 543 p = &(*p)->bd_next; 544 if (*p == 0) 545 panic("bpf_detachd: descriptor not in list"); 546 } 547 *p = (*p)->bd_next; 548 if (bp->bif_dlist == 0) { 549 /* 550 * Let the driver know that there are no more listeners. 551 */ 552 /* Only call dlil_set_bpf_tap for primary dlt */ 553 if (bp->bif_ifp->if_bpf == bp) 554 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL); 555 if (bp->bif_tap) 556 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE); 557 558 for (bp = bpf_iflist; bp; bp = bp->bif_next) 559 if (bp->bif_ifp == ifp && bp->bif_dlist != 0) 560 break; 561 if (bp == NULL) 562 ifp->if_bpf = NULL; 563 } 564 d->bd_bif = NULL; 565 /* 566 * Check if this descriptor had requested promiscuous mode. 567 * If so, turn it off. 568 */ 569 if (d->bd_promisc) { 570 d->bd_promisc = 0; 571 lck_mtx_unlock(bpf_mlock); 572 if (ifnet_set_promiscuous(ifp, 0)) { 573 /* 574 * Something is really wrong if we were able to put 575 * the driver into promiscuous mode, but can't 576 * take it out. 577 * Most likely the network interface is gone. 578 */ 579 printf("bpf: ifnet_set_promiscuous failed"); 580 } 581 lck_mtx_lock(bpf_mlock); 582 } 583} 584 585 586/* 587 * Start asynchronous timer, if necessary. 588 * Must be called with bpf_mlock held. 589 */ 590static void 591bpf_start_timer(struct bpf_d *d) 592{ 593 uint64_t deadline; 594 struct timeval tv; 595 596 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 597 tv.tv_sec = d->bd_rtout / hz; 598 tv.tv_usec = (d->bd_rtout % hz) * tick; 599 600 clock_interval_to_deadline( 601 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec, 602 NSEC_PER_USEC, &deadline); 603 /* 604 * The state is BPF_IDLE, so the timer hasn't 605 * been started yet, and hasn't gone off yet; 606 * there is no thread call scheduled, so this 607 * won't change the schedule. 608 * 609 * XXX - what if, by the time it gets entered, 610 * the deadline has already passed? 611 */ 612 thread_call_enter_delayed(d->bd_thread_call, deadline); 613 d->bd_state = BPF_WAITING; 614 } 615} 616 617/* 618 * Cancel asynchronous timer. 619 * Must be called with bpf_mlock held. 620 */ 621static boolean_t 622bpf_stop_timer(struct bpf_d *d) 623{ 624 /* 625 * If the timer has already gone off, this does nothing. 626 * Our caller is expected to set d->bd_state to BPF_IDLE, 627 * with the bpf_mlock, after we are called. bpf_timed_out() 628 * also grabs bpf_mlock, so, if the timer has gone off and 629 * bpf_timed_out() hasn't finished, it's waiting for the 630 * lock; when this thread releases the lock, it will 631 * find the state is BPF_IDLE, and just release the 632 * lock and return. 633 */ 634 return (thread_call_cancel(d->bd_thread_call)); 635} 636 637 638 639/* 640 * Open ethernet device. Returns ENXIO for illegal minor device number, 641 * EBUSY if file is open by another process. 642 */ 643/* ARGSUSED */ 644int 645bpfopen(dev_t dev, int flags, __unused int fmt, 646 __unused struct proc *p) 647{ 648 struct bpf_d *d; 649 650 lck_mtx_lock(bpf_mlock); 651 if ((unsigned int) minor(dev) >= nbpfilter) { 652 lck_mtx_unlock(bpf_mlock); 653 return (ENXIO); 654 } 655 /* 656 * New device nodes are created on demand when opening the last one. 657 * The programming model is for processes to loop on the minor starting at 0 658 * as long as EBUSY is returned. The loop stops when either the open succeeds or 659 * an error other that EBUSY is returned. That means that bpf_make_dev_t() must 660 * block all processes that are opening the last node. If not all 661 * processes are blocked, they could unexpectedly get ENOENT and abort their 662 * opening loop. 663 */ 664 if ((unsigned int) minor(dev) == (nbpfilter - 1)) 665 bpf_make_dev_t(major(dev)); 666 667 /* 668 * Each minor can be opened by only one process. If the requested 669 * minor is in use, return EBUSY. 670 * 671 * Important: bpfopen() and bpfclose() have to check and set the status of a device 672 * in the same lockin context otherwise the device may be leaked because the vnode use count 673 * will be unpextectly greater than 1 when close() is called. 674 */ 675 if (bpf_dtab[minor(dev)] == 0) { 676 bpf_dtab[minor(dev)] = (void *)1; /* Mark opening */ 677 } else { 678 lck_mtx_unlock(bpf_mlock); 679 return (EBUSY); 680 } 681 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF, M_WAIT); 682 if (d == NULL) { 683 /* this really is a catastrophic failure */ 684 printf("bpfopen: malloc bpf_d failed\n"); 685 bpf_dtab[minor(dev)] = NULL; 686 lck_mtx_unlock(bpf_mlock); 687 return ENOMEM; 688 } 689 bzero(d, sizeof(struct bpf_d)); 690 691 /* 692 * It is not necessary to take the BPF lock here because no other 693 * thread can access the device until it is marked opened... 694 */ 695 696 /* Mark "in use" and do most initialization. */ 697 d->bd_bufsize = bpf_bufsize; 698 d->bd_sig = SIGIO; 699 d->bd_seesent = 1; 700 d->bd_oflags = flags; 701 d->bd_state = BPF_IDLE; 702 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d); 703 d->bd_traffic_class = SO_TC_BE; 704 if (bpf_wantpktap) 705 d->bd_flags |= BPF_WANT_PKTAP; 706 else 707 d->bd_flags &= ~BPF_WANT_PKTAP; 708 709 if (d->bd_thread_call == NULL) { 710 printf("bpfopen: malloc thread call failed\n"); 711 bpf_dtab[minor(dev)] = NULL; 712 lck_mtx_unlock(bpf_mlock); 713 _FREE(d, M_DEVBUF); 714 return ENOMEM; 715 } 716#if CONFIG_MACF_NET 717 mac_bpfdesc_label_init(d); 718 mac_bpfdesc_label_associate(kauth_cred_get(), d); 719#endif 720 bpf_dtab[minor(dev)] = d; /* Mark opened */ 721 lck_mtx_unlock(bpf_mlock); 722 723 return (0); 724} 725 726/* 727 * Close the descriptor by detaching it from its interface, 728 * deallocating its buffers, and marking it free. 729 */ 730/* ARGSUSED */ 731int 732bpfclose(dev_t dev, __unused int flags, __unused int fmt, 733 __unused struct proc *p) 734{ 735 struct bpf_d *d; 736 737 /* Take BPF lock to ensure no other thread is using the device */ 738 lck_mtx_lock(bpf_mlock); 739 740 d = bpf_dtab[minor(dev)]; 741 if (d == 0 || d == (void *)1) { 742 lck_mtx_unlock(bpf_mlock); 743 return (ENXIO); 744 } 745 bpf_dtab[minor(dev)] = (void *)1; /* Mark closing */ 746 747 /* 748 * Deal with any in-progress timeouts. 749 */ 750 switch (d->bd_state) { 751 case BPF_IDLE: 752 /* 753 * Not waiting for a timeout, and no timeout happened. 754 */ 755 break; 756 757 case BPF_WAITING: 758 /* 759 * Waiting for a timeout. 760 * Cancel any timer that has yet to go off, 761 * and mark the state as "closing". 762 * Then drop the lock to allow any timers that 763 * *have* gone off to run to completion, and wait 764 * for them to finish. 765 */ 766 if (!bpf_stop_timer(d)) { 767 /* 768 * There was no pending call, so the call must 769 * have been in progress. Wait for the call to 770 * complete; we have to drop the lock while 771 * waiting. to let the in-progrss call complete 772 */ 773 d->bd_state = BPF_DRAINING; 774 while (d->bd_state == BPF_DRAINING) 775 msleep((caddr_t)d, bpf_mlock, PRINET, 776 "bpfdraining", NULL); 777 } 778 d->bd_state = BPF_IDLE; 779 break; 780 781 case BPF_TIMED_OUT: 782 /* 783 * Timer went off, and the timeout routine finished. 784 */ 785 d->bd_state = BPF_IDLE; 786 break; 787 788 case BPF_DRAINING: 789 /* 790 * Another thread is blocked on a close waiting for 791 * a timeout to finish. 792 * This "shouldn't happen", as the first thread to enter 793 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and 794 * all subsequent threads should see that and fail with 795 * ENXIO. 796 */ 797 panic("Two threads blocked in a BPF close"); 798 break; 799 } 800 801 if (d->bd_bif) 802 bpf_detachd(d); 803 selthreadclear(&d->bd_sel); 804#if CONFIG_MACF_NET 805 mac_bpfdesc_label_destroy(d); 806#endif 807 thread_call_free(d->bd_thread_call); 808 809 while (d->bd_hbuf_read) 810 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); 811 812 bpf_freed(d); 813 814 /* Mark free in same context as bpfopen comes to check */ 815 bpf_dtab[minor(dev)] = NULL; /* Mark closed */ 816 lck_mtx_unlock(bpf_mlock); 817 818 _FREE(d, M_DEVBUF); 819 820 return (0); 821} 822 823 824#define BPF_SLEEP bpf_sleep 825 826static int 827bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo) 828{ 829 u_int64_t abstime = 0; 830 831 if(timo) 832 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime); 833 834 return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime); 835} 836 837/* 838 * Rotate the packet buffers in descriptor d. Move the store buffer 839 * into the hold slot, and the free buffer into the store slot. 840 * Zero the length of the new store buffer. 841 */ 842#define ROTATE_BUFFERS(d) \ 843 if (d->bd_hbuf_read) \ 844 panic("rotating bpf buffers during read"); \ 845 (d)->bd_hbuf = (d)->bd_sbuf; \ 846 (d)->bd_hlen = (d)->bd_slen; \ 847 (d)->bd_sbuf = (d)->bd_fbuf; \ 848 (d)->bd_slen = 0; \ 849 (d)->bd_fbuf = NULL; 850/* 851 * bpfread - read next chunk of packets from buffers 852 */ 853int 854bpfread(dev_t dev, struct uio *uio, int ioflag) 855{ 856 struct bpf_d *d; 857 caddr_t hbuf; 858 int timed_out, hbuf_len; 859 int error; 860 int flags; 861 862 lck_mtx_lock(bpf_mlock); 863 864 d = bpf_dtab[minor(dev)]; 865 if (d == 0 || d == (void *)1) { 866 lck_mtx_unlock(bpf_mlock); 867 return (ENXIO); 868 } 869 870 /* 871 * Restrict application to use a buffer the same size as 872 * as kernel buffers. 873 */ 874 if (uio_resid(uio) != d->bd_bufsize) { 875 lck_mtx_unlock(bpf_mlock); 876 return (EINVAL); 877 } 878 879 if (d->bd_state == BPF_WAITING) 880 bpf_stop_timer(d); 881 882 timed_out = (d->bd_state == BPF_TIMED_OUT); 883 d->bd_state = BPF_IDLE; 884 885 while (d->bd_hbuf_read) 886 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); 887 888 d = bpf_dtab[minor(dev)]; 889 if (d == 0 || d == (void *)1) { 890 lck_mtx_unlock(bpf_mlock); 891 return (ENXIO); 892 } 893 /* 894 * If the hold buffer is empty, then do a timed sleep, which 895 * ends when the timeout expires or when enough packets 896 * have arrived to fill the store buffer. 897 */ 898 while (d->bd_hbuf == 0) { 899 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) 900 && d->bd_slen != 0) { 901 /* 902 * We're in immediate mode, or are reading 903 * in non-blocking mode, or a timer was 904 * started before the read (e.g., by select() 905 * or poll()) and has expired and a packet(s) 906 * either arrived since the previous 907 * read or arrived while we were asleep. 908 * Rotate the buffers and return what's here. 909 */ 910 ROTATE_BUFFERS(d); 911 break; 912 } 913 914 /* 915 * No data is available, check to see if the bpf device 916 * is still pointed at a real interface. If not, return 917 * ENXIO so that the userland process knows to rebind 918 * it before using it again. 919 */ 920 if (d->bd_bif == NULL) { 921 lck_mtx_unlock(bpf_mlock); 922 return (ENXIO); 923 } 924 if (ioflag & IO_NDELAY) { 925 lck_mtx_unlock(bpf_mlock); 926 return (EWOULDBLOCK); 927 } 928 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf", 929 d->bd_rtout); 930 /* 931 * Make sure device is still opened 932 */ 933 d = bpf_dtab[minor(dev)]; 934 if (d == 0 || d == (void *)1) { 935 lck_mtx_unlock(bpf_mlock); 936 return (ENXIO); 937 } 938 939 while (d->bd_hbuf_read) 940 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); 941 942 d = bpf_dtab[minor(dev)]; 943 if (d == 0 || d == (void *)1) { 944 lck_mtx_unlock(bpf_mlock); 945 return (ENXIO); 946 } 947 948 if (error == EINTR || error == ERESTART) { 949 if (d->bd_hbuf) { 950 /* 951 * Because we msleep, the hold buffer might 952 * be filled when we wake up. Avoid rotating 953 * in this case. 954 */ 955 break; 956 } 957 if (d->bd_slen) { 958 /* 959 * Sometimes we may be interrupted often and 960 * the sleep above will not timeout. 961 * Regardless, we should rotate the buffers 962 * if there's any new data pending and 963 * return it. 964 */ 965 ROTATE_BUFFERS(d); 966 break; 967 } 968 lck_mtx_unlock(bpf_mlock); 969 return (error); 970 } 971 if (error == EWOULDBLOCK) { 972 /* 973 * On a timeout, return what's in the buffer, 974 * which may be nothing. If there is something 975 * in the store buffer, we can rotate the buffers. 976 */ 977 if (d->bd_hbuf) 978 /* 979 * We filled up the buffer in between 980 * getting the timeout and arriving 981 * here, so we don't need to rotate. 982 */ 983 break; 984 985 if (d->bd_slen == 0) { 986 lck_mtx_unlock(bpf_mlock); 987 return (0); 988 } 989 ROTATE_BUFFERS(d); 990 break; 991 } 992 } 993 /* 994 * At this point, we know we have something in the hold slot. 995 */ 996 997 /* 998 * Set the hold buffer read. So we do not 999 * rotate the buffers until the hold buffer 1000 * read is complete. Also to avoid issues resulting 1001 * from page faults during disk sleep (<rdar://problem/13436396>). 1002 */ 1003 d->bd_hbuf_read = 1; 1004 hbuf = d->bd_hbuf; 1005 hbuf_len = d->bd_hlen; 1006 flags = d->bd_flags; 1007 lck_mtx_unlock(bpf_mlock); 1008 1009#ifdef __APPLE__ 1010 /* 1011 * Before we move data to userland, we fill out the extended 1012 * header fields. 1013 */ 1014 if (flags & BPF_EXTENDED_HDR) { 1015 char *p; 1016 1017 p = hbuf; 1018 while (p < hbuf + hbuf_len) { 1019 struct bpf_hdr_ext *ehp; 1020 uint32_t flowid; 1021 struct so_procinfo soprocinfo; 1022 int found = 0; 1023 1024 ehp = (struct bpf_hdr_ext *)(void *)p; 1025 if ((flowid = ehp->bh_flowid)) { 1026 if (ehp->bh_proto == IPPROTO_TCP) 1027 found = inp_findinpcb_procinfo(&tcbinfo, 1028 flowid, &soprocinfo); 1029 else if (ehp->bh_proto == IPPROTO_UDP) 1030 found = inp_findinpcb_procinfo(&udbinfo, 1031 flowid, &soprocinfo); 1032 if (found == 1) { 1033 ehp->bh_pid = soprocinfo.spi_pid; 1034 proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN); 1035 } 1036 ehp->bh_flowid = 0; 1037 } 1038 if (flags & BPF_FINALIZE_PKTAP) { 1039 struct pktap_header *pktaphdr; 1040 1041 pktaphdr = (struct pktap_header *)(void *) 1042 (p + BPF_WORDALIGN(ehp->bh_hdrlen)); 1043 1044 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) 1045 pktap_finalize_proc_info(pktaphdr); 1046 1047 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) { 1048 ehp->bh_tstamp.tv_sec = 1049 pktaphdr->pth_tstamp.tv_sec; 1050 ehp->bh_tstamp.tv_usec = 1051 pktaphdr->pth_tstamp.tv_usec; 1052 } 1053 } 1054 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen); 1055 } 1056 } else if (flags & BPF_FINALIZE_PKTAP) { 1057 char *p; 1058 1059 p = hbuf; 1060 while (p < hbuf + hbuf_len) { 1061 struct bpf_hdr *hp; 1062 struct pktap_header *pktaphdr; 1063 1064 hp = (struct bpf_hdr *)(void *)p; 1065 pktaphdr = (struct pktap_header *)(void *) 1066 (p + BPF_WORDALIGN(hp->bh_hdrlen)); 1067 1068 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) 1069 pktap_finalize_proc_info(pktaphdr); 1070 1071 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) { 1072 hp->bh_tstamp.tv_sec = 1073 pktaphdr->pth_tstamp.tv_sec; 1074 hp->bh_tstamp.tv_usec = 1075 pktaphdr->pth_tstamp.tv_usec; 1076 } 1077 1078 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen); 1079 } 1080 } 1081#endif 1082 1083 /* 1084 * Move data from hold buffer into user space. 1085 * We know the entire buffer is transferred since 1086 * we checked above that the read buffer is bpf_bufsize bytes. 1087 */ 1088 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio); 1089 1090 lck_mtx_lock(bpf_mlock); 1091 /* 1092 * Make sure device is still opened 1093 */ 1094 d = bpf_dtab[minor(dev)]; 1095 if (d == 0 || d == (void *)1) { 1096 lck_mtx_unlock(bpf_mlock); 1097 return (ENXIO); 1098 } 1099 1100 d->bd_hbuf_read = 0; 1101 d->bd_fbuf = d->bd_hbuf; 1102 d->bd_hbuf = NULL; 1103 d->bd_hlen = 0; 1104 wakeup((caddr_t)d); 1105 lck_mtx_unlock(bpf_mlock); 1106 return (error); 1107 1108} 1109 1110 1111/* 1112 * If there are processes sleeping on this descriptor, wake them up. 1113 */ 1114static void 1115bpf_wakeup(struct bpf_d *d) 1116{ 1117 if (d->bd_state == BPF_WAITING) { 1118 bpf_stop_timer(d); 1119 d->bd_state = BPF_IDLE; 1120 } 1121 wakeup((caddr_t)d); 1122 if (d->bd_async && d->bd_sig && d->bd_sigio) 1123 pgsigio(d->bd_sigio, d->bd_sig); 1124 1125 selwakeup(&d->bd_sel); 1126 KNOTE(&d->bd_sel.si_note, 1); 1127#ifndef __APPLE__ 1128 /* XXX */ 1129 d->bd_sel.si_pid = 0; 1130#endif 1131} 1132 1133 1134static void 1135bpf_timed_out(void *arg, __unused void *dummy) 1136{ 1137 struct bpf_d *d = (struct bpf_d *)arg; 1138 1139 lck_mtx_lock(bpf_mlock); 1140 if (d->bd_state == BPF_WAITING) { 1141 /* 1142 * There's a select or kqueue waiting for this; if there's 1143 * now stuff to read, wake it up. 1144 */ 1145 d->bd_state = BPF_TIMED_OUT; 1146 if (d->bd_slen != 0) 1147 bpf_wakeup(d); 1148 } else if (d->bd_state == BPF_DRAINING) { 1149 /* 1150 * A close is waiting for this to finish. 1151 * Mark it as finished, and wake the close up. 1152 */ 1153 d->bd_state = BPF_IDLE; 1154 bpf_wakeup(d); 1155 } 1156 lck_mtx_unlock(bpf_mlock); 1157} 1158 1159 1160 1161 1162 1163/* keep in sync with bpf_movein above: */ 1164#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header)) 1165 1166int 1167bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) 1168{ 1169 struct bpf_d *d; 1170 struct ifnet *ifp; 1171 struct mbuf *m = NULL; 1172 int error; 1173 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN]; 1174 int datlen = 0; 1175 int bif_dlt; 1176 int bd_hdrcmplt; 1177 1178 lck_mtx_lock(bpf_mlock); 1179 1180 d = bpf_dtab[minor(dev)]; 1181 if (d == 0 || d == (void *)1) { 1182 lck_mtx_unlock(bpf_mlock); 1183 return (ENXIO); 1184 } 1185 if (d->bd_bif == 0) { 1186 lck_mtx_unlock(bpf_mlock); 1187 return (ENXIO); 1188 } 1189 1190 ifp = d->bd_bif->bif_ifp; 1191 1192 if ((ifp->if_flags & IFF_UP) == 0) { 1193 lck_mtx_unlock(bpf_mlock); 1194 return (ENETDOWN); 1195 } 1196 if (uio_resid(uio) == 0) { 1197 lck_mtx_unlock(bpf_mlock); 1198 return (0); 1199 } 1200 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf); 1201 1202 /* 1203 * fix for PR-6849527 1204 * geting variables onto stack before dropping lock for bpf_movein() 1205 */ 1206 bif_dlt = (int)d->bd_bif->bif_dlt; 1207 bd_hdrcmplt = d->bd_hdrcmplt; 1208 1209 /* bpf_movein allocating mbufs; drop lock */ 1210 lck_mtx_unlock(bpf_mlock); 1211 1212 error = bpf_movein(uio, bif_dlt, &m, 1213 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf, 1214 &datlen); 1215 1216 if (error) { 1217 return (error); 1218 } 1219 1220 /* taking the lock again and verifying whether device is open */ 1221 lck_mtx_lock(bpf_mlock); 1222 d = bpf_dtab[minor(dev)]; 1223 if (d == 0 || d == (void *)1) { 1224 lck_mtx_unlock(bpf_mlock); 1225 m_freem(m); 1226 return (ENXIO); 1227 } 1228 1229 if (d->bd_bif == NULL) { 1230 lck_mtx_unlock(bpf_mlock); 1231 m_free(m); 1232 return (ENXIO); 1233 } 1234 1235 if ((unsigned)datlen > ifp->if_mtu) { 1236 lck_mtx_unlock(bpf_mlock); 1237 m_freem(m); 1238 return (EMSGSIZE); 1239 } 1240 1241 1242#if CONFIG_MACF_NET 1243 mac_mbuf_label_associate_bpfdesc(d, m); 1244#endif 1245 1246 bpf_set_packet_service_class(m, d->bd_traffic_class); 1247 1248 lck_mtx_unlock(bpf_mlock); 1249 1250 if (d->bd_hdrcmplt) { 1251 if (d->bd_bif->bif_send) 1252 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m); 1253 else 1254 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL); 1255 } else { 1256 error = dlil_output(ifp, PF_INET, m, NULL, 1257 (struct sockaddr *)dst_buf, 0, NULL); 1258 } 1259 1260 /* 1261 * The driver frees the mbuf. 1262 */ 1263 return (error); 1264} 1265 1266/* 1267 * Reset a descriptor by flushing its packet buffer and clearing the 1268 * receive and drop counts. 1269 */ 1270static void 1271reset_d(struct bpf_d *d) 1272{ 1273 if (d->bd_hbuf_read) 1274 panic("resetting buffers during read"); 1275 1276 if (d->bd_hbuf) { 1277 /* Free the hold buffer. */ 1278 d->bd_fbuf = d->bd_hbuf; 1279 d->bd_hbuf = NULL; 1280 } 1281 d->bd_slen = 0; 1282 d->bd_hlen = 0; 1283 d->bd_rcount = 0; 1284 d->bd_dcount = 0; 1285} 1286 1287/* 1288 * FIONREAD Check for read packet available. 1289 * SIOCGIFADDR Get interface address - convenient hook to driver. 1290 * BIOCGBLEN Get buffer len [for read()]. 1291 * BIOCSETF Set ethernet read filter. 1292 * BIOCFLUSH Flush read packet buffer. 1293 * BIOCPROMISC Put interface into promiscuous mode. 1294 * BIOCGDLT Get link layer type. 1295 * BIOCGETIF Get interface name. 1296 * BIOCSETIF Set interface. 1297 * BIOCSRTIMEOUT Set read timeout. 1298 * BIOCGRTIMEOUT Get read timeout. 1299 * BIOCGSTATS Get packet stats. 1300 * BIOCIMMEDIATE Set immediate mode. 1301 * BIOCVERSION Get filter language version. 1302 * BIOCGHDRCMPLT Get "header already complete" flag 1303 * BIOCSHDRCMPLT Set "header already complete" flag 1304 * BIOCGSEESENT Get "see packets sent" flag 1305 * BIOCSSEESENT Set "see packets sent" flag 1306 * BIOCSETTC Set traffic class. 1307 * BIOCGETTC Get traffic class. 1308 * BIOCSEXTHDR Set "extended header" flag 1309 */ 1310/* ARGSUSED */ 1311int 1312bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, 1313 struct proc *p) 1314{ 1315 struct bpf_d *d; 1316 int error = 0; 1317 u_int int_arg; 1318 struct ifreq ifr; 1319 1320 lck_mtx_lock(bpf_mlock); 1321 1322 d = bpf_dtab[minor(dev)]; 1323 if (d == 0 || d == (void *)1) { 1324 lck_mtx_unlock(bpf_mlock); 1325 return (ENXIO); 1326 } 1327 1328 if (d->bd_state == BPF_WAITING) 1329 bpf_stop_timer(d); 1330 d->bd_state = BPF_IDLE; 1331 1332 switch (cmd) { 1333 1334 default: 1335 error = EINVAL; 1336 break; 1337 1338 /* 1339 * Check for read packet available. 1340 */ 1341 case FIONREAD: /* int */ 1342 { 1343 int n; 1344 1345 n = d->bd_slen; 1346 if (d->bd_hbuf && d->bd_hbuf_read == 0) 1347 n += d->bd_hlen; 1348 1349 bcopy(&n, addr, sizeof (n)); 1350 break; 1351 } 1352 1353 case SIOCGIFADDR: /* struct ifreq */ 1354 { 1355 struct ifnet *ifp; 1356 1357 if (d->bd_bif == 0) 1358 error = EINVAL; 1359 else { 1360 ifp = d->bd_bif->bif_ifp; 1361 error = ifnet_ioctl(ifp, 0, cmd, addr); 1362 } 1363 break; 1364 } 1365 1366 /* 1367 * Get buffer len [for read()]. 1368 */ 1369 case BIOCGBLEN: /* u_int */ 1370 bcopy(&d->bd_bufsize, addr, sizeof (u_int)); 1371 break; 1372 1373 /* 1374 * Set buffer length. 1375 */ 1376 case BIOCSBLEN: /* u_int */ 1377 if (d->bd_bif != 0) 1378 error = EINVAL; 1379 else { 1380 u_int size; 1381 1382 bcopy(addr, &size, sizeof (size)); 1383 1384 if (size > bpf_maxbufsize) 1385 size = bpf_maxbufsize; 1386 else if (size < BPF_MINBUFSIZE) 1387 size = BPF_MINBUFSIZE; 1388 bcopy(&size, addr, sizeof (size)); 1389 d->bd_bufsize = size; 1390 } 1391 break; 1392 1393 /* 1394 * Set link layer read filter. 1395 */ 1396 case BIOCSETF32: 1397 case BIOCSETFNR32: { /* struct bpf_program32 */ 1398 struct bpf_program32 prg32; 1399 1400 bcopy(addr, &prg32, sizeof (prg32)); 1401 error = bpf_setf(d, prg32.bf_len, 1402 CAST_USER_ADDR_T(prg32.bf_insns), dev, cmd); 1403 break; 1404 } 1405 1406 case BIOCSETF64: 1407 case BIOCSETFNR64: { /* struct bpf_program64 */ 1408 struct bpf_program64 prg64; 1409 1410 bcopy(addr, &prg64, sizeof (prg64)); 1411 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, dev, cmd); 1412 break; 1413 } 1414 1415 /* 1416 * Flush read packet buffer. 1417 */ 1418 case BIOCFLUSH: 1419 while (d->bd_hbuf_read) { 1420 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); 1421 } 1422 1423 d = bpf_dtab[minor(dev)]; 1424 if (d == 0 || d == (void *)1) 1425 return (ENXIO); 1426 1427 reset_d(d); 1428 break; 1429 1430 /* 1431 * Put interface into promiscuous mode. 1432 */ 1433 case BIOCPROMISC: 1434 if (d->bd_bif == 0) { 1435 /* 1436 * No interface attached yet. 1437 */ 1438 error = EINVAL; 1439 break; 1440 } 1441 if (d->bd_promisc == 0) { 1442 lck_mtx_unlock(bpf_mlock); 1443 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1); 1444 lck_mtx_lock(bpf_mlock); 1445 if (error == 0) 1446 d->bd_promisc = 1; 1447 } 1448 break; 1449 1450 /* 1451 * Get device parameters. 1452 */ 1453 case BIOCGDLT: /* u_int */ 1454 if (d->bd_bif == 0) 1455 error = EINVAL; 1456 else 1457 bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int)); 1458 break; 1459 1460 /* 1461 * Get a list of supported data link types. 1462 */ 1463 case BIOCGDLTLIST: /* struct bpf_dltlist */ 1464 if (d->bd_bif == NULL) { 1465 error = EINVAL; 1466 } else { 1467 error = bpf_getdltlist(d, addr, p); 1468 } 1469 break; 1470 1471 /* 1472 * Set data link type. 1473 */ 1474 case BIOCSDLT: /* u_int */ 1475 if (d->bd_bif == NULL) { 1476 error = EINVAL; 1477 } else { 1478 u_int dlt; 1479 1480 bcopy(addr, &dlt, sizeof (dlt)); 1481 error = bpf_setdlt(d, dlt, dev); 1482 } 1483 break; 1484 1485 /* 1486 * Get interface name. 1487 */ 1488 case BIOCGETIF: /* struct ifreq */ 1489 if (d->bd_bif == 0) 1490 error = EINVAL; 1491 else { 1492 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1493 1494 snprintf(((struct ifreq *)(void *)addr)->ifr_name, 1495 sizeof (ifr.ifr_name), "%s", if_name(ifp)); 1496 } 1497 break; 1498 1499 /* 1500 * Set interface. 1501 */ 1502 case BIOCSETIF: { /* struct ifreq */ 1503 ifnet_t ifp; 1504 1505 bcopy(addr, &ifr, sizeof (ifr)); 1506 ifr.ifr_name[IFNAMSIZ - 1] = '\0'; 1507 ifp = ifunit(ifr.ifr_name); 1508 if (ifp == NULL) 1509 error = ENXIO; 1510 else 1511 error = bpf_setif(d, ifp, 0, dev); 1512 break; 1513 } 1514 1515 /* 1516 * Set read timeout. 1517 */ 1518 case BIOCSRTIMEOUT32: { /* struct user32_timeval */ 1519 struct user32_timeval _tv; 1520 struct timeval tv; 1521 1522 bcopy(addr, &_tv, sizeof (_tv)); 1523 tv.tv_sec = _tv.tv_sec; 1524 tv.tv_usec = _tv.tv_usec; 1525 1526 /* 1527 * Subtract 1 tick from tvtohz() since this isn't 1528 * a one-shot timer. 1529 */ 1530 if ((error = itimerfix(&tv)) == 0) 1531 d->bd_rtout = tvtohz(&tv) - 1; 1532 break; 1533 } 1534 1535 case BIOCSRTIMEOUT64: { /* struct user64_timeval */ 1536 struct user64_timeval _tv; 1537 struct timeval tv; 1538 1539 bcopy(addr, &_tv, sizeof (_tv)); 1540 tv.tv_sec = _tv.tv_sec; 1541 tv.tv_usec = _tv.tv_usec; 1542 1543 /* 1544 * Subtract 1 tick from tvtohz() since this isn't 1545 * a one-shot timer. 1546 */ 1547 if ((error = itimerfix(&tv)) == 0) 1548 d->bd_rtout = tvtohz(&tv) - 1; 1549 break; 1550 } 1551 1552 /* 1553 * Get read timeout. 1554 */ 1555 case BIOCGRTIMEOUT32: { /* struct user32_timeval */ 1556 struct user32_timeval tv; 1557 1558 bzero(&tv, sizeof (tv)); 1559 tv.tv_sec = d->bd_rtout / hz; 1560 tv.tv_usec = (d->bd_rtout % hz) * tick; 1561 bcopy(&tv, addr, sizeof (tv)); 1562 break; 1563 } 1564 1565 case BIOCGRTIMEOUT64: { /* struct user64_timeval */ 1566 struct user64_timeval tv; 1567 1568 bzero(&tv, sizeof (tv)); 1569 tv.tv_sec = d->bd_rtout / hz; 1570 tv.tv_usec = (d->bd_rtout % hz) * tick; 1571 bcopy(&tv, addr, sizeof (tv)); 1572 break; 1573 } 1574 1575 /* 1576 * Get packet stats. 1577 */ 1578 case BIOCGSTATS: { /* struct bpf_stat */ 1579 struct bpf_stat bs; 1580 1581 bzero(&bs, sizeof (bs)); 1582 bs.bs_recv = d->bd_rcount; 1583 bs.bs_drop = d->bd_dcount; 1584 bcopy(&bs, addr, sizeof (bs)); 1585 break; 1586 } 1587 1588 /* 1589 * Set immediate mode. 1590 */ 1591 case BIOCIMMEDIATE: /* u_int */ 1592 bcopy(addr, &d->bd_immediate, sizeof (u_int)); 1593 break; 1594 1595 case BIOCVERSION: { /* struct bpf_version */ 1596 struct bpf_version bv; 1597 1598 bzero(&bv, sizeof (bv)); 1599 bv.bv_major = BPF_MAJOR_VERSION; 1600 bv.bv_minor = BPF_MINOR_VERSION; 1601 bcopy(&bv, addr, sizeof (bv)); 1602 break; 1603 } 1604 1605 /* 1606 * Get "header already complete" flag 1607 */ 1608 case BIOCGHDRCMPLT: /* u_int */ 1609 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int)); 1610 break; 1611 1612 /* 1613 * Set "header already complete" flag 1614 */ 1615 case BIOCSHDRCMPLT: /* u_int */ 1616 bcopy(addr, &int_arg, sizeof (int_arg)); 1617 d->bd_hdrcmplt = int_arg ? 1 : 0; 1618 break; 1619 1620 /* 1621 * Get "see sent packets" flag 1622 */ 1623 case BIOCGSEESENT: /* u_int */ 1624 bcopy(&d->bd_seesent, addr, sizeof (u_int)); 1625 break; 1626 1627 /* 1628 * Set "see sent packets" flag 1629 */ 1630 case BIOCSSEESENT: /* u_int */ 1631 bcopy(addr, &d->bd_seesent, sizeof (u_int)); 1632 break; 1633 1634 /* 1635 * Set traffic service class 1636 */ 1637 case BIOCSETTC: { /* int */ 1638 int tc; 1639 1640 bcopy(addr, &tc, sizeof (int)); 1641 error = bpf_set_traffic_class(d, tc); 1642 break; 1643 } 1644 1645 /* 1646 * Get traffic service class 1647 */ 1648 case BIOCGETTC: /* int */ 1649 bcopy(&d->bd_traffic_class, addr, sizeof (int)); 1650 break; 1651 1652 case FIONBIO: /* Non-blocking I/O; int */ 1653 break; 1654 1655 case FIOASYNC: /* Send signal on receive packets; int */ 1656 bcopy(addr, &d->bd_async, sizeof (int)); 1657 break; 1658#ifndef __APPLE__ 1659 case FIOSETOWN: 1660 error = fsetown(*(int *)addr, &d->bd_sigio); 1661 break; 1662 1663 case FIOGETOWN: 1664 *(int *)addr = fgetown(d->bd_sigio); 1665 break; 1666 1667 /* This is deprecated, FIOSETOWN should be used instead. */ 1668 case TIOCSPGRP: 1669 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1670 break; 1671 1672 /* This is deprecated, FIOGETOWN should be used instead. */ 1673 case TIOCGPGRP: 1674 *(int *)addr = -fgetown(d->bd_sigio); 1675 break; 1676#endif 1677 case BIOCSRSIG: { /* Set receive signal; u_int */ 1678 u_int sig; 1679 1680 bcopy(addr, &sig, sizeof (u_int)); 1681 1682 if (sig >= NSIG) 1683 error = EINVAL; 1684 else 1685 d->bd_sig = sig; 1686 break; 1687 } 1688 case BIOCGRSIG: /* u_int */ 1689 bcopy(&d->bd_sig, addr, sizeof (u_int)); 1690 break; 1691#ifdef __APPLE__ 1692 case BIOCSEXTHDR: /* u_int */ 1693 bcopy(addr, &int_arg, sizeof (int_arg)); 1694 if (int_arg) 1695 d->bd_flags |= BPF_EXTENDED_HDR; 1696 else 1697 d->bd_flags &= ~BPF_EXTENDED_HDR; 1698 break; 1699 1700 case BIOCGIFATTACHCOUNT: { /* struct ifreq */ 1701 ifnet_t ifp; 1702 struct bpf_if *bp; 1703 1704 bcopy(addr, &ifr, sizeof (ifr)); 1705 ifr.ifr_name[IFNAMSIZ - 1] = '\0'; 1706 ifp = ifunit(ifr.ifr_name); 1707 if (ifp == NULL) { 1708 error = ENXIO; 1709 break; 1710 } 1711 ifr.ifr_intval = 0; 1712 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) { 1713 struct bpf_d *bpf_d; 1714 1715 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) 1716 continue; 1717 for (bpf_d = bp->bif_dlist; bpf_d; bpf_d = bpf_d->bd_next) { 1718 ifr.ifr_intval += 1; 1719 } 1720 } 1721 bcopy(&ifr, addr, sizeof (ifr)); 1722 break; 1723 } 1724 case BIOCGWANTPKTAP: /* u_int */ 1725 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0; 1726 bcopy(&int_arg, addr, sizeof (int_arg)); 1727 break; 1728 1729 case BIOCSWANTPKTAP: /* u_int */ 1730 bcopy(addr, &int_arg, sizeof (int_arg)); 1731 if (int_arg) 1732 d->bd_flags |= BPF_WANT_PKTAP; 1733 else 1734 d->bd_flags &= ~BPF_WANT_PKTAP; 1735 break; 1736#endif 1737 } 1738 1739 lck_mtx_unlock(bpf_mlock); 1740 1741 return (error); 1742} 1743 1744/* 1745 * Set d's packet filter program to fp. If this file already has a filter, 1746 * free it and replace it. Returns EINVAL for bogus requests. 1747 */ 1748static int 1749bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, dev_t dev, u_long cmd) 1750{ 1751 struct bpf_insn *fcode, *old; 1752 u_int flen, size; 1753 1754 while (d->bd_hbuf_read) 1755 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); 1756 1757 d = bpf_dtab[minor(dev)]; 1758 if (d == 0 || d == (void *)1) 1759 return (ENXIO); 1760 1761 old = d->bd_filter; 1762 if (bf_insns == USER_ADDR_NULL) { 1763 if (bf_len != 0) 1764 return (EINVAL); 1765 d->bd_filter = NULL; 1766 reset_d(d); 1767 if (old != 0) 1768 FREE((caddr_t)old, M_DEVBUF); 1769 return (0); 1770 } 1771 flen = bf_len; 1772 if (flen > BPF_MAXINSNS) 1773 return (EINVAL); 1774 1775 size = flen * sizeof(struct bpf_insn); 1776 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT); 1777#ifdef __APPLE__ 1778 if (fcode == NULL) 1779 return (ENOBUFS); 1780#endif 1781 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 && 1782 bpf_validate(fcode, (int)flen)) { 1783 d->bd_filter = fcode; 1784 1785 if (cmd == BIOCSETF32 || cmd == BIOCSETF64) 1786 reset_d(d); 1787 1788 if (old != 0) 1789 FREE((caddr_t)old, M_DEVBUF); 1790 1791 return (0); 1792 } 1793 FREE((caddr_t)fcode, M_DEVBUF); 1794 return (EINVAL); 1795} 1796 1797/* 1798 * Detach a file from its current interface (if attached at all) and attach 1799 * to the interface indicated by the name stored in ifr. 1800 * Return an errno or 0. 1801 */ 1802static int 1803bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev) 1804{ 1805 struct bpf_if *bp; 1806 int error; 1807 1808 while (d->bd_hbuf_read) 1809 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); 1810 1811 d = bpf_dtab[minor(dev)]; 1812 if (d == 0 || d == (void *)1) 1813 return (ENXIO); 1814 1815 /* 1816 * Look through attached interfaces for the named one. 1817 */ 1818 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) { 1819 struct ifnet *ifp = bp->bif_ifp; 1820 1821 if (ifp == 0 || ifp != theywant || (dlt != 0 && dlt != bp->bif_dlt)) 1822 continue; 1823 /* 1824 * If the process knows how to deal with DLT_PKTAP, use it 1825 * by default 1826 */ 1827 if (dlt == 0 && bp->bif_dlt == DLT_PKTAP && 1828 !(d->bd_flags & BPF_WANT_PKTAP)) 1829 continue; 1830 /* 1831 * We found the requested interface. 1832 * Allocate the packet buffers if we need to. 1833 * If we're already attached to requested interface, 1834 * just flush the buffer. 1835 */ 1836 if (d->bd_sbuf == 0) { 1837 error = bpf_allocbufs(d); 1838 if (error != 0) 1839 return (error); 1840 } 1841 if (bp != d->bd_bif) { 1842 if (d->bd_bif) 1843 /* 1844 * Detach if attached to something else. 1845 */ 1846 bpf_detachd(d); 1847 1848 if (bpf_attachd(d, bp) != 0) { 1849 return ENXIO; 1850 } 1851 } 1852 reset_d(d); 1853 return (0); 1854 } 1855 /* Not found. */ 1856 return (ENXIO); 1857} 1858 1859 1860 1861/* 1862 * Get a list of available data link type of the interface. 1863 */ 1864static int 1865bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p) 1866{ 1867 u_int n; 1868 int error; 1869 struct ifnet *ifp; 1870 struct bpf_if *bp; 1871 user_addr_t dlist; 1872 struct bpf_dltlist bfl; 1873 1874 bcopy(addr, &bfl, sizeof (bfl)); 1875 if (proc_is64bit(p)) { 1876 dlist = (user_addr_t)bfl.bfl_u.bflu_pad; 1877 } else { 1878 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list); 1879 } 1880 1881 ifp = d->bd_bif->bif_ifp; 1882 n = 0; 1883 error = 0; 1884 1885 for (bp = bpf_iflist; bp; bp = bp->bif_next) { 1886 if (bp->bif_ifp != ifp) 1887 continue; 1888 /* 1889 * Return DLT_PKTAP only to processes that know how to handle it 1890 */ 1891 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) 1892 continue; 1893 if (dlist != USER_ADDR_NULL) { 1894 if (n >= bfl.bfl_len) { 1895 return (ENOMEM); 1896 } 1897 error = copyout(&bp->bif_dlt, dlist, 1898 sizeof (bp->bif_dlt)); 1899 if (error != 0) 1900 break; 1901 dlist += sizeof (bp->bif_dlt); 1902 } 1903 n++; 1904 } 1905 bfl.bfl_len = n; 1906 bcopy(&bfl, addr, sizeof (bfl)); 1907 1908 return (error); 1909} 1910 1911/* 1912 * Set the data link type of a BPF instance. 1913 */ 1914static int 1915bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev) 1916{ 1917 int error, opromisc; 1918 struct ifnet *ifp; 1919 struct bpf_if *bp; 1920 1921 if (d->bd_bif->bif_dlt == dlt) 1922 return (0); 1923 1924 while (d->bd_hbuf_read) 1925 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); 1926 1927 d = bpf_dtab[minor(dev)]; 1928 if (d == 0 || d == (void *)1) 1929 return (ENXIO); 1930 1931 ifp = d->bd_bif->bif_ifp; 1932 for (bp = bpf_iflist; bp; bp = bp->bif_next) { 1933 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1934 break; 1935 } 1936 if (bp != NULL) { 1937 opromisc = d->bd_promisc; 1938 bpf_detachd(d); 1939 error = bpf_attachd(d, bp); 1940 if (error) { 1941 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n", 1942 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error); 1943 return error; 1944 } 1945 reset_d(d); 1946 if (opromisc) { 1947 lck_mtx_unlock(bpf_mlock); 1948 error = ifnet_set_promiscuous(bp->bif_ifp, 1); 1949 lck_mtx_lock(bpf_mlock); 1950 if (error) 1951 printf("bpf_setdlt: ifpromisc %s%d failed (%d)\n", 1952 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp), error); 1953 else 1954 d->bd_promisc = 1; 1955 } 1956 } 1957 return (bp == NULL ? EINVAL : 0); 1958} 1959 1960static int 1961bpf_set_traffic_class(struct bpf_d *d, int tc) 1962{ 1963 int error = 0; 1964 1965 if (!SO_VALID_TC(tc)) 1966 error = EINVAL; 1967 else 1968 d->bd_traffic_class = tc; 1969 1970 return (error); 1971} 1972 1973static void 1974bpf_set_packet_service_class(struct mbuf *m, int tc) 1975{ 1976 if (!(m->m_flags & M_PKTHDR)) 1977 return; 1978 1979 VERIFY(SO_VALID_TC(tc)); 1980 (void) m_set_service_class(m, so_tc2msc(tc)); 1981} 1982 1983/* 1984 * Support for select() 1985 * 1986 * Return true iff the specific operation will not block indefinitely. 1987 * Otherwise, return false but make a note that a selwakeup() must be done. 1988 */ 1989int 1990bpfselect(dev_t dev, int which, void * wql, struct proc *p) 1991{ 1992 struct bpf_d *d; 1993 int ret = 0; 1994 1995 lck_mtx_lock(bpf_mlock); 1996 1997 d = bpf_dtab[minor(dev)]; 1998 if (d == 0 || d == (void *)1) { 1999 lck_mtx_unlock(bpf_mlock); 2000 return (ENXIO); 2001 } 2002 2003 if (d->bd_bif == NULL) { 2004 lck_mtx_unlock(bpf_mlock); 2005 return (ENXIO); 2006 } 2007 2008 while (d->bd_hbuf_read) 2009 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); 2010 2011 d = bpf_dtab[minor(dev)]; 2012 if (d == 0 || d == (void *)1) { 2013 lck_mtx_unlock(bpf_mlock); 2014 return (ENXIO); 2015 } 2016 2017 switch (which) { 2018 case FREAD: 2019 if (d->bd_hlen != 0 || 2020 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 2021 d->bd_slen != 0)) 2022 ret = 1; /* read has data to return */ 2023 else { 2024 /* 2025 * Read has no data to return. 2026 * Make the select wait, and start a timer if 2027 * necessary. 2028 */ 2029 selrecord(p, &d->bd_sel, wql); 2030 bpf_start_timer(d); 2031 } 2032 break; 2033 2034 case FWRITE: 2035 ret = 1; /* can't determine whether a write would block */ 2036 break; 2037 } 2038 2039 lck_mtx_unlock(bpf_mlock); 2040 return (ret); 2041} 2042 2043 2044/* 2045 * Support for kevent() system call. Register EVFILT_READ filters and 2046 * reject all others. 2047 */ 2048int bpfkqfilter(dev_t dev, struct knote *kn); 2049static void filt_bpfdetach(struct knote *); 2050static int filt_bpfread(struct knote *, long); 2051 2052static struct filterops bpfread_filtops = { 2053 .f_isfd = 1, 2054 .f_detach = filt_bpfdetach, 2055 .f_event = filt_bpfread, 2056}; 2057 2058int 2059bpfkqfilter(dev_t dev, struct knote *kn) 2060{ 2061 struct bpf_d *d; 2062 2063 /* 2064 * Is this device a bpf? 2065 */ 2066 if (major(dev) != CDEV_MAJOR) { 2067 return (EINVAL); 2068 } 2069 2070 if (kn->kn_filter != EVFILT_READ) { 2071 return (EINVAL); 2072 } 2073 2074 lck_mtx_lock(bpf_mlock); 2075 2076 d = bpf_dtab[minor(dev)]; 2077 if (d == 0 || d == (void *)1) { 2078 lck_mtx_unlock(bpf_mlock); 2079 return (ENXIO); 2080 } 2081 2082 if (d->bd_bif == NULL) { 2083 lck_mtx_unlock(bpf_mlock); 2084 return (ENXIO); 2085 } 2086 2087 kn->kn_hook = d; 2088 kn->kn_fop = &bpfread_filtops; 2089 KNOTE_ATTACH(&d->bd_sel.si_note, kn); 2090 lck_mtx_unlock(bpf_mlock); 2091 return 0; 2092} 2093 2094static void 2095filt_bpfdetach(struct knote *kn) 2096{ 2097 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2098 2099 lck_mtx_lock(bpf_mlock); 2100 KNOTE_DETACH(&d->bd_sel.si_note, kn); 2101 lck_mtx_unlock(bpf_mlock); 2102} 2103 2104static int 2105filt_bpfread(struct knote *kn, long hint) 2106{ 2107 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2108 int ready = 0; 2109 2110 if (hint == 0) 2111 lck_mtx_lock(bpf_mlock); 2112 2113 if (d->bd_immediate) { 2114 /* 2115 * If there's data in the hold buffer, it's the 2116 * amount of data a read will return. 2117 * 2118 * If there's no data in the hold buffer, but 2119 * there's data in the store buffer, a read will 2120 * immediately rotate the store buffer to the 2121 * hold buffer, the amount of data in the store 2122 * buffer is the amount of data a read will 2123 * return. 2124 * 2125 * If there's no data in either buffer, we're not 2126 * ready to read. 2127 */ 2128 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) 2129 ? d->bd_slen : d->bd_hlen); 2130 int64_t lowwat = 1; 2131 if (kn->kn_sfflags & NOTE_LOWAT) 2132 { 2133 if (kn->kn_sdata > d->bd_bufsize) 2134 lowwat = d->bd_bufsize; 2135 else if (kn->kn_sdata > lowwat) 2136 lowwat = kn->kn_sdata; 2137 } 2138 ready = (kn->kn_data >= lowwat); 2139 } else { 2140 /* 2141 * If there's data in the hold buffer, it's the 2142 * amount of data a read will return. 2143 * 2144 * If there's no data in the hold buffer, but 2145 * there's data in the store buffer, if the 2146 * timer has expired a read will immediately 2147 * rotate the store buffer to the hold buffer, 2148 * so the amount of data in the store buffer is 2149 * the amount of data a read will return. 2150 * 2151 * If there's no data in either buffer, or there's 2152 * no data in the hold buffer and the timer hasn't 2153 * expired, we're not ready to read. 2154 */ 2155 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) && d->bd_state == BPF_TIMED_OUT ? 2156 d->bd_slen : d->bd_hlen); 2157 ready = (kn->kn_data > 0); 2158 } 2159 if (!ready) 2160 bpf_start_timer(d); 2161 2162 if (hint == 0) 2163 lck_mtx_unlock(bpf_mlock); 2164 return (ready); 2165} 2166 2167/* 2168 * Copy data from an mbuf chain into a buffer. This code is derived 2169 * from m_copydata in sys/uipc_mbuf.c. 2170 */ 2171static void 2172bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 2173{ 2174 struct mbuf *m = (struct mbuf *)(uintptr_t)(src_arg); 2175 u_int count; 2176 u_char *dst; 2177 2178 dst = dst_arg; 2179 while (len > 0) { 2180 if (m == 0) 2181 panic("bpf_mcopy"); 2182 count = min(m->m_len, len); 2183 bcopy(mbuf_data(m), dst, count); 2184 m = m->m_next; 2185 dst += count; 2186 len -= count; 2187 } 2188} 2189 2190static inline void 2191bpf_tap_imp( 2192 ifnet_t ifp, 2193 u_int32_t dlt, 2194 mbuf_t m, 2195 void* hdr, 2196 size_t hlen, 2197 int outbound) 2198{ 2199 struct bpf_if *bp; 2200 struct mbuf *savedm = m; 2201 2202 /* 2203 * It's possible that we get here after the bpf descriptor has been 2204 * detached from the interface; in such a case we simply return. 2205 * Lock ordering is important since we can be called asynchronously 2206 * (from the IOKit) to process an inbound packet; when that happens 2207 * we would have been holding its "gateLock" and will be acquiring 2208 * "bpf_mlock" upon entering this routine. Due to that, we release 2209 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will 2210 * acquire "gateLock" in the IOKit), in order to avoid a deadlock 2211 * when a ifnet_set_promiscuous request simultaneously collides with 2212 * an inbound packet being passed into the tap callback. 2213 */ 2214 lck_mtx_lock(bpf_mlock); 2215 if (ifp->if_bpf == NULL) { 2216 lck_mtx_unlock(bpf_mlock); 2217 return; 2218 } 2219 bp = ifp->if_bpf; 2220 for (bp = ifp->if_bpf; bp && bp->bif_ifp == ifp && 2221 (dlt != 0 && bp->bif_dlt != dlt); bp = bp->bif_next) 2222 ; 2223 if (bp && bp->bif_ifp == ifp && bp->bif_dlist != NULL) { 2224 struct bpf_d *d; 2225 struct m_hdr hack_hdr; 2226 u_int pktlen = 0; 2227 u_int slen = 0; 2228 struct mbuf *m0; 2229 2230 if (hdr) { 2231 /* 2232 * This is gross. We mock up an mbuf that points to the 2233 * header buffer. This means we don't have to copy the 2234 * header. A number of interfaces prepended headers just 2235 * for bpf by allocating an mbuf on the stack. We want to 2236 * give developers an easy way to prepend a header for bpf. 2237 * Since a developer allocating an mbuf on the stack is bad, 2238 * we do even worse here, allocating only a header to point 2239 * to a buffer the developer supplied. This makes assumptions 2240 * that bpf_filter and catchpacket will not look at anything 2241 * in the mbuf other than the header. This was true at the 2242 * time this code was written. 2243 */ 2244 hack_hdr.mh_next = m; 2245 hack_hdr.mh_nextpkt = NULL; 2246 hack_hdr.mh_len = hlen; 2247 hack_hdr.mh_data = hdr; 2248 hack_hdr.mh_type = m->m_type; 2249 hack_hdr.mh_flags = 0; 2250 2251 m = (mbuf_t)&hack_hdr; 2252 } 2253 2254 for (m0 = m; m0 != 0; m0 = m0->m_next) 2255 pktlen += m0->m_len; 2256 2257 for (d = bp->bif_dlist; d; d = d->bd_next) { 2258 if (outbound && !d->bd_seesent) 2259 continue; 2260 ++d->bd_rcount; 2261 slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0); 2262 if (slen != 0) { 2263#if CONFIG_MACF_NET 2264 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0) 2265 continue; 2266#endif 2267 catchpacket(d, (u_char *)m, savedm, pktlen, 2268 slen, outbound, bpf_mcopy); 2269 } 2270 } 2271 } 2272 lck_mtx_unlock(bpf_mlock); 2273} 2274 2275void 2276bpf_tap_out( 2277 ifnet_t ifp, 2278 u_int32_t dlt, 2279 mbuf_t m, 2280 void* hdr, 2281 size_t hlen) 2282{ 2283 bpf_tap_imp(ifp, dlt, m, hdr, hlen, 1); 2284} 2285 2286void 2287bpf_tap_in( 2288 ifnet_t ifp, 2289 u_int32_t dlt, 2290 mbuf_t m, 2291 void* hdr, 2292 size_t hlen) 2293{ 2294 bpf_tap_imp(ifp, dlt, m, hdr, hlen, 0); 2295} 2296 2297/* Callback registered with Ethernet driver. */ 2298static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m) 2299{ 2300 bpf_tap_imp(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL); 2301 2302 return 0; 2303} 2304 2305/* 2306 * Move the packet data from interface memory (pkt) into the 2307 * store buffer. Return 1 if it's time to wakeup a listener (buffer full), 2308 * otherwise 0. "copy" is the routine called to do the actual data 2309 * transfer. bcopy is passed in to copy contiguous chunks, while 2310 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 2311 * pkt is really an mbuf. 2312 */ 2313static void 2314catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen, 2315 u_int snaplen, int outbound, 2316 void (*cpfn)(const void *, void *, size_t)) 2317{ 2318 struct bpf_hdr *hp; 2319 struct bpf_hdr_ext *ehp; 2320 int totlen, curlen; 2321 int hdrlen, caplen; 2322 int do_wakeup = 0; 2323 u_char *payload; 2324 struct timeval tv; 2325 struct m_tag *mt = NULL; 2326 struct bpf_mtag *bt = NULL; 2327 2328 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen : 2329 d->bd_bif->bif_hdrlen; 2330 /* 2331 * Figure out how many bytes to move. If the packet is 2332 * greater or equal to the snapshot length, transfer that 2333 * much. Otherwise, transfer the whole packet (unless 2334 * we hit the buffer size limit). 2335 */ 2336 totlen = hdrlen + min(snaplen, pktlen); 2337 if (totlen > d->bd_bufsize) 2338 totlen = d->bd_bufsize; 2339 2340 /* 2341 * Round up the end of the previous packet to the next longword. 2342 */ 2343 curlen = BPF_WORDALIGN(d->bd_slen); 2344 if (curlen + totlen > d->bd_bufsize) { 2345 /* 2346 * This packet will overflow the storage buffer. 2347 * Rotate the buffers if we can, then wakeup any 2348 * pending reads. 2349 */ 2350 if (d->bd_fbuf == NULL) { 2351 /* 2352 * We haven't completed the previous read yet, 2353 * so drop the packet. 2354 */ 2355 ++d->bd_dcount; 2356 return; 2357 } 2358 ROTATE_BUFFERS(d); 2359 do_wakeup = 1; 2360 curlen = 0; 2361 } 2362 else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) 2363 /* 2364 * Immediate mode is set, or the read timeout has 2365 * already expired during a select call. A packet 2366 * arrived, so the reader should be woken up. 2367 */ 2368 do_wakeup = 1; 2369 2370 /* 2371 * Append the bpf header. 2372 */ 2373 microtime(&tv); 2374 if (d->bd_flags & BPF_EXTENDED_HDR) { 2375 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen); 2376 memset(ehp, 0, sizeof(*ehp)); 2377 ehp->bh_tstamp.tv_sec = tv.tv_sec; 2378 ehp->bh_tstamp.tv_usec = tv.tv_usec; 2379 ehp->bh_datalen = pktlen; 2380 ehp->bh_hdrlen = hdrlen; 2381 ehp->bh_caplen = totlen - hdrlen; 2382 mt = m_tag_locate(m, bpf_mtag_id, 0, NULL); 2383 if (mt && mt->m_tag_len >= sizeof(*bt)) { 2384 bt = (struct bpf_mtag *)(mt + 1); 2385 ehp->bh_pid = bt->bt_pid; 2386 strlcpy(ehp->bh_comm, bt->bt_comm, 2387 sizeof(ehp->bh_comm)); 2388 ehp->bh_svc = so_svc2tc(bt->bt_svc); 2389 if (bt->bt_direction == BPF_MTAG_DIR_OUT) 2390 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT; 2391 else 2392 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN; 2393 m_tag_delete(m, mt); 2394 } else if (outbound) { 2395 /* only do lookups on non-raw INPCB */ 2396 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID| 2397 PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) == 2398 (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) && 2399 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { 2400 ehp->bh_flowid = m->m_pkthdr.pkt_flowid; 2401 ehp->bh_proto = m->m_pkthdr.pkt_proto; 2402 } 2403 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc); 2404 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT; 2405 } else 2406 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN; 2407 payload = (u_char *)ehp + hdrlen; 2408 caplen = ehp->bh_caplen; 2409 } else { 2410 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen); 2411 hp->bh_tstamp.tv_sec = tv.tv_sec; 2412 hp->bh_tstamp.tv_usec = tv.tv_usec; 2413 hp->bh_datalen = pktlen; 2414 hp->bh_hdrlen = hdrlen; 2415 hp->bh_caplen = totlen - hdrlen; 2416 payload = (u_char *)hp + hdrlen; 2417 caplen = hp->bh_caplen; 2418 } 2419 /* 2420 * Copy the packet data into the store buffer and update its length. 2421 */ 2422 (*cpfn)(pkt, payload, caplen); 2423 d->bd_slen = curlen + totlen; 2424 2425 if (do_wakeup) 2426 bpf_wakeup(d); 2427} 2428 2429/* 2430 * Initialize all nonzero fields of a descriptor. 2431 */ 2432static int 2433bpf_allocbufs(struct bpf_d *d) 2434{ 2435 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT); 2436 if (d->bd_fbuf == 0) 2437 return (ENOBUFS); 2438 2439 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT); 2440 if (d->bd_sbuf == 0) { 2441 FREE(d->bd_fbuf, M_DEVBUF); 2442 return (ENOBUFS); 2443 } 2444 d->bd_slen = 0; 2445 d->bd_hlen = 0; 2446 return (0); 2447} 2448 2449/* 2450 * Free buffers currently in use by a descriptor. 2451 * Called on close. 2452 */ 2453static void 2454bpf_freed(struct bpf_d *d) 2455{ 2456 /* 2457 * We don't need to lock out interrupts since this descriptor has 2458 * been detached from its interface and it yet hasn't been marked 2459 * free. 2460 */ 2461 if (d->bd_hbuf_read) 2462 panic("bpf buffer freed during read"); 2463 2464 if (d->bd_sbuf != 0) { 2465 FREE(d->bd_sbuf, M_DEVBUF); 2466 if (d->bd_hbuf != 0) 2467 FREE(d->bd_hbuf, M_DEVBUF); 2468 if (d->bd_fbuf != 0) 2469 FREE(d->bd_fbuf, M_DEVBUF); 2470 } 2471 if (d->bd_filter) 2472 FREE((caddr_t)d->bd_filter, M_DEVBUF); 2473} 2474 2475/* 2476 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *) 2477 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed 2478 * size of the link header (variable length headers not yet supported). 2479 */ 2480void 2481bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2482{ 2483 bpf_attach(ifp, dlt, hdrlen, NULL, NULL); 2484} 2485 2486errno_t 2487bpf_attach( 2488 ifnet_t ifp, 2489 u_int32_t dlt, 2490 u_int32_t hdrlen, 2491 bpf_send_func send, 2492 bpf_tap_func tap) 2493{ 2494 struct bpf_if *bp_new; 2495 struct bpf_if *bp_temp; 2496 struct bpf_if *bp_first = NULL; 2497 2498 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF, M_WAIT); 2499 if (bp_new == 0) 2500 panic("bpfattach"); 2501 2502 lck_mtx_lock(bpf_mlock); 2503 2504 /* 2505 * Check if this interface/dlt is already attached, record first 2506 * attachment for this interface. 2507 */ 2508 for (bp_temp = bpf_iflist; bp_temp && (bp_temp->bif_ifp != ifp || 2509 bp_temp->bif_dlt != dlt); bp_temp = bp_temp->bif_next) { 2510 if (bp_temp->bif_ifp == ifp && bp_first == NULL) 2511 bp_first = bp_temp; 2512 } 2513 2514 if (bp_temp != NULL) { 2515 printf("bpfattach - %s with dlt %d is already attached\n", 2516 if_name(ifp), dlt); 2517 FREE(bp_new, M_DEVBUF); 2518 lck_mtx_unlock(bpf_mlock); 2519 return EEXIST; 2520 } 2521 2522 bzero(bp_new, sizeof(*bp_new)); 2523 bp_new->bif_ifp = ifp; 2524 bp_new->bif_dlt = dlt; 2525 bp_new->bif_send = send; 2526 bp_new->bif_tap = tap; 2527 2528 if (bp_first == NULL) { 2529 /* No other entries for this ifp */ 2530 bp_new->bif_next = bpf_iflist; 2531 bpf_iflist = bp_new; 2532 } 2533 else { 2534 /* Add this after the first entry for this interface */ 2535 bp_new->bif_next = bp_first->bif_next; 2536 bp_first->bif_next = bp_new; 2537 } 2538 2539 /* 2540 * Compute the length of the bpf header. This is not necessarily 2541 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 2542 * that the network layer header begins on a longword boundary (for 2543 * performance reasons and to alleviate alignment restrictions). 2544 */ 2545 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 2546 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen + 2547 sizeof(struct bpf_hdr_ext)) - hdrlen; 2548 2549 /* Take a reference on the interface */ 2550 ifnet_reference(ifp); 2551 2552 lck_mtx_unlock(bpf_mlock); 2553 2554#ifndef __APPLE__ 2555 if (bootverbose) 2556 printf("bpf: %s attached\n", if_name(ifp)); 2557#endif 2558 2559 return 0; 2560} 2561 2562/* 2563 * Detach bpf from an interface. This involves detaching each descriptor 2564 * associated with the interface, and leaving bd_bif NULL. Notify each 2565 * descriptor as it's detached so that any sleepers wake up and get 2566 * ENXIO. 2567 */ 2568void 2569bpfdetach(struct ifnet *ifp) 2570{ 2571 struct bpf_if *bp, *bp_prev, *bp_next; 2572 struct bpf_if *bp_free_list = NULL; 2573 struct bpf_d *d; 2574 2575 lck_mtx_lock(bpf_mlock); 2576 2577 /* 2578 * Build the list of devices attached to that interface 2579 * that we need to free while keeping the lock to maintain 2580 * the integrity of the interface list 2581 */ 2582 bp_prev = NULL; 2583 for (bp = bpf_iflist; bp != NULL; bp = bp_next) { 2584 bp_next = bp->bif_next; 2585 2586 if (ifp != bp->bif_ifp) { 2587 bp_prev = bp; 2588 continue; 2589 } 2590 /* Unlink from the interface list */ 2591 if (bp_prev) 2592 bp_prev->bif_next = bp->bif_next; 2593 else 2594 bpf_iflist = bp->bif_next; 2595 2596 /* Add to the list to be freed */ 2597 bp->bif_next = bp_free_list; 2598 bp_free_list = bp; 2599 } 2600 2601 /* 2602 * Detach the bpf devices attached to the interface 2603 * Now we do not care if we lose the bpf_mlock in bpf_detachd 2604 */ 2605 for (bp = bp_free_list; bp != NULL; bp = bp->bif_next) { 2606 while ((d = bp->bif_dlist) != NULL) { 2607 bpf_detachd(d); 2608 bpf_wakeup(d); 2609 } 2610 ifnet_release(ifp); 2611 } 2612 2613 lck_mtx_unlock(bpf_mlock); 2614 2615 /* 2616 * Free the list 2617 */ 2618 while ((bp = bp_free_list) != NULL) { 2619 bp_free_list = bp->bif_next; 2620 FREE(bp, M_DEVBUF); 2621 } 2622} 2623 2624void 2625bpf_init(__unused void *unused) 2626{ 2627#ifdef __APPLE__ 2628 int i; 2629 int maj; 2630 2631 if (bpf_devsw_installed == 0) { 2632 bpf_devsw_installed = 1; 2633 bpf_mlock_grp_attr = lck_grp_attr_alloc_init(); 2634 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr); 2635 bpf_mlock_attr = lck_attr_alloc_init(); 2636 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr); 2637 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw); 2638 if (maj == -1) { 2639 if (bpf_mlock_attr) 2640 lck_attr_free(bpf_mlock_attr); 2641 if (bpf_mlock_grp) 2642 lck_grp_free(bpf_mlock_grp); 2643 if (bpf_mlock_grp_attr) 2644 lck_grp_attr_free(bpf_mlock_grp_attr); 2645 2646 bpf_mlock = NULL; 2647 bpf_mlock_attr = NULL; 2648 bpf_mlock_grp = NULL; 2649 bpf_mlock_grp_attr = NULL; 2650 bpf_devsw_installed = 0; 2651 printf("bpf_init: failed to allocate a major number!\n"); 2652 return; 2653 } 2654 2655 for (i = 0 ; i < NBPFILTER; i++) 2656 bpf_make_dev_t(maj); 2657 2658 VERIFY(mbuf_tag_id_find(BPF_CONTROL_NAME, &bpf_mtag_id) == 0); 2659 } 2660#else 2661 cdevsw_add(&bpf_cdevsw); 2662#endif 2663} 2664 2665#ifndef __APPLE__ 2666SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL) 2667#endif 2668 2669#if CONFIG_MACF_NET 2670struct label * 2671mac_bpfdesc_label_get(struct bpf_d *d) 2672{ 2673 2674 return (d->bd_label); 2675} 2676 2677void 2678mac_bpfdesc_label_set(struct bpf_d *d, struct label *label) 2679{ 2680 2681 d->bd_label = label; 2682} 2683#endif 2684