bpf.c revision 177596
1/*- 2 * Copyright (c) 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from the Stanford/CMU enet packet filter, 6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 8 * Berkeley Laboratory. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/net/bpf.c 177596 2008-03-25 07:41:33Z rwatson $"); 39 40#include "opt_bpf.h" 41#include "opt_mac.h" 42#include "opt_netgraph.h" 43 44#include <sys/types.h> 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/conf.h> 48#include <sys/fcntl.h> 49#include <sys/malloc.h> 50#include <sys/mbuf.h> 51#include <sys/time.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/signalvar.h> 55#include <sys/filio.h> 56#include <sys/sockio.h> 57#include <sys/ttycom.h> 58#include <sys/uio.h> 59 60#include <sys/event.h> 61#include <sys/file.h> 62#include <sys/poll.h> 63#include <sys/proc.h> 64 65#include <sys/socket.h> 66 67#include <net/if.h> 68#include <net/bpf.h> 69#include <net/bpf_buffer.h> 70#ifdef BPF_JITTER 71#include <net/bpf_jitter.h> 72#endif 73#include <net/bpf_zerocopy.h> 74#include <net/bpfdesc.h> 75 76#include <netinet/in.h> 77#include <netinet/if_ether.h> 78#include <sys/kernel.h> 79#include <sys/sysctl.h> 80 81#include <net80211/ieee80211_freebsd.h> 82 83#include <security/mac/mac_framework.h> 84 85MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 86 87#if defined(DEV_BPF) || defined(NETGRAPH_BPF) 88 89#define PRINET 26 /* interruptible */ 90 91#define M_SKIP_BPF M_SKIP_FIREWALL 92 93/* 94 * bpf_iflist is a list of BPF interface structures, each corresponding to a 95 * specific DLT. The same network interface might have several BPF interface 96 * structures registered by different layers in the stack (i.e., 802.11 97 * frames, ethernet frames, etc). 98 */ 99static LIST_HEAD(, bpf_if) bpf_iflist; 100static struct mtx bpf_mtx; /* bpf global lock */ 101static int bpf_bpfd_cnt; 102 103static void bpf_attachd(struct bpf_d *, struct bpf_if *); 104static void bpf_detachd(struct bpf_d *); 105static void bpf_freed(struct bpf_d *); 106static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 107 struct sockaddr *, int *, struct bpf_insn *); 108static int bpf_setif(struct bpf_d *, struct ifreq *); 109static void bpf_timed_out(void *); 110static __inline void 111 bpf_wakeup(struct bpf_d *); 112static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 113 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 114 struct timeval *); 115static void reset_d(struct bpf_d *); 116static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 117static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 118static int bpf_setdlt(struct bpf_d *, u_int); 119static void filt_bpfdetach(struct knote *); 120static int filt_bpfread(struct knote *, long); 121static void bpf_drvinit(void *); 122static void bpf_clone(void *, struct ucred *, char *, int, struct cdev **); 123static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 124 125SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl"); 126static int bpf_maxinsns = BPF_MAXINSNS; 127SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 128 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 129static int bpf_zerocopy_enable = 0; 130SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 131 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 132SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW, 133 bpf_stats_sysctl, "bpf statistics portal"); 134 135static d_open_t bpfopen; 136static d_close_t bpfclose; 137static d_read_t bpfread; 138static d_write_t bpfwrite; 139static d_ioctl_t bpfioctl; 140static d_poll_t bpfpoll; 141static d_kqfilter_t bpfkqfilter; 142 143static struct cdevsw bpf_cdevsw = { 144 .d_version = D_VERSION, 145 .d_open = bpfopen, 146 .d_close = bpfclose, 147 .d_read = bpfread, 148 .d_write = bpfwrite, 149 .d_ioctl = bpfioctl, 150 .d_poll = bpfpoll, 151 .d_name = "bpf", 152 .d_kqfilter = bpfkqfilter, 153}; 154 155static struct filterops bpfread_filtops = 156 { 1, NULL, filt_bpfdetach, filt_bpfread }; 157 158/* 159 * Wrapper functions for various buffering methods. If the set of buffer 160 * modes expands, we will probably want to introduce a switch data structure 161 * similar to protosw, et. 162 */ 163static void 164bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 165 u_int len) 166{ 167 168 BPFD_LOCK_ASSERT(d); 169 170 switch (d->bd_bufmode) { 171 case BPF_BUFMODE_BUFFER: 172 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 173 174 case BPF_BUFMODE_ZBUF: 175 d->bd_zcopy++; 176 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 177 178 default: 179 panic("bpf_buf_append_bytes"); 180 } 181} 182 183static void 184bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 185 u_int len) 186{ 187 188 BPFD_LOCK_ASSERT(d); 189 190 switch (d->bd_bufmode) { 191 case BPF_BUFMODE_BUFFER: 192 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 193 194 case BPF_BUFMODE_ZBUF: 195 d->bd_zcopy++; 196 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 197 198 default: 199 panic("bpf_buf_append_mbuf"); 200 } 201} 202 203/* 204 * If the buffer mechanism has a way to decide that a held buffer can be made 205 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 206 * returned if the buffer can be discarded, (0) is returned if it cannot. 207 */ 208static int 209bpf_canfreebuf(struct bpf_d *d) 210{ 211 212 BPFD_LOCK_ASSERT(d); 213 214 switch (d->bd_bufmode) { 215 case BPF_BUFMODE_ZBUF: 216 return (bpf_zerocopy_canfreebuf(d)); 217 } 218 return (0); 219} 220 221void 222bpf_bufheld(struct bpf_d *d) 223{ 224 225 BPFD_LOCK_ASSERT(d); 226 227 switch (d->bd_bufmode) { 228 case BPF_BUFMODE_ZBUF: 229 bpf_zerocopy_bufheld(d); 230 break; 231 } 232} 233 234static void 235bpf_free(struct bpf_d *d) 236{ 237 238 switch (d->bd_bufmode) { 239 case BPF_BUFMODE_BUFFER: 240 return (bpf_buffer_free(d)); 241 242 case BPF_BUFMODE_ZBUF: 243 return (bpf_zerocopy_free(d)); 244 245 default: 246 panic("bpf_buf_free"); 247 } 248} 249 250static int 251bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 252{ 253 254 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 255 return (EOPNOTSUPP); 256 return (bpf_buffer_uiomove(d, buf, len, uio)); 257} 258 259static int 260bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 261{ 262 263 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 264 return (EOPNOTSUPP); 265 return (bpf_buffer_ioctl_sblen(d, i)); 266} 267 268static int 269bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 270{ 271 272 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 273 return (EOPNOTSUPP); 274 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 275} 276 277static int 278bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 279{ 280 281 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 282 return (EOPNOTSUPP); 283 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 284} 285 286static int 287bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 288{ 289 290 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 291 return (EOPNOTSUPP); 292 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 293} 294 295/* 296 * General BPF functions. 297 */ 298static int 299bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 300 struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter) 301{ 302 const struct ieee80211_bpf_params *p; 303 struct ether_header *eh; 304 struct mbuf *m; 305 int error; 306 int len; 307 int hlen; 308 int slen; 309 310 /* 311 * Build a sockaddr based on the data link layer type. 312 * We do this at this level because the ethernet header 313 * is copied directly into the data field of the sockaddr. 314 * In the case of SLIP, there is no header and the packet 315 * is forwarded as is. 316 * Also, we are careful to leave room at the front of the mbuf 317 * for the link level header. 318 */ 319 switch (linktype) { 320 321 case DLT_SLIP: 322 sockp->sa_family = AF_INET; 323 hlen = 0; 324 break; 325 326 case DLT_EN10MB: 327 sockp->sa_family = AF_UNSPEC; 328 /* XXX Would MAXLINKHDR be better? */ 329 hlen = ETHER_HDR_LEN; 330 break; 331 332 case DLT_FDDI: 333 sockp->sa_family = AF_IMPLINK; 334 hlen = 0; 335 break; 336 337 case DLT_RAW: 338 sockp->sa_family = AF_UNSPEC; 339 hlen = 0; 340 break; 341 342 case DLT_NULL: 343 /* 344 * null interface types require a 4 byte pseudo header which 345 * corresponds to the address family of the packet. 346 */ 347 sockp->sa_family = AF_UNSPEC; 348 hlen = 4; 349 break; 350 351 case DLT_ATM_RFC1483: 352 /* 353 * en atm driver requires 4-byte atm pseudo header. 354 * though it isn't standard, vpi:vci needs to be 355 * specified anyway. 356 */ 357 sockp->sa_family = AF_UNSPEC; 358 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 359 break; 360 361 case DLT_PPP: 362 sockp->sa_family = AF_UNSPEC; 363 hlen = 4; /* This should match PPP_HDRLEN */ 364 break; 365 366 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 367 sockp->sa_family = AF_IEEE80211; 368 hlen = 0; 369 break; 370 371 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 372 sockp->sa_family = AF_IEEE80211; 373 sockp->sa_len = 12; /* XXX != 0 */ 374 hlen = sizeof(struct ieee80211_bpf_params); 375 break; 376 377 default: 378 return (EIO); 379 } 380 381 len = uio->uio_resid; 382 383 if (len - hlen > ifp->if_mtu) 384 return (EMSGSIZE); 385 386 if ((unsigned)len > MCLBYTES) 387 return (EIO); 388 389 if (len > MHLEN) { 390 m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR); 391 } else { 392 MGETHDR(m, M_TRYWAIT, MT_DATA); 393 } 394 if (m == NULL) 395 return (ENOBUFS); 396 m->m_pkthdr.len = m->m_len = len; 397 m->m_pkthdr.rcvif = NULL; 398 *mp = m; 399 400 if (m->m_len < hlen) { 401 error = EPERM; 402 goto bad; 403 } 404 405 error = uiomove(mtod(m, u_char *), len, uio); 406 if (error) 407 goto bad; 408 409 slen = bpf_filter(wfilter, mtod(m, u_char *), len, len); 410 if (slen == 0) { 411 error = EPERM; 412 goto bad; 413 } 414 415 /* Check for multicast destination */ 416 switch (linktype) { 417 case DLT_EN10MB: 418 eh = mtod(m, struct ether_header *); 419 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 420 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 421 ETHER_ADDR_LEN) == 0) 422 m->m_flags |= M_BCAST; 423 else 424 m->m_flags |= M_MCAST; 425 } 426 break; 427 } 428 429 /* 430 * Make room for link header, and copy it to sockaddr 431 */ 432 if (hlen != 0) { 433 if (sockp->sa_family == AF_IEEE80211) { 434 /* 435 * Collect true length from the parameter header 436 * NB: sockp is known to be zero'd so if we do a 437 * short copy unspecified parameters will be 438 * zero. 439 * NB: packet may not be aligned after stripping 440 * bpf params 441 * XXX check ibp_vers 442 */ 443 p = mtod(m, const struct ieee80211_bpf_params *); 444 hlen = p->ibp_len; 445 if (hlen > sizeof(sockp->sa_data)) { 446 error = EINVAL; 447 goto bad; 448 } 449 } 450 bcopy(m->m_data, sockp->sa_data, hlen); 451 } 452 *hdrlen = hlen; 453 454 return (0); 455bad: 456 m_freem(m); 457 return (error); 458} 459 460/* 461 * Attach file to the bpf interface, i.e. make d listen on bp. 462 */ 463static void 464bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 465{ 466 /* 467 * Point d at bp, and add d to the interface's list of listeners. 468 * Finally, point the driver's bpf cookie at the interface so 469 * it will divert packets to bpf. 470 */ 471 BPFIF_LOCK(bp); 472 d->bd_bif = bp; 473 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 474 475 bpf_bpfd_cnt++; 476 BPFIF_UNLOCK(bp); 477} 478 479/* 480 * Detach a file from its interface. 481 */ 482static void 483bpf_detachd(struct bpf_d *d) 484{ 485 int error; 486 struct bpf_if *bp; 487 struct ifnet *ifp; 488 489 bp = d->bd_bif; 490 BPFIF_LOCK(bp); 491 BPFD_LOCK(d); 492 ifp = d->bd_bif->bif_ifp; 493 494 /* 495 * Remove d from the interface's descriptor list. 496 */ 497 LIST_REMOVE(d, bd_next); 498 499 bpf_bpfd_cnt--; 500 d->bd_bif = NULL; 501 BPFD_UNLOCK(d); 502 BPFIF_UNLOCK(bp); 503 504 /* 505 * Check if this descriptor had requested promiscuous mode. 506 * If so, turn it off. 507 */ 508 if (d->bd_promisc) { 509 d->bd_promisc = 0; 510 error = ifpromisc(ifp, 0); 511 if (error != 0 && error != ENXIO) { 512 /* 513 * ENXIO can happen if a pccard is unplugged 514 * Something is really wrong if we were able to put 515 * the driver into promiscuous mode, but can't 516 * take it out. 517 */ 518 if_printf(bp->bif_ifp, 519 "bpf_detach: ifpromisc failed (%d)\n", error); 520 } 521 } 522} 523 524/* 525 * Open ethernet device. Returns ENXIO for illegal minor device number, 526 * EBUSY if file is open by another process. 527 */ 528/* ARGSUSED */ 529static int 530bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 531{ 532 struct bpf_d *d; 533 534 mtx_lock(&bpf_mtx); 535 d = dev->si_drv1; 536 /* 537 * Each minor can be opened by only one process. If the requested 538 * minor is in use, return EBUSY. 539 */ 540 if (d != NULL) { 541 mtx_unlock(&bpf_mtx); 542 return (EBUSY); 543 } 544 dev->si_drv1 = (struct bpf_d *)~0; /* mark device in use */ 545 mtx_unlock(&bpf_mtx); 546 547 if ((dev->si_flags & SI_NAMED) == 0) 548 make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600, 549 "bpf%d", dev2unit(dev)); 550 MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 551 dev->si_drv1 = d; 552 553 /* 554 * For historical reasons, perform a one-time initialization call to 555 * the buffer routines, even though we're not yet committed to a 556 * particular buffer method. 557 */ 558 bpf_buffer_init(d); 559 d->bd_bufmode = BPF_BUFMODE_BUFFER; 560 d->bd_sig = SIGIO; 561 d->bd_direction = BPF_D_INOUT; 562 d->bd_pid = td->td_proc->p_pid; 563#ifdef MAC 564 mac_bpfdesc_init(d); 565 mac_bpfdesc_create(td->td_ucred, d); 566#endif 567 mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF); 568 callout_init(&d->bd_callout, CALLOUT_MPSAFE); 569 knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL); 570 571 return (0); 572} 573 574/* 575 * Close the descriptor by detaching it from its interface, 576 * deallocating its buffers, and marking it free. 577 */ 578/* ARGSUSED */ 579static int 580bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td) 581{ 582 struct bpf_d *d = dev->si_drv1; 583 584 BPFD_LOCK(d); 585 if (d->bd_state == BPF_WAITING) 586 callout_stop(&d->bd_callout); 587 d->bd_state = BPF_IDLE; 588 BPFD_UNLOCK(d); 589 funsetown(&d->bd_sigio); 590 mtx_lock(&bpf_mtx); 591 if (d->bd_bif) 592 bpf_detachd(d); 593 mtx_unlock(&bpf_mtx); 594 selwakeuppri(&d->bd_sel, PRINET); 595#ifdef MAC 596 mac_bpfdesc_destroy(d); 597#endif /* MAC */ 598 knlist_destroy(&d->bd_sel.si_note); 599 bpf_freed(d); 600 dev->si_drv1 = NULL; 601 free(d, M_BPF); 602 603 return (0); 604} 605 606/* 607 * bpfread - read next chunk of packets from buffers 608 */ 609static int 610bpfread(struct cdev *dev, struct uio *uio, int ioflag) 611{ 612 struct bpf_d *d = dev->si_drv1; 613 int timed_out; 614 int error; 615 616 /* 617 * Restrict application to use a buffer the same size as 618 * as kernel buffers. 619 */ 620 if (uio->uio_resid != d->bd_bufsize) 621 return (EINVAL); 622 623 BPFD_LOCK(d); 624 d->bd_pid = curthread->td_proc->p_pid; 625 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 626 BPFD_UNLOCK(d); 627 return (EOPNOTSUPP); 628 } 629 if (d->bd_state == BPF_WAITING) 630 callout_stop(&d->bd_callout); 631 timed_out = (d->bd_state == BPF_TIMED_OUT); 632 d->bd_state = BPF_IDLE; 633 /* 634 * If the hold buffer is empty, then do a timed sleep, which 635 * ends when the timeout expires or when enough packets 636 * have arrived to fill the store buffer. 637 */ 638 while (d->bd_hbuf == NULL) { 639 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 640 /* 641 * A packet(s) either arrived since the previous 642 * read or arrived while we were asleep. 643 * Rotate the buffers and return what's here. 644 */ 645 ROTATE_BUFFERS(d); 646 break; 647 } 648 649 /* 650 * No data is available, check to see if the bpf device 651 * is still pointed at a real interface. If not, return 652 * ENXIO so that the userland process knows to rebind 653 * it before using it again. 654 */ 655 if (d->bd_bif == NULL) { 656 BPFD_UNLOCK(d); 657 return (ENXIO); 658 } 659 660 if (ioflag & O_NONBLOCK) { 661 BPFD_UNLOCK(d); 662 return (EWOULDBLOCK); 663 } 664 error = msleep(d, &d->bd_mtx, PRINET|PCATCH, 665 "bpf", d->bd_rtout); 666 if (error == EINTR || error == ERESTART) { 667 BPFD_UNLOCK(d); 668 return (error); 669 } 670 if (error == EWOULDBLOCK) { 671 /* 672 * On a timeout, return what's in the buffer, 673 * which may be nothing. If there is something 674 * in the store buffer, we can rotate the buffers. 675 */ 676 if (d->bd_hbuf) 677 /* 678 * We filled up the buffer in between 679 * getting the timeout and arriving 680 * here, so we don't need to rotate. 681 */ 682 break; 683 684 if (d->bd_slen == 0) { 685 BPFD_UNLOCK(d); 686 return (0); 687 } 688 ROTATE_BUFFERS(d); 689 break; 690 } 691 } 692 /* 693 * At this point, we know we have something in the hold slot. 694 */ 695 BPFD_UNLOCK(d); 696 697 /* 698 * Move data from hold buffer into user space. 699 * We know the entire buffer is transferred since 700 * we checked above that the read buffer is bpf_bufsize bytes. 701 * 702 * XXXRW: More synchronization needed here: what if a second thread 703 * issues a read on the same fd at the same time? Don't want this 704 * getting invalidated. 705 */ 706 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 707 708 BPFD_LOCK(d); 709 d->bd_fbuf = d->bd_hbuf; 710 d->bd_hbuf = NULL; 711 d->bd_hlen = 0; 712 BPFD_UNLOCK(d); 713 714 return (error); 715} 716 717/* 718 * If there are processes sleeping on this descriptor, wake them up. 719 */ 720static __inline void 721bpf_wakeup(struct bpf_d *d) 722{ 723 724 BPFD_LOCK_ASSERT(d); 725 if (d->bd_state == BPF_WAITING) { 726 callout_stop(&d->bd_callout); 727 d->bd_state = BPF_IDLE; 728 } 729 wakeup(d); 730 if (d->bd_async && d->bd_sig && d->bd_sigio) 731 pgsigio(&d->bd_sigio, d->bd_sig, 0); 732 733 selwakeuppri(&d->bd_sel, PRINET); 734 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 735} 736 737static void 738bpf_timed_out(void *arg) 739{ 740 struct bpf_d *d = (struct bpf_d *)arg; 741 742 BPFD_LOCK(d); 743 if (d->bd_state == BPF_WAITING) { 744 d->bd_state = BPF_TIMED_OUT; 745 if (d->bd_slen != 0) 746 bpf_wakeup(d); 747 } 748 BPFD_UNLOCK(d); 749} 750 751static int 752bpf_ready(struct bpf_d *d) 753{ 754 755 BPFD_LOCK_ASSERT(d); 756 757 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 758 return (1); 759 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 760 d->bd_slen != 0) 761 return (1); 762 return (0); 763} 764 765static int 766bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 767{ 768 struct bpf_d *d = dev->si_drv1; 769 struct ifnet *ifp; 770 struct mbuf *m, *mc; 771 struct sockaddr dst; 772 int error, hlen; 773 774 d->bd_pid = curthread->td_proc->p_pid; 775 d->bd_wcount++; 776 if (d->bd_bif == NULL) { 777 d->bd_wdcount++; 778 return (ENXIO); 779 } 780 781 ifp = d->bd_bif->bif_ifp; 782 783 if ((ifp->if_flags & IFF_UP) == 0) { 784 d->bd_wdcount++; 785 return (ENETDOWN); 786 } 787 788 if (uio->uio_resid == 0) { 789 d->bd_wdcount++; 790 return (0); 791 } 792 793 bzero(&dst, sizeof(dst)); 794 m = NULL; 795 hlen = 0; 796 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, 797 &m, &dst, &hlen, d->bd_wfilter); 798 if (error) { 799 d->bd_wdcount++; 800 return (error); 801 } 802 d->bd_wfcount++; 803 if (d->bd_hdrcmplt) 804 dst.sa_family = pseudo_AF_HDRCMPLT; 805 806 if (d->bd_feedback) { 807 mc = m_dup(m, M_DONTWAIT); 808 if (mc != NULL) 809 mc->m_pkthdr.rcvif = ifp; 810 /* XXX Do not return the same packet twice. */ 811 if (d->bd_direction == BPF_D_INOUT) 812 m->m_flags |= M_SKIP_BPF; 813 } else 814 mc = NULL; 815 816 m->m_pkthdr.len -= hlen; 817 m->m_len -= hlen; 818 m->m_data += hlen; /* XXX */ 819 820#ifdef MAC 821 BPFD_LOCK(d); 822 mac_bpfdesc_create_mbuf(d, m); 823 if (mc != NULL) 824 mac_bpfdesc_create_mbuf(d, mc); 825 BPFD_UNLOCK(d); 826#endif 827 828 error = (*ifp->if_output)(ifp, m, &dst, NULL); 829 if (error) 830 d->bd_wdcount++; 831 832 if (mc != NULL) { 833 if (error == 0) 834 (*ifp->if_input)(ifp, mc); 835 else 836 m_freem(mc); 837 } 838 839 return (error); 840} 841 842/* 843 * Reset a descriptor by flushing its packet buffer and clearing the 844 * receive and drop counts. 845 */ 846static void 847reset_d(struct bpf_d *d) 848{ 849 850 mtx_assert(&d->bd_mtx, MA_OWNED); 851 if (d->bd_hbuf) { 852 /* Free the hold buffer. */ 853 d->bd_fbuf = d->bd_hbuf; 854 d->bd_hbuf = NULL; 855 } 856 d->bd_slen = 0; 857 d->bd_hlen = 0; 858 d->bd_rcount = 0; 859 d->bd_dcount = 0; 860 d->bd_fcount = 0; 861 d->bd_wcount = 0; 862 d->bd_wfcount = 0; 863 d->bd_wdcount = 0; 864 d->bd_zcopy = 0; 865} 866 867/* 868 * FIONREAD Check for read packet available. 869 * SIOCGIFADDR Get interface address - convenient hook to driver. 870 * BIOCGBLEN Get buffer len [for read()]. 871 * BIOCSETF Set ethernet read filter. 872 * BIOCSETWF Set ethernet write filter. 873 * BIOCFLUSH Flush read packet buffer. 874 * BIOCPROMISC Put interface into promiscuous mode. 875 * BIOCGDLT Get link layer type. 876 * BIOCGETIF Get interface name. 877 * BIOCSETIF Set interface. 878 * BIOCSRTIMEOUT Set read timeout. 879 * BIOCGRTIMEOUT Get read timeout. 880 * BIOCGSTATS Get packet stats. 881 * BIOCIMMEDIATE Set immediate mode. 882 * BIOCVERSION Get filter language version. 883 * BIOCGHDRCMPLT Get "header already complete" flag 884 * BIOCSHDRCMPLT Set "header already complete" flag 885 * BIOCGDIRECTION Get packet direction flag 886 * BIOCSDIRECTION Set packet direction flag 887 * BIOCLOCK Set "locked" flag 888 * BIOCFEEDBACK Set packet feedback mode. 889 * BIOCSETZBUF Set current zero-copy buffer locations. 890 * BIOCGETZMAX Get maximum zero-copy buffer size. 891 * BIOCROTZBUF Force rotation of zero-copy buffer 892 * BIOCSETBUFMODE Set buffer mode. 893 * BIOCGETBUFMODE Get current buffer mode. 894 */ 895/* ARGSUSED */ 896static int 897bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 898 struct thread *td) 899{ 900 struct bpf_d *d = dev->si_drv1; 901 int error = 0; 902 903 /* 904 * Refresh PID associated with this descriptor. 905 */ 906 BPFD_LOCK(d); 907 d->bd_pid = td->td_proc->p_pid; 908 if (d->bd_state == BPF_WAITING) 909 callout_stop(&d->bd_callout); 910 d->bd_state = BPF_IDLE; 911 BPFD_UNLOCK(d); 912 913 if (d->bd_locked == 1) { 914 switch (cmd) { 915 case BIOCGBLEN: 916 case BIOCFLUSH: 917 case BIOCGDLT: 918 case BIOCGDLTLIST: 919 case BIOCGETIF: 920 case BIOCGRTIMEOUT: 921 case BIOCGSTATS: 922 case BIOCVERSION: 923 case BIOCGRSIG: 924 case BIOCGHDRCMPLT: 925 case BIOCFEEDBACK: 926 case FIONREAD: 927 case BIOCLOCK: 928 case BIOCSRTIMEOUT: 929 case BIOCIMMEDIATE: 930 case TIOCGPGRP: 931 case BIOCROTZBUF: 932 break; 933 default: 934 return (EPERM); 935 } 936 } 937 switch (cmd) { 938 939 default: 940 error = EINVAL; 941 break; 942 943 /* 944 * Check for read packet available. 945 */ 946 case FIONREAD: 947 { 948 int n; 949 950 BPFD_LOCK(d); 951 n = d->bd_slen; 952 if (d->bd_hbuf) 953 n += d->bd_hlen; 954 BPFD_UNLOCK(d); 955 956 *(int *)addr = n; 957 break; 958 } 959 960 case SIOCGIFADDR: 961 { 962 struct ifnet *ifp; 963 964 if (d->bd_bif == NULL) 965 error = EINVAL; 966 else { 967 ifp = d->bd_bif->bif_ifp; 968 error = (*ifp->if_ioctl)(ifp, cmd, addr); 969 } 970 break; 971 } 972 973 /* 974 * Get buffer len [for read()]. 975 */ 976 case BIOCGBLEN: 977 *(u_int *)addr = d->bd_bufsize; 978 break; 979 980 /* 981 * Set buffer length. 982 */ 983 case BIOCSBLEN: 984 error = bpf_ioctl_sblen(d, (u_int *)addr); 985 break; 986 987 /* 988 * Set link layer read filter. 989 */ 990 case BIOCSETF: 991 case BIOCSETWF: 992 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 993 break; 994 995 /* 996 * Flush read packet buffer. 997 */ 998 case BIOCFLUSH: 999 BPFD_LOCK(d); 1000 reset_d(d); 1001 BPFD_UNLOCK(d); 1002 break; 1003 1004 /* 1005 * Put interface into promiscuous mode. 1006 */ 1007 case BIOCPROMISC: 1008 if (d->bd_bif == NULL) { 1009 /* 1010 * No interface attached yet. 1011 */ 1012 error = EINVAL; 1013 break; 1014 } 1015 if (d->bd_promisc == 0) { 1016 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1017 if (error == 0) 1018 d->bd_promisc = 1; 1019 } 1020 break; 1021 1022 /* 1023 * Get current data link type. 1024 */ 1025 case BIOCGDLT: 1026 if (d->bd_bif == NULL) 1027 error = EINVAL; 1028 else 1029 *(u_int *)addr = d->bd_bif->bif_dlt; 1030 break; 1031 1032 /* 1033 * Get a list of supported data link types. 1034 */ 1035 case BIOCGDLTLIST: 1036 if (d->bd_bif == NULL) 1037 error = EINVAL; 1038 else 1039 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1040 break; 1041 1042 /* 1043 * Set data link type. 1044 */ 1045 case BIOCSDLT: 1046 if (d->bd_bif == NULL) 1047 error = EINVAL; 1048 else 1049 error = bpf_setdlt(d, *(u_int *)addr); 1050 break; 1051 1052 /* 1053 * Get interface name. 1054 */ 1055 case BIOCGETIF: 1056 if (d->bd_bif == NULL) 1057 error = EINVAL; 1058 else { 1059 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1060 struct ifreq *const ifr = (struct ifreq *)addr; 1061 1062 strlcpy(ifr->ifr_name, ifp->if_xname, 1063 sizeof(ifr->ifr_name)); 1064 } 1065 break; 1066 1067 /* 1068 * Set interface. 1069 */ 1070 case BIOCSETIF: 1071 error = bpf_setif(d, (struct ifreq *)addr); 1072 break; 1073 1074 /* 1075 * Set read timeout. 1076 */ 1077 case BIOCSRTIMEOUT: 1078 { 1079 struct timeval *tv = (struct timeval *)addr; 1080 1081 /* 1082 * Subtract 1 tick from tvtohz() since this isn't 1083 * a one-shot timer. 1084 */ 1085 if ((error = itimerfix(tv)) == 0) 1086 d->bd_rtout = tvtohz(tv) - 1; 1087 break; 1088 } 1089 1090 /* 1091 * Get read timeout. 1092 */ 1093 case BIOCGRTIMEOUT: 1094 { 1095 struct timeval *tv = (struct timeval *)addr; 1096 1097 tv->tv_sec = d->bd_rtout / hz; 1098 tv->tv_usec = (d->bd_rtout % hz) * tick; 1099 break; 1100 } 1101 1102 /* 1103 * Get packet stats. 1104 */ 1105 case BIOCGSTATS: 1106 { 1107 struct bpf_stat *bs = (struct bpf_stat *)addr; 1108 1109 /* XXXCSJP overflow */ 1110 bs->bs_recv = d->bd_rcount; 1111 bs->bs_drop = d->bd_dcount; 1112 break; 1113 } 1114 1115 /* 1116 * Set immediate mode. 1117 */ 1118 case BIOCIMMEDIATE: 1119 d->bd_immediate = *(u_int *)addr; 1120 break; 1121 1122 case BIOCVERSION: 1123 { 1124 struct bpf_version *bv = (struct bpf_version *)addr; 1125 1126 bv->bv_major = BPF_MAJOR_VERSION; 1127 bv->bv_minor = BPF_MINOR_VERSION; 1128 break; 1129 } 1130 1131 /* 1132 * Get "header already complete" flag 1133 */ 1134 case BIOCGHDRCMPLT: 1135 *(u_int *)addr = d->bd_hdrcmplt; 1136 break; 1137 1138 /* 1139 * Set "header already complete" flag 1140 */ 1141 case BIOCSHDRCMPLT: 1142 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1143 break; 1144 1145 /* 1146 * Get packet direction flag 1147 */ 1148 case BIOCGDIRECTION: 1149 *(u_int *)addr = d->bd_direction; 1150 break; 1151 1152 /* 1153 * Set packet direction flag 1154 */ 1155 case BIOCSDIRECTION: 1156 { 1157 u_int direction; 1158 1159 direction = *(u_int *)addr; 1160 switch (direction) { 1161 case BPF_D_IN: 1162 case BPF_D_INOUT: 1163 case BPF_D_OUT: 1164 d->bd_direction = direction; 1165 break; 1166 default: 1167 error = EINVAL; 1168 } 1169 } 1170 break; 1171 1172 case BIOCFEEDBACK: 1173 d->bd_feedback = *(u_int *)addr; 1174 break; 1175 1176 case BIOCLOCK: 1177 d->bd_locked = 1; 1178 break; 1179 1180 case FIONBIO: /* Non-blocking I/O */ 1181 break; 1182 1183 case FIOASYNC: /* Send signal on receive packets */ 1184 d->bd_async = *(int *)addr; 1185 break; 1186 1187 case FIOSETOWN: 1188 error = fsetown(*(int *)addr, &d->bd_sigio); 1189 break; 1190 1191 case FIOGETOWN: 1192 *(int *)addr = fgetown(&d->bd_sigio); 1193 break; 1194 1195 /* This is deprecated, FIOSETOWN should be used instead. */ 1196 case TIOCSPGRP: 1197 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1198 break; 1199 1200 /* This is deprecated, FIOGETOWN should be used instead. */ 1201 case TIOCGPGRP: 1202 *(int *)addr = -fgetown(&d->bd_sigio); 1203 break; 1204 1205 case BIOCSRSIG: /* Set receive signal */ 1206 { 1207 u_int sig; 1208 1209 sig = *(u_int *)addr; 1210 1211 if (sig >= NSIG) 1212 error = EINVAL; 1213 else 1214 d->bd_sig = sig; 1215 break; 1216 } 1217 case BIOCGRSIG: 1218 *(u_int *)addr = d->bd_sig; 1219 break; 1220 1221 case BIOCGETBUFMODE: 1222 *(u_int *)addr = d->bd_bufmode; 1223 break; 1224 1225 case BIOCSETBUFMODE: 1226 /* 1227 * Allow the buffering mode to be changed as long as we 1228 * haven't yet committed to a particular mode. Our 1229 * definition of commitment, for now, is whether or not a 1230 * buffer has been allocated or an interface attached, since 1231 * that's the point where things get tricky. 1232 */ 1233 switch (*(u_int *)addr) { 1234 case BPF_BUFMODE_BUFFER: 1235 break; 1236 1237 case BPF_BUFMODE_ZBUF: 1238 if (bpf_zerocopy_enable) 1239 break; 1240 /* FALLSTHROUGH */ 1241 1242 default: 1243 return (EINVAL); 1244 } 1245 1246 BPFD_LOCK(d); 1247 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1248 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1249 BPFD_UNLOCK(d); 1250 return (EBUSY); 1251 } 1252 d->bd_bufmode = *(u_int *)addr; 1253 BPFD_UNLOCK(d); 1254 break; 1255 1256 case BIOCGETZMAX: 1257 return (bpf_ioctl_getzmax(td, d, (size_t *)addr)); 1258 1259 case BIOCSETZBUF: 1260 return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr)); 1261 1262 case BIOCROTZBUF: 1263 return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr)); 1264 } 1265 return (error); 1266} 1267 1268/* 1269 * Set d's packet filter program to fp. If this file already has a filter, 1270 * free it and replace it. Returns EINVAL for bogus requests. 1271 */ 1272static int 1273bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1274{ 1275 struct bpf_insn *fcode, *old; 1276 u_int wfilter, flen, size; 1277#ifdef BPF_JITTER 1278 bpf_jit_filter *ofunc; 1279#endif 1280 1281 if (cmd == BIOCSETWF) { 1282 old = d->bd_wfilter; 1283 wfilter = 1; 1284#ifdef BPF_JITTER 1285 ofunc = NULL; 1286#endif 1287 } else { 1288 wfilter = 0; 1289 old = d->bd_rfilter; 1290#ifdef BPF_JITTER 1291 ofunc = d->bd_bfilter; 1292#endif 1293 } 1294 if (fp->bf_insns == NULL) { 1295 if (fp->bf_len != 0) 1296 return (EINVAL); 1297 BPFD_LOCK(d); 1298 if (wfilter) 1299 d->bd_wfilter = NULL; 1300 else { 1301 d->bd_rfilter = NULL; 1302#ifdef BPF_JITTER 1303 d->bd_bfilter = NULL; 1304#endif 1305 } 1306 reset_d(d); 1307 BPFD_UNLOCK(d); 1308 if (old != NULL) 1309 free((caddr_t)old, M_BPF); 1310#ifdef BPF_JITTER 1311 if (ofunc != NULL) 1312 bpf_destroy_jit_filter(ofunc); 1313#endif 1314 return (0); 1315 } 1316 flen = fp->bf_len; 1317 if (flen > bpf_maxinsns) 1318 return (EINVAL); 1319 1320 size = flen * sizeof(*fp->bf_insns); 1321 fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK); 1322 if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && 1323 bpf_validate(fcode, (int)flen)) { 1324 BPFD_LOCK(d); 1325 if (wfilter) 1326 d->bd_wfilter = fcode; 1327 else { 1328 d->bd_rfilter = fcode; 1329#ifdef BPF_JITTER 1330 d->bd_bfilter = bpf_jitter(fcode, flen); 1331#endif 1332 } 1333 reset_d(d); 1334 BPFD_UNLOCK(d); 1335 if (old != NULL) 1336 free((caddr_t)old, M_BPF); 1337#ifdef BPF_JITTER 1338 if (ofunc != NULL) 1339 bpf_destroy_jit_filter(ofunc); 1340#endif 1341 1342 return (0); 1343 } 1344 free((caddr_t)fcode, M_BPF); 1345 return (EINVAL); 1346} 1347 1348/* 1349 * Detach a file from its current interface (if attached at all) and attach 1350 * to the interface indicated by the name stored in ifr. 1351 * Return an errno or 0. 1352 */ 1353static int 1354bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1355{ 1356 struct bpf_if *bp; 1357 struct ifnet *theywant; 1358 1359 theywant = ifunit(ifr->ifr_name); 1360 if (theywant == NULL || theywant->if_bpf == NULL) 1361 return (ENXIO); 1362 1363 bp = theywant->if_bpf; 1364 1365 /* 1366 * Behavior here depends on the buffering model. If we're using 1367 * kernel memory buffers, then we can allocate them here. If we're 1368 * using zero-copy, then the user process must have registered 1369 * buffers by the time we get here. If not, return an error. 1370 * 1371 * XXXRW: There are locking issues here with multi-threaded use: what 1372 * if two threads try to set the interface at once? 1373 */ 1374 switch (d->bd_bufmode) { 1375 case BPF_BUFMODE_BUFFER: 1376 if (d->bd_sbuf == NULL) 1377 bpf_buffer_alloc(d); 1378 KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL")); 1379 break; 1380 1381 case BPF_BUFMODE_ZBUF: 1382 if (d->bd_sbuf == NULL) 1383 return (EINVAL); 1384 break; 1385 1386 default: 1387 panic("bpf_setif: bufmode %d", d->bd_bufmode); 1388 } 1389 if (bp != d->bd_bif) { 1390 if (d->bd_bif) 1391 /* 1392 * Detach if attached to something else. 1393 */ 1394 bpf_detachd(d); 1395 1396 bpf_attachd(d, bp); 1397 } 1398 BPFD_LOCK(d); 1399 reset_d(d); 1400 BPFD_UNLOCK(d); 1401 return (0); 1402} 1403 1404/* 1405 * Support for select() and poll() system calls 1406 * 1407 * Return true iff the specific operation will not block indefinitely. 1408 * Otherwise, return false but make a note that a selwakeup() must be done. 1409 */ 1410static int 1411bpfpoll(struct cdev *dev, int events, struct thread *td) 1412{ 1413 struct bpf_d *d; 1414 int revents; 1415 1416 d = dev->si_drv1; 1417 if (d->bd_bif == NULL) 1418 return (ENXIO); 1419 1420 /* 1421 * Refresh PID associated with this descriptor. 1422 */ 1423 revents = events & (POLLOUT | POLLWRNORM); 1424 BPFD_LOCK(d); 1425 d->bd_pid = td->td_proc->p_pid; 1426 if (events & (POLLIN | POLLRDNORM)) { 1427 if (bpf_ready(d)) 1428 revents |= events & (POLLIN | POLLRDNORM); 1429 else { 1430 selrecord(td, &d->bd_sel); 1431 /* Start the read timeout if necessary. */ 1432 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1433 callout_reset(&d->bd_callout, d->bd_rtout, 1434 bpf_timed_out, d); 1435 d->bd_state = BPF_WAITING; 1436 } 1437 } 1438 } 1439 BPFD_UNLOCK(d); 1440 return (revents); 1441} 1442 1443/* 1444 * Support for kevent() system call. Register EVFILT_READ filters and 1445 * reject all others. 1446 */ 1447int 1448bpfkqfilter(struct cdev *dev, struct knote *kn) 1449{ 1450 struct bpf_d *d = (struct bpf_d *)dev->si_drv1; 1451 1452 if (kn->kn_filter != EVFILT_READ) 1453 return (1); 1454 1455 /* 1456 * Refresh PID associated with this descriptor. 1457 */ 1458 BPFD_LOCK(d); 1459 d->bd_pid = curthread->td_proc->p_pid; 1460 kn->kn_fop = &bpfread_filtops; 1461 kn->kn_hook = d; 1462 knlist_add(&d->bd_sel.si_note, kn, 1); 1463 BPFD_UNLOCK(d); 1464 1465 return (0); 1466} 1467 1468static void 1469filt_bpfdetach(struct knote *kn) 1470{ 1471 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1472 1473 knlist_remove(&d->bd_sel.si_note, kn, 0); 1474} 1475 1476static int 1477filt_bpfread(struct knote *kn, long hint) 1478{ 1479 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1480 int ready; 1481 1482 BPFD_LOCK_ASSERT(d); 1483 ready = bpf_ready(d); 1484 if (ready) { 1485 kn->kn_data = d->bd_slen; 1486 if (d->bd_hbuf) 1487 kn->kn_data += d->bd_hlen; 1488 } 1489 else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1490 callout_reset(&d->bd_callout, d->bd_rtout, 1491 bpf_timed_out, d); 1492 d->bd_state = BPF_WAITING; 1493 } 1494 1495 return (ready); 1496} 1497 1498/* 1499 * Incoming linkage from device drivers. Process the packet pkt, of length 1500 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1501 * by each process' filter, and if accepted, stashed into the corresponding 1502 * buffer. 1503 */ 1504void 1505bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1506{ 1507 struct bpf_d *d; 1508 u_int slen; 1509 int gottime; 1510 struct timeval tv; 1511 1512 gottime = 0; 1513 BPFIF_LOCK(bp); 1514 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1515 BPFD_LOCK(d); 1516 ++d->bd_rcount; 1517#ifdef BPF_JITTER 1518 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL) 1519 slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen); 1520 else 1521#endif 1522 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 1523 if (slen != 0) { 1524 d->bd_fcount++; 1525 if (!gottime) { 1526 microtime(&tv); 1527 gottime = 1; 1528 } 1529#ifdef MAC 1530 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1531#endif 1532 catchpacket(d, pkt, pktlen, slen, 1533 bpf_append_bytes, &tv); 1534 } 1535 BPFD_UNLOCK(d); 1536 } 1537 BPFIF_UNLOCK(bp); 1538} 1539 1540#define BPF_CHECK_DIRECTION(d, m) \ 1541 if (((d)->bd_direction == BPF_D_IN && (m)->m_pkthdr.rcvif == NULL) || \ 1542 ((d)->bd_direction == BPF_D_OUT && (m)->m_pkthdr.rcvif != NULL)) 1543 1544/* 1545 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1546 */ 1547void 1548bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1549{ 1550 struct bpf_d *d; 1551 u_int pktlen, slen; 1552 int gottime; 1553 struct timeval tv; 1554 1555 if (m->m_flags & M_SKIP_BPF) { 1556 m->m_flags &= ~M_SKIP_BPF; 1557 return; 1558 } 1559 1560 gottime = 0; 1561 1562 pktlen = m_length(m, NULL); 1563 1564 BPFIF_LOCK(bp); 1565 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1566 BPF_CHECK_DIRECTION(d, m) 1567 continue; 1568 BPFD_LOCK(d); 1569 ++d->bd_rcount; 1570#ifdef BPF_JITTER 1571 /* XXX We cannot handle multiple mbufs. */ 1572 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL && 1573 m->m_next == NULL) 1574 slen = (*(d->bd_bfilter->func))(mtod(m, u_char *), 1575 pktlen, pktlen); 1576 else 1577#endif 1578 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 1579 if (slen != 0) { 1580 d->bd_fcount++; 1581 if (!gottime) { 1582 microtime(&tv); 1583 gottime = 1; 1584 } 1585#ifdef MAC 1586 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1587#endif 1588 catchpacket(d, (u_char *)m, pktlen, slen, 1589 bpf_append_mbuf, &tv); 1590 } 1591 BPFD_UNLOCK(d); 1592 } 1593 BPFIF_UNLOCK(bp); 1594} 1595 1596/* 1597 * Incoming linkage from device drivers, when packet is in 1598 * an mbuf chain and to be prepended by a contiguous header. 1599 */ 1600void 1601bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1602{ 1603 struct mbuf mb; 1604 struct bpf_d *d; 1605 u_int pktlen, slen; 1606 int gottime; 1607 struct timeval tv; 1608 1609 if (m->m_flags & M_SKIP_BPF) { 1610 m->m_flags &= ~M_SKIP_BPF; 1611 return; 1612 } 1613 1614 gottime = 0; 1615 1616 pktlen = m_length(m, NULL); 1617 /* 1618 * Craft on-stack mbuf suitable for passing to bpf_filter. 1619 * Note that we cut corners here; we only setup what's 1620 * absolutely needed--this mbuf should never go anywhere else. 1621 */ 1622 mb.m_next = m; 1623 mb.m_data = data; 1624 mb.m_len = dlen; 1625 pktlen += dlen; 1626 1627 BPFIF_LOCK(bp); 1628 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1629 BPF_CHECK_DIRECTION(d, m) 1630 continue; 1631 BPFD_LOCK(d); 1632 ++d->bd_rcount; 1633 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 1634 if (slen != 0) { 1635 d->bd_fcount++; 1636 if (!gottime) { 1637 microtime(&tv); 1638 gottime = 1; 1639 } 1640#ifdef MAC 1641 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1642#endif 1643 catchpacket(d, (u_char *)&mb, pktlen, slen, 1644 bpf_append_mbuf, &tv); 1645 } 1646 BPFD_UNLOCK(d); 1647 } 1648 BPFIF_UNLOCK(bp); 1649} 1650 1651#undef BPF_CHECK_DIRECTION 1652 1653/* 1654 * Move the packet data from interface memory (pkt) into the 1655 * store buffer. "cpfn" is the routine called to do the actual data 1656 * transfer. bcopy is passed in to copy contiguous chunks, while 1657 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 1658 * pkt is really an mbuf. 1659 */ 1660static void 1661catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1662 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 1663 struct timeval *tv) 1664{ 1665 struct bpf_hdr hdr; 1666 int totlen, curlen; 1667 int hdrlen = d->bd_bif->bif_hdrlen; 1668 int do_wakeup = 0; 1669 1670 BPFD_LOCK_ASSERT(d); 1671 1672 /* 1673 * Detect whether user space has released a buffer back to us, and if 1674 * so, move it from being a hold buffer to a free buffer. This may 1675 * not be the best place to do it (for example, we might only want to 1676 * run this check if we need the space), but for now it's a reliable 1677 * spot to do it. 1678 */ 1679 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 1680 d->bd_fbuf = d->bd_hbuf; 1681 d->bd_hbuf = NULL; 1682 d->bd_hlen = 0; 1683 } 1684 1685 /* 1686 * Figure out how many bytes to move. If the packet is 1687 * greater or equal to the snapshot length, transfer that 1688 * much. Otherwise, transfer the whole packet (unless 1689 * we hit the buffer size limit). 1690 */ 1691 totlen = hdrlen + min(snaplen, pktlen); 1692 if (totlen > d->bd_bufsize) 1693 totlen = d->bd_bufsize; 1694 1695 /* 1696 * Round up the end of the previous packet to the next longword. 1697 */ 1698 curlen = BPF_WORDALIGN(d->bd_slen); 1699 if (curlen + totlen > d->bd_bufsize) { 1700 /* 1701 * This packet will overflow the storage buffer. 1702 * Rotate the buffers if we can, then wakeup any 1703 * pending reads. 1704 */ 1705 if (d->bd_fbuf == NULL) { 1706 /* 1707 * We haven't completed the previous read yet, 1708 * so drop the packet. 1709 */ 1710 ++d->bd_dcount; 1711 return; 1712 } 1713 ROTATE_BUFFERS(d); 1714 do_wakeup = 1; 1715 curlen = 0; 1716 } 1717 else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) 1718 /* 1719 * Immediate mode is set, or the read timeout has already 1720 * expired during a select call. A packet arrived, so the 1721 * reader should be woken up. 1722 */ 1723 do_wakeup = 1; 1724 1725 /* 1726 * Append the bpf header. Note we append the actual header size, but 1727 * move forward the length of the header plus padding. 1728 */ 1729 bzero(&hdr, sizeof(hdr)); 1730 hdr.bh_tstamp = *tv; 1731 hdr.bh_datalen = pktlen; 1732 hdr.bh_hdrlen = hdrlen; 1733 hdr.bh_caplen = totlen - hdrlen; 1734 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 1735 1736 /* 1737 * Copy the packet data into the store buffer and update its length. 1738 */ 1739 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen); 1740 d->bd_slen = curlen + totlen; 1741 1742 if (do_wakeup) 1743 bpf_wakeup(d); 1744} 1745 1746/* 1747 * Free buffers currently in use by a descriptor. 1748 * Called on close. 1749 */ 1750static void 1751bpf_freed(struct bpf_d *d) 1752{ 1753 1754 /* 1755 * We don't need to lock out interrupts since this descriptor has 1756 * been detached from its interface and it yet hasn't been marked 1757 * free. 1758 */ 1759 bpf_free(d); 1760 if (d->bd_rfilter) { 1761 free((caddr_t)d->bd_rfilter, M_BPF); 1762#ifdef BPF_JITTER 1763 bpf_destroy_jit_filter(d->bd_bfilter); 1764#endif 1765 } 1766 if (d->bd_wfilter) 1767 free((caddr_t)d->bd_wfilter, M_BPF); 1768 mtx_destroy(&d->bd_mtx); 1769} 1770 1771/* 1772 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 1773 * fixed size of the link header (variable length headers not yet supported). 1774 */ 1775void 1776bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1777{ 1778 1779 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 1780} 1781 1782/* 1783 * Attach an interface to bpf. ifp is a pointer to the structure 1784 * defining the interface to be attached, dlt is the link layer type, 1785 * and hdrlen is the fixed size of the link header (variable length 1786 * headers are not yet supporrted). 1787 */ 1788void 1789bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1790{ 1791 struct bpf_if *bp; 1792 1793 bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO); 1794 if (bp == NULL) 1795 panic("bpfattach"); 1796 1797 LIST_INIT(&bp->bif_dlist); 1798 bp->bif_ifp = ifp; 1799 bp->bif_dlt = dlt; 1800 mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF); 1801 KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); 1802 *driverp = bp; 1803 1804 mtx_lock(&bpf_mtx); 1805 LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 1806 mtx_unlock(&bpf_mtx); 1807 1808 /* 1809 * Compute the length of the bpf header. This is not necessarily 1810 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1811 * that the network layer header begins on a longword boundary (for 1812 * performance reasons and to alleviate alignment restrictions). 1813 */ 1814 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1815 1816 if (bootverbose) 1817 if_printf(ifp, "bpf attached\n"); 1818} 1819 1820/* 1821 * Detach bpf from an interface. This involves detaching each descriptor 1822 * associated with the interface, and leaving bd_bif NULL. Notify each 1823 * descriptor as it's detached so that any sleepers wake up and get 1824 * ENXIO. 1825 */ 1826void 1827bpfdetach(struct ifnet *ifp) 1828{ 1829 struct bpf_if *bp; 1830 struct bpf_d *d; 1831 1832 /* Locate BPF interface information */ 1833 mtx_lock(&bpf_mtx); 1834 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1835 if (ifp == bp->bif_ifp) 1836 break; 1837 } 1838 1839 /* Interface wasn't attached */ 1840 if ((bp == NULL) || (bp->bif_ifp == NULL)) { 1841 mtx_unlock(&bpf_mtx); 1842 printf("bpfdetach: %s was not attached\n", ifp->if_xname); 1843 return; 1844 } 1845 1846 LIST_REMOVE(bp, bif_next); 1847 mtx_unlock(&bpf_mtx); 1848 1849 while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { 1850 bpf_detachd(d); 1851 BPFD_LOCK(d); 1852 bpf_wakeup(d); 1853 BPFD_UNLOCK(d); 1854 } 1855 1856 mtx_destroy(&bp->bif_mtx); 1857 free(bp, M_BPF); 1858} 1859 1860/* 1861 * Get a list of available data link type of the interface. 1862 */ 1863static int 1864bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1865{ 1866 int n, error; 1867 struct ifnet *ifp; 1868 struct bpf_if *bp; 1869 1870 ifp = d->bd_bif->bif_ifp; 1871 n = 0; 1872 error = 0; 1873 mtx_lock(&bpf_mtx); 1874 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1875 if (bp->bif_ifp != ifp) 1876 continue; 1877 if (bfl->bfl_list != NULL) { 1878 if (n >= bfl->bfl_len) { 1879 mtx_unlock(&bpf_mtx); 1880 return (ENOMEM); 1881 } 1882 error = copyout(&bp->bif_dlt, 1883 bfl->bfl_list + n, sizeof(u_int)); 1884 } 1885 n++; 1886 } 1887 mtx_unlock(&bpf_mtx); 1888 bfl->bfl_len = n; 1889 return (error); 1890} 1891 1892/* 1893 * Set the data link type of a BPF instance. 1894 */ 1895static int 1896bpf_setdlt(struct bpf_d *d, u_int dlt) 1897{ 1898 int error, opromisc; 1899 struct ifnet *ifp; 1900 struct bpf_if *bp; 1901 1902 if (d->bd_bif->bif_dlt == dlt) 1903 return (0); 1904 ifp = d->bd_bif->bif_ifp; 1905 mtx_lock(&bpf_mtx); 1906 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1907 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1908 break; 1909 } 1910 mtx_unlock(&bpf_mtx); 1911 if (bp != NULL) { 1912 opromisc = d->bd_promisc; 1913 bpf_detachd(d); 1914 bpf_attachd(d, bp); 1915 BPFD_LOCK(d); 1916 reset_d(d); 1917 BPFD_UNLOCK(d); 1918 if (opromisc) { 1919 error = ifpromisc(bp->bif_ifp, 1); 1920 if (error) 1921 if_printf(bp->bif_ifp, 1922 "bpf_setdlt: ifpromisc failed (%d)\n", 1923 error); 1924 else 1925 d->bd_promisc = 1; 1926 } 1927 } 1928 return (bp == NULL ? EINVAL : 0); 1929} 1930 1931static void 1932bpf_clone(void *arg, struct ucred *cred, char *name, int namelen, 1933 struct cdev **dev) 1934{ 1935 int u; 1936 1937 if (*dev != NULL) 1938 return; 1939 if (dev_stdclone(name, NULL, "bpf", &u) != 1) 1940 return; 1941 *dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600, 1942 "bpf%d", u); 1943 dev_ref(*dev); 1944 (*dev)->si_flags |= SI_CHEAPCLONE; 1945 return; 1946} 1947 1948static void 1949bpf_drvinit(void *unused) 1950{ 1951 1952 mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF); 1953 LIST_INIT(&bpf_iflist); 1954 EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000); 1955} 1956 1957static void 1958bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 1959{ 1960 1961 bzero(d, sizeof(*d)); 1962 BPFD_LOCK_ASSERT(bd); 1963 d->bd_structsize = sizeof(*d); 1964 d->bd_immediate = bd->bd_immediate; 1965 d->bd_promisc = bd->bd_promisc; 1966 d->bd_hdrcmplt = bd->bd_hdrcmplt; 1967 d->bd_direction = bd->bd_direction; 1968 d->bd_feedback = bd->bd_feedback; 1969 d->bd_async = bd->bd_async; 1970 d->bd_rcount = bd->bd_rcount; 1971 d->bd_dcount = bd->bd_dcount; 1972 d->bd_fcount = bd->bd_fcount; 1973 d->bd_sig = bd->bd_sig; 1974 d->bd_slen = bd->bd_slen; 1975 d->bd_hlen = bd->bd_hlen; 1976 d->bd_bufsize = bd->bd_bufsize; 1977 d->bd_pid = bd->bd_pid; 1978 strlcpy(d->bd_ifname, 1979 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 1980 d->bd_locked = bd->bd_locked; 1981 d->bd_wcount = bd->bd_wcount; 1982 d->bd_wdcount = bd->bd_wdcount; 1983 d->bd_wfcount = bd->bd_wfcount; 1984 d->bd_zcopy = bd->bd_zcopy; 1985 d->bd_bufmode = bd->bd_bufmode; 1986} 1987 1988static int 1989bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 1990{ 1991 struct xbpf_d *xbdbuf, *xbd; 1992 int index, error; 1993 struct bpf_if *bp; 1994 struct bpf_d *bd; 1995 1996 /* 1997 * XXX This is not technically correct. It is possible for non 1998 * privileged users to open bpf devices. It would make sense 1999 * if the users who opened the devices were able to retrieve 2000 * the statistics for them, too. 2001 */ 2002 error = priv_check(req->td, PRIV_NET_BPF); 2003 if (error) 2004 return (error); 2005 if (req->oldptr == NULL) 2006 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 2007 if (bpf_bpfd_cnt == 0) 2008 return (SYSCTL_OUT(req, 0, 0)); 2009 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 2010 mtx_lock(&bpf_mtx); 2011 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 2012 mtx_unlock(&bpf_mtx); 2013 free(xbdbuf, M_BPF); 2014 return (ENOMEM); 2015 } 2016 index = 0; 2017 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2018 BPFIF_LOCK(bp); 2019 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2020 xbd = &xbdbuf[index++]; 2021 BPFD_LOCK(bd); 2022 bpfstats_fill_xbpf(xbd, bd); 2023 BPFD_UNLOCK(bd); 2024 } 2025 BPFIF_UNLOCK(bp); 2026 } 2027 mtx_unlock(&bpf_mtx); 2028 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 2029 free(xbdbuf, M_BPF); 2030 return (error); 2031} 2032 2033SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); 2034 2035#else /* !DEV_BPF && !NETGRAPH_BPF */ 2036/* 2037 * NOP stubs to allow bpf-using drivers to load and function. 2038 * 2039 * A 'better' implementation would allow the core bpf functionality 2040 * to be loaded at runtime. 2041 */ 2042static struct bpf_if bp_null; 2043 2044void 2045bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2046{ 2047} 2048 2049void 2050bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2051{ 2052} 2053 2054void 2055bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 2056{ 2057} 2058 2059void 2060bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2061{ 2062 2063 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2064} 2065 2066void 2067bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 2068{ 2069 2070 *driverp = &bp_null; 2071} 2072 2073void 2074bpfdetach(struct ifnet *ifp) 2075{ 2076} 2077 2078u_int 2079bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 2080{ 2081 return -1; /* "no filter" behaviour */ 2082} 2083 2084int 2085bpf_validate(const struct bpf_insn *f, int len) 2086{ 2087 return 0; /* false */ 2088} 2089 2090#endif /* !DEV_BPF && !NETGRAPH_BPF */ 2091