bpf.c revision 180310
1/*- 2 * Copyright (c) 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from the Stanford/CMU enet packet filter, 6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 8 * Berkeley Laboratory. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/net/bpf.c 180310 2008-07-05 20:11:28Z csjp $"); 39 40#include "opt_bpf.h" 41#include "opt_mac.h" 42#include "opt_netgraph.h" 43 44#include <sys/types.h> 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/conf.h> 48#include <sys/fcntl.h> 49#include <sys/malloc.h> 50#include <sys/mbuf.h> 51#include <sys/time.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/signalvar.h> 55#include <sys/filio.h> 56#include <sys/sockio.h> 57#include <sys/ttycom.h> 58#include <sys/uio.h> 59 60#include <sys/event.h> 61#include <sys/file.h> 62#include <sys/poll.h> 63#include <sys/proc.h> 64 65#include <sys/socket.h> 66 67#include <net/if.h> 68#include <net/bpf.h> 69#include <net/bpf_buffer.h> 70#ifdef BPF_JITTER 71#include <net/bpf_jitter.h> 72#endif 73#include <net/bpf_zerocopy.h> 74#include <net/bpfdesc.h> 75 76#include <netinet/in.h> 77#include <netinet/if_ether.h> 78#include <sys/kernel.h> 79#include <sys/sysctl.h> 80 81#include <net80211/ieee80211_freebsd.h> 82 83#include <security/mac/mac_framework.h> 84 85MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 86 87#if defined(DEV_BPF) || defined(NETGRAPH_BPF) 88 89#define PRINET 26 /* interruptible */ 90 91/* 92 * bpf_iflist is a list of BPF interface structures, each corresponding to a 93 * specific DLT. The same network interface might have several BPF interface 94 * structures registered by different layers in the stack (i.e., 802.11 95 * frames, ethernet frames, etc). 96 */ 97static LIST_HEAD(, bpf_if) bpf_iflist; 98static struct mtx bpf_mtx; /* bpf global lock */ 99static int bpf_bpfd_cnt; 100 101static void bpf_attachd(struct bpf_d *, struct bpf_if *); 102static void bpf_detachd(struct bpf_d *); 103static void bpf_freed(struct bpf_d *); 104static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 105 struct sockaddr *, int *, struct bpf_insn *); 106static int bpf_setif(struct bpf_d *, struct ifreq *); 107static void bpf_timed_out(void *); 108static __inline void 109 bpf_wakeup(struct bpf_d *); 110static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 111 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 112 struct timeval *); 113static void reset_d(struct bpf_d *); 114static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 115static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 116static int bpf_setdlt(struct bpf_d *, u_int); 117static void filt_bpfdetach(struct knote *); 118static int filt_bpfread(struct knote *, long); 119static void bpf_drvinit(void *); 120static void bpf_clone(void *, struct ucred *, char *, int, struct cdev **); 121static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 122 123SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl"); 124static int bpf_maxinsns = BPF_MAXINSNS; 125SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 126 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 127static int bpf_zerocopy_enable = 0; 128SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 129 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 130SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW, 131 bpf_stats_sysctl, "bpf statistics portal"); 132 133static d_open_t bpfopen; 134static d_close_t bpfclose; 135static d_read_t bpfread; 136static d_write_t bpfwrite; 137static d_ioctl_t bpfioctl; 138static d_poll_t bpfpoll; 139static d_kqfilter_t bpfkqfilter; 140 141static struct cdevsw bpf_cdevsw = { 142 .d_version = D_VERSION, 143 .d_flags = D_TRACKCLOSE, 144 .d_open = bpfopen, 145 .d_close = bpfclose, 146 .d_read = bpfread, 147 .d_write = bpfwrite, 148 .d_ioctl = bpfioctl, 149 .d_poll = bpfpoll, 150 .d_name = "bpf", 151 .d_kqfilter = bpfkqfilter, 152}; 153 154static struct filterops bpfread_filtops = 155 { 1, NULL, filt_bpfdetach, filt_bpfread }; 156 157/* 158 * Wrapper functions for various buffering methods. If the set of buffer 159 * modes expands, we will probably want to introduce a switch data structure 160 * similar to protosw, et. 161 */ 162static void 163bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 164 u_int len) 165{ 166 167 BPFD_LOCK_ASSERT(d); 168 169 switch (d->bd_bufmode) { 170 case BPF_BUFMODE_BUFFER: 171 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 172 173 case BPF_BUFMODE_ZBUF: 174 d->bd_zcopy++; 175 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 176 177 default: 178 panic("bpf_buf_append_bytes"); 179 } 180} 181 182static void 183bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 184 u_int len) 185{ 186 187 BPFD_LOCK_ASSERT(d); 188 189 switch (d->bd_bufmode) { 190 case BPF_BUFMODE_BUFFER: 191 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 192 193 case BPF_BUFMODE_ZBUF: 194 d->bd_zcopy++; 195 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 196 197 default: 198 panic("bpf_buf_append_mbuf"); 199 } 200} 201 202/* 203 * This function gets called when the free buffer is re-assigned. 204 */ 205static void 206bpf_buf_reclaimed(struct bpf_d *d) 207{ 208 209 BPFD_LOCK_ASSERT(d); 210 211 switch (d->bd_bufmode) { 212 case BPF_BUFMODE_BUFFER: 213 return; 214 215 case BPF_BUFMODE_ZBUF: 216 bpf_zerocopy_buf_reclaimed(d); 217 return; 218 219 default: 220 panic("bpf_buf_reclaimed"); 221 } 222} 223 224/* 225 * If the buffer mechanism has a way to decide that a held buffer can be made 226 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 227 * returned if the buffer can be discarded, (0) is returned if it cannot. 228 */ 229static int 230bpf_canfreebuf(struct bpf_d *d) 231{ 232 233 BPFD_LOCK_ASSERT(d); 234 235 switch (d->bd_bufmode) { 236 case BPF_BUFMODE_ZBUF: 237 return (bpf_zerocopy_canfreebuf(d)); 238 } 239 return (0); 240} 241 242/* 243 * Allow the buffer model to indicate that the current store buffer is 244 * immutable, regardless of the appearance of space. Return (1) if the 245 * buffer is writable, and (0) if not. 246 */ 247static int 248bpf_canwritebuf(struct bpf_d *d) 249{ 250 251 BPFD_LOCK_ASSERT(d); 252 253 switch (d->bd_bufmode) { 254 case BPF_BUFMODE_ZBUF: 255 return (bpf_zerocopy_canwritebuf(d)); 256 } 257 return (1); 258} 259 260/* 261 * Notify buffer model that an attempt to write to the store buffer has 262 * resulted in a dropped packet, in which case the buffer may be considered 263 * full. 264 */ 265static void 266bpf_buffull(struct bpf_d *d) 267{ 268 269 BPFD_LOCK_ASSERT(d); 270 271 switch (d->bd_bufmode) { 272 case BPF_BUFMODE_ZBUF: 273 bpf_zerocopy_buffull(d); 274 break; 275 } 276} 277 278/* 279 * Notify the buffer model that a buffer has moved into the hold position. 280 */ 281void 282bpf_bufheld(struct bpf_d *d) 283{ 284 285 BPFD_LOCK_ASSERT(d); 286 287 switch (d->bd_bufmode) { 288 case BPF_BUFMODE_ZBUF: 289 bpf_zerocopy_bufheld(d); 290 break; 291 } 292} 293 294static void 295bpf_free(struct bpf_d *d) 296{ 297 298 switch (d->bd_bufmode) { 299 case BPF_BUFMODE_BUFFER: 300 return (bpf_buffer_free(d)); 301 302 case BPF_BUFMODE_ZBUF: 303 return (bpf_zerocopy_free(d)); 304 305 default: 306 panic("bpf_buf_free"); 307 } 308} 309 310static int 311bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 312{ 313 314 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 315 return (EOPNOTSUPP); 316 return (bpf_buffer_uiomove(d, buf, len, uio)); 317} 318 319static int 320bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 321{ 322 323 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 324 return (EOPNOTSUPP); 325 return (bpf_buffer_ioctl_sblen(d, i)); 326} 327 328static int 329bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 330{ 331 332 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 333 return (EOPNOTSUPP); 334 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 335} 336 337static int 338bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 339{ 340 341 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 342 return (EOPNOTSUPP); 343 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 344} 345 346static int 347bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 348{ 349 350 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 351 return (EOPNOTSUPP); 352 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 353} 354 355/* 356 * General BPF functions. 357 */ 358static int 359bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 360 struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter) 361{ 362 const struct ieee80211_bpf_params *p; 363 struct ether_header *eh; 364 struct mbuf *m; 365 int error; 366 int len; 367 int hlen; 368 int slen; 369 370 /* 371 * Build a sockaddr based on the data link layer type. 372 * We do this at this level because the ethernet header 373 * is copied directly into the data field of the sockaddr. 374 * In the case of SLIP, there is no header and the packet 375 * is forwarded as is. 376 * Also, we are careful to leave room at the front of the mbuf 377 * for the link level header. 378 */ 379 switch (linktype) { 380 381 case DLT_SLIP: 382 sockp->sa_family = AF_INET; 383 hlen = 0; 384 break; 385 386 case DLT_EN10MB: 387 sockp->sa_family = AF_UNSPEC; 388 /* XXX Would MAXLINKHDR be better? */ 389 hlen = ETHER_HDR_LEN; 390 break; 391 392 case DLT_FDDI: 393 sockp->sa_family = AF_IMPLINK; 394 hlen = 0; 395 break; 396 397 case DLT_RAW: 398 sockp->sa_family = AF_UNSPEC; 399 hlen = 0; 400 break; 401 402 case DLT_NULL: 403 /* 404 * null interface types require a 4 byte pseudo header which 405 * corresponds to the address family of the packet. 406 */ 407 sockp->sa_family = AF_UNSPEC; 408 hlen = 4; 409 break; 410 411 case DLT_ATM_RFC1483: 412 /* 413 * en atm driver requires 4-byte atm pseudo header. 414 * though it isn't standard, vpi:vci needs to be 415 * specified anyway. 416 */ 417 sockp->sa_family = AF_UNSPEC; 418 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 419 break; 420 421 case DLT_PPP: 422 sockp->sa_family = AF_UNSPEC; 423 hlen = 4; /* This should match PPP_HDRLEN */ 424 break; 425 426 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 427 sockp->sa_family = AF_IEEE80211; 428 hlen = 0; 429 break; 430 431 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 432 sockp->sa_family = AF_IEEE80211; 433 sockp->sa_len = 12; /* XXX != 0 */ 434 hlen = sizeof(struct ieee80211_bpf_params); 435 break; 436 437 default: 438 return (EIO); 439 } 440 441 len = uio->uio_resid; 442 443 if (len - hlen > ifp->if_mtu) 444 return (EMSGSIZE); 445 446 if ((unsigned)len > MCLBYTES) 447 return (EIO); 448 449 if (len > MHLEN) 450 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); 451 else 452 MGETHDR(m, M_WAIT, MT_DATA); 453 m->m_pkthdr.len = m->m_len = len; 454 m->m_pkthdr.rcvif = NULL; 455 *mp = m; 456 457 if (m->m_len < hlen) { 458 error = EPERM; 459 goto bad; 460 } 461 462 error = uiomove(mtod(m, u_char *), len, uio); 463 if (error) 464 goto bad; 465 466 slen = bpf_filter(wfilter, mtod(m, u_char *), len, len); 467 if (slen == 0) { 468 error = EPERM; 469 goto bad; 470 } 471 472 /* Check for multicast destination */ 473 switch (linktype) { 474 case DLT_EN10MB: 475 eh = mtod(m, struct ether_header *); 476 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 477 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 478 ETHER_ADDR_LEN) == 0) 479 m->m_flags |= M_BCAST; 480 else 481 m->m_flags |= M_MCAST; 482 } 483 break; 484 } 485 486 /* 487 * Make room for link header, and copy it to sockaddr 488 */ 489 if (hlen != 0) { 490 if (sockp->sa_family == AF_IEEE80211) { 491 /* 492 * Collect true length from the parameter header 493 * NB: sockp is known to be zero'd so if we do a 494 * short copy unspecified parameters will be 495 * zero. 496 * NB: packet may not be aligned after stripping 497 * bpf params 498 * XXX check ibp_vers 499 */ 500 p = mtod(m, const struct ieee80211_bpf_params *); 501 hlen = p->ibp_len; 502 if (hlen > sizeof(sockp->sa_data)) { 503 error = EINVAL; 504 goto bad; 505 } 506 } 507 bcopy(m->m_data, sockp->sa_data, hlen); 508 } 509 *hdrlen = hlen; 510 511 return (0); 512bad: 513 m_freem(m); 514 return (error); 515} 516 517/* 518 * Attach file to the bpf interface, i.e. make d listen on bp. 519 */ 520static void 521bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 522{ 523 /* 524 * Point d at bp, and add d to the interface's list of listeners. 525 * Finally, point the driver's bpf cookie at the interface so 526 * it will divert packets to bpf. 527 */ 528 BPFIF_LOCK(bp); 529 d->bd_bif = bp; 530 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 531 532 bpf_bpfd_cnt++; 533 BPFIF_UNLOCK(bp); 534} 535 536/* 537 * Detach a file from its interface. 538 */ 539static void 540bpf_detachd(struct bpf_d *d) 541{ 542 int error; 543 struct bpf_if *bp; 544 struct ifnet *ifp; 545 546 bp = d->bd_bif; 547 BPFIF_LOCK(bp); 548 BPFD_LOCK(d); 549 ifp = d->bd_bif->bif_ifp; 550 551 /* 552 * Remove d from the interface's descriptor list. 553 */ 554 LIST_REMOVE(d, bd_next); 555 556 bpf_bpfd_cnt--; 557 d->bd_bif = NULL; 558 BPFD_UNLOCK(d); 559 BPFIF_UNLOCK(bp); 560 561 /* 562 * Check if this descriptor had requested promiscuous mode. 563 * If so, turn it off. 564 */ 565 if (d->bd_promisc) { 566 d->bd_promisc = 0; 567 error = ifpromisc(ifp, 0); 568 if (error != 0 && error != ENXIO) { 569 /* 570 * ENXIO can happen if a pccard is unplugged 571 * Something is really wrong if we were able to put 572 * the driver into promiscuous mode, but can't 573 * take it out. 574 */ 575 if_printf(bp->bif_ifp, 576 "bpf_detach: ifpromisc failed (%d)\n", error); 577 } 578 } 579} 580 581/* 582 * Open ethernet device. Returns ENXIO for illegal minor device number, 583 * EBUSY if file is open by another process. 584 */ 585/* ARGSUSED */ 586static int 587bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 588{ 589 struct bpf_d *d; 590 591 mtx_lock(&bpf_mtx); 592 d = dev->si_drv1; 593 /* 594 * Each minor can be opened by only one process. If the requested 595 * minor is in use, return EBUSY. 596 */ 597 if (d != NULL) { 598 mtx_unlock(&bpf_mtx); 599 return (EBUSY); 600 } 601 dev->si_drv1 = (struct bpf_d *)~0; /* mark device in use */ 602 mtx_unlock(&bpf_mtx); 603 604 if ((dev->si_flags & SI_NAMED) == 0) 605 make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600, 606 "bpf%d", dev2unit(dev)); 607 MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 608 dev->si_drv1 = d; 609 610 /* 611 * For historical reasons, perform a one-time initialization call to 612 * the buffer routines, even though we're not yet committed to a 613 * particular buffer method. 614 */ 615 bpf_buffer_init(d); 616 d->bd_bufmode = BPF_BUFMODE_BUFFER; 617 d->bd_sig = SIGIO; 618 d->bd_direction = BPF_D_INOUT; 619 d->bd_pid = td->td_proc->p_pid; 620#ifdef MAC 621 mac_bpfdesc_init(d); 622 mac_bpfdesc_create(td->td_ucred, d); 623#endif 624 mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF); 625 callout_init(&d->bd_callout, CALLOUT_MPSAFE); 626 knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL); 627 628 return (0); 629} 630 631/* 632 * Close the descriptor by detaching it from its interface, 633 * deallocating its buffers, and marking it free. 634 */ 635/* ARGSUSED */ 636static int 637bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td) 638{ 639 struct bpf_d *d = dev->si_drv1; 640 641 BPFD_LOCK(d); 642 if (d->bd_state == BPF_WAITING) 643 callout_stop(&d->bd_callout); 644 d->bd_state = BPF_IDLE; 645 BPFD_UNLOCK(d); 646 funsetown(&d->bd_sigio); 647 mtx_lock(&bpf_mtx); 648 if (d->bd_bif) 649 bpf_detachd(d); 650 mtx_unlock(&bpf_mtx); 651 selwakeuppri(&d->bd_sel, PRINET); 652#ifdef MAC 653 mac_bpfdesc_destroy(d); 654#endif /* MAC */ 655 knlist_destroy(&d->bd_sel.si_note); 656 bpf_freed(d); 657 dev->si_drv1 = NULL; 658 free(d, M_BPF); 659 660 return (0); 661} 662 663/* 664 * bpfread - read next chunk of packets from buffers 665 */ 666static int 667bpfread(struct cdev *dev, struct uio *uio, int ioflag) 668{ 669 struct bpf_d *d = dev->si_drv1; 670 int timed_out; 671 int error; 672 673 /* 674 * Restrict application to use a buffer the same size as 675 * as kernel buffers. 676 */ 677 if (uio->uio_resid != d->bd_bufsize) 678 return (EINVAL); 679 680 BPFD_LOCK(d); 681 d->bd_pid = curthread->td_proc->p_pid; 682 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 683 BPFD_UNLOCK(d); 684 return (EOPNOTSUPP); 685 } 686 if (d->bd_state == BPF_WAITING) 687 callout_stop(&d->bd_callout); 688 timed_out = (d->bd_state == BPF_TIMED_OUT); 689 d->bd_state = BPF_IDLE; 690 /* 691 * If the hold buffer is empty, then do a timed sleep, which 692 * ends when the timeout expires or when enough packets 693 * have arrived to fill the store buffer. 694 */ 695 while (d->bd_hbuf == NULL) { 696 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 697 /* 698 * A packet(s) either arrived since the previous 699 * read or arrived while we were asleep. 700 * Rotate the buffers and return what's here. 701 */ 702 ROTATE_BUFFERS(d); 703 break; 704 } 705 706 /* 707 * No data is available, check to see if the bpf device 708 * is still pointed at a real interface. If not, return 709 * ENXIO so that the userland process knows to rebind 710 * it before using it again. 711 */ 712 if (d->bd_bif == NULL) { 713 BPFD_UNLOCK(d); 714 return (ENXIO); 715 } 716 717 if (ioflag & O_NONBLOCK) { 718 BPFD_UNLOCK(d); 719 return (EWOULDBLOCK); 720 } 721 error = msleep(d, &d->bd_mtx, PRINET|PCATCH, 722 "bpf", d->bd_rtout); 723 if (error == EINTR || error == ERESTART) { 724 BPFD_UNLOCK(d); 725 return (error); 726 } 727 if (error == EWOULDBLOCK) { 728 /* 729 * On a timeout, return what's in the buffer, 730 * which may be nothing. If there is something 731 * in the store buffer, we can rotate the buffers. 732 */ 733 if (d->bd_hbuf) 734 /* 735 * We filled up the buffer in between 736 * getting the timeout and arriving 737 * here, so we don't need to rotate. 738 */ 739 break; 740 741 if (d->bd_slen == 0) { 742 BPFD_UNLOCK(d); 743 return (0); 744 } 745 ROTATE_BUFFERS(d); 746 break; 747 } 748 } 749 /* 750 * At this point, we know we have something in the hold slot. 751 */ 752 BPFD_UNLOCK(d); 753 754 /* 755 * Move data from hold buffer into user space. 756 * We know the entire buffer is transferred since 757 * we checked above that the read buffer is bpf_bufsize bytes. 758 * 759 * XXXRW: More synchronization needed here: what if a second thread 760 * issues a read on the same fd at the same time? Don't want this 761 * getting invalidated. 762 */ 763 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 764 765 BPFD_LOCK(d); 766 d->bd_fbuf = d->bd_hbuf; 767 d->bd_hbuf = NULL; 768 d->bd_hlen = 0; 769 bpf_buf_reclaimed(d); 770 BPFD_UNLOCK(d); 771 772 return (error); 773} 774 775/* 776 * If there are processes sleeping on this descriptor, wake them up. 777 */ 778static __inline void 779bpf_wakeup(struct bpf_d *d) 780{ 781 782 BPFD_LOCK_ASSERT(d); 783 if (d->bd_state == BPF_WAITING) { 784 callout_stop(&d->bd_callout); 785 d->bd_state = BPF_IDLE; 786 } 787 wakeup(d); 788 if (d->bd_async && d->bd_sig && d->bd_sigio) 789 pgsigio(&d->bd_sigio, d->bd_sig, 0); 790 791 selwakeuppri(&d->bd_sel, PRINET); 792 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 793} 794 795static void 796bpf_timed_out(void *arg) 797{ 798 struct bpf_d *d = (struct bpf_d *)arg; 799 800 BPFD_LOCK(d); 801 if (d->bd_state == BPF_WAITING) { 802 d->bd_state = BPF_TIMED_OUT; 803 if (d->bd_slen != 0) 804 bpf_wakeup(d); 805 } 806 BPFD_UNLOCK(d); 807} 808 809static int 810bpf_ready(struct bpf_d *d) 811{ 812 813 BPFD_LOCK_ASSERT(d); 814 815 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 816 return (1); 817 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 818 d->bd_slen != 0) 819 return (1); 820 return (0); 821} 822 823static int 824bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 825{ 826 struct bpf_d *d = dev->si_drv1; 827 struct ifnet *ifp; 828 struct mbuf *m, *mc; 829 struct sockaddr dst; 830 int error, hlen; 831 832 d->bd_pid = curthread->td_proc->p_pid; 833 d->bd_wcount++; 834 if (d->bd_bif == NULL) { 835 d->bd_wdcount++; 836 return (ENXIO); 837 } 838 839 ifp = d->bd_bif->bif_ifp; 840 841 if ((ifp->if_flags & IFF_UP) == 0) { 842 d->bd_wdcount++; 843 return (ENETDOWN); 844 } 845 846 if (uio->uio_resid == 0) { 847 d->bd_wdcount++; 848 return (0); 849 } 850 851 bzero(&dst, sizeof(dst)); 852 m = NULL; 853 hlen = 0; 854 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, 855 &m, &dst, &hlen, d->bd_wfilter); 856 if (error) { 857 d->bd_wdcount++; 858 return (error); 859 } 860 d->bd_wfcount++; 861 if (d->bd_hdrcmplt) 862 dst.sa_family = pseudo_AF_HDRCMPLT; 863 864 if (d->bd_feedback) { 865 mc = m_dup(m, M_DONTWAIT); 866 if (mc != NULL) 867 mc->m_pkthdr.rcvif = ifp; 868 /* Set M_PROMISC for outgoing packets to be discarded. */ 869 if (d->bd_direction == BPF_D_INOUT) 870 m->m_flags |= M_PROMISC; 871 } else 872 mc = NULL; 873 874 m->m_pkthdr.len -= hlen; 875 m->m_len -= hlen; 876 m->m_data += hlen; /* XXX */ 877 878#ifdef MAC 879 BPFD_LOCK(d); 880 mac_bpfdesc_create_mbuf(d, m); 881 if (mc != NULL) 882 mac_bpfdesc_create_mbuf(d, mc); 883 BPFD_UNLOCK(d); 884#endif 885 886 error = (*ifp->if_output)(ifp, m, &dst, NULL); 887 if (error) 888 d->bd_wdcount++; 889 890 if (mc != NULL) { 891 if (error == 0) 892 (*ifp->if_input)(ifp, mc); 893 else 894 m_freem(mc); 895 } 896 897 return (error); 898} 899 900/* 901 * Reset a descriptor by flushing its packet buffer and clearing the 902 * receive and drop counts. 903 */ 904static void 905reset_d(struct bpf_d *d) 906{ 907 908 mtx_assert(&d->bd_mtx, MA_OWNED); 909 if (d->bd_hbuf) { 910 /* Free the hold buffer. */ 911 d->bd_fbuf = d->bd_hbuf; 912 d->bd_hbuf = NULL; 913 bpf_buf_reclaimed(d); 914 } 915 d->bd_slen = 0; 916 d->bd_hlen = 0; 917 d->bd_rcount = 0; 918 d->bd_dcount = 0; 919 d->bd_fcount = 0; 920 d->bd_wcount = 0; 921 d->bd_wfcount = 0; 922 d->bd_wdcount = 0; 923 d->bd_zcopy = 0; 924} 925 926/* 927 * FIONREAD Check for read packet available. 928 * SIOCGIFADDR Get interface address - convenient hook to driver. 929 * BIOCGBLEN Get buffer len [for read()]. 930 * BIOCSETF Set ethernet read filter. 931 * BIOCSETWF Set ethernet write filter. 932 * BIOCFLUSH Flush read packet buffer. 933 * BIOCPROMISC Put interface into promiscuous mode. 934 * BIOCGDLT Get link layer type. 935 * BIOCGETIF Get interface name. 936 * BIOCSETIF Set interface. 937 * BIOCSRTIMEOUT Set read timeout. 938 * BIOCGRTIMEOUT Get read timeout. 939 * BIOCGSTATS Get packet stats. 940 * BIOCIMMEDIATE Set immediate mode. 941 * BIOCVERSION Get filter language version. 942 * BIOCGHDRCMPLT Get "header already complete" flag 943 * BIOCSHDRCMPLT Set "header already complete" flag 944 * BIOCGDIRECTION Get packet direction flag 945 * BIOCSDIRECTION Set packet direction flag 946 * BIOCLOCK Set "locked" flag 947 * BIOCFEEDBACK Set packet feedback mode. 948 * BIOCSETZBUF Set current zero-copy buffer locations. 949 * BIOCGETZMAX Get maximum zero-copy buffer size. 950 * BIOCROTZBUF Force rotation of zero-copy buffer 951 * BIOCSETBUFMODE Set buffer mode. 952 * BIOCGETBUFMODE Get current buffer mode. 953 */ 954/* ARGSUSED */ 955static int 956bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 957 struct thread *td) 958{ 959 struct bpf_d *d = dev->si_drv1; 960 int error = 0; 961 962 /* 963 * Refresh PID associated with this descriptor. 964 */ 965 BPFD_LOCK(d); 966 d->bd_pid = td->td_proc->p_pid; 967 if (d->bd_state == BPF_WAITING) 968 callout_stop(&d->bd_callout); 969 d->bd_state = BPF_IDLE; 970 BPFD_UNLOCK(d); 971 972 if (d->bd_locked == 1) { 973 switch (cmd) { 974 case BIOCGBLEN: 975 case BIOCFLUSH: 976 case BIOCGDLT: 977 case BIOCGDLTLIST: 978 case BIOCGETIF: 979 case BIOCGRTIMEOUT: 980 case BIOCGSTATS: 981 case BIOCVERSION: 982 case BIOCGRSIG: 983 case BIOCGHDRCMPLT: 984 case BIOCFEEDBACK: 985 case FIONREAD: 986 case BIOCLOCK: 987 case BIOCSRTIMEOUT: 988 case BIOCIMMEDIATE: 989 case TIOCGPGRP: 990 case BIOCROTZBUF: 991 break; 992 default: 993 return (EPERM); 994 } 995 } 996 switch (cmd) { 997 998 default: 999 error = EINVAL; 1000 break; 1001 1002 /* 1003 * Check for read packet available. 1004 */ 1005 case FIONREAD: 1006 { 1007 int n; 1008 1009 BPFD_LOCK(d); 1010 n = d->bd_slen; 1011 if (d->bd_hbuf) 1012 n += d->bd_hlen; 1013 BPFD_UNLOCK(d); 1014 1015 *(int *)addr = n; 1016 break; 1017 } 1018 1019 case SIOCGIFADDR: 1020 { 1021 struct ifnet *ifp; 1022 1023 if (d->bd_bif == NULL) 1024 error = EINVAL; 1025 else { 1026 ifp = d->bd_bif->bif_ifp; 1027 error = (*ifp->if_ioctl)(ifp, cmd, addr); 1028 } 1029 break; 1030 } 1031 1032 /* 1033 * Get buffer len [for read()]. 1034 */ 1035 case BIOCGBLEN: 1036 *(u_int *)addr = d->bd_bufsize; 1037 break; 1038 1039 /* 1040 * Set buffer length. 1041 */ 1042 case BIOCSBLEN: 1043 error = bpf_ioctl_sblen(d, (u_int *)addr); 1044 break; 1045 1046 /* 1047 * Set link layer read filter. 1048 */ 1049 case BIOCSETF: 1050 case BIOCSETWF: 1051 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 1052 break; 1053 1054 /* 1055 * Flush read packet buffer. 1056 */ 1057 case BIOCFLUSH: 1058 BPFD_LOCK(d); 1059 reset_d(d); 1060 BPFD_UNLOCK(d); 1061 break; 1062 1063 /* 1064 * Put interface into promiscuous mode. 1065 */ 1066 case BIOCPROMISC: 1067 if (d->bd_bif == NULL) { 1068 /* 1069 * No interface attached yet. 1070 */ 1071 error = EINVAL; 1072 break; 1073 } 1074 if (d->bd_promisc == 0) { 1075 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1076 if (error == 0) 1077 d->bd_promisc = 1; 1078 } 1079 break; 1080 1081 /* 1082 * Get current data link type. 1083 */ 1084 case BIOCGDLT: 1085 if (d->bd_bif == NULL) 1086 error = EINVAL; 1087 else 1088 *(u_int *)addr = d->bd_bif->bif_dlt; 1089 break; 1090 1091 /* 1092 * Get a list of supported data link types. 1093 */ 1094 case BIOCGDLTLIST: 1095 if (d->bd_bif == NULL) 1096 error = EINVAL; 1097 else 1098 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1099 break; 1100 1101 /* 1102 * Set data link type. 1103 */ 1104 case BIOCSDLT: 1105 if (d->bd_bif == NULL) 1106 error = EINVAL; 1107 else 1108 error = bpf_setdlt(d, *(u_int *)addr); 1109 break; 1110 1111 /* 1112 * Get interface name. 1113 */ 1114 case BIOCGETIF: 1115 if (d->bd_bif == NULL) 1116 error = EINVAL; 1117 else { 1118 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1119 struct ifreq *const ifr = (struct ifreq *)addr; 1120 1121 strlcpy(ifr->ifr_name, ifp->if_xname, 1122 sizeof(ifr->ifr_name)); 1123 } 1124 break; 1125 1126 /* 1127 * Set interface. 1128 */ 1129 case BIOCSETIF: 1130 error = bpf_setif(d, (struct ifreq *)addr); 1131 break; 1132 1133 /* 1134 * Set read timeout. 1135 */ 1136 case BIOCSRTIMEOUT: 1137 { 1138 struct timeval *tv = (struct timeval *)addr; 1139 1140 /* 1141 * Subtract 1 tick from tvtohz() since this isn't 1142 * a one-shot timer. 1143 */ 1144 if ((error = itimerfix(tv)) == 0) 1145 d->bd_rtout = tvtohz(tv) - 1; 1146 break; 1147 } 1148 1149 /* 1150 * Get read timeout. 1151 */ 1152 case BIOCGRTIMEOUT: 1153 { 1154 struct timeval *tv = (struct timeval *)addr; 1155 1156 tv->tv_sec = d->bd_rtout / hz; 1157 tv->tv_usec = (d->bd_rtout % hz) * tick; 1158 break; 1159 } 1160 1161 /* 1162 * Get packet stats. 1163 */ 1164 case BIOCGSTATS: 1165 { 1166 struct bpf_stat *bs = (struct bpf_stat *)addr; 1167 1168 /* XXXCSJP overflow */ 1169 bs->bs_recv = d->bd_rcount; 1170 bs->bs_drop = d->bd_dcount; 1171 break; 1172 } 1173 1174 /* 1175 * Set immediate mode. 1176 */ 1177 case BIOCIMMEDIATE: 1178 d->bd_immediate = *(u_int *)addr; 1179 break; 1180 1181 case BIOCVERSION: 1182 { 1183 struct bpf_version *bv = (struct bpf_version *)addr; 1184 1185 bv->bv_major = BPF_MAJOR_VERSION; 1186 bv->bv_minor = BPF_MINOR_VERSION; 1187 break; 1188 } 1189 1190 /* 1191 * Get "header already complete" flag 1192 */ 1193 case BIOCGHDRCMPLT: 1194 *(u_int *)addr = d->bd_hdrcmplt; 1195 break; 1196 1197 /* 1198 * Set "header already complete" flag 1199 */ 1200 case BIOCSHDRCMPLT: 1201 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1202 break; 1203 1204 /* 1205 * Get packet direction flag 1206 */ 1207 case BIOCGDIRECTION: 1208 *(u_int *)addr = d->bd_direction; 1209 break; 1210 1211 /* 1212 * Set packet direction flag 1213 */ 1214 case BIOCSDIRECTION: 1215 { 1216 u_int direction; 1217 1218 direction = *(u_int *)addr; 1219 switch (direction) { 1220 case BPF_D_IN: 1221 case BPF_D_INOUT: 1222 case BPF_D_OUT: 1223 d->bd_direction = direction; 1224 break; 1225 default: 1226 error = EINVAL; 1227 } 1228 } 1229 break; 1230 1231 case BIOCFEEDBACK: 1232 d->bd_feedback = *(u_int *)addr; 1233 break; 1234 1235 case BIOCLOCK: 1236 d->bd_locked = 1; 1237 break; 1238 1239 case FIONBIO: /* Non-blocking I/O */ 1240 break; 1241 1242 case FIOASYNC: /* Send signal on receive packets */ 1243 d->bd_async = *(int *)addr; 1244 break; 1245 1246 case FIOSETOWN: 1247 error = fsetown(*(int *)addr, &d->bd_sigio); 1248 break; 1249 1250 case FIOGETOWN: 1251 *(int *)addr = fgetown(&d->bd_sigio); 1252 break; 1253 1254 /* This is deprecated, FIOSETOWN should be used instead. */ 1255 case TIOCSPGRP: 1256 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1257 break; 1258 1259 /* This is deprecated, FIOGETOWN should be used instead. */ 1260 case TIOCGPGRP: 1261 *(int *)addr = -fgetown(&d->bd_sigio); 1262 break; 1263 1264 case BIOCSRSIG: /* Set receive signal */ 1265 { 1266 u_int sig; 1267 1268 sig = *(u_int *)addr; 1269 1270 if (sig >= NSIG) 1271 error = EINVAL; 1272 else 1273 d->bd_sig = sig; 1274 break; 1275 } 1276 case BIOCGRSIG: 1277 *(u_int *)addr = d->bd_sig; 1278 break; 1279 1280 case BIOCGETBUFMODE: 1281 *(u_int *)addr = d->bd_bufmode; 1282 break; 1283 1284 case BIOCSETBUFMODE: 1285 /* 1286 * Allow the buffering mode to be changed as long as we 1287 * haven't yet committed to a particular mode. Our 1288 * definition of commitment, for now, is whether or not a 1289 * buffer has been allocated or an interface attached, since 1290 * that's the point where things get tricky. 1291 */ 1292 switch (*(u_int *)addr) { 1293 case BPF_BUFMODE_BUFFER: 1294 break; 1295 1296 case BPF_BUFMODE_ZBUF: 1297 if (bpf_zerocopy_enable) 1298 break; 1299 /* FALLSTHROUGH */ 1300 1301 default: 1302 return (EINVAL); 1303 } 1304 1305 BPFD_LOCK(d); 1306 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1307 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1308 BPFD_UNLOCK(d); 1309 return (EBUSY); 1310 } 1311 d->bd_bufmode = *(u_int *)addr; 1312 BPFD_UNLOCK(d); 1313 break; 1314 1315 case BIOCGETZMAX: 1316 return (bpf_ioctl_getzmax(td, d, (size_t *)addr)); 1317 1318 case BIOCSETZBUF: 1319 return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr)); 1320 1321 case BIOCROTZBUF: 1322 return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr)); 1323 } 1324 return (error); 1325} 1326 1327/* 1328 * Set d's packet filter program to fp. If this file already has a filter, 1329 * free it and replace it. Returns EINVAL for bogus requests. 1330 */ 1331static int 1332bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1333{ 1334 struct bpf_insn *fcode, *old; 1335 u_int wfilter, flen, size; 1336#ifdef BPF_JITTER 1337 bpf_jit_filter *ofunc; 1338#endif 1339 1340 if (cmd == BIOCSETWF) { 1341 old = d->bd_wfilter; 1342 wfilter = 1; 1343#ifdef BPF_JITTER 1344 ofunc = NULL; 1345#endif 1346 } else { 1347 wfilter = 0; 1348 old = d->bd_rfilter; 1349#ifdef BPF_JITTER 1350 ofunc = d->bd_bfilter; 1351#endif 1352 } 1353 if (fp->bf_insns == NULL) { 1354 if (fp->bf_len != 0) 1355 return (EINVAL); 1356 BPFD_LOCK(d); 1357 if (wfilter) 1358 d->bd_wfilter = NULL; 1359 else { 1360 d->bd_rfilter = NULL; 1361#ifdef BPF_JITTER 1362 d->bd_bfilter = NULL; 1363#endif 1364 } 1365 reset_d(d); 1366 BPFD_UNLOCK(d); 1367 if (old != NULL) 1368 free((caddr_t)old, M_BPF); 1369#ifdef BPF_JITTER 1370 if (ofunc != NULL) 1371 bpf_destroy_jit_filter(ofunc); 1372#endif 1373 return (0); 1374 } 1375 flen = fp->bf_len; 1376 if (flen > bpf_maxinsns) 1377 return (EINVAL); 1378 1379 size = flen * sizeof(*fp->bf_insns); 1380 fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK); 1381 if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && 1382 bpf_validate(fcode, (int)flen)) { 1383 BPFD_LOCK(d); 1384 if (wfilter) 1385 d->bd_wfilter = fcode; 1386 else { 1387 d->bd_rfilter = fcode; 1388#ifdef BPF_JITTER 1389 d->bd_bfilter = bpf_jitter(fcode, flen); 1390#endif 1391 } 1392 reset_d(d); 1393 BPFD_UNLOCK(d); 1394 if (old != NULL) 1395 free((caddr_t)old, M_BPF); 1396#ifdef BPF_JITTER 1397 if (ofunc != NULL) 1398 bpf_destroy_jit_filter(ofunc); 1399#endif 1400 1401 return (0); 1402 } 1403 free((caddr_t)fcode, M_BPF); 1404 return (EINVAL); 1405} 1406 1407/* 1408 * Detach a file from its current interface (if attached at all) and attach 1409 * to the interface indicated by the name stored in ifr. 1410 * Return an errno or 0. 1411 */ 1412static int 1413bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1414{ 1415 struct bpf_if *bp; 1416 struct ifnet *theywant; 1417 1418 theywant = ifunit(ifr->ifr_name); 1419 if (theywant == NULL || theywant->if_bpf == NULL) 1420 return (ENXIO); 1421 1422 bp = theywant->if_bpf; 1423 1424 /* 1425 * Behavior here depends on the buffering model. If we're using 1426 * kernel memory buffers, then we can allocate them here. If we're 1427 * using zero-copy, then the user process must have registered 1428 * buffers by the time we get here. If not, return an error. 1429 * 1430 * XXXRW: There are locking issues here with multi-threaded use: what 1431 * if two threads try to set the interface at once? 1432 */ 1433 switch (d->bd_bufmode) { 1434 case BPF_BUFMODE_BUFFER: 1435 if (d->bd_sbuf == NULL) 1436 bpf_buffer_alloc(d); 1437 KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL")); 1438 break; 1439 1440 case BPF_BUFMODE_ZBUF: 1441 if (d->bd_sbuf == NULL) 1442 return (EINVAL); 1443 break; 1444 1445 default: 1446 panic("bpf_setif: bufmode %d", d->bd_bufmode); 1447 } 1448 if (bp != d->bd_bif) { 1449 if (d->bd_bif) 1450 /* 1451 * Detach if attached to something else. 1452 */ 1453 bpf_detachd(d); 1454 1455 bpf_attachd(d, bp); 1456 } 1457 BPFD_LOCK(d); 1458 reset_d(d); 1459 BPFD_UNLOCK(d); 1460 return (0); 1461} 1462 1463/* 1464 * Support for select() and poll() system calls 1465 * 1466 * Return true iff the specific operation will not block indefinitely. 1467 * Otherwise, return false but make a note that a selwakeup() must be done. 1468 */ 1469static int 1470bpfpoll(struct cdev *dev, int events, struct thread *td) 1471{ 1472 struct bpf_d *d; 1473 int revents; 1474 1475 d = dev->si_drv1; 1476 if (d->bd_bif == NULL) 1477 return (ENXIO); 1478 1479 /* 1480 * Refresh PID associated with this descriptor. 1481 */ 1482 revents = events & (POLLOUT | POLLWRNORM); 1483 BPFD_LOCK(d); 1484 d->bd_pid = td->td_proc->p_pid; 1485 if (events & (POLLIN | POLLRDNORM)) { 1486 if (bpf_ready(d)) 1487 revents |= events & (POLLIN | POLLRDNORM); 1488 else { 1489 selrecord(td, &d->bd_sel); 1490 /* Start the read timeout if necessary. */ 1491 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1492 callout_reset(&d->bd_callout, d->bd_rtout, 1493 bpf_timed_out, d); 1494 d->bd_state = BPF_WAITING; 1495 } 1496 } 1497 } 1498 BPFD_UNLOCK(d); 1499 return (revents); 1500} 1501 1502/* 1503 * Support for kevent() system call. Register EVFILT_READ filters and 1504 * reject all others. 1505 */ 1506int 1507bpfkqfilter(struct cdev *dev, struct knote *kn) 1508{ 1509 struct bpf_d *d = (struct bpf_d *)dev->si_drv1; 1510 1511 if (kn->kn_filter != EVFILT_READ) 1512 return (1); 1513 1514 /* 1515 * Refresh PID associated with this descriptor. 1516 */ 1517 BPFD_LOCK(d); 1518 d->bd_pid = curthread->td_proc->p_pid; 1519 kn->kn_fop = &bpfread_filtops; 1520 kn->kn_hook = d; 1521 knlist_add(&d->bd_sel.si_note, kn, 1); 1522 BPFD_UNLOCK(d); 1523 1524 return (0); 1525} 1526 1527static void 1528filt_bpfdetach(struct knote *kn) 1529{ 1530 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1531 1532 knlist_remove(&d->bd_sel.si_note, kn, 0); 1533} 1534 1535static int 1536filt_bpfread(struct knote *kn, long hint) 1537{ 1538 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1539 int ready; 1540 1541 BPFD_LOCK_ASSERT(d); 1542 ready = bpf_ready(d); 1543 if (ready) { 1544 kn->kn_data = d->bd_slen; 1545 if (d->bd_hbuf) 1546 kn->kn_data += d->bd_hlen; 1547 } 1548 else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1549 callout_reset(&d->bd_callout, d->bd_rtout, 1550 bpf_timed_out, d); 1551 d->bd_state = BPF_WAITING; 1552 } 1553 1554 return (ready); 1555} 1556 1557/* 1558 * Incoming linkage from device drivers. Process the packet pkt, of length 1559 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1560 * by each process' filter, and if accepted, stashed into the corresponding 1561 * buffer. 1562 */ 1563void 1564bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1565{ 1566 struct bpf_d *d; 1567 u_int slen; 1568 int gottime; 1569 struct timeval tv; 1570 1571 gottime = 0; 1572 BPFIF_LOCK(bp); 1573 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1574 BPFD_LOCK(d); 1575 ++d->bd_rcount; 1576#ifdef BPF_JITTER 1577 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL) 1578 slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen); 1579 else 1580#endif 1581 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 1582 if (slen != 0) { 1583 d->bd_fcount++; 1584 if (!gottime) { 1585 microtime(&tv); 1586 gottime = 1; 1587 } 1588#ifdef MAC 1589 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1590#endif 1591 catchpacket(d, pkt, pktlen, slen, 1592 bpf_append_bytes, &tv); 1593 } 1594 BPFD_UNLOCK(d); 1595 } 1596 BPFIF_UNLOCK(bp); 1597} 1598 1599#define BPF_CHECK_DIRECTION(d, r, i) \ 1600 (((d)->bd_direction == BPF_D_IN && (r) != (i)) || \ 1601 ((d)->bd_direction == BPF_D_OUT && (r) == (i))) 1602 1603/* 1604 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1605 */ 1606void 1607bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1608{ 1609 struct bpf_d *d; 1610 u_int pktlen, slen; 1611 int gottime; 1612 struct timeval tv; 1613 1614 /* Skip outgoing duplicate packets. */ 1615 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1616 m->m_flags &= ~M_PROMISC; 1617 return; 1618 } 1619 1620 gottime = 0; 1621 1622 pktlen = m_length(m, NULL); 1623 1624 BPFIF_LOCK(bp); 1625 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1626 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 1627 continue; 1628 BPFD_LOCK(d); 1629 ++d->bd_rcount; 1630#ifdef BPF_JITTER 1631 /* XXX We cannot handle multiple mbufs. */ 1632 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL && 1633 m->m_next == NULL) 1634 slen = (*(d->bd_bfilter->func))(mtod(m, u_char *), 1635 pktlen, pktlen); 1636 else 1637#endif 1638 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 1639 if (slen != 0) { 1640 d->bd_fcount++; 1641 if (!gottime) { 1642 microtime(&tv); 1643 gottime = 1; 1644 } 1645#ifdef MAC 1646 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1647#endif 1648 catchpacket(d, (u_char *)m, pktlen, slen, 1649 bpf_append_mbuf, &tv); 1650 } 1651 BPFD_UNLOCK(d); 1652 } 1653 BPFIF_UNLOCK(bp); 1654} 1655 1656/* 1657 * Incoming linkage from device drivers, when packet is in 1658 * an mbuf chain and to be prepended by a contiguous header. 1659 */ 1660void 1661bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1662{ 1663 struct mbuf mb; 1664 struct bpf_d *d; 1665 u_int pktlen, slen; 1666 int gottime; 1667 struct timeval tv; 1668 1669 /* Skip outgoing duplicate packets. */ 1670 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1671 m->m_flags &= ~M_PROMISC; 1672 return; 1673 } 1674 1675 gottime = 0; 1676 1677 pktlen = m_length(m, NULL); 1678 /* 1679 * Craft on-stack mbuf suitable for passing to bpf_filter. 1680 * Note that we cut corners here; we only setup what's 1681 * absolutely needed--this mbuf should never go anywhere else. 1682 */ 1683 mb.m_next = m; 1684 mb.m_data = data; 1685 mb.m_len = dlen; 1686 pktlen += dlen; 1687 1688 BPFIF_LOCK(bp); 1689 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1690 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 1691 continue; 1692 BPFD_LOCK(d); 1693 ++d->bd_rcount; 1694 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 1695 if (slen != 0) { 1696 d->bd_fcount++; 1697 if (!gottime) { 1698 microtime(&tv); 1699 gottime = 1; 1700 } 1701#ifdef MAC 1702 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1703#endif 1704 catchpacket(d, (u_char *)&mb, pktlen, slen, 1705 bpf_append_mbuf, &tv); 1706 } 1707 BPFD_UNLOCK(d); 1708 } 1709 BPFIF_UNLOCK(bp); 1710} 1711 1712#undef BPF_CHECK_DIRECTION 1713 1714/* 1715 * Move the packet data from interface memory (pkt) into the 1716 * store buffer. "cpfn" is the routine called to do the actual data 1717 * transfer. bcopy is passed in to copy contiguous chunks, while 1718 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 1719 * pkt is really an mbuf. 1720 */ 1721static void 1722catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1723 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 1724 struct timeval *tv) 1725{ 1726 struct bpf_hdr hdr; 1727 int totlen, curlen; 1728 int hdrlen = d->bd_bif->bif_hdrlen; 1729 int do_wakeup = 0; 1730 1731 BPFD_LOCK_ASSERT(d); 1732 1733 /* 1734 * Detect whether user space has released a buffer back to us, and if 1735 * so, move it from being a hold buffer to a free buffer. This may 1736 * not be the best place to do it (for example, we might only want to 1737 * run this check if we need the space), but for now it's a reliable 1738 * spot to do it. 1739 */ 1740 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 1741 d->bd_fbuf = d->bd_hbuf; 1742 d->bd_hbuf = NULL; 1743 d->bd_hlen = 0; 1744 bpf_buf_reclaimed(d); 1745 } 1746 1747 /* 1748 * Figure out how many bytes to move. If the packet is 1749 * greater or equal to the snapshot length, transfer that 1750 * much. Otherwise, transfer the whole packet (unless 1751 * we hit the buffer size limit). 1752 */ 1753 totlen = hdrlen + min(snaplen, pktlen); 1754 if (totlen > d->bd_bufsize) 1755 totlen = d->bd_bufsize; 1756 1757 /* 1758 * Round up the end of the previous packet to the next longword. 1759 * 1760 * Drop the packet if there's no room and no hope of room 1761 * If the packet would overflow the storage buffer or the storage 1762 * buffer is considered immutable by the buffer model, try to rotate 1763 * the buffer and wakeup pending processes. 1764 */ 1765 curlen = BPF_WORDALIGN(d->bd_slen); 1766 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { 1767 if (d->bd_fbuf == NULL) { 1768 /* 1769 * There's no room in the store buffer, and no 1770 * prospect of room, so drop the packet. Notify the 1771 * buffer model. 1772 */ 1773 bpf_buffull(d); 1774 ++d->bd_dcount; 1775 return; 1776 } 1777 ROTATE_BUFFERS(d); 1778 do_wakeup = 1; 1779 curlen = 0; 1780 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) 1781 /* 1782 * Immediate mode is set, or the read timeout has already 1783 * expired during a select call. A packet arrived, so the 1784 * reader should be woken up. 1785 */ 1786 do_wakeup = 1; 1787 1788 /* 1789 * Append the bpf header. Note we append the actual header size, but 1790 * move forward the length of the header plus padding. 1791 */ 1792 bzero(&hdr, sizeof(hdr)); 1793 hdr.bh_tstamp = *tv; 1794 hdr.bh_datalen = pktlen; 1795 hdr.bh_hdrlen = hdrlen; 1796 hdr.bh_caplen = totlen - hdrlen; 1797 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 1798 1799 /* 1800 * Copy the packet data into the store buffer and update its length. 1801 */ 1802 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen); 1803 d->bd_slen = curlen + totlen; 1804 1805 if (do_wakeup) 1806 bpf_wakeup(d); 1807} 1808 1809/* 1810 * Free buffers currently in use by a descriptor. 1811 * Called on close. 1812 */ 1813static void 1814bpf_freed(struct bpf_d *d) 1815{ 1816 1817 /* 1818 * We don't need to lock out interrupts since this descriptor has 1819 * been detached from its interface and it yet hasn't been marked 1820 * free. 1821 */ 1822 bpf_free(d); 1823 if (d->bd_rfilter) { 1824 free((caddr_t)d->bd_rfilter, M_BPF); 1825#ifdef BPF_JITTER 1826 bpf_destroy_jit_filter(d->bd_bfilter); 1827#endif 1828 } 1829 if (d->bd_wfilter) 1830 free((caddr_t)d->bd_wfilter, M_BPF); 1831 mtx_destroy(&d->bd_mtx); 1832} 1833 1834/* 1835 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 1836 * fixed size of the link header (variable length headers not yet supported). 1837 */ 1838void 1839bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1840{ 1841 1842 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 1843} 1844 1845/* 1846 * Attach an interface to bpf. ifp is a pointer to the structure 1847 * defining the interface to be attached, dlt is the link layer type, 1848 * and hdrlen is the fixed size of the link header (variable length 1849 * headers are not yet supporrted). 1850 */ 1851void 1852bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1853{ 1854 struct bpf_if *bp; 1855 1856 bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO); 1857 if (bp == NULL) 1858 panic("bpfattach"); 1859 1860 LIST_INIT(&bp->bif_dlist); 1861 bp->bif_ifp = ifp; 1862 bp->bif_dlt = dlt; 1863 mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF); 1864 KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); 1865 *driverp = bp; 1866 1867 mtx_lock(&bpf_mtx); 1868 LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 1869 mtx_unlock(&bpf_mtx); 1870 1871 /* 1872 * Compute the length of the bpf header. This is not necessarily 1873 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1874 * that the network layer header begins on a longword boundary (for 1875 * performance reasons and to alleviate alignment restrictions). 1876 */ 1877 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1878 1879 if (bootverbose) 1880 if_printf(ifp, "bpf attached\n"); 1881} 1882 1883/* 1884 * Detach bpf from an interface. This involves detaching each descriptor 1885 * associated with the interface, and leaving bd_bif NULL. Notify each 1886 * descriptor as it's detached so that any sleepers wake up and get 1887 * ENXIO. 1888 */ 1889void 1890bpfdetach(struct ifnet *ifp) 1891{ 1892 struct bpf_if *bp; 1893 struct bpf_d *d; 1894 1895 /* Locate BPF interface information */ 1896 mtx_lock(&bpf_mtx); 1897 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1898 if (ifp == bp->bif_ifp) 1899 break; 1900 } 1901 1902 /* Interface wasn't attached */ 1903 if ((bp == NULL) || (bp->bif_ifp == NULL)) { 1904 mtx_unlock(&bpf_mtx); 1905 printf("bpfdetach: %s was not attached\n", ifp->if_xname); 1906 return; 1907 } 1908 1909 LIST_REMOVE(bp, bif_next); 1910 mtx_unlock(&bpf_mtx); 1911 1912 while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { 1913 bpf_detachd(d); 1914 BPFD_LOCK(d); 1915 bpf_wakeup(d); 1916 BPFD_UNLOCK(d); 1917 } 1918 1919 mtx_destroy(&bp->bif_mtx); 1920 free(bp, M_BPF); 1921} 1922 1923/* 1924 * Get a list of available data link type of the interface. 1925 */ 1926static int 1927bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1928{ 1929 int n, error; 1930 struct ifnet *ifp; 1931 struct bpf_if *bp; 1932 1933 ifp = d->bd_bif->bif_ifp; 1934 n = 0; 1935 error = 0; 1936 mtx_lock(&bpf_mtx); 1937 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1938 if (bp->bif_ifp != ifp) 1939 continue; 1940 if (bfl->bfl_list != NULL) { 1941 if (n >= bfl->bfl_len) { 1942 mtx_unlock(&bpf_mtx); 1943 return (ENOMEM); 1944 } 1945 error = copyout(&bp->bif_dlt, 1946 bfl->bfl_list + n, sizeof(u_int)); 1947 } 1948 n++; 1949 } 1950 mtx_unlock(&bpf_mtx); 1951 bfl->bfl_len = n; 1952 return (error); 1953} 1954 1955/* 1956 * Set the data link type of a BPF instance. 1957 */ 1958static int 1959bpf_setdlt(struct bpf_d *d, u_int dlt) 1960{ 1961 int error, opromisc; 1962 struct ifnet *ifp; 1963 struct bpf_if *bp; 1964 1965 if (d->bd_bif->bif_dlt == dlt) 1966 return (0); 1967 ifp = d->bd_bif->bif_ifp; 1968 mtx_lock(&bpf_mtx); 1969 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1970 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1971 break; 1972 } 1973 mtx_unlock(&bpf_mtx); 1974 if (bp != NULL) { 1975 opromisc = d->bd_promisc; 1976 bpf_detachd(d); 1977 bpf_attachd(d, bp); 1978 BPFD_LOCK(d); 1979 reset_d(d); 1980 BPFD_UNLOCK(d); 1981 if (opromisc) { 1982 error = ifpromisc(bp->bif_ifp, 1); 1983 if (error) 1984 if_printf(bp->bif_ifp, 1985 "bpf_setdlt: ifpromisc failed (%d)\n", 1986 error); 1987 else 1988 d->bd_promisc = 1; 1989 } 1990 } 1991 return (bp == NULL ? EINVAL : 0); 1992} 1993 1994static void 1995bpf_clone(void *arg, struct ucred *cred, char *name, int namelen, 1996 struct cdev **dev) 1997{ 1998 int u; 1999 2000 if (*dev != NULL) 2001 return; 2002 if (dev_stdclone(name, NULL, "bpf", &u) != 1) 2003 return; 2004 *dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600, 2005 "bpf%d", u); 2006 dev_ref(*dev); 2007 (*dev)->si_flags |= SI_CHEAPCLONE; 2008 return; 2009} 2010 2011static void 2012bpf_drvinit(void *unused) 2013{ 2014 2015 mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF); 2016 LIST_INIT(&bpf_iflist); 2017 EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000); 2018} 2019 2020static void 2021bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 2022{ 2023 2024 bzero(d, sizeof(*d)); 2025 BPFD_LOCK_ASSERT(bd); 2026 d->bd_structsize = sizeof(*d); 2027 d->bd_immediate = bd->bd_immediate; 2028 d->bd_promisc = bd->bd_promisc; 2029 d->bd_hdrcmplt = bd->bd_hdrcmplt; 2030 d->bd_direction = bd->bd_direction; 2031 d->bd_feedback = bd->bd_feedback; 2032 d->bd_async = bd->bd_async; 2033 d->bd_rcount = bd->bd_rcount; 2034 d->bd_dcount = bd->bd_dcount; 2035 d->bd_fcount = bd->bd_fcount; 2036 d->bd_sig = bd->bd_sig; 2037 d->bd_slen = bd->bd_slen; 2038 d->bd_hlen = bd->bd_hlen; 2039 d->bd_bufsize = bd->bd_bufsize; 2040 d->bd_pid = bd->bd_pid; 2041 strlcpy(d->bd_ifname, 2042 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 2043 d->bd_locked = bd->bd_locked; 2044 d->bd_wcount = bd->bd_wcount; 2045 d->bd_wdcount = bd->bd_wdcount; 2046 d->bd_wfcount = bd->bd_wfcount; 2047 d->bd_zcopy = bd->bd_zcopy; 2048 d->bd_bufmode = bd->bd_bufmode; 2049} 2050 2051static int 2052bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 2053{ 2054 struct xbpf_d *xbdbuf, *xbd; 2055 int index, error; 2056 struct bpf_if *bp; 2057 struct bpf_d *bd; 2058 2059 /* 2060 * XXX This is not technically correct. It is possible for non 2061 * privileged users to open bpf devices. It would make sense 2062 * if the users who opened the devices were able to retrieve 2063 * the statistics for them, too. 2064 */ 2065 error = priv_check(req->td, PRIV_NET_BPF); 2066 if (error) 2067 return (error); 2068 if (req->oldptr == NULL) 2069 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 2070 if (bpf_bpfd_cnt == 0) 2071 return (SYSCTL_OUT(req, 0, 0)); 2072 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 2073 mtx_lock(&bpf_mtx); 2074 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 2075 mtx_unlock(&bpf_mtx); 2076 free(xbdbuf, M_BPF); 2077 return (ENOMEM); 2078 } 2079 index = 0; 2080 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2081 BPFIF_LOCK(bp); 2082 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2083 xbd = &xbdbuf[index++]; 2084 BPFD_LOCK(bd); 2085 bpfstats_fill_xbpf(xbd, bd); 2086 BPFD_UNLOCK(bd); 2087 } 2088 BPFIF_UNLOCK(bp); 2089 } 2090 mtx_unlock(&bpf_mtx); 2091 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 2092 free(xbdbuf, M_BPF); 2093 return (error); 2094} 2095 2096SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); 2097 2098#else /* !DEV_BPF && !NETGRAPH_BPF */ 2099/* 2100 * NOP stubs to allow bpf-using drivers to load and function. 2101 * 2102 * A 'better' implementation would allow the core bpf functionality 2103 * to be loaded at runtime. 2104 */ 2105static struct bpf_if bp_null; 2106 2107void 2108bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2109{ 2110} 2111 2112void 2113bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2114{ 2115} 2116 2117void 2118bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 2119{ 2120} 2121 2122void 2123bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2124{ 2125 2126 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2127} 2128 2129void 2130bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 2131{ 2132 2133 *driverp = &bp_null; 2134} 2135 2136void 2137bpfdetach(struct ifnet *ifp) 2138{ 2139} 2140 2141u_int 2142bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 2143{ 2144 return -1; /* "no filter" behaviour */ 2145} 2146 2147int 2148bpf_validate(const struct bpf_insn *f, int len) 2149{ 2150 return 0; /* false */ 2151} 2152 2153#endif /* !DEV_BPF && !NETGRAPH_BPF */ 2154