bpf.c revision 178208
1/*- 2 * Copyright (c) 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from the Stanford/CMU enet packet filter, 6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 8 * Berkeley Laboratory. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/net/bpf.c 178208 2008-04-15 00:50:01Z jkim $"); 39 40#include "opt_bpf.h" 41#include "opt_mac.h" 42#include "opt_netgraph.h" 43 44#include <sys/types.h> 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/conf.h> 48#include <sys/fcntl.h> 49#include <sys/malloc.h> 50#include <sys/mbuf.h> 51#include <sys/time.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/signalvar.h> 55#include <sys/filio.h> 56#include <sys/sockio.h> 57#include <sys/ttycom.h> 58#include <sys/uio.h> 59 60#include <sys/event.h> 61#include <sys/file.h> 62#include <sys/poll.h> 63#include <sys/proc.h> 64 65#include <sys/socket.h> 66 67#include <net/if.h> 68#include <net/bpf.h> 69#include <net/bpf_buffer.h> 70#ifdef BPF_JITTER 71#include <net/bpf_jitter.h> 72#endif 73#include <net/bpf_zerocopy.h> 74#include <net/bpfdesc.h> 75 76#include <netinet/in.h> 77#include <netinet/if_ether.h> 78#include <sys/kernel.h> 79#include <sys/sysctl.h> 80 81#include <net80211/ieee80211_freebsd.h> 82 83#include <security/mac/mac_framework.h> 84 85MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 86 87#if defined(DEV_BPF) || defined(NETGRAPH_BPF) 88 89#define PRINET 26 /* interruptible */ 90 91/* 92 * bpf_iflist is a list of BPF interface structures, each corresponding to a 93 * specific DLT. The same network interface might have several BPF interface 94 * structures registered by different layers in the stack (i.e., 802.11 95 * frames, ethernet frames, etc). 96 */ 97static LIST_HEAD(, bpf_if) bpf_iflist; 98static struct mtx bpf_mtx; /* bpf global lock */ 99static int bpf_bpfd_cnt; 100 101static void bpf_attachd(struct bpf_d *, struct bpf_if *); 102static void bpf_detachd(struct bpf_d *); 103static void bpf_freed(struct bpf_d *); 104static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 105 struct sockaddr *, int *, struct bpf_insn *); 106static int bpf_setif(struct bpf_d *, struct ifreq *); 107static void bpf_timed_out(void *); 108static __inline void 109 bpf_wakeup(struct bpf_d *); 110static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 111 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 112 struct timeval *); 113static void reset_d(struct bpf_d *); 114static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 115static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 116static int bpf_setdlt(struct bpf_d *, u_int); 117static void filt_bpfdetach(struct knote *); 118static int filt_bpfread(struct knote *, long); 119static void bpf_drvinit(void *); 120static void bpf_clone(void *, struct ucred *, char *, int, struct cdev **); 121static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 122 123SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl"); 124static int bpf_maxinsns = BPF_MAXINSNS; 125SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 126 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 127static int bpf_zerocopy_enable = 0; 128SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 129 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 130SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW, 131 bpf_stats_sysctl, "bpf statistics portal"); 132 133static d_open_t bpfopen; 134static d_close_t bpfclose; 135static d_read_t bpfread; 136static d_write_t bpfwrite; 137static d_ioctl_t bpfioctl; 138static d_poll_t bpfpoll; 139static d_kqfilter_t bpfkqfilter; 140 141static struct cdevsw bpf_cdevsw = { 142 .d_version = D_VERSION, 143 .d_open = bpfopen, 144 .d_close = bpfclose, 145 .d_read = bpfread, 146 .d_write = bpfwrite, 147 .d_ioctl = bpfioctl, 148 .d_poll = bpfpoll, 149 .d_name = "bpf", 150 .d_kqfilter = bpfkqfilter, 151}; 152 153static struct filterops bpfread_filtops = 154 { 1, NULL, filt_bpfdetach, filt_bpfread }; 155 156/* 157 * Wrapper functions for various buffering methods. If the set of buffer 158 * modes expands, we will probably want to introduce a switch data structure 159 * similar to protosw, et. 160 */ 161static void 162bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 163 u_int len) 164{ 165 166 BPFD_LOCK_ASSERT(d); 167 168 switch (d->bd_bufmode) { 169 case BPF_BUFMODE_BUFFER: 170 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 171 172 case BPF_BUFMODE_ZBUF: 173 d->bd_zcopy++; 174 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 175 176 default: 177 panic("bpf_buf_append_bytes"); 178 } 179} 180 181static void 182bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 183 u_int len) 184{ 185 186 BPFD_LOCK_ASSERT(d); 187 188 switch (d->bd_bufmode) { 189 case BPF_BUFMODE_BUFFER: 190 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 191 192 case BPF_BUFMODE_ZBUF: 193 d->bd_zcopy++; 194 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 195 196 default: 197 panic("bpf_buf_append_mbuf"); 198 } 199} 200 201/* 202 * If the buffer mechanism has a way to decide that a held buffer can be made 203 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 204 * returned if the buffer can be discarded, (0) is returned if it cannot. 205 */ 206static int 207bpf_canfreebuf(struct bpf_d *d) 208{ 209 210 BPFD_LOCK_ASSERT(d); 211 212 switch (d->bd_bufmode) { 213 case BPF_BUFMODE_ZBUF: 214 return (bpf_zerocopy_canfreebuf(d)); 215 } 216 return (0); 217} 218 219/* 220 * Allow the buffer model to indicate that the current store buffer is 221 * immutable, regardless of the appearance of space. Return (1) if the 222 * buffer is writable, and (0) if not. 223 */ 224static int 225bpf_canwritebuf(struct bpf_d *d) 226{ 227 228 BPFD_LOCK_ASSERT(d); 229 230 switch (d->bd_bufmode) { 231 case BPF_BUFMODE_ZBUF: 232 return (bpf_zerocopy_canwritebuf(d)); 233 } 234 return (1); 235} 236 237/* 238 * Notify buffer model that an attempt to write to the store buffer has 239 * resulted in a dropped packet, in which case the buffer may be considered 240 * full. 241 */ 242static void 243bpf_buffull(struct bpf_d *d) 244{ 245 246 BPFD_LOCK_ASSERT(d); 247 248 switch (d->bd_bufmode) { 249 case BPF_BUFMODE_ZBUF: 250 bpf_zerocopy_buffull(d); 251 break; 252 } 253} 254 255/* 256 * Notify the buffer model that a buffer has moved into the hold position. 257 */ 258void 259bpf_bufheld(struct bpf_d *d) 260{ 261 262 BPFD_LOCK_ASSERT(d); 263 264 switch (d->bd_bufmode) { 265 case BPF_BUFMODE_ZBUF: 266 bpf_zerocopy_bufheld(d); 267 break; 268 } 269} 270 271static void 272bpf_free(struct bpf_d *d) 273{ 274 275 switch (d->bd_bufmode) { 276 case BPF_BUFMODE_BUFFER: 277 return (bpf_buffer_free(d)); 278 279 case BPF_BUFMODE_ZBUF: 280 return (bpf_zerocopy_free(d)); 281 282 default: 283 panic("bpf_buf_free"); 284 } 285} 286 287static int 288bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 289{ 290 291 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 292 return (EOPNOTSUPP); 293 return (bpf_buffer_uiomove(d, buf, len, uio)); 294} 295 296static int 297bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 298{ 299 300 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 301 return (EOPNOTSUPP); 302 return (bpf_buffer_ioctl_sblen(d, i)); 303} 304 305static int 306bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 307{ 308 309 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 310 return (EOPNOTSUPP); 311 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 312} 313 314static int 315bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 316{ 317 318 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 319 return (EOPNOTSUPP); 320 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 321} 322 323static int 324bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 325{ 326 327 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 328 return (EOPNOTSUPP); 329 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 330} 331 332/* 333 * General BPF functions. 334 */ 335static int 336bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 337 struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter) 338{ 339 const struct ieee80211_bpf_params *p; 340 struct ether_header *eh; 341 struct mbuf *m; 342 int error; 343 int len; 344 int hlen; 345 int slen; 346 347 /* 348 * Build a sockaddr based on the data link layer type. 349 * We do this at this level because the ethernet header 350 * is copied directly into the data field of the sockaddr. 351 * In the case of SLIP, there is no header and the packet 352 * is forwarded as is. 353 * Also, we are careful to leave room at the front of the mbuf 354 * for the link level header. 355 */ 356 switch (linktype) { 357 358 case DLT_SLIP: 359 sockp->sa_family = AF_INET; 360 hlen = 0; 361 break; 362 363 case DLT_EN10MB: 364 sockp->sa_family = AF_UNSPEC; 365 /* XXX Would MAXLINKHDR be better? */ 366 hlen = ETHER_HDR_LEN; 367 break; 368 369 case DLT_FDDI: 370 sockp->sa_family = AF_IMPLINK; 371 hlen = 0; 372 break; 373 374 case DLT_RAW: 375 sockp->sa_family = AF_UNSPEC; 376 hlen = 0; 377 break; 378 379 case DLT_NULL: 380 /* 381 * null interface types require a 4 byte pseudo header which 382 * corresponds to the address family of the packet. 383 */ 384 sockp->sa_family = AF_UNSPEC; 385 hlen = 4; 386 break; 387 388 case DLT_ATM_RFC1483: 389 /* 390 * en atm driver requires 4-byte atm pseudo header. 391 * though it isn't standard, vpi:vci needs to be 392 * specified anyway. 393 */ 394 sockp->sa_family = AF_UNSPEC; 395 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 396 break; 397 398 case DLT_PPP: 399 sockp->sa_family = AF_UNSPEC; 400 hlen = 4; /* This should match PPP_HDRLEN */ 401 break; 402 403 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 404 sockp->sa_family = AF_IEEE80211; 405 hlen = 0; 406 break; 407 408 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 409 sockp->sa_family = AF_IEEE80211; 410 sockp->sa_len = 12; /* XXX != 0 */ 411 hlen = sizeof(struct ieee80211_bpf_params); 412 break; 413 414 default: 415 return (EIO); 416 } 417 418 len = uio->uio_resid; 419 420 if (len - hlen > ifp->if_mtu) 421 return (EMSGSIZE); 422 423 if ((unsigned)len > MCLBYTES) 424 return (EIO); 425 426 if (len > MHLEN) 427 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); 428 else 429 MGETHDR(m, M_WAIT, MT_DATA); 430 m->m_pkthdr.len = m->m_len = len; 431 m->m_pkthdr.rcvif = NULL; 432 *mp = m; 433 434 if (m->m_len < hlen) { 435 error = EPERM; 436 goto bad; 437 } 438 439 error = uiomove(mtod(m, u_char *), len, uio); 440 if (error) 441 goto bad; 442 443 slen = bpf_filter(wfilter, mtod(m, u_char *), len, len); 444 if (slen == 0) { 445 error = EPERM; 446 goto bad; 447 } 448 449 /* Check for multicast destination */ 450 switch (linktype) { 451 case DLT_EN10MB: 452 eh = mtod(m, struct ether_header *); 453 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 454 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 455 ETHER_ADDR_LEN) == 0) 456 m->m_flags |= M_BCAST; 457 else 458 m->m_flags |= M_MCAST; 459 } 460 break; 461 } 462 463 /* 464 * Make room for link header, and copy it to sockaddr 465 */ 466 if (hlen != 0) { 467 if (sockp->sa_family == AF_IEEE80211) { 468 /* 469 * Collect true length from the parameter header 470 * NB: sockp is known to be zero'd so if we do a 471 * short copy unspecified parameters will be 472 * zero. 473 * NB: packet may not be aligned after stripping 474 * bpf params 475 * XXX check ibp_vers 476 */ 477 p = mtod(m, const struct ieee80211_bpf_params *); 478 hlen = p->ibp_len; 479 if (hlen > sizeof(sockp->sa_data)) { 480 error = EINVAL; 481 goto bad; 482 } 483 } 484 bcopy(m->m_data, sockp->sa_data, hlen); 485 } 486 *hdrlen = hlen; 487 488 return (0); 489bad: 490 m_freem(m); 491 return (error); 492} 493 494/* 495 * Attach file to the bpf interface, i.e. make d listen on bp. 496 */ 497static void 498bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 499{ 500 /* 501 * Point d at bp, and add d to the interface's list of listeners. 502 * Finally, point the driver's bpf cookie at the interface so 503 * it will divert packets to bpf. 504 */ 505 BPFIF_LOCK(bp); 506 d->bd_bif = bp; 507 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 508 509 bpf_bpfd_cnt++; 510 BPFIF_UNLOCK(bp); 511} 512 513/* 514 * Detach a file from its interface. 515 */ 516static void 517bpf_detachd(struct bpf_d *d) 518{ 519 int error; 520 struct bpf_if *bp; 521 struct ifnet *ifp; 522 523 bp = d->bd_bif; 524 BPFIF_LOCK(bp); 525 BPFD_LOCK(d); 526 ifp = d->bd_bif->bif_ifp; 527 528 /* 529 * Remove d from the interface's descriptor list. 530 */ 531 LIST_REMOVE(d, bd_next); 532 533 bpf_bpfd_cnt--; 534 d->bd_bif = NULL; 535 BPFD_UNLOCK(d); 536 BPFIF_UNLOCK(bp); 537 538 /* 539 * Check if this descriptor had requested promiscuous mode. 540 * If so, turn it off. 541 */ 542 if (d->bd_promisc) { 543 d->bd_promisc = 0; 544 error = ifpromisc(ifp, 0); 545 if (error != 0 && error != ENXIO) { 546 /* 547 * ENXIO can happen if a pccard is unplugged 548 * Something is really wrong if we were able to put 549 * the driver into promiscuous mode, but can't 550 * take it out. 551 */ 552 if_printf(bp->bif_ifp, 553 "bpf_detach: ifpromisc failed (%d)\n", error); 554 } 555 } 556} 557 558/* 559 * Open ethernet device. Returns ENXIO for illegal minor device number, 560 * EBUSY if file is open by another process. 561 */ 562/* ARGSUSED */ 563static int 564bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 565{ 566 struct bpf_d *d; 567 568 mtx_lock(&bpf_mtx); 569 d = dev->si_drv1; 570 /* 571 * Each minor can be opened by only one process. If the requested 572 * minor is in use, return EBUSY. 573 */ 574 if (d != NULL) { 575 mtx_unlock(&bpf_mtx); 576 return (EBUSY); 577 } 578 dev->si_drv1 = (struct bpf_d *)~0; /* mark device in use */ 579 mtx_unlock(&bpf_mtx); 580 581 if ((dev->si_flags & SI_NAMED) == 0) 582 make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600, 583 "bpf%d", dev2unit(dev)); 584 MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 585 dev->si_drv1 = d; 586 587 /* 588 * For historical reasons, perform a one-time initialization call to 589 * the buffer routines, even though we're not yet committed to a 590 * particular buffer method. 591 */ 592 bpf_buffer_init(d); 593 d->bd_bufmode = BPF_BUFMODE_BUFFER; 594 d->bd_sig = SIGIO; 595 d->bd_direction = BPF_D_INOUT; 596 d->bd_pid = td->td_proc->p_pid; 597#ifdef MAC 598 mac_bpfdesc_init(d); 599 mac_bpfdesc_create(td->td_ucred, d); 600#endif 601 mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF); 602 callout_init(&d->bd_callout, CALLOUT_MPSAFE); 603 knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL); 604 605 return (0); 606} 607 608/* 609 * Close the descriptor by detaching it from its interface, 610 * deallocating its buffers, and marking it free. 611 */ 612/* ARGSUSED */ 613static int 614bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td) 615{ 616 struct bpf_d *d = dev->si_drv1; 617 618 BPFD_LOCK(d); 619 if (d->bd_state == BPF_WAITING) 620 callout_stop(&d->bd_callout); 621 d->bd_state = BPF_IDLE; 622 BPFD_UNLOCK(d); 623 funsetown(&d->bd_sigio); 624 mtx_lock(&bpf_mtx); 625 if (d->bd_bif) 626 bpf_detachd(d); 627 mtx_unlock(&bpf_mtx); 628 selwakeuppri(&d->bd_sel, PRINET); 629#ifdef MAC 630 mac_bpfdesc_destroy(d); 631#endif /* MAC */ 632 knlist_destroy(&d->bd_sel.si_note); 633 bpf_freed(d); 634 dev->si_drv1 = NULL; 635 free(d, M_BPF); 636 637 return (0); 638} 639 640/* 641 * bpfread - read next chunk of packets from buffers 642 */ 643static int 644bpfread(struct cdev *dev, struct uio *uio, int ioflag) 645{ 646 struct bpf_d *d = dev->si_drv1; 647 int timed_out; 648 int error; 649 650 /* 651 * Restrict application to use a buffer the same size as 652 * as kernel buffers. 653 */ 654 if (uio->uio_resid != d->bd_bufsize) 655 return (EINVAL); 656 657 BPFD_LOCK(d); 658 d->bd_pid = curthread->td_proc->p_pid; 659 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 660 BPFD_UNLOCK(d); 661 return (EOPNOTSUPP); 662 } 663 if (d->bd_state == BPF_WAITING) 664 callout_stop(&d->bd_callout); 665 timed_out = (d->bd_state == BPF_TIMED_OUT); 666 d->bd_state = BPF_IDLE; 667 /* 668 * If the hold buffer is empty, then do a timed sleep, which 669 * ends when the timeout expires or when enough packets 670 * have arrived to fill the store buffer. 671 */ 672 while (d->bd_hbuf == NULL) { 673 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 674 /* 675 * A packet(s) either arrived since the previous 676 * read or arrived while we were asleep. 677 * Rotate the buffers and return what's here. 678 */ 679 ROTATE_BUFFERS(d); 680 break; 681 } 682 683 /* 684 * No data is available, check to see if the bpf device 685 * is still pointed at a real interface. If not, return 686 * ENXIO so that the userland process knows to rebind 687 * it before using it again. 688 */ 689 if (d->bd_bif == NULL) { 690 BPFD_UNLOCK(d); 691 return (ENXIO); 692 } 693 694 if (ioflag & O_NONBLOCK) { 695 BPFD_UNLOCK(d); 696 return (EWOULDBLOCK); 697 } 698 error = msleep(d, &d->bd_mtx, PRINET|PCATCH, 699 "bpf", d->bd_rtout); 700 if (error == EINTR || error == ERESTART) { 701 BPFD_UNLOCK(d); 702 return (error); 703 } 704 if (error == EWOULDBLOCK) { 705 /* 706 * On a timeout, return what's in the buffer, 707 * which may be nothing. If there is something 708 * in the store buffer, we can rotate the buffers. 709 */ 710 if (d->bd_hbuf) 711 /* 712 * We filled up the buffer in between 713 * getting the timeout and arriving 714 * here, so we don't need to rotate. 715 */ 716 break; 717 718 if (d->bd_slen == 0) { 719 BPFD_UNLOCK(d); 720 return (0); 721 } 722 ROTATE_BUFFERS(d); 723 break; 724 } 725 } 726 /* 727 * At this point, we know we have something in the hold slot. 728 */ 729 BPFD_UNLOCK(d); 730 731 /* 732 * Move data from hold buffer into user space. 733 * We know the entire buffer is transferred since 734 * we checked above that the read buffer is bpf_bufsize bytes. 735 * 736 * XXXRW: More synchronization needed here: what if a second thread 737 * issues a read on the same fd at the same time? Don't want this 738 * getting invalidated. 739 */ 740 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 741 742 BPFD_LOCK(d); 743 d->bd_fbuf = d->bd_hbuf; 744 d->bd_hbuf = NULL; 745 d->bd_hlen = 0; 746 BPFD_UNLOCK(d); 747 748 return (error); 749} 750 751/* 752 * If there are processes sleeping on this descriptor, wake them up. 753 */ 754static __inline void 755bpf_wakeup(struct bpf_d *d) 756{ 757 758 BPFD_LOCK_ASSERT(d); 759 if (d->bd_state == BPF_WAITING) { 760 callout_stop(&d->bd_callout); 761 d->bd_state = BPF_IDLE; 762 } 763 wakeup(d); 764 if (d->bd_async && d->bd_sig && d->bd_sigio) 765 pgsigio(&d->bd_sigio, d->bd_sig, 0); 766 767 selwakeuppri(&d->bd_sel, PRINET); 768 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 769} 770 771static void 772bpf_timed_out(void *arg) 773{ 774 struct bpf_d *d = (struct bpf_d *)arg; 775 776 BPFD_LOCK(d); 777 if (d->bd_state == BPF_WAITING) { 778 d->bd_state = BPF_TIMED_OUT; 779 if (d->bd_slen != 0) 780 bpf_wakeup(d); 781 } 782 BPFD_UNLOCK(d); 783} 784 785static int 786bpf_ready(struct bpf_d *d) 787{ 788 789 BPFD_LOCK_ASSERT(d); 790 791 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 792 return (1); 793 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 794 d->bd_slen != 0) 795 return (1); 796 return (0); 797} 798 799static int 800bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 801{ 802 struct bpf_d *d = dev->si_drv1; 803 struct ifnet *ifp; 804 struct mbuf *m, *mc; 805 struct sockaddr dst; 806 int error, hlen; 807 808 d->bd_pid = curthread->td_proc->p_pid; 809 d->bd_wcount++; 810 if (d->bd_bif == NULL) { 811 d->bd_wdcount++; 812 return (ENXIO); 813 } 814 815 ifp = d->bd_bif->bif_ifp; 816 817 if ((ifp->if_flags & IFF_UP) == 0) { 818 d->bd_wdcount++; 819 return (ENETDOWN); 820 } 821 822 if (uio->uio_resid == 0) { 823 d->bd_wdcount++; 824 return (0); 825 } 826 827 bzero(&dst, sizeof(dst)); 828 m = NULL; 829 hlen = 0; 830 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, 831 &m, &dst, &hlen, d->bd_wfilter); 832 if (error) { 833 d->bd_wdcount++; 834 return (error); 835 } 836 d->bd_wfcount++; 837 if (d->bd_hdrcmplt) 838 dst.sa_family = pseudo_AF_HDRCMPLT; 839 840 if (d->bd_feedback) { 841 mc = m_dup(m, M_DONTWAIT); 842 if (mc != NULL) 843 mc->m_pkthdr.rcvif = ifp; 844 } else 845 mc = NULL; 846 847 m->m_pkthdr.len -= hlen; 848 m->m_len -= hlen; 849 m->m_data += hlen; /* XXX */ 850 851#ifdef MAC 852 BPFD_LOCK(d); 853 mac_bpfdesc_create_mbuf(d, m); 854 if (mc != NULL) 855 mac_bpfdesc_create_mbuf(d, mc); 856 BPFD_UNLOCK(d); 857#endif 858 859 error = (*ifp->if_output)(ifp, m, &dst, NULL); 860 if (error) 861 d->bd_wdcount++; 862 863 if (mc != NULL) { 864 if (error == 0) 865 (*ifp->if_input)(ifp, mc); 866 else 867 m_freem(mc); 868 } 869 870 return (error); 871} 872 873/* 874 * Reset a descriptor by flushing its packet buffer and clearing the 875 * receive and drop counts. 876 */ 877static void 878reset_d(struct bpf_d *d) 879{ 880 881 mtx_assert(&d->bd_mtx, MA_OWNED); 882 if (d->bd_hbuf) { 883 /* Free the hold buffer. */ 884 d->bd_fbuf = d->bd_hbuf; 885 d->bd_hbuf = NULL; 886 } 887 d->bd_slen = 0; 888 d->bd_hlen = 0; 889 d->bd_rcount = 0; 890 d->bd_dcount = 0; 891 d->bd_fcount = 0; 892 d->bd_wcount = 0; 893 d->bd_wfcount = 0; 894 d->bd_wdcount = 0; 895 d->bd_zcopy = 0; 896} 897 898/* 899 * FIONREAD Check for read packet available. 900 * SIOCGIFADDR Get interface address - convenient hook to driver. 901 * BIOCGBLEN Get buffer len [for read()]. 902 * BIOCSETF Set ethernet read filter. 903 * BIOCSETWF Set ethernet write filter. 904 * BIOCFLUSH Flush read packet buffer. 905 * BIOCPROMISC Put interface into promiscuous mode. 906 * BIOCGDLT Get link layer type. 907 * BIOCGETIF Get interface name. 908 * BIOCSETIF Set interface. 909 * BIOCSRTIMEOUT Set read timeout. 910 * BIOCGRTIMEOUT Get read timeout. 911 * BIOCGSTATS Get packet stats. 912 * BIOCIMMEDIATE Set immediate mode. 913 * BIOCVERSION Get filter language version. 914 * BIOCGHDRCMPLT Get "header already complete" flag 915 * BIOCSHDRCMPLT Set "header already complete" flag 916 * BIOCGDIRECTION Get packet direction flag 917 * BIOCSDIRECTION Set packet direction flag 918 * BIOCLOCK Set "locked" flag 919 * BIOCFEEDBACK Set packet feedback mode. 920 * BIOCSETZBUF Set current zero-copy buffer locations. 921 * BIOCGETZMAX Get maximum zero-copy buffer size. 922 * BIOCROTZBUF Force rotation of zero-copy buffer 923 * BIOCSETBUFMODE Set buffer mode. 924 * BIOCGETBUFMODE Get current buffer mode. 925 */ 926/* ARGSUSED */ 927static int 928bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 929 struct thread *td) 930{ 931 struct bpf_d *d = dev->si_drv1; 932 int error = 0; 933 934 /* 935 * Refresh PID associated with this descriptor. 936 */ 937 BPFD_LOCK(d); 938 d->bd_pid = td->td_proc->p_pid; 939 if (d->bd_state == BPF_WAITING) 940 callout_stop(&d->bd_callout); 941 d->bd_state = BPF_IDLE; 942 BPFD_UNLOCK(d); 943 944 if (d->bd_locked == 1) { 945 switch (cmd) { 946 case BIOCGBLEN: 947 case BIOCFLUSH: 948 case BIOCGDLT: 949 case BIOCGDLTLIST: 950 case BIOCGETIF: 951 case BIOCGRTIMEOUT: 952 case BIOCGSTATS: 953 case BIOCVERSION: 954 case BIOCGRSIG: 955 case BIOCGHDRCMPLT: 956 case BIOCFEEDBACK: 957 case FIONREAD: 958 case BIOCLOCK: 959 case BIOCSRTIMEOUT: 960 case BIOCIMMEDIATE: 961 case TIOCGPGRP: 962 case BIOCROTZBUF: 963 break; 964 default: 965 return (EPERM); 966 } 967 } 968 switch (cmd) { 969 970 default: 971 error = EINVAL; 972 break; 973 974 /* 975 * Check for read packet available. 976 */ 977 case FIONREAD: 978 { 979 int n; 980 981 BPFD_LOCK(d); 982 n = d->bd_slen; 983 if (d->bd_hbuf) 984 n += d->bd_hlen; 985 BPFD_UNLOCK(d); 986 987 *(int *)addr = n; 988 break; 989 } 990 991 case SIOCGIFADDR: 992 { 993 struct ifnet *ifp; 994 995 if (d->bd_bif == NULL) 996 error = EINVAL; 997 else { 998 ifp = d->bd_bif->bif_ifp; 999 error = (*ifp->if_ioctl)(ifp, cmd, addr); 1000 } 1001 break; 1002 } 1003 1004 /* 1005 * Get buffer len [for read()]. 1006 */ 1007 case BIOCGBLEN: 1008 *(u_int *)addr = d->bd_bufsize; 1009 break; 1010 1011 /* 1012 * Set buffer length. 1013 */ 1014 case BIOCSBLEN: 1015 error = bpf_ioctl_sblen(d, (u_int *)addr); 1016 break; 1017 1018 /* 1019 * Set link layer read filter. 1020 */ 1021 case BIOCSETF: 1022 case BIOCSETWF: 1023 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 1024 break; 1025 1026 /* 1027 * Flush read packet buffer. 1028 */ 1029 case BIOCFLUSH: 1030 BPFD_LOCK(d); 1031 reset_d(d); 1032 BPFD_UNLOCK(d); 1033 break; 1034 1035 /* 1036 * Put interface into promiscuous mode. 1037 */ 1038 case BIOCPROMISC: 1039 if (d->bd_bif == NULL) { 1040 /* 1041 * No interface attached yet. 1042 */ 1043 error = EINVAL; 1044 break; 1045 } 1046 if (d->bd_promisc == 0) { 1047 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1048 if (error == 0) 1049 d->bd_promisc = 1; 1050 } 1051 break; 1052 1053 /* 1054 * Get current data link type. 1055 */ 1056 case BIOCGDLT: 1057 if (d->bd_bif == NULL) 1058 error = EINVAL; 1059 else 1060 *(u_int *)addr = d->bd_bif->bif_dlt; 1061 break; 1062 1063 /* 1064 * Get a list of supported data link types. 1065 */ 1066 case BIOCGDLTLIST: 1067 if (d->bd_bif == NULL) 1068 error = EINVAL; 1069 else 1070 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1071 break; 1072 1073 /* 1074 * Set data link type. 1075 */ 1076 case BIOCSDLT: 1077 if (d->bd_bif == NULL) 1078 error = EINVAL; 1079 else 1080 error = bpf_setdlt(d, *(u_int *)addr); 1081 break; 1082 1083 /* 1084 * Get interface name. 1085 */ 1086 case BIOCGETIF: 1087 if (d->bd_bif == NULL) 1088 error = EINVAL; 1089 else { 1090 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1091 struct ifreq *const ifr = (struct ifreq *)addr; 1092 1093 strlcpy(ifr->ifr_name, ifp->if_xname, 1094 sizeof(ifr->ifr_name)); 1095 } 1096 break; 1097 1098 /* 1099 * Set interface. 1100 */ 1101 case BIOCSETIF: 1102 error = bpf_setif(d, (struct ifreq *)addr); 1103 break; 1104 1105 /* 1106 * Set read timeout. 1107 */ 1108 case BIOCSRTIMEOUT: 1109 { 1110 struct timeval *tv = (struct timeval *)addr; 1111 1112 /* 1113 * Subtract 1 tick from tvtohz() since this isn't 1114 * a one-shot timer. 1115 */ 1116 if ((error = itimerfix(tv)) == 0) 1117 d->bd_rtout = tvtohz(tv) - 1; 1118 break; 1119 } 1120 1121 /* 1122 * Get read timeout. 1123 */ 1124 case BIOCGRTIMEOUT: 1125 { 1126 struct timeval *tv = (struct timeval *)addr; 1127 1128 tv->tv_sec = d->bd_rtout / hz; 1129 tv->tv_usec = (d->bd_rtout % hz) * tick; 1130 break; 1131 } 1132 1133 /* 1134 * Get packet stats. 1135 */ 1136 case BIOCGSTATS: 1137 { 1138 struct bpf_stat *bs = (struct bpf_stat *)addr; 1139 1140 /* XXXCSJP overflow */ 1141 bs->bs_recv = d->bd_rcount; 1142 bs->bs_drop = d->bd_dcount; 1143 break; 1144 } 1145 1146 /* 1147 * Set immediate mode. 1148 */ 1149 case BIOCIMMEDIATE: 1150 d->bd_immediate = *(u_int *)addr; 1151 break; 1152 1153 case BIOCVERSION: 1154 { 1155 struct bpf_version *bv = (struct bpf_version *)addr; 1156 1157 bv->bv_major = BPF_MAJOR_VERSION; 1158 bv->bv_minor = BPF_MINOR_VERSION; 1159 break; 1160 } 1161 1162 /* 1163 * Get "header already complete" flag 1164 */ 1165 case BIOCGHDRCMPLT: 1166 *(u_int *)addr = d->bd_hdrcmplt; 1167 break; 1168 1169 /* 1170 * Set "header already complete" flag 1171 */ 1172 case BIOCSHDRCMPLT: 1173 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1174 break; 1175 1176 /* 1177 * Get packet direction flag 1178 */ 1179 case BIOCGDIRECTION: 1180 *(u_int *)addr = d->bd_direction; 1181 break; 1182 1183 /* 1184 * Set packet direction flag 1185 */ 1186 case BIOCSDIRECTION: 1187 { 1188 u_int direction; 1189 1190 direction = *(u_int *)addr; 1191 switch (direction) { 1192 case BPF_D_IN: 1193 case BPF_D_INOUT: 1194 case BPF_D_OUT: 1195 d->bd_direction = direction; 1196 break; 1197 default: 1198 error = EINVAL; 1199 } 1200 } 1201 break; 1202 1203 case BIOCFEEDBACK: 1204 d->bd_feedback = *(u_int *)addr; 1205 break; 1206 1207 case BIOCLOCK: 1208 d->bd_locked = 1; 1209 break; 1210 1211 case FIONBIO: /* Non-blocking I/O */ 1212 break; 1213 1214 case FIOASYNC: /* Send signal on receive packets */ 1215 d->bd_async = *(int *)addr; 1216 break; 1217 1218 case FIOSETOWN: 1219 error = fsetown(*(int *)addr, &d->bd_sigio); 1220 break; 1221 1222 case FIOGETOWN: 1223 *(int *)addr = fgetown(&d->bd_sigio); 1224 break; 1225 1226 /* This is deprecated, FIOSETOWN should be used instead. */ 1227 case TIOCSPGRP: 1228 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1229 break; 1230 1231 /* This is deprecated, FIOGETOWN should be used instead. */ 1232 case TIOCGPGRP: 1233 *(int *)addr = -fgetown(&d->bd_sigio); 1234 break; 1235 1236 case BIOCSRSIG: /* Set receive signal */ 1237 { 1238 u_int sig; 1239 1240 sig = *(u_int *)addr; 1241 1242 if (sig >= NSIG) 1243 error = EINVAL; 1244 else 1245 d->bd_sig = sig; 1246 break; 1247 } 1248 case BIOCGRSIG: 1249 *(u_int *)addr = d->bd_sig; 1250 break; 1251 1252 case BIOCGETBUFMODE: 1253 *(u_int *)addr = d->bd_bufmode; 1254 break; 1255 1256 case BIOCSETBUFMODE: 1257 /* 1258 * Allow the buffering mode to be changed as long as we 1259 * haven't yet committed to a particular mode. Our 1260 * definition of commitment, for now, is whether or not a 1261 * buffer has been allocated or an interface attached, since 1262 * that's the point where things get tricky. 1263 */ 1264 switch (*(u_int *)addr) { 1265 case BPF_BUFMODE_BUFFER: 1266 break; 1267 1268 case BPF_BUFMODE_ZBUF: 1269 if (bpf_zerocopy_enable) 1270 break; 1271 /* FALLSTHROUGH */ 1272 1273 default: 1274 return (EINVAL); 1275 } 1276 1277 BPFD_LOCK(d); 1278 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1279 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1280 BPFD_UNLOCK(d); 1281 return (EBUSY); 1282 } 1283 d->bd_bufmode = *(u_int *)addr; 1284 BPFD_UNLOCK(d); 1285 break; 1286 1287 case BIOCGETZMAX: 1288 return (bpf_ioctl_getzmax(td, d, (size_t *)addr)); 1289 1290 case BIOCSETZBUF: 1291 return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr)); 1292 1293 case BIOCROTZBUF: 1294 return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr)); 1295 } 1296 return (error); 1297} 1298 1299/* 1300 * Set d's packet filter program to fp. If this file already has a filter, 1301 * free it and replace it. Returns EINVAL for bogus requests. 1302 */ 1303static int 1304bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1305{ 1306 struct bpf_insn *fcode, *old; 1307 u_int wfilter, flen, size; 1308#ifdef BPF_JITTER 1309 bpf_jit_filter *ofunc; 1310#endif 1311 1312 if (cmd == BIOCSETWF) { 1313 old = d->bd_wfilter; 1314 wfilter = 1; 1315#ifdef BPF_JITTER 1316 ofunc = NULL; 1317#endif 1318 } else { 1319 wfilter = 0; 1320 old = d->bd_rfilter; 1321#ifdef BPF_JITTER 1322 ofunc = d->bd_bfilter; 1323#endif 1324 } 1325 if (fp->bf_insns == NULL) { 1326 if (fp->bf_len != 0) 1327 return (EINVAL); 1328 BPFD_LOCK(d); 1329 if (wfilter) 1330 d->bd_wfilter = NULL; 1331 else { 1332 d->bd_rfilter = NULL; 1333#ifdef BPF_JITTER 1334 d->bd_bfilter = NULL; 1335#endif 1336 } 1337 reset_d(d); 1338 BPFD_UNLOCK(d); 1339 if (old != NULL) 1340 free((caddr_t)old, M_BPF); 1341#ifdef BPF_JITTER 1342 if (ofunc != NULL) 1343 bpf_destroy_jit_filter(ofunc); 1344#endif 1345 return (0); 1346 } 1347 flen = fp->bf_len; 1348 if (flen > bpf_maxinsns) 1349 return (EINVAL); 1350 1351 size = flen * sizeof(*fp->bf_insns); 1352 fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK); 1353 if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && 1354 bpf_validate(fcode, (int)flen)) { 1355 BPFD_LOCK(d); 1356 if (wfilter) 1357 d->bd_wfilter = fcode; 1358 else { 1359 d->bd_rfilter = fcode; 1360#ifdef BPF_JITTER 1361 d->bd_bfilter = bpf_jitter(fcode, flen); 1362#endif 1363 } 1364 reset_d(d); 1365 BPFD_UNLOCK(d); 1366 if (old != NULL) 1367 free((caddr_t)old, M_BPF); 1368#ifdef BPF_JITTER 1369 if (ofunc != NULL) 1370 bpf_destroy_jit_filter(ofunc); 1371#endif 1372 1373 return (0); 1374 } 1375 free((caddr_t)fcode, M_BPF); 1376 return (EINVAL); 1377} 1378 1379/* 1380 * Detach a file from its current interface (if attached at all) and attach 1381 * to the interface indicated by the name stored in ifr. 1382 * Return an errno or 0. 1383 */ 1384static int 1385bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1386{ 1387 struct bpf_if *bp; 1388 struct ifnet *theywant; 1389 1390 theywant = ifunit(ifr->ifr_name); 1391 if (theywant == NULL || theywant->if_bpf == NULL) 1392 return (ENXIO); 1393 1394 bp = theywant->if_bpf; 1395 1396 /* 1397 * Behavior here depends on the buffering model. If we're using 1398 * kernel memory buffers, then we can allocate them here. If we're 1399 * using zero-copy, then the user process must have registered 1400 * buffers by the time we get here. If not, return an error. 1401 * 1402 * XXXRW: There are locking issues here with multi-threaded use: what 1403 * if two threads try to set the interface at once? 1404 */ 1405 switch (d->bd_bufmode) { 1406 case BPF_BUFMODE_BUFFER: 1407 if (d->bd_sbuf == NULL) 1408 bpf_buffer_alloc(d); 1409 KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL")); 1410 break; 1411 1412 case BPF_BUFMODE_ZBUF: 1413 if (d->bd_sbuf == NULL) 1414 return (EINVAL); 1415 break; 1416 1417 default: 1418 panic("bpf_setif: bufmode %d", d->bd_bufmode); 1419 } 1420 if (bp != d->bd_bif) { 1421 if (d->bd_bif) 1422 /* 1423 * Detach if attached to something else. 1424 */ 1425 bpf_detachd(d); 1426 1427 bpf_attachd(d, bp); 1428 } 1429 BPFD_LOCK(d); 1430 reset_d(d); 1431 BPFD_UNLOCK(d); 1432 return (0); 1433} 1434 1435/* 1436 * Support for select() and poll() system calls 1437 * 1438 * Return true iff the specific operation will not block indefinitely. 1439 * Otherwise, return false but make a note that a selwakeup() must be done. 1440 */ 1441static int 1442bpfpoll(struct cdev *dev, int events, struct thread *td) 1443{ 1444 struct bpf_d *d; 1445 int revents; 1446 1447 d = dev->si_drv1; 1448 if (d->bd_bif == NULL) 1449 return (ENXIO); 1450 1451 /* 1452 * Refresh PID associated with this descriptor. 1453 */ 1454 revents = events & (POLLOUT | POLLWRNORM); 1455 BPFD_LOCK(d); 1456 d->bd_pid = td->td_proc->p_pid; 1457 if (events & (POLLIN | POLLRDNORM)) { 1458 if (bpf_ready(d)) 1459 revents |= events & (POLLIN | POLLRDNORM); 1460 else { 1461 selrecord(td, &d->bd_sel); 1462 /* Start the read timeout if necessary. */ 1463 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1464 callout_reset(&d->bd_callout, d->bd_rtout, 1465 bpf_timed_out, d); 1466 d->bd_state = BPF_WAITING; 1467 } 1468 } 1469 } 1470 BPFD_UNLOCK(d); 1471 return (revents); 1472} 1473 1474/* 1475 * Support for kevent() system call. Register EVFILT_READ filters and 1476 * reject all others. 1477 */ 1478int 1479bpfkqfilter(struct cdev *dev, struct knote *kn) 1480{ 1481 struct bpf_d *d = (struct bpf_d *)dev->si_drv1; 1482 1483 if (kn->kn_filter != EVFILT_READ) 1484 return (1); 1485 1486 /* 1487 * Refresh PID associated with this descriptor. 1488 */ 1489 BPFD_LOCK(d); 1490 d->bd_pid = curthread->td_proc->p_pid; 1491 kn->kn_fop = &bpfread_filtops; 1492 kn->kn_hook = d; 1493 knlist_add(&d->bd_sel.si_note, kn, 1); 1494 BPFD_UNLOCK(d); 1495 1496 return (0); 1497} 1498 1499static void 1500filt_bpfdetach(struct knote *kn) 1501{ 1502 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1503 1504 knlist_remove(&d->bd_sel.si_note, kn, 0); 1505} 1506 1507static int 1508filt_bpfread(struct knote *kn, long hint) 1509{ 1510 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1511 int ready; 1512 1513 BPFD_LOCK_ASSERT(d); 1514 ready = bpf_ready(d); 1515 if (ready) { 1516 kn->kn_data = d->bd_slen; 1517 if (d->bd_hbuf) 1518 kn->kn_data += d->bd_hlen; 1519 } 1520 else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1521 callout_reset(&d->bd_callout, d->bd_rtout, 1522 bpf_timed_out, d); 1523 d->bd_state = BPF_WAITING; 1524 } 1525 1526 return (ready); 1527} 1528 1529/* 1530 * Incoming linkage from device drivers. Process the packet pkt, of length 1531 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1532 * by each process' filter, and if accepted, stashed into the corresponding 1533 * buffer. 1534 */ 1535void 1536bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1537{ 1538 struct bpf_d *d; 1539 u_int slen; 1540 int gottime; 1541 struct timeval tv; 1542 1543 gottime = 0; 1544 BPFIF_LOCK(bp); 1545 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1546 BPFD_LOCK(d); 1547 ++d->bd_rcount; 1548#ifdef BPF_JITTER 1549 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL) 1550 slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen); 1551 else 1552#endif 1553 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 1554 if (slen != 0) { 1555 d->bd_fcount++; 1556 if (!gottime) { 1557 microtime(&tv); 1558 gottime = 1; 1559 } 1560#ifdef MAC 1561 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1562#endif 1563 catchpacket(d, pkt, pktlen, slen, 1564 bpf_append_bytes, &tv); 1565 } 1566 BPFD_UNLOCK(d); 1567 } 1568 BPFIF_UNLOCK(bp); 1569} 1570 1571#define BPF_CHECK_DIRECTION(d, i) \ 1572 (((d)->bd_direction == BPF_D_IN && (i) == NULL) || \ 1573 ((d)->bd_direction == BPF_D_OUT && (i) != NULL)) 1574#define BPF_CHECK_DUPLICATE(d, i) \ 1575 ((d)->bd_feedback && \ 1576 (d)->bd_direction == BPF_D_INOUT && (i) == NULL) 1577 1578/* 1579 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1580 */ 1581void 1582bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1583{ 1584 struct bpf_d *d; 1585 u_int pktlen, slen; 1586 int gottime; 1587 struct timeval tv; 1588 1589 gottime = 0; 1590 1591 pktlen = m_length(m, NULL); 1592 1593 BPFIF_LOCK(bp); 1594 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1595 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif) || 1596 BPF_CHECK_DUPLICATE(d, m->m_pkthdr.rcvif)) 1597 continue; 1598 BPFD_LOCK(d); 1599 ++d->bd_rcount; 1600#ifdef BPF_JITTER 1601 /* XXX We cannot handle multiple mbufs. */ 1602 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL && 1603 m->m_next == NULL) 1604 slen = (*(d->bd_bfilter->func))(mtod(m, u_char *), 1605 pktlen, pktlen); 1606 else 1607#endif 1608 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 1609 if (slen != 0) { 1610 d->bd_fcount++; 1611 if (!gottime) { 1612 microtime(&tv); 1613 gottime = 1; 1614 } 1615#ifdef MAC 1616 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1617#endif 1618 catchpacket(d, (u_char *)m, pktlen, slen, 1619 bpf_append_mbuf, &tv); 1620 } 1621 BPFD_UNLOCK(d); 1622 } 1623 BPFIF_UNLOCK(bp); 1624} 1625 1626/* 1627 * Incoming linkage from device drivers, when packet is in 1628 * an mbuf chain and to be prepended by a contiguous header. 1629 */ 1630void 1631bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1632{ 1633 struct mbuf mb; 1634 struct bpf_d *d; 1635 u_int pktlen, slen; 1636 int gottime; 1637 struct timeval tv; 1638 1639 gottime = 0; 1640 1641 pktlen = m_length(m, NULL); 1642 /* 1643 * Craft on-stack mbuf suitable for passing to bpf_filter. 1644 * Note that we cut corners here; we only setup what's 1645 * absolutely needed--this mbuf should never go anywhere else. 1646 */ 1647 mb.m_next = m; 1648 mb.m_data = data; 1649 mb.m_len = dlen; 1650 pktlen += dlen; 1651 1652 BPFIF_LOCK(bp); 1653 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1654 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif) || 1655 BPF_CHECK_DUPLICATE(d, m->m_pkthdr.rcvif)) 1656 continue; 1657 BPFD_LOCK(d); 1658 ++d->bd_rcount; 1659 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 1660 if (slen != 0) { 1661 d->bd_fcount++; 1662 if (!gottime) { 1663 microtime(&tv); 1664 gottime = 1; 1665 } 1666#ifdef MAC 1667 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1668#endif 1669 catchpacket(d, (u_char *)&mb, pktlen, slen, 1670 bpf_append_mbuf, &tv); 1671 } 1672 BPFD_UNLOCK(d); 1673 } 1674 BPFIF_UNLOCK(bp); 1675} 1676 1677#undef BPF_CHECK_DIRECTION 1678#undef BPF_CHECK_DUPLICATE 1679 1680/* 1681 * Move the packet data from interface memory (pkt) into the 1682 * store buffer. "cpfn" is the routine called to do the actual data 1683 * transfer. bcopy is passed in to copy contiguous chunks, while 1684 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 1685 * pkt is really an mbuf. 1686 */ 1687static void 1688catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1689 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 1690 struct timeval *tv) 1691{ 1692 struct bpf_hdr hdr; 1693 int totlen, curlen; 1694 int hdrlen = d->bd_bif->bif_hdrlen; 1695 int do_wakeup = 0; 1696 1697 BPFD_LOCK_ASSERT(d); 1698 1699 /* 1700 * Detect whether user space has released a buffer back to us, and if 1701 * so, move it from being a hold buffer to a free buffer. This may 1702 * not be the best place to do it (for example, we might only want to 1703 * run this check if we need the space), but for now it's a reliable 1704 * spot to do it. 1705 */ 1706 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 1707 d->bd_fbuf = d->bd_hbuf; 1708 d->bd_hbuf = NULL; 1709 d->bd_hlen = 0; 1710 } 1711 1712 /* 1713 * Figure out how many bytes to move. If the packet is 1714 * greater or equal to the snapshot length, transfer that 1715 * much. Otherwise, transfer the whole packet (unless 1716 * we hit the buffer size limit). 1717 */ 1718 totlen = hdrlen + min(snaplen, pktlen); 1719 if (totlen > d->bd_bufsize) 1720 totlen = d->bd_bufsize; 1721 1722 /* 1723 * Round up the end of the previous packet to the next longword. 1724 * 1725 * Drop the packet if there's no room and no hope of room 1726 * If the packet would overflow the storage buffer or the storage 1727 * buffer is considered immutable by the buffer model, try to rotate 1728 * the buffer and wakeup pending processes. 1729 */ 1730 curlen = BPF_WORDALIGN(d->bd_slen); 1731 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { 1732 if (d->bd_fbuf == NULL) { 1733 /* 1734 * There's no room in the store buffer, and no 1735 * prospect of room, so drop the packet. Notify the 1736 * buffer model. 1737 */ 1738 bpf_buffull(d); 1739 ++d->bd_dcount; 1740 return; 1741 } 1742 ROTATE_BUFFERS(d); 1743 do_wakeup = 1; 1744 curlen = 0; 1745 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) 1746 /* 1747 * Immediate mode is set, or the read timeout has already 1748 * expired during a select call. A packet arrived, so the 1749 * reader should be woken up. 1750 */ 1751 do_wakeup = 1; 1752 1753 /* 1754 * Append the bpf header. Note we append the actual header size, but 1755 * move forward the length of the header plus padding. 1756 */ 1757 bzero(&hdr, sizeof(hdr)); 1758 hdr.bh_tstamp = *tv; 1759 hdr.bh_datalen = pktlen; 1760 hdr.bh_hdrlen = hdrlen; 1761 hdr.bh_caplen = totlen - hdrlen; 1762 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 1763 1764 /* 1765 * Copy the packet data into the store buffer and update its length. 1766 */ 1767 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen); 1768 d->bd_slen = curlen + totlen; 1769 1770 if (do_wakeup) 1771 bpf_wakeup(d); 1772} 1773 1774/* 1775 * Free buffers currently in use by a descriptor. 1776 * Called on close. 1777 */ 1778static void 1779bpf_freed(struct bpf_d *d) 1780{ 1781 1782 /* 1783 * We don't need to lock out interrupts since this descriptor has 1784 * been detached from its interface and it yet hasn't been marked 1785 * free. 1786 */ 1787 bpf_free(d); 1788 if (d->bd_rfilter) { 1789 free((caddr_t)d->bd_rfilter, M_BPF); 1790#ifdef BPF_JITTER 1791 bpf_destroy_jit_filter(d->bd_bfilter); 1792#endif 1793 } 1794 if (d->bd_wfilter) 1795 free((caddr_t)d->bd_wfilter, M_BPF); 1796 mtx_destroy(&d->bd_mtx); 1797} 1798 1799/* 1800 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 1801 * fixed size of the link header (variable length headers not yet supported). 1802 */ 1803void 1804bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1805{ 1806 1807 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 1808} 1809 1810/* 1811 * Attach an interface to bpf. ifp is a pointer to the structure 1812 * defining the interface to be attached, dlt is the link layer type, 1813 * and hdrlen is the fixed size of the link header (variable length 1814 * headers are not yet supporrted). 1815 */ 1816void 1817bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1818{ 1819 struct bpf_if *bp; 1820 1821 bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO); 1822 if (bp == NULL) 1823 panic("bpfattach"); 1824 1825 LIST_INIT(&bp->bif_dlist); 1826 bp->bif_ifp = ifp; 1827 bp->bif_dlt = dlt; 1828 mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF); 1829 KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); 1830 *driverp = bp; 1831 1832 mtx_lock(&bpf_mtx); 1833 LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 1834 mtx_unlock(&bpf_mtx); 1835 1836 /* 1837 * Compute the length of the bpf header. This is not necessarily 1838 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1839 * that the network layer header begins on a longword boundary (for 1840 * performance reasons and to alleviate alignment restrictions). 1841 */ 1842 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1843 1844 if (bootverbose) 1845 if_printf(ifp, "bpf attached\n"); 1846} 1847 1848/* 1849 * Detach bpf from an interface. This involves detaching each descriptor 1850 * associated with the interface, and leaving bd_bif NULL. Notify each 1851 * descriptor as it's detached so that any sleepers wake up and get 1852 * ENXIO. 1853 */ 1854void 1855bpfdetach(struct ifnet *ifp) 1856{ 1857 struct bpf_if *bp; 1858 struct bpf_d *d; 1859 1860 /* Locate BPF interface information */ 1861 mtx_lock(&bpf_mtx); 1862 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1863 if (ifp == bp->bif_ifp) 1864 break; 1865 } 1866 1867 /* Interface wasn't attached */ 1868 if ((bp == NULL) || (bp->bif_ifp == NULL)) { 1869 mtx_unlock(&bpf_mtx); 1870 printf("bpfdetach: %s was not attached\n", ifp->if_xname); 1871 return; 1872 } 1873 1874 LIST_REMOVE(bp, bif_next); 1875 mtx_unlock(&bpf_mtx); 1876 1877 while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { 1878 bpf_detachd(d); 1879 BPFD_LOCK(d); 1880 bpf_wakeup(d); 1881 BPFD_UNLOCK(d); 1882 } 1883 1884 mtx_destroy(&bp->bif_mtx); 1885 free(bp, M_BPF); 1886} 1887 1888/* 1889 * Get a list of available data link type of the interface. 1890 */ 1891static int 1892bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1893{ 1894 int n, error; 1895 struct ifnet *ifp; 1896 struct bpf_if *bp; 1897 1898 ifp = d->bd_bif->bif_ifp; 1899 n = 0; 1900 error = 0; 1901 mtx_lock(&bpf_mtx); 1902 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1903 if (bp->bif_ifp != ifp) 1904 continue; 1905 if (bfl->bfl_list != NULL) { 1906 if (n >= bfl->bfl_len) { 1907 mtx_unlock(&bpf_mtx); 1908 return (ENOMEM); 1909 } 1910 error = copyout(&bp->bif_dlt, 1911 bfl->bfl_list + n, sizeof(u_int)); 1912 } 1913 n++; 1914 } 1915 mtx_unlock(&bpf_mtx); 1916 bfl->bfl_len = n; 1917 return (error); 1918} 1919 1920/* 1921 * Set the data link type of a BPF instance. 1922 */ 1923static int 1924bpf_setdlt(struct bpf_d *d, u_int dlt) 1925{ 1926 int error, opromisc; 1927 struct ifnet *ifp; 1928 struct bpf_if *bp; 1929 1930 if (d->bd_bif->bif_dlt == dlt) 1931 return (0); 1932 ifp = d->bd_bif->bif_ifp; 1933 mtx_lock(&bpf_mtx); 1934 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1935 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1936 break; 1937 } 1938 mtx_unlock(&bpf_mtx); 1939 if (bp != NULL) { 1940 opromisc = d->bd_promisc; 1941 bpf_detachd(d); 1942 bpf_attachd(d, bp); 1943 BPFD_LOCK(d); 1944 reset_d(d); 1945 BPFD_UNLOCK(d); 1946 if (opromisc) { 1947 error = ifpromisc(bp->bif_ifp, 1); 1948 if (error) 1949 if_printf(bp->bif_ifp, 1950 "bpf_setdlt: ifpromisc failed (%d)\n", 1951 error); 1952 else 1953 d->bd_promisc = 1; 1954 } 1955 } 1956 return (bp == NULL ? EINVAL : 0); 1957} 1958 1959static void 1960bpf_clone(void *arg, struct ucred *cred, char *name, int namelen, 1961 struct cdev **dev) 1962{ 1963 int u; 1964 1965 if (*dev != NULL) 1966 return; 1967 if (dev_stdclone(name, NULL, "bpf", &u) != 1) 1968 return; 1969 *dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600, 1970 "bpf%d", u); 1971 dev_ref(*dev); 1972 (*dev)->si_flags |= SI_CHEAPCLONE; 1973 return; 1974} 1975 1976static void 1977bpf_drvinit(void *unused) 1978{ 1979 1980 mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF); 1981 LIST_INIT(&bpf_iflist); 1982 EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000); 1983} 1984 1985static void 1986bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 1987{ 1988 1989 bzero(d, sizeof(*d)); 1990 BPFD_LOCK_ASSERT(bd); 1991 d->bd_structsize = sizeof(*d); 1992 d->bd_immediate = bd->bd_immediate; 1993 d->bd_promisc = bd->bd_promisc; 1994 d->bd_hdrcmplt = bd->bd_hdrcmplt; 1995 d->bd_direction = bd->bd_direction; 1996 d->bd_feedback = bd->bd_feedback; 1997 d->bd_async = bd->bd_async; 1998 d->bd_rcount = bd->bd_rcount; 1999 d->bd_dcount = bd->bd_dcount; 2000 d->bd_fcount = bd->bd_fcount; 2001 d->bd_sig = bd->bd_sig; 2002 d->bd_slen = bd->bd_slen; 2003 d->bd_hlen = bd->bd_hlen; 2004 d->bd_bufsize = bd->bd_bufsize; 2005 d->bd_pid = bd->bd_pid; 2006 strlcpy(d->bd_ifname, 2007 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 2008 d->bd_locked = bd->bd_locked; 2009 d->bd_wcount = bd->bd_wcount; 2010 d->bd_wdcount = bd->bd_wdcount; 2011 d->bd_wfcount = bd->bd_wfcount; 2012 d->bd_zcopy = bd->bd_zcopy; 2013 d->bd_bufmode = bd->bd_bufmode; 2014} 2015 2016static int 2017bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 2018{ 2019 struct xbpf_d *xbdbuf, *xbd; 2020 int index, error; 2021 struct bpf_if *bp; 2022 struct bpf_d *bd; 2023 2024 /* 2025 * XXX This is not technically correct. It is possible for non 2026 * privileged users to open bpf devices. It would make sense 2027 * if the users who opened the devices were able to retrieve 2028 * the statistics for them, too. 2029 */ 2030 error = priv_check(req->td, PRIV_NET_BPF); 2031 if (error) 2032 return (error); 2033 if (req->oldptr == NULL) 2034 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 2035 if (bpf_bpfd_cnt == 0) 2036 return (SYSCTL_OUT(req, 0, 0)); 2037 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 2038 mtx_lock(&bpf_mtx); 2039 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 2040 mtx_unlock(&bpf_mtx); 2041 free(xbdbuf, M_BPF); 2042 return (ENOMEM); 2043 } 2044 index = 0; 2045 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2046 BPFIF_LOCK(bp); 2047 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2048 xbd = &xbdbuf[index++]; 2049 BPFD_LOCK(bd); 2050 bpfstats_fill_xbpf(xbd, bd); 2051 BPFD_UNLOCK(bd); 2052 } 2053 BPFIF_UNLOCK(bp); 2054 } 2055 mtx_unlock(&bpf_mtx); 2056 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 2057 free(xbdbuf, M_BPF); 2058 return (error); 2059} 2060 2061SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); 2062 2063#else /* !DEV_BPF && !NETGRAPH_BPF */ 2064/* 2065 * NOP stubs to allow bpf-using drivers to load and function. 2066 * 2067 * A 'better' implementation would allow the core bpf functionality 2068 * to be loaded at runtime. 2069 */ 2070static struct bpf_if bp_null; 2071 2072void 2073bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2074{ 2075} 2076 2077void 2078bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2079{ 2080} 2081 2082void 2083bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 2084{ 2085} 2086 2087void 2088bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2089{ 2090 2091 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2092} 2093 2094void 2095bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 2096{ 2097 2098 *driverp = &bp_null; 2099} 2100 2101void 2102bpfdetach(struct ifnet *ifp) 2103{ 2104} 2105 2106u_int 2107bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 2108{ 2109 return -1; /* "no filter" behaviour */ 2110} 2111 2112int 2113bpf_validate(const struct bpf_insn *f, int len) 2114{ 2115 return 0; /* false */ 2116} 2117 2118#endif /* !DEV_BPF && !NETGRAPH_BPF */ 2119