sys/net/bpf.c

139823Simp/*-
11150Swollman * Copyright (c) 1990, 1991, 1993
1541Srgrimes *	The Regents of the University of California.  All rights reserved.
1541Srgrimes *
1541Srgrimes * This code is derived from the Stanford/CMU enet packet filter,
1541Srgrimes * (net/enet.c) distributed as part of 4.3BSD, and code contributed
1541Srgrimes * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
1541Srgrimes * Berkeley Laboratory.
1541Srgrimes *
1541Srgrimes * Redistribution and use in source and binary forms, with or without
1541Srgrimes * modification, are permitted provided that the following conditions
1541Srgrimes * are met:
1541Srgrimes * 1. Redistributions of source code must retain the above copyright
1541Srgrimes *    notice, this list of conditions and the following disclaimer.
1541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
1541Srgrimes *    notice, this list of conditions and the following disclaimer in the
1541Srgrimes *    documentation and/or other materials provided with the distribution.
1541Srgrimes * 4. Neither the name of the University nor the names of its contributors
1541Srgrimes *    may be used to endorse or promote products derived from this software
1541Srgrimes *    without specific prior written permission.
1541Srgrimes *
1541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
1541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
11150Swollman * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
1541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
1541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
172467Ssilby * SUCH DAMAGE.
172467Ssilby *
172467Ssilby *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
32752Seivind */
125680Sbms
54263Sshin#include <sys/cdefs.h>
56041Sshin__FBSDID("$FreeBSD: stable/10/sys/net/bpf.c 250945 2013-05-23 21:33:10Z ghelmer $");
29514Sjoerg
29514Sjoerg#include "opt_bpf.h"
1541Srgrimes#include "opt_compat.h"
1541Srgrimes#include "opt_netgraph.h"
50673Sjlemon
216758Slstewart#include <sys/types.h>
12172Sphk#include <sys/param.h>
216758Slstewart#include <sys/lock.h>
12172Sphk#include <sys/rwlock.h>
194368Sbz#include <sys/systm.h>
1541Srgrimes#include <sys/conf.h>
1541Srgrimes#include <sys/fcntl.h>
55679Sshin#include <sys/jail.h>
55679Sshin#include <sys/malloc.h>
55679Sshin#include <sys/mbuf.h>
164033Srwatson#include <sys/time.h>
48758Sgreen#include <sys/priv.h>
1541Srgrimes#include <sys/proc.h>
1541Srgrimes#include <sys/signalvar.h>
1541Srgrimes#include <sys/filio.h>
75619Skris#include <sys/sockio.h>
34923Sbde#include <sys/ttycom.h>
92760Sjeff#include <sys/uio.h>
1541Srgrimes
1541Srgrimes#include <sys/event.h>
1541Srgrimes#include <sys/file.h>
196019Srwatson#include <sys/poll.h>
1541Srgrimes#include <sys/proc.h>
215166Slstewart
1541Srgrimes#include <sys/socket.h>
221250Sbz
1541Srgrimes#include <net/if.h>
221250Sbz#define	BPF_INTERNAL
1541Srgrimes#include <net/bpf.h>
221250Sbz#include <net/bpf_buffer.h>
221250Sbz#ifdef BPF_JITTER
55679Sshin#include <net/bpf_jitter.h>
55679Sshin#endif
55679Sshin#include <net/bpf_zerocopy.h>
55679Sshin#include <net/bpfdesc.h>
148385Sume#include <net/vnet.h>
122922Sandre
55679Sshin#include <netinet/in.h>
221250Sbz#include <netinet/if_ether.h>
1541Srgrimes#include <sys/kernel.h>
1541Srgrimes#include <sys/sysctl.h>
1541Srgrimes
1541Srgrimes#include <net80211/ieee80211_freebsd.h>
171605Ssilby
55679Sshin#include <security/mac/mac_framework.h>
55679Sshin
55679SshinMALLOC_DEFINE(M_BPF, "BPF", "BPF data");
1541Srgrimes
6283Swollman#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
6283Swollman
6283Swollman#define PRINET  26			/* interruptible */
221250Sbz
55679Sshin#define	SIZEOF_BPF_HDR(type)	\
221250Sbz    (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
252555Snp
252555Snp#ifdef COMPAT_FREEBSD32
252555Snp#include <sys/mount.h>
1541Srgrimes#include <compat/freebsd32/freebsd32.h>
171167Sgnn#define BPF_ALIGNMENT32 sizeof(int32_t)
105199Ssam#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1))
125680Sbms
105199Ssam#ifndef BURN_BRIDGES
105199Ssam/*
105199Ssam * 32-bit version of structure prepended to each packet.  We use this header
125680Sbms * instead of the standard one for 32-bit streams.  We mark the a stream as
183001Sbz * 32-bit the first time we see a 32-bit compat ioctl request.
171167Sgnn */
105199Ssamstruct bpf_hdr32 {
58698Sjlemon	struct timeval32 bh_tstamp;	/* time stamp */
82122Ssilby	uint32_t	bh_caplen;	/* length of captured portion */
58698Sjlemon	uint32_t	bh_datalen;	/* original length of packet */
163606Srwatson	uint16_t	bh_hdrlen;	/* length of bpf header (this struct
163606Srwatson					   plus alignment padding) */
207369Sbz};
52904Sshin#endif
207369Sbz
52904Sshinstruct bpf_program32 {
52904Sshin	u_int bf_len;
182846Sbz	uint32_t bf_insns;
182846Sbz};
182846Sbz
182846Sbzstruct bpf_dltlist32 {
182846Sbz	u_int	bfl_len;
182848Sbz	u_int	bfl_list;
182846Sbz};
182846Sbz
182846Sbz#define	BIOCSETF32	_IOW('B', 103, struct bpf_program32)
182846Sbz#define	BIOCSRTIMEOUT32	_IOW('B', 109, struct timeval32)
182846Sbz#define	BIOCGRTIMEOUT32	_IOR('B', 110, struct timeval32)
182848Sbz#define	BIOCGDLTLIST32	_IOWR('B', 121, struct bpf_dltlist32)
182846Sbz#define	BIOCSETWF32	_IOW('B', 123, struct bpf_program32)
182846Sbz#define	BIOCSETFNR32	_IOW('B', 130, struct bpf_program32)
182846Sbz#endif
182846Sbz
195699Srwatson/*
195699Srwatson * bpf_iflist is a list of BPF interface structures, each corresponding to a
185348Szec * specific DLT.  The same network interface might have several BPF interface
185348Szec * structures registered by different layers in the stack (i.e., 802.11
182846Sbz * frames, ethernet frames, etc).
182846Sbz */
182846Sbzstatic LIST_HEAD(, bpf_if)	bpf_iflist, bpf_freelist;
182846Sbzstatic struct mtx	bpf_mtx;		/* bpf global lock */
182846Sbzstatic int		bpf_bpfd_cnt;
182846Sbz
182846Sbzstatic void	bpf_attachd(struct bpf_d *, struct bpf_if *);
182848Sbzstatic void	bpf_detachd(struct bpf_d *);
182846Sbzstatic void	bpf_detachd_locked(struct bpf_d *);
182846Sbzstatic void	bpf_freed(struct bpf_d *);
182846Sbzstatic int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
182846Sbz		    struct sockaddr *, int *, struct bpf_insn *);
182846Sbzstatic int	bpf_setif(struct bpf_d *, struct ifreq *);
182848Sbzstatic void	bpf_timed_out(void *);
182846Sbzstatic __inline void
182846Sbz		bpf_wakeup(struct bpf_d *);
182846Sbzstatic void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
182846Sbz		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
195699Srwatson		    struct bintime *);
195699Srwatsonstatic void	reset_d(struct bpf_d *);
185348Szecstatic int	bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
185348Szecstatic int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
221250Sbzstatic int	bpf_setdlt(struct bpf_d *, u_int);
182846Sbzstatic void	filt_bpfdetach(struct knote *);
124258Sandrestatic int	filt_bpfread(struct knote *, long);
124258Sandrestatic void	bpf_drvinit(void *);
124258Sandrestatic int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
124258Sandre
124258SandreSYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
124258Sandreint bpf_maxinsns = BPF_MAXINSNS;
124258SandreSYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
124258Sandre    &bpf_maxinsns, 0, "Maximum bpf program instructions");
207369Sbzstatic int bpf_zerocopy_enable = 0;
195699SrwatsonSYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
195699Srwatson    &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
252285Sjimharrisstatic SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
124258Sandre    bpf_stats_sysctl, "bpf statistics portal");
207369Sbz
195699Srwatsonstatic VNET_DEFINE(int, bpf_optimize_writers) = 0;
195699Srwatson#define	V_bpf_optimize_writers VNET(bpf_optimize_writers)
183550SzecSYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers,
12296Sphk    CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0,
172454Srwatson    "Do not send packets until BPF program is set");
171640Sandre
171640Sandrestatic	d_open_t	bpfopen;
171640Sandrestatic	d_read_t	bpfread;
50426Sjlemonstatic	d_write_t	bpfwrite;
121307Ssilbystatic	d_ioctl_t	bpfioctl;
167721Sandrestatic	d_poll_t	bpfpoll;
50426Sjlemonstatic	d_kqfilter_t	bpfkqfilter;
55198Smsmith
183550Szecstatic struct cdevsw bpf_cdevsw = {
167721Sandre	.d_version =	D_VERSION,
55198Smsmith	.d_open =	bpfopen,
217322Smdf	.d_read =	bpfread,
195699Srwatson	.d_write =	bpfwrite,
36079Swollman	.d_ioctl =	bpfioctl,
215701Sdim	.d_poll =	bpfpoll,
207369Sbz	.d_name =	"bpf",
195699Srwatson	.d_kqfilter =	bpfkqfilter,
195699Srwatson};
72959Sjlemon
70103Sphkstatic struct filterops bpfread_filtops = {
215701Sdim	.f_isfd = 1,
207369Sbz	.f_detach = filt_bpfdetach,
195699Srwatson	.f_event = filt_bpfread,
195699Srwatson};
183550Szec
82122Ssilbyeventhandler_tag	bpf_ifdetach_cookie = NULL;
197236Sandre
197236Sandre/*
197236Sandre * LOCKING MODEL USED BY BPF:
197236Sandre * Locks:
221023Sattilio * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
221023Sattilio * some global counters and every bpf_if reference.
221023Sattilio * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
221023Sattilio * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
221023Sattilio *   used by bpf_mtap code.
221023Sattilio *
195699Srwatson * Lock order:
195727Srwatson *
130989Sps * Global lock, interface lock, descriptor lock
216758Slstewart *
216758Slstewart * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
98211Shsu * working model. In many places (like bpf_detachd) we start with BPF descriptor
235051Sglebius * (and we need to at least rlock it to get reliable interface pointer). This
211462Sandre * gives us potential LOR. As a result, we use global lock to protect from bpf_if
211462Sandre * change in every such place.
12296Sphk *
7684Sdg * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
32821Sdg * 3) descriptor main wlock.
43562Smsmith * Reading bd_bif can be protected by any of these locks, typically global lock.
43562Smsmith *
43562Smsmith * Changing read/write BPF filter is protected by the same three locks,
7684Sdg * the same applies for reading.
7684Sdg *
32821Sdg * Sleeping in global lock is not allowed due to bpfdetach() using it.
7684Sdg */
1541Srgrimes
1541Srgrimes/*
111145Sjlemon * Wrapper functions for various buffering methods.  If the set of buffer
111145Sjlemon * modes expands, we will probably want to introduce a switch data structure
123608Sjhb * similar to protosw, et.
111145Sjlemon */
34881Swollmanstatic void
168615Sandrebpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
168615Sandre    u_int len)
172309Ssilby{
216749Slstewart
216758Slstewart	BPFD_LOCK_ASSERT(d);
34881Swollman
34881Swollman	switch (d->bd_bufmode) {
215701Sdim	case BPF_BUFMODE_BUFFER:
195727Srwatson		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
195699Srwatson
169683Sandre	case BPF_BUFMODE_ZBUF:
157967Srwatson		d->bd_zcopy++;
111145Sjlemon		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
157977Srwatson
157977Srwatson	default:
157977Srwatson		panic("bpf_buf_append_bytes");
157977Srwatson	}
34881Swollman}
157431Srwatson
1541Srgrimesstatic void
157927Spsbpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
157927Sps    u_int len)
157927Sps{
157927Sps
181803Sbz	BPFD_LOCK_ASSERT(d);
190787Szec
169541Sandre	switch (d->bd_bufmode) {
157927Sps	case BPF_BUFMODE_BUFFER:
157927Sps		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
160491Sups
160491Sups	case BPF_BUFMODE_ZBUF:
160491Sups		d->bd_zcopy++;
165657Sjhb		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
165657Sjhb
160491Sups	default:
160491Sups		panic("bpf_buf_append_mbuf");
160491Sups	}
160491Sups}
1541Srgrimes
157431Srwatson/*
1541Srgrimes * This function gets called when the free buffer is re-assigned.
185088Szec */
169454Srwatsonstatic void
216758Slstewartbpf_buf_reclaimed(struct bpf_d *d)
216758Slstewart{
216758Slstewart
216758Slstewart	BPFD_LOCK_ASSERT(d);
216758Slstewart
216758Slstewart	switch (d->bd_bufmode) {
216758Slstewart	case BPF_BUFMODE_BUFFER:
185088Szec		return;
77900Speter
43576Smsmith	case BPF_BUFMODE_ZBUF:
43562Smsmith		bpf_zerocopy_buf_reclaimed(d);
43562Smsmith		return;
43562Smsmith
205157Srwatson	default:
222748Srwatson		panic("bpf_buf_reclaimed");
222748Srwatson	}
205157Srwatson}
190787Szec
190787Szec/*
190787Szec * If the buffer mechanism has a way to decide that a held buffer can be made
190787Szec * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
190787Szec * returned if the buffer can be discarded, (0) is returned if it cannot.
190787Szec */
207369Sbzstatic int
190787Szecbpf_canfreebuf(struct bpf_d *d)
190787Szec{
190787Szec
207369Sbz	BPFD_LOCK_ASSERT(d);
207369Sbz
190787Szec	switch (d->bd_bufmode) {
190787Szec	case BPF_BUFMODE_ZBUF:
190787Szec		return (bpf_zerocopy_canfreebuf(d));
190787Szec	}
190787Szec	return (0);
190787Szec}
190787Szec
285980Sdelphij/*
285980Sdelphij * Allow the buffer model to indicate that the current store buffer is
190787Szec * immutable, regardless of the appearance of space.  Return (1) if the
190787Szec * buffer is writable, and (0) if not.
190787Szec */
190787Szecstatic int
190787Szecbpf_canwritebuf(struct bpf_d *d)
190787Szec{
190787Szec	BPFD_LOCK_ASSERT(d);
190787Szec
190787Szec	switch (d->bd_bufmode) {
190787Szec	case BPF_BUFMODE_ZBUF:
190787Szec		return (bpf_zerocopy_canwritebuf(d));
190787Szec	}
190787Szec	return (1);
190787Szec}
197236Sandre
197236Sandre/*
224010Sbz * Notify buffer model that an attempt to write to the store buffer has
197236Sandre * resulted in a dropped packet, in which case the buffer may be considered
224010Sbz * full.
223839Sandre */
197236Sandrestatic void
223839Sandrebpf_buffull(struct bpf_d *d)
197236Sandre{
197236Sandre
55679Sshin	BPFD_LOCK_ASSERT(d);
55679Sshin
55679Sshin	switch (d->bd_bufmode) {
55679Sshin	case BPF_BUFMODE_ZBUF:
55679Sshin		bpf_zerocopy_buffull(d);
55679Sshin		break;
55679Sshin	}
55679Sshin}
1541Srgrimes
55679Sshin/*
190787Szec * Notify the buffer model that a buffer has moved into the hold position.
157977Srwatson */
128452Ssilbyvoid
128452Ssilbybpf_bufheld(struct bpf_d *d)
157927Sps{
157927Sps
1541Srgrimes	BPFD_LOCK_ASSERT(d);
1541Srgrimes
193731Szec	switch (d->bd_bufmode) {
128452Ssilby	case BPF_BUFMODE_ZBUF:
193731Szec		bpf_zerocopy_bufheld(d);
193731Szec		break;
193731Szec	}
193731Szec}
193731Szec
204838Sbzstatic void
205157Srwatsonbpf_free(struct bpf_d *d)
204838Sbz{
204838Sbz
193731Szec	switch (d->bd_bufmode) {
193731Szec	case BPF_BUFMODE_BUFFER:
193731Szec		return (bpf_buffer_free(d));
193731Szec
157431Srwatson	case BPF_BUFMODE_ZBUF:
128452Ssilby		return (bpf_zerocopy_free(d));
157431Srwatson
128452Ssilby	default:
128452Ssilby		panic("bpf_buf_free");
1541Srgrimes	}
78642Ssilby}
78642Ssilby
78642Ssilbystatic int
1541Srgrimesbpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
78642Ssilby{
157431Srwatson
1541Srgrimes	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
111144Sjlemon		return (EOPNOTSUPP);
1541Srgrimes	return (bpf_buffer_uiomove(d, buf, len, uio));
178285Srwatson}
138410Srwatson
55679Sshinstatic int
55679Sshinbpf_ioctl_sblen(struct bpf_d *d, u_int *i)
78642Ssilby{
55679Sshin
78642Ssilby	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
55679Sshin		return (EOPNOTSUPP);
186141Sbz	return (bpf_buffer_ioctl_sblen(d, i));
55679Sshin}
55679Sshin
55679Sshinstatic int
182855Sbzbpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
55679Sshin{
55679Sshin
221250Sbz	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
221250Sbz		return (EOPNOTSUPP);
221250Sbz	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
221250Sbz}
55679Sshin
221250Sbzstatic int
78642Ssilbybpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
111144Sjlemon{
55679Sshin
111144Sjlemon	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
111144Sjlemon		return (EOPNOTSUPP);
111144Sjlemon	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
111144Sjlemon}
111144Sjlemon
111144Sjlemonstatic int
111144Sjlemonbpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
111144Sjlemon{
111144Sjlemon
111144Sjlemon	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
111144Sjlemon		return (EOPNOTSUPP);
111144Sjlemon	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
78642Ssilby}
221250Sbz
111144Sjlemon/*
111144Sjlemon * General BPF functions.
111144Sjlemon */
111144Sjlemonstatic int
111144Sjlemonbpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
111144Sjlemon    struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
111144Sjlemon{
111144Sjlemon	const struct ieee80211_bpf_params *p;
111144Sjlemon	struct ether_header *eh;
111144Sjlemon	struct mbuf *m;
78642Ssilby	int error;
78642Ssilby	int len;
78642Ssilby	int hlen;
78642Ssilby	int slen;
78642Ssilby
78642Ssilby	/*
78642Ssilby	 * Build a sockaddr based on the data link layer type.
78642Ssilby	 * We do this at this level because the ethernet header
157431Srwatson	 * is copied directly into the data field of the sockaddr.
78642Ssilby	 * In the case of SLIP, there is no header and the packet
179487Srwatson	 * is forwarded as is.
78642Ssilby	 * Also, we are careful to leave room at the front of the mbuf
179487Srwatson	 * for the link level header.
179487Srwatson	 */
179487Srwatson	switch (linktype) {
179487Srwatson
179487Srwatson	case DLT_SLIP:
1541Srgrimes		sockp->sa_family = AF_INET;
1541Srgrimes		hlen = 0;
1541Srgrimes		break;
1541Srgrimes
127870Srwatson	case DLT_EN10MB:
1541Srgrimes		sockp->sa_family = AF_UNSPEC;
1541Srgrimes		/* XXX Would MAXLINKHDR be better? */
78642Ssilby		hlen = ETHER_HDR_LEN;
78642Ssilby		break;
78642Ssilby
1541Srgrimes	case DLT_FDDI:
1541Srgrimes		sockp->sa_family = AF_IMPLINK;
1541Srgrimes		hlen = 0;
31848Sjulian		break;
31848Sjulian
1541Srgrimes	case DLT_RAW:
1541Srgrimes		sockp->sa_family = AF_UNSPEC;
169454Srwatson		hlen = 0;
169454Srwatson		break;
1541Srgrimes
167785Sandre	case DLT_NULL:
1541Srgrimes		/*
55679Sshin		 * null interface types require a 4 byte pseudo header which
55679Sshin		 * corresponds to the address family of the packet.
55679Sshin		 */
55679Sshin		sockp->sa_family = AF_UNSPEC;
55679Sshin		hlen = 4;
55679Sshin		break;
55679Sshin
128905Srwatson	case DLT_ATM_RFC1483:
1541Srgrimes		/*
101137Srwatson		 * en atm driver requires 4-byte atm pseudo header.
101137Srwatson		 * though it isn't standard, vpi:vci needs to be
55679Sshin		 * specified anyway.
221250Sbz		 */
55679Sshin		sockp->sa_family = AF_UNSPEC;
55679Sshin		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
55679Sshin		break;
55679Sshin
127871Srwatson	case DLT_PPP:
122327Ssam		sockp->sa_family = AF_UNSPEC;
122327Ssam		hlen = 4;	/* This should match PPP_HDRLEN */
178285Srwatson		break;
128905Srwatson
128905Srwatson	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
128905Srwatson		sockp->sa_family = AF_IEEE80211;
128905Srwatson		hlen = 0;
57576Sps		break;
122327Ssam
57576Sps	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
57576Sps		sockp->sa_family = AF_IEEE80211;
57576Sps		sockp->sa_len = 12;	/* XXX != 0 */
1541Srgrimes		hlen = sizeof(struct ieee80211_bpf_params);
127870Srwatson		break;
151967Sandre
1541Srgrimes	default:
1541Srgrimes		return (EIO);
1541Srgrimes	}
1541Srgrimes
55679Sshin	len = uio->uio_resid;
55679Sshin	if (len < hlen || len - hlen > ifp->if_mtu)
133874Srwatson		return (EMSGSIZE);
55679Sshin
55679Sshin	m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
55679Sshin	if (m == NULL)
55679Sshin		return (EIO);
55679Sshin	m->m_pkthdr.len = m->m_len = len;
55679Sshin	*mp = m;
55679Sshin
55679Sshin	error = uiomove(mtod(m, u_char *), len, uio);
55679Sshin	if (error)
55679Sshin		goto bad;
55679Sshin
1541Srgrimes	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
1541Srgrimes	if (slen == 0) {
178888Sjulian		error = EPERM;
178888Sjulian		goto bad;
178888Sjulian	}
178888Sjulian
1541Srgrimes	/* Check for multicast destination */
127870Srwatson	switch (linktype) {
55679Sshin	case DLT_EN10MB:
223765Seri		eh = mtod(m, struct ether_header *);
55679Sshin		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1541Srgrimes			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
1541Srgrimes			    ETHER_ADDR_LEN) == 0)
55679Sshin				m->m_flags |= M_BCAST;
55679Sshin			else
55679Sshin				m->m_flags |= M_MCAST;
55679Sshin		}
55679Sshin		break;
55679Sshin	}
55679Sshin
188578Sluigi	/*
55679Sshin	 * Make room for link header, and copy it to sockaddr
55679Sshin	 */
55679Sshin	if (hlen != 0) {
55679Sshin		if (sockp->sa_family == AF_IEEE80211) {
55679Sshin			/*
55679Sshin			 * Collect true length from the parameter header
55679Sshin			 * NB: sockp is known to be zero'd so if we do a
55679Sshin			 *     short copy unspecified parameters will be
55679Sshin			 *     zero.
55679Sshin			 * NB: packet may not be aligned after stripping
55679Sshin			 *     bpf params
188578Sluigi			 * XXX check ibp_vers
1541Srgrimes			 */
1541Srgrimes			p = mtod(m, const struct ieee80211_bpf_params *);
55679Sshin			hlen = p->ibp_len;
55679Sshin			if (hlen > sizeof(sockp->sa_data)) {
90198Sume				error = EINVAL;
90198Sume				goto bad;
90198Sume			}
238237Sbz		}
55679Sshin		bcopy(m->m_data, sockp->sa_data, hlen);
221250Sbz	}
55679Sshin	*hdrlen = hlen;
221250Sbz
221250Sbz	return (0);
221250Sbzbad:
221250Sbz	m_freem(m);
133874Srwatson	return (error);
133874Srwatson}
133874Srwatson
181803Sbz/*
181803Sbz * Attach file to the bpf interface, i.e. make d listen on bp.
133874Srwatson */
133874Srwatsonstatic void
221250Sbzbpf_attachd(struct bpf_d *d, struct bpf_if *bp)
1541Srgrimes{
1541Srgrimes	int op_w;
127870Srwatson
101106Srwatson	BPF_LOCK_ASSERT();
122327Ssam
101106Srwatson	/*
101106Srwatson	 * Save sysctl value to protect from sysctl change
101106Srwatson	 * between reads
101106Srwatson	 */
178285Srwatson	op_w = V_bpf_optimize_writers;
172930Srwatson
101106Srwatson	if (d->bd_bif != NULL)
101106Srwatson		bpf_detachd_locked(d);
119245Srwatson	/*
119245Srwatson	 * Point d at bp, and add d to the interface's list.
101106Srwatson	 * Since there are many applicaiotns using BPF for
172930Srwatson	 * sending raw packets only (dhcpd, cdpd are good examples)
101106Srwatson	 * we can delay adding d to the list of active listeners until
101106Srwatson	 * some filter is configured.
55679Sshin	 */
55679Sshin
55679Sshin	BPFIF_WLOCK(bp);
55679Sshin	BPFD_LOCK(d);
55679Sshin
127870Srwatson	d->bd_bif = bp;
55679Sshin
1541Srgrimes	if (op_w != 0) {
55679Sshin		/* Add to writers-only list */
55679Sshin		LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
238237Sbz		/*
238237Sbz		 * We decrement bd_writer on every filter set operation.
55679Sshin		 * First BIOCSETF is done by pcap_open_live() to set up
55679Sshin		 * snap length. After that appliation usually sets its own filter
238247Sbz		 */
238237Sbz		d->bd_writer = 2;
238237Sbz	} else
127870Srwatson		LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
127870Srwatson
221250Sbz	BPFD_UNLOCK(d);
55679Sshin	BPFIF_WUNLOCK(bp);
221250Sbz
221250Sbz	bpf_bpfd_cnt++;
221250Sbz
221250Sbz	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
133874Srwatson	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
238247Sbz
133874Srwatson	if (op_w == 0)
133874Srwatson		EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
133874Srwatson}
221250Sbz
6283Swollman/*
122327Ssam * Add d to the list of active bp filters.
55679Sshin * Reuqires bpf_attachd() to be called before
6283Swollman */
55679Sshinstatic void
122922Sandrebpf_upgraded(struct bpf_d *d)
122922Sandre{
221250Sbz	struct bpf_if *bp;
221250Sbz
122922Sandre	BPF_LOCK_ASSERT();
221250Sbz
221250Sbz	bp = d->bd_bif;
221250Sbz
221250Sbz	/*
1541Srgrimes	 * Filter can be set several times without specifying interface.
1541Srgrimes	 * Mark d as reader and exit.
1541Srgrimes	 */
1541Srgrimes	if (bp == NULL) {
1541Srgrimes		BPFD_LOCK(d);
34881Swollman		d->bd_writer = 0;
34881Swollman		BPFD_UNLOCK(d);
1541Srgrimes		return;
1541Srgrimes	}
157431Srwatson
1541Srgrimes	BPFIF_WLOCK(bp);
111145Sjlemon	BPFD_LOCK(d);
111145Sjlemon
55679Sshin	/* Remove from writers-only list */
55679Sshin	LIST_REMOVE(d, bd_next);
55679Sshin	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
1541Srgrimes	/* Mark d as reader */
190787Szec	d->bd_writer = 0;
111145Sjlemon
111145Sjlemon	BPFD_UNLOCK(d);
111145Sjlemon	BPFIF_WUNLOCK(bp);
215166Slstewart
215166Slstewart	CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
215166Slstewart
215166Slstewart	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
215166Slstewart}
215166Slstewart
215166Slstewart/*
215166Slstewart * Detach a file from its interface.
215166Slstewart */
215166Slstewartstatic void
215166Slstewartbpf_detachd(struct bpf_d *d)
215166Slstewart{
215166Slstewart	BPF_LOCK();
215166Slstewart	bpf_detachd_locked(d);
215166Slstewart	BPF_UNLOCK();
215166Slstewart}
215166Slstewart
215166Slstewartstatic void
215166Slstewartbpf_detachd_locked(struct bpf_d *d)
215166Slstewart{
216758Slstewart	int error;
216758Slstewart	struct bpf_if *bp;
216758Slstewart	struct ifnet *ifp;
216758Slstewart
216758Slstewart	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
216758Slstewart
191688Szec	BPF_LOCK_ASSERT();
191688Szec
191688Szec	/* Check if descriptor is attached */
172309Ssilby	if ((bp = d->bd_bif) == NULL)
111145Sjlemon		return;
55679Sshin
55679Sshin	BPFIF_WLOCK(bp);
181803Sbz	BPFD_LOCK(d);
55679Sshin
181803Sbz	/* Save bd_writer value */
1541Srgrimes	error = d->bd_writer;
50673Sjlemon
172309Ssilby	/*
172309Ssilby	 * Remove d from the interface's descriptor list.
172309Ssilby	 */
172309Ssilby	LIST_REMOVE(d, bd_next);
172309Ssilby
50673Sjlemon	ifp = bp->bif_ifp;
181803Sbz	d->bd_bif = NULL;
6283Swollman	BPFD_UNLOCK(d);
181803Sbz	BPFIF_WUNLOCK(bp);
169317Sandre
147735Sps	bpf_bpfd_cnt--;
34881Swollman
1541Srgrimes	/* Call event handler iff d is attached */
1541Srgrimes	if (error == 0)
16367Swollman		EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
1541Srgrimes
1541Srgrimes	/*
1541Srgrimes	 * Check if this descriptor had requested promiscuous mode.
16367Swollman	 * If so, turn it off.
100335Sdillon	 */
16367Swollman	if (d->bd_promisc) {
1541Srgrimes		d->bd_promisc = 0;
1541Srgrimes		CURVNET_SET(ifp->if_vnet);
50673Sjlemon		error = ifpromisc(ifp, 0);
133874Srwatson		CURVNET_RESTORE();
56564Sshin		if (error != 0 && error != ENXIO) {
56564Sshin			/*
56564Sshin			 * ENXIO can happen if a pccard is unplugged
56564Sshin			 * Something is really wrong if we were able to put
181803Sbz			 * the driver into promiscuous mode, but can't
157432Srwatson			 * take it out.
34881Swollman			 */
1541Srgrimes			if_printf(bp->bif_ifp,
1541Srgrimes				"bpf_detach: ifpromisc failed (%d)\n", error);
1541Srgrimes		}
215392Slstewart	}
215392Slstewart}
215392Slstewart
215392Slstewart/*
215392Slstewart * Close the descriptor by detaching it from its interface,
215392Slstewart * deallocating its buffers, and marking it free.
215392Slstewart */
215392Slstewartstatic void
215392Slstewartbpf_dtor(void *data)
215392Slstewart{
215392Slstewart	struct bpf_d *d = data;
215392Slstewart
215392Slstewart	BPFD_LOCK(d);
215392Slstewart	if (d->bd_state == BPF_WAITING)
215392Slstewart		callout_stop(&d->bd_callout);
215392Slstewart	d->bd_state = BPF_IDLE;
215392Slstewart	BPFD_UNLOCK(d);
215392Slstewart	funsetown(&d->bd_sigio);
215392Slstewart	bpf_detachd(d);
215392Slstewart#ifdef MAC
215392Slstewart	mac_bpfdesc_destroy(d);
215392Slstewart#endif /* MAC */
215392Slstewart	seldrain(&d->bd_sel);
215392Slstewart	knlist_destroy(&d->bd_sel.si_note);
215392Slstewart	callout_drain(&d->bd_callout);
215392Slstewart	bpf_freed(d);
215392Slstewart	free(d, M_BPF);
215392Slstewart}
215392Slstewart
215392Slstewart/*
215392Slstewart * Open ethernet device.  Returns ENXIO for illegal minor device number,
215392Slstewart * EBUSY if file is open by another process.
215392Slstewart */
215392Slstewart/* ARGSUSED */
215392Slstewartstatic	int
215392Slstewartbpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
215392Slstewart{
215392Slstewart	struct bpf_d *d;
215392Slstewart	int error, size;
215392Slstewart
215392Slstewart	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
215392Slstewart	error = devfs_set_cdevpriv(d, bpf_dtor);
215392Slstewart	if (error != 0) {
215392Slstewart		free(d, M_BPF);
215392Slstewart		return (error);
215392Slstewart	}
215392Slstewart
215392Slstewart	/*
215392Slstewart	 * For historical reasons, perform a one-time initialization call to
215392Slstewart	 * the buffer routines, even though we're not yet committed to a
215392Slstewart	 * particular buffer method.
215392Slstewart	 */
215392Slstewart	bpf_buffer_init(d);
215392Slstewart	d->bd_hbuf_in_use = 0;
215392Slstewart	d->bd_bufmode = BPF_BUFMODE_BUFFER;
215392Slstewart	d->bd_sig = SIGIO;
215392Slstewart	d->bd_direction = BPF_D_INOUT;
215392Slstewart	BPF_PID_REFRESH(d, td);
215392Slstewart#ifdef MAC
215392Slstewart	mac_bpfdesc_init(d);
215392Slstewart	mac_bpfdesc_create(td->td_ucred, d);
215392Slstewart#endif
215392Slstewart	mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
1541Srgrimes	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
1541Srgrimes	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
1541Srgrimes
1541Srgrimes	/* Allocate default buffers */
1541Srgrimes	size = d->bd_bufsize;
157431Srwatson	bpf_buffer_ioctl_sblen(d, &size);
1541Srgrimes
1541Srgrimes	return (0);
1541Srgrimes}
181803Sbz
178285Srwatson/*
146864Srwatson *  bpfread - read next chunk of packets from buffers
1541Srgrimes */
1541Srgrimesstatic	int
252555Snpbpfread(struct cdev *dev, struct uio *uio, int ioflag)
190948Srwatson{
1541Srgrimes	struct bpf_d *d;
190948Srwatson	int error;
1541Srgrimes	int non_block;
1541Srgrimes	int timed_out;
1541Srgrimes
1541Srgrimes	error = devfs_get_cdevpriv((void **)&d);
1541Srgrimes	if (error != 0)
1541Srgrimes		return (error);
157376Srwatson
157431Srwatson	/*
1541Srgrimes	 * Restrict application to use a buffer the same size as
1541Srgrimes	 * as kernel buffers.
1541Srgrimes	 */
55679Sshin	if (uio->uio_resid != d->bd_bufsize)
55679Sshin		return (EINVAL);
55679Sshin
1541Srgrimes	non_block = ((ioflag & O_NONBLOCK) != 0);
178285Srwatson
138410Srwatson	BPFD_LOCK(d);
1541Srgrimes	BPF_PID_REFRESH_CUR(d);
204829Srwatson	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
204829Srwatson		BPFD_UNLOCK(d);
204829Srwatson		return (EOPNOTSUPP);
204829Srwatson	}
204829Srwatson	if (d->bd_state == BPF_WAITING)
204829Srwatson		callout_stop(&d->bd_callout);
204829Srwatson	timed_out = (d->bd_state == BPF_TIMED_OUT);
204829Srwatson	d->bd_state = BPF_IDLE;
204829Srwatson	while (d->bd_hbuf_in_use) {
204829Srwatson		error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
204829Srwatson		    PRINET|PCATCH, "bd_hbuf", 0);
204829Srwatson		if (error != 0) {
204829Srwatson			BPFD_UNLOCK(d);
50673Sjlemon			return (error);
172309Ssilby		}
172309Ssilby	}
172309Ssilby	/*
172309Ssilby	 * If the hold buffer is empty, then do a timed sleep, which
172309Ssilby	 * ends when the timeout expires or when enough packets
50673Sjlemon	 * have arrived to fill the store buffer.
50673Sjlemon	 */
9373Swollman	while (d->bd_hbuf == NULL) {
9373Swollman		if (d->bd_slen != 0) {
122922Sandre			/*
122922Sandre			 * A packet(s) either arrived since the previous
122922Sandre			 * read or arrived while we were asleep.
122922Sandre			 */
122922Sandre			if (d->bd_immediate || non_block || timed_out) {
122922Sandre				/*
1541Srgrimes				 * Rotate the buffers and return what's here
122922Sandre				 * if we are in immediate mode, non-blocking
122922Sandre				 * flag is set, or this descriptor timed out.
122922Sandre				 */
1541Srgrimes				ROTATE_BUFFERS(d);
122922Sandre				break;
1541Srgrimes			}
122922Sandre		}
122922Sandre
122922Sandre		/*
122922Sandre		 * No data is available, check to see if the bpf device
160925Srwatson		 * is still pointed at a real interface.  If not, return
160925Srwatson		 * ENXIO so that the userland process knows to rebind
160925Srwatson		 * it before using it again.
1541Srgrimes		 */
122922Sandre		if (d->bd_bif == NULL) {
122922Sandre			BPFD_UNLOCK(d);
1541Srgrimes			return (ENXIO);
1541Srgrimes		}
1541Srgrimes
1541Srgrimes		if (non_block) {
122922Sandre			BPFD_UNLOCK(d);
122922Sandre			return (EWOULDBLOCK);
122922Sandre		}
122922Sandre		error = msleep(d, &d->bd_lock, PRINET|PCATCH,
55679Sshin		     "bpf", d->bd_rtout);
55679Sshin		if (error == EINTR || error == ERESTART) {
55679Sshin			BPFD_UNLOCK(d);
55679Sshin			return (error);
55679Sshin		}
55679Sshin		if (error == EWOULDBLOCK) {
55679Sshin			/*
55679Sshin			 * On a timeout, return what's in the buffer,
55679Sshin			 * which may be nothing.  If there is something
122922Sandre			 * in the store buffer, we can rotate the buffers.
122922Sandre			 */
122922Sandre			if (d->bd_hbuf)
122922Sandre				/*
122922Sandre				 * We filled up the buffer in between
122922Sandre				 * getting the timeout and arriving
122922Sandre				 * here, so we don't need to rotate.
133874Srwatson				 */
122922Sandre				break;
122922Sandre
122922Sandre			if (d->bd_slen == 0) {
1541Srgrimes				BPFD_UNLOCK(d);
122922Sandre				return (0);
1541Srgrimes			}
213158Slstewart			ROTATE_BUFFERS(d);
252555Snp			break;
252555Snp		}
174757Skmacy	}
252555Snp	/*
252555Snp	 * At this point, we know we have something in the hold slot.
252555Snp	 */
174757Skmacy	d->bd_hbuf_in_use = 1;
130989Sps	BPFD_UNLOCK(d);
215166Slstewart
215166Slstewart	/*
215166Slstewart	 * Move data from hold buffer into user space.
215166Slstewart	 * We know the entire buffer is transferred since
215166Slstewart	 * we checked above that the read buffer is bpf_bufsize bytes.
216758Slstewart  	 *
216758Slstewart	 * We do not have to worry about simultaneous reads because
215166Slstewart	 * we waited for sole access to the hold buffer above.
32821Sdg	 */
108265Shsu	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
190787Szec
111145Sjlemon	BPFD_LOCK(d);
111145Sjlemon	KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
111145Sjlemon	d->bd_fbuf = d->bd_hbuf;
157430Srwatson	d->bd_hbuf = NULL;
157430Srwatson	d->bd_hlen = 0;
111145Sjlemon	bpf_buf_reclaimed(d);
111145Sjlemon	d->bd_hbuf_in_use = 0;
157431Srwatson	wakeup(&d->bd_hbuf_in_use);
111145Sjlemon	BPFD_UNLOCK(d);
111145Sjlemon
157376Srwatson	return (error);
111145Sjlemon}
181803Sbz
178285Srwatson/*
138410Srwatson * If there are processes sleeping on this descriptor, wake them up.
252555Snp */
174757Skmacystatic __inline void
252555Snpbpf_wakeup(struct bpf_d *d)
252555Snp{
158009Srwatson
190948Srwatson	BPFD_LOCK_ASSERT(d);
157376Srwatson	if (d->bd_state == BPF_WAITING) {
157376Srwatson		callout_stop(&d->bd_callout);
157376Srwatson		d->bd_state = BPF_IDLE;
189848Srwatson	}
157376Srwatson	wakeup(d);
157376Srwatson	if (d->bd_async && d->bd_sig && d->bd_sigio)
189848Srwatson		pgsigio(&d->bd_sigio, d->bd_sig, 0);
178285Srwatson
157376Srwatson	selwakeuppri(&d->bd_sel, PRINET);
157376Srwatson	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
157376Srwatson}
157376Srwatson
157376Srwatsonstatic void
157376Srwatsonbpf_timed_out(void *arg)
157376Srwatson{
1541Srgrimes	struct bpf_d *d = (struct bpf_d *)arg;
1541Srgrimes
1541Srgrimes	BPFD_LOCK_ASSERT(d);
157431Srwatson
1541Srgrimes	if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
183550Szec		return;
157431Srwatson	if (d->bd_state == BPF_WAITING) {
183550Szec		d->bd_state = BPF_TIMED_OUT;
183550Szec		if (d->bd_slen != 0)
183550Szec			bpf_wakeup(d);
195760Srwatson	}
183550Szec}
183550Szec
55198Smsmithstatic int
55198Smsmithbpf_ready(struct bpf_d *d)
1541Srgrimes{
55198Smsmith
55198Smsmith	BPFD_LOCK_ASSERT(d);
55198Smsmith
55198Smsmith	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
55198Smsmith		return (1);
133874Srwatson	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
133874Srwatson	    d->bd_slen != 0)
55198Smsmith		return (1);
181803Sbz	return (0);
181803Sbz}
189848Srwatson
111145Sjlemonstatic int
178285Srwatsonbpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
127870Srwatson{
213158Slstewart	struct bpf_d *d;
144857Sps	struct ifnet *ifp;
55198Smsmith	struct mbuf *m, *mc;
178285Srwatson	struct sockaddr dst;
55198Smsmith	int error, hlen;
181803Sbz
183550Szec	error = devfs_get_cdevpriv((void **)&d);
55198Smsmith	if (error != 0)
195760Srwatson		return (error);
1541Srgrimes
1541Srgrimes	BPF_PID_REFRESH_CUR(d);
1541Srgrimes	d->bd_wcount++;
1541Srgrimes	/* XXX: locking required */
1541Srgrimes	if (d->bd_bif == NULL) {
1541Srgrimes		d->bd_wdcount++;
72960Sjlemon		return (ENXIO);
72960Sjlemon	}
72960Sjlemon
1541Srgrimes	ifp = d->bd_bif->bif_ifp;
98211Shsu
157431Srwatson	if ((ifp->if_flags & IFF_UP) == 0) {
1541Srgrimes		d->bd_wdcount++;
157432Srwatson		return (ENETDOWN);
1541Srgrimes	}
181803Sbz
178285Srwatson	if (uio->uio_resid == 0) {
157433Srwatson		d->bd_wdcount++;
189848Srwatson		return (0);
189848Srwatson	}
157433Srwatson
157433Srwatson	bzero(&dst, sizeof(dst));
157432Srwatson	m = NULL;
157433Srwatson	hlen = 0;
138410Srwatson	/* XXX: bpf_movein() can sleep */
1541Srgrimes	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
1541Srgrimes	    &m, &dst, &hlen, d->bd_wfilter);
1541Srgrimes	if (error) {
1541Srgrimes		d->bd_wdcount++;
1541Srgrimes		return (error);
1541Srgrimes	}
1541Srgrimes	d->bd_wfcount++;
1541Srgrimes	if (d->bd_hdrcmplt)
110896Shsu		dst.sa_family = pseudo_AF_HDRCMPLT;
110896Shsu
139222Srwatson	if (d->bd_feedback) {
1541Srgrimes		mc = m_dup(m, M_NOWAIT);
98211Shsu		if (mc != NULL)
157376Srwatson			mc->m_pkthdr.rcvif = ifp;
157376Srwatson		/* Set M_PROMISC for outgoing packets to be discarded. */
157376Srwatson		if (d->bd_direction == BPF_D_INOUT)
157376Srwatson			m->m_flags |= M_PROMISC;
157376Srwatson	} else
98211Shsu		mc = NULL;
1541Srgrimes
139222Srwatson	m->m_pkthdr.len -= hlen;
98211Shsu	m->m_len -= hlen;
72960Sjlemon	m->m_data += hlen;	/* XXX */
111748Sdes
1541Srgrimes	CURVNET_SET(ifp->if_vnet);
1541Srgrimes#ifdef MAC
72960Sjlemon	BPFD_LOCK(d);
1541Srgrimes	mac_bpfdesc_create_mbuf(d, m);
1541Srgrimes	if (mc != NULL)
36079Swollman		mac_bpfdesc_create_mbuf(d, mc);
62573Sphk	BPFD_UNLOCK(d);
36079Swollman#endif
171605Ssilby
36079Swollman	error = (*ifp->if_output)(ifp, m, &dst, NULL);
36079Swollman	if (error)
36079Swollman		d->bd_wdcount++;
36079Swollman
36079Swollman	if (mc != NULL) {
36079Swollman		if (error == 0)
36079Swollman			(*ifp->if_input)(ifp, mc);
36079Swollman		else
127870Srwatson			m_freem(mc);
211888Sjhb	}
211888Sjhb	CURVNET_RESTORE();
211888Sjhb
139222Srwatson	return (error);
36079Swollman}
36079Swollman
127870Srwatson/*
139222Srwatson * Reset a descriptor by flushing its packet buffer and clearing the receive
36079Swollman * and drop counts.  This is doable for kernel-only buffers, but with
36079Swollman * zero-copy buffers, we can't write to (or rotate) buffers that are
36079Swollman * currently owned by userspace.  It would be nice if we could encapsulate
36079Swollman * this logic in the buffer code rather than here.
181803Sbz */
181803Sbzstatic void
181803Sbzreset_d(struct bpf_d *d)
181803Sbz{
36079Swollman
171605Ssilby	BPFD_LOCK_ASSERT(d);
171605Ssilby
126253Struckman	while (d->bd_hbuf_in_use)
171605Ssilby		mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
126253Struckman		    "bd_hbuf", 0);
126253Struckman	if ((d->bd_hbuf != NULL) &&
100831Struckman	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
36079Swollman		/* Free the hold buffer. */
171605Ssilby		d->bd_fbuf = d->bd_hbuf;
36079Swollman		d->bd_hbuf = NULL;
36079Swollman		d->bd_hlen = 0;
36079Swollman		bpf_buf_reclaimed(d);
36079Swollman	}
139222Srwatson	if (bpf_canwritebuf(d))
36079Swollman		d->bd_slen = 0;
171605Ssilby	d->bd_rcount = 0;
171605Ssilby	d->bd_dcount = 0;
171605Ssilby	d->bd_fcount = 0;
171605Ssilby	d->bd_wcount = 0;
111119Simp	d->bd_wfcount = 0;
127870Srwatson	d->bd_wdcount = 0;
139222Srwatson	d->bd_zcopy = 0;
133874Srwatson}
181803Sbz
181887Sjulian/*
181888Sjulian *  FIONREAD		Check for read packet available.
205251Sbz *  SIOCGIFADDR		Get interface address - convenient hook to driver.
113345Srwatson *  BIOCGBLEN		Get buffer len [for read()].
113345Srwatson *  BIOCSETF		Set read filter.
113345Srwatson *  BIOCSETFNR		Set read filter without resetting descriptor.
113345Srwatson *  BIOCSETWF		Set write filter.
113345Srwatson *  BIOCFLUSH		Flush read packet buffer.
113345Srwatson *  BIOCPROMISC		Put interface into promiscuous mode.
189848Srwatson *  BIOCGDLT		Get link layer type.
157474Srwatson *  BIOCGETIF		Get interface name.
157474Srwatson *  BIOCSETIF		Set interface.
157474Srwatson *  BIOCSRTIMEOUT	Set read timeout.
157474Srwatson *  BIOCGRTIMEOUT	Get read timeout.
157474Srwatson *  BIOCGSTATS		Get packet stats.
157474Srwatson *  BIOCIMMEDIATE	Set immediate mode.
183982Sbz *  BIOCVERSION		Get filter language version.
205251Sbz *  BIOCGHDRCMPLT	Get "header already complete" flag
205251Sbz *  BIOCSHDRCMPLT	Set "header already complete" flag
113345Srwatson *  BIOCGDIRECTION	Get packet direction flag
205251Sbz *  BIOCSDIRECTION	Set packet direction flag
113345Srwatson *  BIOCGTSTAMP		Get time stamp format and resolution.
205251Sbz *  BIOCSTSTAMP		Set time stamp format and resolution.
36079Swollman *  BIOCLOCK		Set "locked" flag
181803Sbz *  BIOCFEEDBACK	Set packet feedback mode.
36079Swollman *  BIOCSETZBUF		Set current zero-copy buffer locations.
36079Swollman *  BIOCGETZMAX		Get maximum zero-copy buffer size.
36079Swollman *  BIOCROTZBUF		Force rotation of zero-copy buffer
36079Swollman *  BIOCSETBUFMODE	Set buffer mode.
36079Swollman *  BIOCGETBUFMODE	Get current buffer mode.
179414Srwatson */
36079Swollman/* ARGSUSED */
36079Swollmanstatic	int
157432Srwatsonbpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
145978Scperciva    struct thread *td)
145978Scperciva{
36079Swollman	struct bpf_d *d;
36079Swollman	int error;
36079Swollman
47960Stegge	error = devfs_get_cdevpriv((void **)&d);
111145Sjlemon	if (error != 0)
111145Sjlemon		return (error);
189848Srwatson
111145Sjlemon	/*
111145Sjlemon	 * Refresh PID associated with this descriptor.
197244Ssilby	 */
47960Stegge	BPFD_LOCK(d);
197244Ssilby	BPF_PID_REFRESH(d, td);
197244Ssilby	if (d->bd_state == BPF_WAITING)
197244Ssilby		callout_stop(&d->bd_callout);
127870Srwatson	d->bd_state = BPF_IDLE;
36079Swollman	BPFD_UNLOCK(d);
111145Sjlemon
111145Sjlemon	if (d->bd_locked == 1) {
111145Sjlemon		switch (cmd) {
111145Sjlemon		case BIOCGBLEN:
110896Shsu		case BIOCFLUSH:
179414Srwatson		case BIOCGDLT:
36079Swollman		case BIOCGDLTLIST:
160491Sups#ifdef COMPAT_FREEBSD32
179414Srwatson		case BIOCGDLTLIST32:
36079Swollman#endif
205251Sbz		case BIOCGETIF:
205251Sbz		case BIOCGRTIMEOUT:
205251Sbz#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
222488Srwatson		case BIOCGRTIMEOUT32:
222488Srwatson#endif
222488Srwatson		case BIOCGSTATS:
205251Sbz		case BIOCVERSION:
205251Sbz		case BIOCGRSIG:
205251Sbz		case BIOCGHDRCMPLT:
36079Swollman		case BIOCSTSTAMP:
36079Swollman		case BIOCFEEDBACK:
36079Swollman		case FIONREAD:
36079Swollman		case BIOCLOCK:
36079Swollman		case BIOCSRTIMEOUT:
36079Swollman#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
36079Swollman		case BIOCSRTIMEOUT32:
36079Swollman#endif
181803Sbz		case BIOCIMMEDIATE:
181803Sbz		case TIOCGPGRP:
36079Swollman		case BIOCROTZBUF:
181803Sbz			break;
181803Sbz		default:
36079Swollman			return (EPERM);
36079Swollman		}
36079Swollman	}
139222Srwatson#ifdef COMPAT_FREEBSD32
36079Swollman	/*
36079Swollman	 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so
217554Smdf	 * that it will get 32-bit packet headers.
217554Smdf	 */
167785Sandre	switch (cmd) {
36079Swollman	case BIOCSETF32:
221250Sbz	case BIOCSETFNR32:
48758Sgreen	case BIOCSETWF32:
62573Sphk	case BIOCGDLTLIST32:
48758Sgreen	case BIOCGRTIMEOUT32:
72650Sgreen	case BIOCSRTIMEOUT32:
48758Sgreen		BPFD_LOCK(d);
48758Sgreen		d->bd_compat32 = 1;
148156Srwatson		BPFD_UNLOCK(d);
48758Sgreen	}
170587Srwatson#endif
48758Sgreen
48758Sgreen	CURVNET_SET(TD_TO_VNET(td));
48758Sgreen	switch (cmd) {
48758Sgreen
48758Sgreen	default:
222488Srwatson		error = EINVAL;
222488Srwatson		break;
179414Srwatson
179414Srwatson	/*
179414Srwatson	 * Check for read packet available.
179414Srwatson	 */
183982Sbz	case FIONREAD:
179414Srwatson		{
183606Sbz			int n;
179414Srwatson
222488Srwatson			BPFD_LOCK(d);
48758Sgreen			n = d->bd_slen;
99838Struckman			while (d->bd_hbuf_in_use)
99838Struckman				mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
48758Sgreen				    PRINET, "bd_hbuf", 0);
48758Sgreen			if (d->bd_hbuf)
48758Sgreen				n += d->bd_hlen;
78697Sdwmalone			BPFD_UNLOCK(d);
78697Sdwmalone
78697Sdwmalone			*(int *)addr = n;
221250Sbz			break;
48758Sgreen		}
55679Sshin
55679Sshin	case SIOCGIFADDR:
62573Sphk		{
55679Sshin			struct ifnet *ifp;
72650Sgreen
55679Sshin			if (d->bd_bif == NULL)
55679Sshin				error = EINVAL;
221250Sbz			else {
221250Sbz				ifp = d->bd_bif->bif_ifp;
221250Sbz				error = (*ifp->if_ioctl)(ifp, cmd, addr);
221250Sbz			}
55679Sshin			break;
170587Srwatson		}
55679Sshin
55679Sshin	/*
55679Sshin	 * Get buffer len [for read()].
55679Sshin	 */
55679Sshin	case BIOCGBLEN:
181803Sbz		BPFD_LOCK(d);
181803Sbz		*(u_int *)addr = d->bd_bufsize;
148385Sume		BPFD_UNLOCK(d);
148385Sume		break;
55679Sshin
221250Sbz	/*
55679Sshin	 * Set buffer length.
55679Sshin	 */
55679Sshin	case BIOCSBLEN:
221250Sbz		error = bpf_ioctl_sblen(d, (u_int *)addr);
55679Sshin		break;
55679Sshin
148385Sume	/*
221250Sbz	 * Set link layer read filter.
55679Sshin	 */
222488Srwatson	case BIOCSETF:
55679Sshin	case BIOCSETFNR:
55679Sshin	case BIOCSETWF:
55679Sshin#ifdef COMPAT_FREEBSD32
222488Srwatson	case BIOCSETF32:
55679Sshin	case BIOCSETFNR32:
221250Sbz	case BIOCSETWF32:
222488Srwatson#endif
151254Sphilip		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
222488Srwatson		break;
222488Srwatson
179414Srwatson	/*
179414Srwatson	 * Flush read packet buffer.
179414Srwatson	 */
179414Srwatson	case BIOCFLUSH:
183982Sbz		BPFD_LOCK(d);
179414Srwatson		reset_d(d);
183606Sbz		BPFD_UNLOCK(d);
179414Srwatson		break;
222488Srwatson
55679Sshin	/*
99838Struckman	 * Put interface into promiscuous mode.
99838Struckman	 */
55679Sshin	case BIOCPROMISC:
55679Sshin		if (d->bd_bif == NULL) {
55679Sshin			/*
78697Sdwmalone			 * No interface attached yet.
78697Sdwmalone			 */
78697Sdwmalone			error = EINVAL;
221250Sbz			break;
55679Sshin		}
55679Sshin		if (d->bd_promisc == 0) {
221250Sbz			error = ifpromisc(d->bd_bif->bif_ifp, 1);
1541Srgrimes			if (error == 0)
157431Srwatson				d->bd_promisc = 1;
1541Srgrimes		}
72959Sjlemon		break;
72959Sjlemon
73109Sjlemon	/*
73109Sjlemon	 * Get current data link type.
73109Sjlemon	 */
98211Shsu	case BIOCGDLT:
145360Sandre		BPF_LOCK();
145360Sandre		if (d->bd_bif == NULL)
145360Sandre			error = EINVAL;
148156Srwatson		else
1541Srgrimes			*(u_int *)addr = d->bd_bif->bif_dlt;
73109Sjlemon		BPF_UNLOCK();
73109Sjlemon		break;
73109Sjlemon
73109Sjlemon	/*
145355Sandre	 * Get a list of supported data link types.
235051Sglebius	 */
181803Sbz#ifdef COMPAT_FREEBSD32
99156Sjesper	case BIOCGDLTLIST32:
72959Sjlemon		{
122922Sandre			struct bpf_dltlist32 *list32;
122922Sandre			struct bpf_dltlist dltlist;
122922Sandre
122922Sandre			list32 = (struct bpf_dltlist32 *)addr;
122922Sandre			dltlist.bfl_len = list32->bfl_len;
122922Sandre			dltlist.bfl_list = PTRIN(list32->bfl_list);
145355Sandre			BPF_LOCK();
145355Sandre			if (d->bd_bif == NULL)
145355Sandre				error = EINVAL;
145355Sandre			else {
145355Sandre				error = bpf_getdltlist(d, &dltlist);
122922Sandre				if (error == 0)
122922Sandre					list32->bfl_len = dltlist.bfl_len;
122922Sandre			}
122922Sandre			BPF_UNLOCK();
122922Sandre			break;
127870Srwatson		}
119995Sru#endif
1541Srgrimes
127870Srwatson	case BIOCGDLTLIST:
145360Sandre		BPF_LOCK();
145360Sandre		if (d->bd_bif == NULL)
133874Srwatson			error = EINVAL;
105586Sphk		else
181803Sbz			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
222488Srwatson		BPF_UNLOCK();
222488Srwatson		break;
98102Shsu
189848Srwatson	/*
189848Srwatson	 * Set data link type.
157433Srwatson	 */
145360Sandre	case BIOCSDLT:
98102Shsu		BPF_LOCK();
145360Sandre		if (d->bd_bif == NULL)
145360Sandre			error = EINVAL;
145360Sandre		else
145360Sandre			error = bpf_setdlt(d, *(u_int *)addr);
145360Sandre		BPF_UNLOCK();
145360Sandre		break;
145360Sandre
145360Sandre	/*
145360Sandre	 * Get interface name.
145360Sandre	 */
145360Sandre	case BIOCGETIF:
178888Sjulian		BPF_LOCK();
178888Sjulian		if (d->bd_bif == NULL)
145360Sandre			error = EINVAL;
145360Sandre		else {
145869Sandre			struct ifnet *const ifp = d->bd_bif->bif_ifp;
145869Sandre			struct ifreq *const ifr = (struct ifreq *)addr;
145869Sandre
149929Sandre			strlcpy(ifr->ifr_name, ifp->if_xname,
149929Sandre			    sizeof(ifr->ifr_name));
145869Sandre		}
145360Sandre		BPF_UNLOCK();
149929Sandre		break;
145869Sandre
211333Sandre	/*
211333Sandre	 * Set interface.
211333Sandre	 */
145869Sandre	case BIOCSETIF:
145869Sandre		BPF_LOCK();
218909Sbrucec		error = bpf_setif(d, (struct ifreq *)addr);
145869Sandre		BPF_UNLOCK();
145869Sandre		break;
145869Sandre
145869Sandre	/*
162084Sandre	 * Set read timeout.
145360Sandre	 */
235051Sglebius	case BIOCSRTIMEOUT:
235051Sglebius#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
235051Sglebius	case BIOCSRTIMEOUT32:
235051Sglebius#endif
145360Sandre		{
98102Shsu			struct timeval *tv = (struct timeval *)addr;
127870Srwatson#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
178285Srwatson			struct timeval32 *tv32;
86764Sjlemon			struct timeval tv64;
186222Sbz
86764Sjlemon			if (cmd == BIOCSRTIMEOUT32) {
86764Sjlemon				tv32 = (struct timeval32 *)addr;
86764Sjlemon				tv = &tv64;
86764Sjlemon				tv->tv_sec = tv32->tv_sec;
86764Sjlemon				tv->tv_usec = tv32->tv_usec;
73109Sjlemon			} else
181803Sbz#endif
1541Srgrimes				tv = (struct timeval *)addr;
181803Sbz
1541Srgrimes			/*
221250Sbz			 * Subtract 1 tick from tvtohz() since this isn't
1541Srgrimes			 * a one-shot timer.
55679Sshin			 */
55679Sshin			if ((error = itimerfix(tv)) == 0)
157431Srwatson				d->bd_rtout = tvtohz(tv) - 1;
55679Sshin			break;
55679Sshin		}
98211Shsu
55679Sshin	/*
55679Sshin	 * Get read timeout.
78064Sume	 */
78064Sume	case BIOCGRTIMEOUT:
55679Sshin#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
78064Sume	case BIOCGRTIMEOUT32:
78064Sume#endif
78064Sume		{
78064Sume			struct timeval *tv;
55679Sshin#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
55679Sshin			struct timeval32 *tv32;
55679Sshin			struct timeval tv64;
55679Sshin
55679Sshin			if (cmd == BIOCGRTIMEOUT32)
145355Sandre				tv = &tv64;
235051Sglebius			else
55679Sshin#endif
119995Sru				tv = (struct timeval *)addr;
55679Sshin
145355Sandre			tv->tv_sec = d->bd_rtout / hz;
145355Sandre			tv->tv_usec = (d->bd_rtout % hz) * tick;
145355Sandre#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
55679Sshin			if (cmd == BIOCGRTIMEOUT32) {
55679Sshin				tv32 = (struct timeval32 *)addr;
55679Sshin				tv32->tv_sec = tv->tv_sec;
78064Sume				tv32->tv_usec = tv->tv_usec;
55679Sshin			}
55679Sshin#endif
55679Sshin
78064Sume			break;
55679Sshin		}
55679Sshin
55679Sshin	/*
67456Sitojun	 * Get packet stats.
78064Sume	 */
55679Sshin	case BIOCGSTATS:
55679Sshin		{
127870Srwatson			struct bpf_stat *bs = (struct bpf_stat *)addr;
86764Sjlemon
55679Sshin			/* XXXCSJP overflow */
55679Sshin			bs->bs_recv = d->bd_rcount;
55679Sshin			bs->bs_drop = d->bd_dcount;
55679Sshin			break;
55679Sshin		}
67456Sitojun
78064Sume	/*
67456Sitojun	 * Set immediate mode.
67456Sitojun	 */
78064Sume	case BIOCIMMEDIATE:
78064Sume		BPFD_LOCK(d);
78064Sume		d->bd_immediate = *(u_int *)addr;
181803Sbz		BPFD_UNLOCK(d);
78064Sume		break;
125776Sume
86764Sjlemon	case BIOCVERSION:
186222Sbz		{
86764Sjlemon			struct bpf_version *bv = (struct bpf_version *)addr;
86764Sjlemon
86764Sjlemon			bv->bv_major = BPF_MAJOR_VERSION;
86764Sjlemon			bv->bv_minor = BPF_MINOR_VERSION;
186222Sbz			break;
181803Sbz		}
86764Sjlemon
181803Sbz	/*
55679Sshin	 * Get "header already complete" flag
181803Sbz	 */
125776Sume	case BIOCGHDRCMPLT:
55679Sshin		BPFD_LOCK(d);
55679Sshin		*(u_int *)addr = d->bd_hdrcmplt;
55679Sshin		BPFD_UNLOCK(d);
80428Speter		break;
82122Ssilby
82122Ssilby	/*
82122Ssilby	 * Set "header already complete" flag
82122Ssilby	 */
82122Ssilby	case BIOCSHDRCMPLT:
82122Ssilby		BPFD_LOCK(d);
82122Ssilby		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
94390Ssilby		BPFD_UNLOCK(d);
94390Ssilby		break;
82122Ssilby
82122Ssilby	/*
82122Ssilby	 * Get packet direction flag
82122Ssilby	 */
82122Ssilby	case BIOCGDIRECTION:
133874Srwatson		BPFD_LOCK(d);
82122Ssilby		*(u_int *)addr = d->bd_direction;
82122Ssilby		BPFD_UNLOCK(d);
82122Ssilby		break;
82122Ssilby
82122Ssilby	/*
82122Ssilby	 * Set packet direction flag
82122Ssilby	 */
82122Ssilby	case BIOCSDIRECTION:
128452Ssilby		{
128452Ssilby			u_int	direction;
128452Ssilby
128452Ssilby			direction = *(u_int *)addr;
128452Ssilby			switch (direction) {
128452Ssilby			case BPF_D_IN:
128452Ssilby			case BPF_D_INOUT:
128452Ssilby			case BPF_D_OUT:
128452Ssilby				BPFD_LOCK(d);
128452Ssilby				d->bd_direction = direction;
128452Ssilby				BPFD_UNLOCK(d);
128452Ssilby				break;
82122Ssilby			default:
82122Ssilby				error = EINVAL;
82122Ssilby			}
82122Ssilby		}
138018Srwatson		break;
138018Srwatson
138018Srwatson	/*
82122Ssilby	 * Get packet timestamp format and resolution.
79413Ssilby	 */
82122Ssilby	case BIOCGTSTAMP:
128452Ssilby		BPFD_LOCK(d);
128452Ssilby		*(u_int *)addr = d->bd_tstamp;
79413Ssilby		BPFD_UNLOCK(d);
215701Sdim		break;
221690Smav
215701Sdim	/*
215701Sdim	 * Set packet timestamp format and resolution.
215701Sdim	 */
75619Skris	case BIOCSTSTAMP:
195727Srwatson		{
221690Smav			u_int	func;
195727Srwatson
195727Srwatson			func = *(u_int *)addr;
195727Srwatson			if (BPF_T_VALID(func))
195699Srwatson				d->bd_tstamp = func;
75619Skris			else
157431Srwatson				error = EINVAL;
75619Skris		}
186057Sbz		break;
82122Ssilby
82122Ssilby	case BIOCFEEDBACK:
221690Smav		BPFD_LOCK(d);
75619Skris		d->bd_feedback = *(u_int *)addr;
178285Srwatson		BPFD_UNLOCK(d);
138018Srwatson		break;
157977Srwatson
82122Ssilby	case BIOCLOCK:
181803Sbz		BPFD_LOCK(d);
181803Sbz		d->bd_locked = 1;
82122Ssilby		BPFD_UNLOCK(d);
181803Sbz		break;
181803Sbz
82122Ssilby	case FIONBIO:		/* Non-blocking I/O */
133874Srwatson		break;
82122Ssilby
186057Sbz	case FIOASYNC:		/* Send signal on receive packets */
186057Sbz		BPFD_LOCK(d);
186057Sbz		d->bd_async = *(int *)addr;
82122Ssilby		BPFD_UNLOCK(d);
82122Ssilby		break;
186057Sbz
82122Ssilby	case FIOSETOWN:
186057Sbz		/*
82122Ssilby		 * XXX: Add some sort of locking here?
82122Ssilby		 * fsetown() can sleep.
82122Ssilby		 */
82122Ssilby		error = fsetown(*(int *)addr, &d->bd_sigio);
186057Sbz		break;
82122Ssilby
186057Sbz	case FIOGETOWN:
82122Ssilby		BPFD_LOCK(d);
82122Ssilby		*(int *)addr = fgetown(&d->bd_sigio);
186057Sbz		BPFD_UNLOCK(d);
186057Sbz		break;
82122Ssilby
181803Sbz	/* This is deprecated, FIOSETOWN should be used instead. */
128452Ssilby	case TIOCSPGRP:
221690Smav		error = fsetown(-(*(int *)addr), &d->bd_sigio);
221690Smav		break;
221690Smav
183550Szec	/* This is deprecated, FIOGETOWN should be used instead. */
183550Szec	case TIOCGPGRP:
183550Szec		*(int *)addr = -fgetown(&d->bd_sigio);
221690Smav		break;
183550Szec
221690Smav	case BIOCSRSIG:		/* Set receive signal */
190787Szec		{
221690Smav			u_int sig;
128452Ssilby
128452Ssilby			sig = *(u_int *)addr;
128452Ssilby
72959Sjlemon			if (sig >= NSIG)
72959Sjlemon				error = EINVAL;
72959Sjlemon			else {
70103Sphk				BPFD_LOCK(d);
98211Shsu				d->bd_sig = sig;
157431Srwatson				BPFD_UNLOCK(d);
70103Sphk			}
157432Srwatson			break;
70103Sphk		}
181803Sbz	case BIOCGRSIG:
178285Srwatson		BPFD_LOCK(d);
157433Srwatson		*(u_int *)addr = d->bd_sig;
189848Srwatson		BPFD_UNLOCK(d);
189848Srwatson		break;
157433Srwatson
157433Srwatson	case BIOCGETBUFMODE:
157432Srwatson		BPFD_LOCK(d);
157433Srwatson		*(u_int *)addr = d->bd_bufmode;
157433Srwatson		BPFD_UNLOCK(d);
146864Srwatson		break;
157433Srwatson
157433Srwatson	case BIOCSETBUFMODE:
157433Srwatson		/*
157433Srwatson		 * Allow the buffering mode to be changed as long as we
157433Srwatson		 * haven't yet committed to a particular mode.  Our
72638Sphk		 * definition of commitment, for now, is whether or not a
72638Sphk		 * buffer has been allocated or an interface attached, since
72638Sphk		 * that's the point where things get tricky.
10881Swollman		 */
235051Sglebius		switch (*(u_int *)addr) {
235051Sglebius		case BPF_BUFMODE_BUFFER:
10930Swollman			break;
10881Swollman
235051Sglebius		case BPF_BUFMODE_ZBUF:
235051Sglebius			if (bpf_zerocopy_enable)
235051Sglebius				break;
235051Sglebius			/* FALLSTHROUGH */
235051Sglebius
235051Sglebius		default:
235051Sglebius			CURVNET_RESTORE();
98211Shsu			return (EINVAL);
235051Sglebius		}
10881Swollman
157432Srwatson		BPFD_LOCK(d);
182851Sbz		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
10881Swollman		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
178285Srwatson			BPFD_UNLOCK(d);
189848Srwatson			CURVNET_RESTORE();
189848Srwatson			return (EBUSY);
157433Srwatson		}
157433Srwatson		d->bd_bufmode = *(u_int *)addr;
157432Srwatson		BPFD_UNLOCK(d);
157433Srwatson		break;
157433Srwatson
235051Sglebius	case BIOCGETZMAX:
182851Sbz		error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
182851Sbz		break;
182851Sbz
182851Sbz	case BIOCSETZBUF:
182851Sbz		error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
182851Sbz		break;
182851Sbz
55679Sshin	case BIOCROTZBUF:
190948Srwatson		error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
157433Srwatson		break;
157433Srwatson	}
161645Smohans	CURVNET_RESTORE();
161645Smohans	return (error);
169317Sandre}
215166Slstewart
252555Snp/*
139222Srwatson * Set d's packet filter program to fp.  If this file already has a filter,
10881Swollman * free it and replace it.  Returns EINVAL for bogus requests.
10881Swollman *
221250Sbz * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
10881Swollman * since reading d->bd_bif can't be protected by d or interface lock due to
6283Swollman * lock order.
184721Sbz *
184721Sbz * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
184721Sbz * interface read lock to read all filers.
6283Swollman *
133874Srwatson */
252781Sandrestatic int
6283Swollmanbpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
122922Sandre{
122922Sandre#ifdef COMPAT_FREEBSD32
122922Sandre	struct bpf_program fp_swab;
122922Sandre	struct bpf_program32 *fp32;
6283Swollman#endif
122922Sandre	struct bpf_insn *fcode, *old;
122922Sandre#ifdef BPF_JITTER
122996Sandre	bpf_jit_filter *jfunc, *ofunc;
122922Sandre#endif
122922Sandre	size_t size;
122922Sandre	u_int flen;
122922Sandre	int need_upgrade;
122922Sandre
186119Sqingli#ifdef COMPAT_FREEBSD32
6283Swollman	switch (cmd) {
122922Sandre	case BIOCSETF32:
122922Sandre	case BIOCSETWF32:
122922Sandre	case BIOCSETFNR32:
122922Sandre		fp32 = (struct bpf_program32 *)fp;
122922Sandre		fp_swab.bf_len = fp32->bf_len;
122922Sandre		fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
162084Sandre		fp = &fp_swab;
162084Sandre		switch (cmd) {
252781Sandre		case BIOCSETF32:
162084Sandre			cmd = BIOCSETF;
265357Srmacklem			break;
252781Sandre		case BIOCSETWF32:
252781Sandre			cmd = BIOCSETWF;
265357Srmacklem			break;
162084Sandre		}
122922Sandre		break;
122922Sandre	}
122922Sandre#endif
6283Swollman
221250Sbz	fcode = NULL;
6283Swollman#ifdef BPF_JITTER
55679Sshin	jfunc = ofunc = NULL;
122922Sandre#endif
252781Sandre	need_upgrade = 0;
55679Sshin
122922Sandre	/*
122922Sandre	 * Check new filter validness before acquiring any locks.
122922Sandre	 * Allocate memory for new filter, if needed.
55679Sshin	 */
122922Sandre	flen = fp->bf_len;
122922Sandre	if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
122996Sandre		return (EINVAL);
122922Sandre	size = flen * sizeof(*fp->bf_insns);
122922Sandre	if (size > 0) {
122922Sandre		/* We're setting up new filter.  Copy and check actual data. */
122922Sandre		fcode = malloc(size, M_BPF, M_WAITOK);
232292Sbz		if (copyin(fp->bf_insns, fcode, size) != 0 ||
55679Sshin		    !bpf_validate(fcode, flen)) {
122922Sandre			free(fcode, M_BPF);
122922Sandre			return (EINVAL);
122922Sandre		}
122922Sandre#ifdef BPF_JITTER
122922Sandre		/* Filter is copied inside fcode and is perfectly valid. */
122922Sandre		jfunc = bpf_jitter(fcode, flen);
122922Sandre#endif
162084Sandre	}
162084Sandre
252781Sandre	BPF_LOCK();
162084Sandre
265357Srmacklem	/*
252781Sandre	 * Set up new filter.
252781Sandre	 * Protect filter change by interface lock.
265357Srmacklem	 * Additionally, we are protected by global lock here.
162084Sandre	 */
122922Sandre	if (d->bd_bif != NULL)
122922Sandre		BPFIF_WLOCK(d->bd_bif);
122922Sandre	BPFD_LOCK(d);
122922Sandre	if (cmd == BIOCSETWF) {
55679Sshin		old = d->bd_wfilter;
55679Sshin		d->bd_wfilter = fcode;
55679Sshin	} else {
171167Sgnn		old = d->bd_rfilter;
55679Sshin		d->bd_rfilter = fcode;
55679Sshin#ifdef BPF_JITTER
157431Srwatson		ofunc = d->bd_bfilter;
55679Sshin		d->bd_bfilter = jfunc;
55679Sshin#endif
55679Sshin		if (cmd == BIOCSETF)
55679Sshin			reset_d(d);
55679Sshin
55679Sshin		if (fcode != NULL) {
55679Sshin			/*
111145Sjlemon			 * Do not require upgrade by first BIOCSETF
55679Sshin			 * (used to set snaplen) by pcap_open_live().
55679Sshin			 */
78642Ssilby			if (d->bd_writer != 0 && --d->bd_writer == 0)
139222Srwatson				need_upgrade = 1;
111119Simp			CTR4(KTR_NET, "%s: filter function set by pid %d, "
55679Sshin			    "bd_writer counter %d, need_upgrade %d",
139222Srwatson			    __func__, d->bd_pid, d->bd_writer, need_upgrade);
55679Sshin		}
55679Sshin	}
55679Sshin	BPFD_UNLOCK(d);
55679Sshin	if (d->bd_bif != NULL)
55679Sshin		BPFIF_WUNLOCK(d->bd_bif);
55679Sshin	if (old != NULL)
55679Sshin		free(old, M_BPF);
111144Sjlemon#ifdef BPF_JITTER
188306Sbz	if (ofunc != NULL)
55679Sshin		bpf_destroy_jit_filter(ofunc);
55679Sshin#endif
133874Srwatson
133874Srwatson	/* Move d to active readers list. */
133874Srwatson	if (need_upgrade)
133874Srwatson		bpf_upgraded(d);
133874Srwatson
188306Sbz	BPF_UNLOCK();
133874Srwatson	return (0);
55679Sshin}
55679Sshin
139222Srwatson/*
55679Sshin * Detach a file from its current interface (if attached at all) and attach
171167Sgnn * to the interface indicated by the name stored in ifr.
55679Sshin * Return an errno or 0.
125680Sbms */
125680Sbmsstatic int
125783Sbmsbpf_setif(struct bpf_d *d, struct ifreq *ifr)
125783Sbms{
125783Sbms	struct bpf_if *bp;
125783Sbms	struct ifnet *theywant;
125783Sbms
125783Sbms	BPF_LOCK_ASSERT();
125783Sbms
125819Sbms	theywant = ifunit(ifr->ifr_name);
125783Sbms	if (theywant == NULL || theywant->if_bpf == NULL)
125783Sbms		return (ENXIO);
125783Sbms
125783Sbms	bp = theywant->if_bpf;
183001Sbz
125680Sbms	/* Check if interface is not being detached from BPF */
125741Sbms	BPFIF_RLOCK(bp);
125741Sbms	if (bp->flags & BPFIF_FLAG_DYING) {
183001Sbz		BPFIF_RUNLOCK(bp);
125741Sbms		return (ENXIO);
125741Sbms	}
125741Sbms	BPFIF_RUNLOCK(bp);
125741Sbms
125741Sbms	/*
125680Sbms	 * Behavior here depends on the buffering model.  If we're using
125680Sbms	 * kernel memory buffers, then we can allocate them here.  If we're
125680Sbms	 * using zero-copy, then the user process must have registered
125680Sbms	 * buffers by the time we get here.  If not, return an error.
125680Sbms	 */
125680Sbms	switch (d->bd_bufmode) {
125680Sbms	case BPF_BUFMODE_BUFFER:
125680Sbms	case BPF_BUFMODE_ZBUF:
125680Sbms		if (d->bd_sbuf == NULL)
125680Sbms			return (EINVAL);
125680Sbms		break;
125680Sbms
125680Sbms	default:
183001Sbz		panic("bpf_setif: bufmode %d", d->bd_bufmode);
125741Sbms	}
125680Sbms	if (bp != d->bd_bif)
125680Sbms		bpf_attachd(d, bp);
221250Sbz	BPFD_LOCK(d);
125680Sbms	reset_d(d);
221250Sbz	BPFD_UNLOCK(d);
125680Sbms	return (0);
125680Sbms}
125680Sbms
221250Sbz/*
125680Sbms * Support for select() and poll() system calls
221250Sbz *
125680Sbms * Return true iff the specific operation will not block indefinitely.
125680Sbms * Otherwise, return false but make a note that a selwakeup() must be done.
183001Sbz */
183001Sbzstatic int
183001Sbzbpfpoll(struct cdev *dev, int events, struct thread *td)
183001Sbz{
183001Sbz	struct bpf_d *d;
183001Sbz	int revents;
183001Sbz
125680Sbms	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
125680Sbms		return (events &
125741Sbms		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
125741Sbms
125741Sbms	/*
125741Sbms	 * Refresh PID associated with this descriptor.
183001Sbz	 */
125680Sbms	revents = events & (POLLOUT | POLLWRNORM);
183001Sbz	BPFD_LOCK(d);
183001Sbz	BPF_PID_REFRESH(d, td);
183001Sbz	if (events & (POLLIN | POLLRDNORM)) {
183001Sbz		if (bpf_ready(d))
221250Sbz			revents |= events & (POLLIN | POLLRDNORM);
183001Sbz		else {
183001Sbz			selrecord(td, &d->bd_sel);
183001Sbz			/* Start the read timeout if necessary. */
183001Sbz			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
183001Sbz				callout_reset(&d->bd_callout, d->bd_rtout,
183001Sbz				    bpf_timed_out, d);
221250Sbz				d->bd_state = BPF_WAITING;
183001Sbz			}
183001Sbz		}
183001Sbz	}
183001Sbz	BPFD_UNLOCK(d);
183001Sbz	return (revents);
183001Sbz}
183001Sbz
183001Sbz/*
183001Sbz * Support for kevent() system call.  Register EVFILT_READ filters and
183001Sbz * reject all others.
183001Sbz */
183001Sbzint
183001Sbzbpfkqfilter(struct cdev *dev, struct knote *kn)
183001Sbz{
125741Sbms	struct bpf_d *d;
125741Sbms
125680Sbms	if (devfs_get_cdevpriv((void **)&d) != 0 ||
125680Sbms	    kn->kn_filter != EVFILT_READ)
183001Sbz		return (1);
183001Sbz
183001Sbz	/*
183001Sbz	 * Refresh PID associated with this descriptor.
183001Sbz	 */
183001Sbz	BPFD_LOCK(d);
183001Sbz	BPF_PID_REFRESH_CUR(d);
125680Sbms	kn->kn_fop = &bpfread_filtops;
125680Sbms	kn->kn_hook = d;
125741Sbms	knlist_add(&d->bd_sel.si_note, kn, 1);
125680Sbms	BPFD_UNLOCK(d);
125680Sbms
183001Sbz	return (0);
125680Sbms}
125680Sbms
125680Sbmsstatic void
125680Sbmsfilt_bpfdetach(struct knote *kn)
125680Sbms{
125680Sbms	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
125680Sbms
183001Sbz	knlist_remove(&d->bd_sel.si_note, kn, 0);
221250Sbz}
183001Sbz
183001Sbzstatic int
183001Sbzfilt_bpfread(struct knote *kn, long hint)
183001Sbz{
183001Sbz	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
183001Sbz	int ready;
183001Sbz
183001Sbz	BPFD_LOCK_ASSERT(d);
183001Sbz	ready = bpf_ready(d);
125741Sbms	if (ready) {
183001Sbz		kn->kn_data = d->bd_slen;
183001Sbz		while (d->bd_hbuf_in_use)
183001Sbz			mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
221250Sbz			    PRINET, "bd_hbuf", 0);
183001Sbz		if (d->bd_hbuf)
125680Sbms			kn->kn_data += d->bd_hlen;
183001Sbz	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
183001Sbz		callout_reset(&d->bd_callout, d->bd_rtout,
183001Sbz		    bpf_timed_out, d);
183001Sbz		d->bd_state = BPF_WAITING;
183001Sbz	}
183001Sbz
183001Sbz	return (ready);
183001Sbz}
183001Sbz
183001Sbz#define	BPF_TSTAMP_NONE		0
183001Sbz#define	BPF_TSTAMP_FAST		1
183001Sbz#define	BPF_TSTAMP_NORMAL	2
183001Sbz#define	BPF_TSTAMP_EXTERN	3
183001Sbz
183001Sbzstatic int
183001Sbzbpf_ts_quality(int tstype)
183001Sbz{
183001Sbz
183001Sbz	if (tstype == BPF_T_NONE)
183001Sbz		return (BPF_TSTAMP_NONE);
183001Sbz	if ((tstype & BPF_T_FAST) != 0)
183001Sbz		return (BPF_TSTAMP_FAST);
183001Sbz
183001Sbz	return (BPF_TSTAMP_NORMAL);
183001Sbz}
183001Sbz
183001Sbzstatic int
183001Sbzbpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
183001Sbz{
183001Sbz	struct m_tag *tag;
183001Sbz	int quality;
183001Sbz
183001Sbz	quality = bpf_ts_quality(tstype);
183001Sbz	if (quality == BPF_TSTAMP_NONE)
183001Sbz		return (quality);
125680Sbms
125680Sbms	if (m != NULL) {
125680Sbms		tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
125680Sbms		if (tag != NULL) {
125680Sbms			*bt = *(struct bintime *)(tag + 1);
125680Sbms			return (BPF_TSTAMP_EXTERN);
125680Sbms		}
125741Sbms	}
125680Sbms	if (quality == BPF_TSTAMP_NORMAL)
125680Sbms		binuptime(bt);
125680Sbms	else
125680Sbms		getbinuptime(bt);
125680Sbms
125783Sbms	return (quality);
125741Sbms}
125680Sbms
125680Sbms/*
125680Sbms * Incoming linkage from device drivers.  Process the packet pkt, of length
174022Sbz * pktlen, which is stored in a contiguous buffer.  The packet is parsed
125680Sbms * by each process' filter, and if accepted, stashed into the corresponding
125741Sbms * buffer.
125680Sbms */
125680Sbmsvoid
125680Sbmsbpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
125680Sbms{
221023Sattilio	struct bintime bt;
221023Sattilio	struct bpf_d *d;
221023Sattilio#ifdef BPF_JITTER
221023Sattilio	bpf_jit_filter *bf;
221023Sattilio#endif
221023Sattilio	u_int slen;
221023Sattilio	int gottime;
221023Sattilio
221023Sattilio	gottime = BPF_TSTAMP_NONE;
221023Sattilio
221023Sattilio	BPFIF_RLOCK(bp);
221023Sattilio
221023Sattilio	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
221023Sattilio		/*
221023Sattilio		 * We are not using any locks for d here because:
221023Sattilio		 * 1) any filter change is protected by interface
221023Sattilio		 * write lock
221023Sattilio		 * 2) destroying/detaching d is protected by interface
221023Sattilio		 * write lock, too
221023Sattilio		 */
221023Sattilio
221023Sattilio		/* XXX: Do not protect counter for the sake of performance. */
221023Sattilio		++d->bd_rcount;
221023Sattilio		/*
221023Sattilio		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
221023Sattilio		 * way for the caller to indiciate to us whether this packet
221023Sattilio		 * is inbound or outbound.  In the bpf_mtap() routines, we use
221023Sattilio		 * the interface pointers on the mbuf to figure it out.
221023Sattilio		 */
221023Sattilio#ifdef BPF_JITTER
221023Sattilio		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
221023Sattilio		if (bf != NULL)
221023Sattilio			slen = (*(bf->func))(pkt, pktlen, pktlen);
221023Sattilio		else
221023Sattilio#endif
221023Sattilio		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
221023Sattilio		if (slen != 0) {
221023Sattilio			/*
221023Sattilio			 * Filter matches. Let's to acquire write lock.
221023Sattilio			 */
221023Sattilio			BPFD_LOCK(d);
221023Sattilio
221023Sattilio			d->bd_fcount++;
221023Sattilio			if (gottime < bpf_ts_quality(d->bd_tstamp))
221023Sattilio				gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
221023Sattilio#ifdef MAC
221023Sattilio			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
221023Sattilio#endif
221023Sattilio				catchpacket(d, pkt, pktlen, slen,
221023Sattilio				    bpf_append_bytes, &bt);
221023Sattilio			BPFD_UNLOCK(d);
221023Sattilio		}
221023Sattilio	}
221023Sattilio	BPFIF_RUNLOCK(bp);
221023Sattilio}
221023Sattilio
221023Sattilio#define	BPF_CHECK_DIRECTION(d, r, i)				\
221023Sattilio	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
221023Sattilio	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
221023Sattilio
125680Sbms/*
141886Smaxim * Incoming linkage from device drivers, when packet is in an mbuf chain.
141886Smaxim * Locking model is explained in bpf_tap().
141886Smaxim */
141886Smaximvoid
141886Smaximbpf_mtap(struct bpf_if *bp, struct mbuf *m)
141886Smaxim{
141886Smaxim	struct bintime bt;
141886Smaxim	struct bpf_d *d;
150804Smaxim#ifdef BPF_JITTER
141886Smaxim	bpf_jit_filter *bf;
141886Smaxim#endif
141886Smaxim	u_int pktlen, slen;
141886Smaxim	int gottime;
141886Smaxim
141886Smaxim	/* Skip outgoing duplicate packets. */
141886Smaxim	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
141886Smaxim		m->m_flags &= ~M_PROMISC;
141886Smaxim		return;
141886Smaxim	}
141886Smaxim
141886Smaxim	pktlen = m_length(m, NULL);
141886Smaxim	gottime = BPF_TSTAMP_NONE;
141886Smaxim
141886Smaxim	BPFIF_RLOCK(bp);
141886Smaxim
141886Smaxim	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
141886Smaxim		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
141886Smaxim			continue;
141886Smaxim		++d->bd_rcount;
141886Smaxim#ifdef BPF_JITTER
141886Smaxim		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
141886Smaxim		/* XXX We cannot handle multiple mbufs. */
141886Smaxim		if (bf != NULL && m->m_next == NULL)
141886Smaxim			slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
141886Smaxim		else
141886Smaxim#endif
141886Smaxim		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
141886Smaxim		if (slen != 0) {
141886Smaxim			BPFD_LOCK(d);
141886Smaxim
141886Smaxim			d->bd_fcount++;
141886Smaxim			if (gottime < bpf_ts_quality(d->bd_tstamp))
141886Smaxim				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
141886Smaxim#ifdef MAC
141886Smaxim			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
141886Smaxim#endif
141886Smaxim				catchpacket(d, (u_char *)m, pktlen, slen,
141886Smaxim				    bpf_append_mbuf, &bt);
141886Smaxim			BPFD_UNLOCK(d);
181803Sbz		}
141886Smaxim	}
148385Sume	BPFIF_RUNLOCK(bp);
181803Sbz}
148616Sume
148385Sume/*
141886Smaxim * Incoming linkage from device drivers, when packet is in
141886Smaxim * an mbuf chain and to be prepended by a contiguous header.
221250Sbz */
141886Smaximvoid
141886Smaximbpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
141886Smaxim{
141886Smaxim	struct bintime bt;
141886Smaxim	struct mbuf mb;
141886Smaxim	struct bpf_d *d;
141886Smaxim	u_int pktlen, slen;
221250Sbz	int gottime;
141886Smaxim
141886Smaxim	/* Skip outgoing duplicate packets. */
141886Smaxim	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
181803Sbz		m->m_flags &= ~M_PROMISC;
141886Smaxim		return;
141886Smaxim	}
141886Smaxim
222488Srwatson	pktlen = m_length(m, NULL);
222488Srwatson	/*
222488Srwatson	 * Craft on-stack mbuf suitable for passing to bpf_filter.
141886Smaxim	 * Note that we cut corners here; we only setup what's
141886Smaxim	 * absolutely needed--this mbuf should never go anywhere else.
221250Sbz	 */
141886Smaxim	mb.m_next = m;
222488Srwatson	mb.m_data = data;
222488Srwatson	mb.m_len = dlen;
141886Smaxim	pktlen += dlen;
221250Sbz
141886Smaxim	gottime = BPF_TSTAMP_NONE;
141886Smaxim
189848Srwatson	BPFIF_RLOCK(bp);
157474Srwatson
157474Srwatson	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
157474Srwatson		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
157474Srwatson			continue;
157474Srwatson		++d->bd_rcount;
157474Srwatson		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
157427Srwatson		if (slen != 0) {
157474Srwatson			BPFD_LOCK(d);
157474Srwatson
173884Srwatson			d->bd_fcount++;
178285Srwatson			if (gottime < bpf_ts_quality(d->bd_tstamp))
189848Srwatson				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
157427Srwatson#ifdef MAC
157427Srwatson			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
173884Srwatson#endif
173884Srwatson				catchpacket(d, (u_char *)&mb, pktlen, slen,
178285Srwatson				    bpf_append_mbuf, &bt);
173884Srwatson			BPFD_UNLOCK(d);
178285Srwatson		}
141886Smaxim	}
141886Smaxim	BPFIF_RUNLOCK(bp);
181803Sbz}
141886Smaxim
141886Smaxim#undef	BPF_CHECK_DIRECTION
141886Smaxim
250744Szec#undef	BPF_TSTAMP_NONE
141886Smaxim#undef	BPF_TSTAMP_FAST
141886Smaxim#undef	BPF_TSTAMP_NORMAL
169683Sandre#undef	BPF_TSTAMP_EXTERN
169683Sandre
169683Sandrestatic int
169683Sandrebpf_hdrlen(struct bpf_d *d)
169683Sandre{
169683Sandre	int hdrlen;
169683Sandre
169683Sandre	hdrlen = d->bd_bif->bif_hdrlen;
169683Sandre#ifndef BURN_BRIDGES
169683Sandre	if (d->bd_tstamp == BPF_T_NONE ||
169683Sandre	    BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
169683Sandre#ifdef COMPAT_FREEBSD32
169683Sandre		if (d->bd_compat32)
211462Sandre			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
211462Sandre		else
211462Sandre#endif
211462Sandre			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
211462Sandre	else
211462Sandre#endif
211462Sandre		hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
211462Sandre#ifdef COMPAT_FREEBSD32
211462Sandre	if (d->bd_compat32)
211462Sandre		hdrlen = BPF_WORDALIGN32(hdrlen);
211462Sandre	else
211462Sandre#endif
169683Sandre		hdrlen = BPF_WORDALIGN(hdrlen);
171229Speter
169683Sandre	return (hdrlen - d->bd_bif->bif_hdrlen);
211462Sandre}
211462Sandre
211462Sandrestatic void
211462Sandrebpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
211462Sandre{
211462Sandre	struct bintime bt2;
211462Sandre	struct timeval tsm;
211462Sandre	struct timespec tsn;
211462Sandre
211462Sandre	if ((tstype & BPF_T_MONOTONIC) == 0) {
211462Sandre		bt2 = *bt;
211462Sandre		bintime_add(&bt2, &boottimebin);
169683Sandre		bt = &bt2;
169683Sandre	}
169683Sandre	switch (BPF_T_FORMAT(tstype)) {
169683Sandre	case BPF_T_MICROTIME:
171230Speter		bintime2timeval(bt, &tsm);
169683Sandre		ts->bt_sec = tsm.tv_sec;
171230Speter		ts->bt_frac = tsm.tv_usec;
169683Sandre		break;
169683Sandre	case BPF_T_NANOTIME:
169683Sandre		bintime2timespec(bt, &tsn);
169683Sandre		ts->bt_sec = tsn.tv_sec;
169913Sandre		ts->bt_frac = tsn.tv_nsec;
169913Sandre		break;
169683Sandre	case BPF_T_BINTIME:
169913Sandre		ts->bt_sec = bt->sec;
169913Sandre		ts->bt_frac = bt->frac;
169683Sandre		break;
169913Sandre	}
169683Sandre}
169913Sandre
169683Sandre/*
169683Sandre * Move the packet data from interface memory (pkt) into the
170019Srwatson * store buffer.  "cpfn" is the routine called to do the actual data
169683Sandre * transfer.  bcopy is passed in to copy contiguous chunks, while
169683Sandre * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
169683Sandre * pkt is really an mbuf.
169683Sandre */
169683Sandrestatic void
169683Sandrecatchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
186222Sbz    void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
169683Sandre    struct bintime *bt)
169683Sandre{
169683Sandre	struct bpf_xhdr hdr;
169683Sandre#ifndef BURN_BRIDGES
169683Sandre	struct bpf_hdr hdr_old;
169683Sandre#ifdef COMPAT_FREEBSD32
169683Sandre	struct bpf_hdr32 hdr32_old;
169683Sandre#endif
169683Sandre#endif
169683Sandre	int caplen, curlen, hdrlen, totlen;
169683Sandre	int do_wakeup = 0;
169683Sandre	int do_timestamp;
169683Sandre	int tstype;
169683Sandre
169683Sandre	BPFD_LOCK_ASSERT(d);
169683Sandre
169683Sandre	/*
169683Sandre	 * Detect whether user space has released a buffer back to us, and if
169683Sandre	 * so, move it from being a hold buffer to a free buffer.  This may
169683Sandre	 * not be the best place to do it (for example, we might only want to
169683Sandre	 * run this check if we need the space), but for now it's a reliable
169683Sandre	 * spot to do it.
169683Sandre	 */
169683Sandre	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
169683Sandre		while (d->bd_hbuf_in_use)
221250Sbz			mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
169683Sandre			    PRINET, "bd_hbuf", 0);
169683Sandre		d->bd_fbuf = d->bd_hbuf;
169683Sandre		d->bd_hbuf = NULL;
169683Sandre		d->bd_hlen = 0;
169683Sandre		bpf_buf_reclaimed(d);
169683Sandre	}
169683Sandre
169683Sandre	/*
221250Sbz	 * Figure out how many bytes to move.  If the packet is
169683Sandre	 * greater or equal to the snapshot length, transfer that
169683Sandre	 * much.  Otherwise, transfer the whole packet (unless
169683Sandre	 * we hit the buffer size limit).
169683Sandre	 */
169683Sandre	hdrlen = bpf_hdrlen(d);
169683Sandre	totlen = hdrlen + min(snaplen, pktlen);
169683Sandre	if (totlen > d->bd_bufsize)
169913Sandre		totlen = d->bd_bufsize;
169683Sandre
169683Sandre	/*
169683Sandre	 * Round up the end of the previous packet to the next longword.
	 *
	 * Drop the packet if there's no room and no hope of room
	 * If the packet would overflow the storage buffer or the storage
	 * buffer is considered immutable by the buffer model, try to rotate
	 * the buffer and wakeup pending processes.
	 */
#ifdef COMPAT_FREEBSD32
	if (d->bd_compat32)
		curlen = BPF_WORDALIGN32(d->bd_slen);
	else
#endif
		curlen = BPF_WORDALIGN(d->bd_slen);
	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
		if (d->bd_fbuf == NULL) {
			/*
			 * There's no room in the store buffer, and no
			 * prospect of room, so drop the packet.  Notify the
			 * buffer model.
			 */
			bpf_buffull(d);
			++d->bd_dcount;
			return;
		}
		while (d->bd_hbuf_in_use)
			mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
			    PRINET, "bd_hbuf", 0);
		ROTATE_BUFFERS(d);
		do_wakeup = 1;
		curlen = 0;
	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
		/*
		 * Immediate mode is set, or the read timeout has already
		 * expired during a select call.  A packet arrived, so the
		 * reader should be woken up.
		 */
		do_wakeup = 1;
	caplen = totlen - hdrlen;
	tstype = d->bd_tstamp;
	do_timestamp = tstype != BPF_T_NONE;
#ifndef BURN_BRIDGES
	if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
		struct bpf_ts ts;
		if (do_timestamp)
			bpf_bintime2ts(bt, &ts, tstype);
#ifdef COMPAT_FREEBSD32
		if (d->bd_compat32) {
			bzero(&hdr32_old, sizeof(hdr32_old));
			if (do_timestamp) {
				hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
				hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
			}
			hdr32_old.bh_datalen = pktlen;
			hdr32_old.bh_hdrlen = hdrlen;
			hdr32_old.bh_caplen = caplen;
			bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
			    sizeof(hdr32_old));
			goto copy;
		}
#endif
		bzero(&hdr_old, sizeof(hdr_old));
		if (do_timestamp) {
			hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
			hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
		}
		hdr_old.bh_datalen = pktlen;
		hdr_old.bh_hdrlen = hdrlen;
		hdr_old.bh_caplen = caplen;
		bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
		    sizeof(hdr_old));
		goto copy;
	}
#endif

	/*
	 * Append the bpf header.  Note we append the actual header size, but
	 * move forward the length of the header plus padding.
	 */
	bzero(&hdr, sizeof(hdr));
	if (do_timestamp)
		bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
	hdr.bh_datalen = pktlen;
	hdr.bh_hdrlen = hdrlen;
	hdr.bh_caplen = caplen;
	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));

	/*
	 * Copy the packet data into the store buffer and update its length.
	 */
#ifndef BURN_BRIDGES
copy:
#endif
	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
	d->bd_slen = curlen + totlen;

	if (do_wakeup)
		bpf_wakeup(d);
}

/*
 * Free buffers currently in use by a descriptor.
 * Called on close.
 */
static void
bpf_freed(struct bpf_d *d)
{

	/*
	 * We don't need to lock out interrupts since this descriptor has
	 * been detached from its interface and it yet hasn't been marked
	 * free.
	 */
	bpf_free(d);
	if (d->bd_rfilter != NULL) {
		free((caddr_t)d->bd_rfilter, M_BPF);
#ifdef BPF_JITTER
		if (d->bd_bfilter != NULL)
			bpf_destroy_jit_filter(d->bd_bfilter);
#endif
	}
	if (d->bd_wfilter != NULL)
		free((caddr_t)d->bd_wfilter, M_BPF);
	mtx_destroy(&d->bd_lock);
}

/*
 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
 * fixed size of the link header (variable length headers not yet supported).
 */
void
bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
{

	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
}

/*
 * Attach an interface to bpf.  ifp is a pointer to the structure
 * defining the interface to be attached, dlt is the link layer type,
 * and hdrlen is the fixed size of the link header (variable length
 * headers are not yet supporrted).
 */
void
bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
{
	struct bpf_if *bp;

	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
	if (bp == NULL)
		panic("bpfattach");

	LIST_INIT(&bp->bif_dlist);
	LIST_INIT(&bp->bif_wlist);
	bp->bif_ifp = ifp;
	bp->bif_dlt = dlt;
	rw_init(&bp->bif_lock, "bpf interface lock");
	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
	*driverp = bp;

	BPF_LOCK();
	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
	BPF_UNLOCK();

	bp->bif_hdrlen = hdrlen;

	if (bootverbose)
		if_printf(ifp, "bpf attached\n");
}

/*
 * Detach bpf from an interface. This involves detaching each descriptor
 * associated with the interface. Notify each descriptor as it's detached
 * so that any sleepers wake up and get ENXIO.
 */
void
bpfdetach(struct ifnet *ifp)
{
	struct bpf_if	*bp, *bp_temp;
	struct bpf_d	*d;
	int ndetached;

	ndetached = 0;

	BPF_LOCK();
	/* Find all bpf_if struct's which reference ifp and detach them. */
	LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
		if (ifp != bp->bif_ifp)
			continue;

		LIST_REMOVE(bp, bif_next);
		/* Add to to-be-freed list */
		LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);

		ndetached++;
		/*
		 * Delay freeing bp till interface is detached
		 * and all routes through this interface are removed.
		 * Mark bp as detached to restrict new consumers.
		 */
		BPFIF_WLOCK(bp);
		bp->flags |= BPFIF_FLAG_DYING;
		BPFIF_WUNLOCK(bp);

		CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
		    __func__, bp->bif_dlt, bp, ifp);

		/* Free common descriptors */
		while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
			bpf_detachd_locked(d);
			BPFD_LOCK(d);
			bpf_wakeup(d);
			BPFD_UNLOCK(d);
		}

		/* Free writer-only descriptors */
		while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
			bpf_detachd_locked(d);
			BPFD_LOCK(d);
			bpf_wakeup(d);
			BPFD_UNLOCK(d);
		}
	}
	BPF_UNLOCK();

#ifdef INVARIANTS
	if (ndetached == 0)
		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
#endif
}

/*
 * Interface departure handler.
 * Note departure event does not guarantee interface is going down.
 * Interface renaming is currently done via departure/arrival event set.
 *
 * Departure handled is called after all routes pointing to
 * given interface are removed and interface is in down state
 * restricting any packets to be sent/received. We assume it is now safe
 * to free data allocated by BPF.
 */
static void
bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
{
	struct bpf_if *bp, *bp_temp;
	int nmatched = 0;

	BPF_LOCK();
	/*
	 * Find matching entries in free list.
	 * Nothing should be found if bpfdetach() was not called.
	 */
	LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
		if (ifp != bp->bif_ifp)
			continue;

		CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
		    __func__, bp, ifp);

		LIST_REMOVE(bp, bif_next);

		rw_destroy(&bp->bif_lock);
		free(bp, M_BPF);

		nmatched++;
	}
	BPF_UNLOCK();

	/*
	 * Note that we cannot zero other pointers to
	 * custom DLTs possibly used by given interface.
	 */
	if (nmatched != 0)
		ifp->if_bpf = NULL;
}

/*
 * Get a list of available data link type of the interface.
 */
static int
bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
{
	int n, error;
	struct ifnet *ifp;
	struct bpf_if *bp;

	BPF_LOCK_ASSERT();

	ifp = d->bd_bif->bif_ifp;
	n = 0;
	error = 0;
	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
		if (bp->bif_ifp != ifp)
			continue;
		if (bfl->bfl_list != NULL) {
			if (n >= bfl->bfl_len)
				return (ENOMEM);
			error = copyout(&bp->bif_dlt,
			    bfl->bfl_list + n, sizeof(u_int));
		}
		n++;
	}
	bfl->bfl_len = n;
	return (error);
}

/*
 * Set the data link type of a BPF instance.
 */
static int
bpf_setdlt(struct bpf_d *d, u_int dlt)
{
	int error, opromisc;
	struct ifnet *ifp;
	struct bpf_if *bp;

	BPF_LOCK_ASSERT();

	if (d->bd_bif->bif_dlt == dlt)
		return (0);
	ifp = d->bd_bif->bif_ifp;

	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
			break;
	}

	if (bp != NULL) {
		opromisc = d->bd_promisc;
		bpf_attachd(d, bp);
		BPFD_LOCK(d);
		reset_d(d);
		BPFD_UNLOCK(d);
		if (opromisc) {
			error = ifpromisc(bp->bif_ifp, 1);
			if (error)
				if_printf(bp->bif_ifp,
					"bpf_setdlt: ifpromisc failed (%d)\n",
					error);
			else
				d->bd_promisc = 1;
		}
	}
	return (bp == NULL ? EINVAL : 0);
}

static void
bpf_drvinit(void *unused)
{
	struct cdev *dev;

	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
	LIST_INIT(&bpf_iflist);
	LIST_INIT(&bpf_freelist);

	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
	/* For compatibility */
	make_dev_alias(dev, "bpf0");

	/* Register interface departure handler */
	bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
		    ifnet_departure_event, bpf_ifdetach, NULL,
		    EVENTHANDLER_PRI_ANY);
}

/*
 * Zero out the various packet counters associated with all of the bpf
 * descriptors.  At some point, we will probably want to get a bit more
 * granular and allow the user to specify descriptors to be zeroed.
 */
static void
bpf_zero_counters(void)
{
	struct bpf_if *bp;
	struct bpf_d *bd;

	BPF_LOCK();
	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
		BPFIF_RLOCK(bp);
		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
			BPFD_LOCK(bd);
			bd->bd_rcount = 0;
			bd->bd_dcount = 0;
			bd->bd_fcount = 0;
			bd->bd_wcount = 0;
			bd->bd_wfcount = 0;
			bd->bd_zcopy = 0;
			BPFD_UNLOCK(bd);
		}
		BPFIF_RUNLOCK(bp);
	}
	BPF_UNLOCK();
}

/*
 * Fill filter statistics
 */
static void
bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
{

	bzero(d, sizeof(*d));
	BPFD_LOCK_ASSERT(bd);
	d->bd_structsize = sizeof(*d);
	/* XXX: reading should be protected by global lock */
	d->bd_immediate = bd->bd_immediate;
	d->bd_promisc = bd->bd_promisc;
	d->bd_hdrcmplt = bd->bd_hdrcmplt;
	d->bd_direction = bd->bd_direction;
	d->bd_feedback = bd->bd_feedback;
	d->bd_async = bd->bd_async;
	d->bd_rcount = bd->bd_rcount;
	d->bd_dcount = bd->bd_dcount;
	d->bd_fcount = bd->bd_fcount;
	d->bd_sig = bd->bd_sig;
	d->bd_slen = bd->bd_slen;
	d->bd_hlen = bd->bd_hlen;
	d->bd_bufsize = bd->bd_bufsize;
	d->bd_pid = bd->bd_pid;
	strlcpy(d->bd_ifname,
	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
	d->bd_locked = bd->bd_locked;
	d->bd_wcount = bd->bd_wcount;
	d->bd_wdcount = bd->bd_wdcount;
	d->bd_wfcount = bd->bd_wfcount;
	d->bd_zcopy = bd->bd_zcopy;
	d->bd_bufmode = bd->bd_bufmode;
}

/*
 * Handle `netstat -B' stats request
 */
static int
bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
{
	struct xbpf_d *xbdbuf, *xbd, zerostats;
	int index, error;
	struct bpf_if *bp;
	struct bpf_d *bd;

	/*
	 * XXX This is not technically correct. It is possible for non
	 * privileged users to open bpf devices. It would make sense
	 * if the users who opened the devices were able to retrieve
	 * the statistics for them, too.
	 */
	error = priv_check(req->td, PRIV_NET_BPF);
	if (error)
		return (error);
	/*
	 * Check to see if the user is requesting that the counters be
	 * zeroed out.  Explicitly check that the supplied data is zeroed,
	 * as we aren't allowing the user to set the counters currently.
	 */
	if (req->newptr != NULL) {
		if (req->newlen != sizeof(zerostats))
			return (EINVAL);
		bzero(&zerostats, sizeof(zerostats));
		xbd = req->newptr;
		if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
			return (EINVAL);
		bpf_zero_counters();
		return (0);
	}
	if (req->oldptr == NULL)
		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
	if (bpf_bpfd_cnt == 0)
		return (SYSCTL_OUT(req, 0, 0));
	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
	BPF_LOCK();
	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
		BPF_UNLOCK();
		free(xbdbuf, M_BPF);
		return (ENOMEM);
	}
	index = 0;
	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
		BPFIF_RLOCK(bp);
		/* Send writers-only first */
		LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
			xbd = &xbdbuf[index++];
			BPFD_LOCK(bd);
			bpfstats_fill_xbpf(xbd, bd);
			BPFD_UNLOCK(bd);
		}
		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
			xbd = &xbdbuf[index++];
			BPFD_LOCK(bd);
			bpfstats_fill_xbpf(xbd, bd);
			BPFD_UNLOCK(bd);
		}
		BPFIF_RUNLOCK(bp);
	}
	BPF_UNLOCK();
	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
	free(xbdbuf, M_BPF);
	return (error);
}

SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);

#else /* !DEV_BPF && !NETGRAPH_BPF */
/*
 * NOP stubs to allow bpf-using drivers to load and function.
 *
 * A 'better' implementation would allow the core bpf functionality
 * to be loaded at runtime.
 */
static struct bpf_if bp_null;

void
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
{
}

void
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
{
}

void
bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
{
}

void
bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
{

	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
}

void
bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
{

	*driverp = &bp_null;
}

void
bpfdetach(struct ifnet *ifp)
{
}

u_int
bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
{
	return -1;	/* "no filter" behaviour */
}

int
bpf_validate(const struct bpf_insn *f, int len)
{
	return 0;		/* false */
}

#endif /* !DEV_BPF && !NETGRAPH_BPF */