bpf.c revision 143064
1305549Sjhb/*-
2305549Sjhb * Copyright (c) 1990, 1991, 1993
3305549Sjhb *	The Regents of the University of California.  All rights reserved.
4305549Sjhb *
5305549Sjhb * This code is derived from the Stanford/CMU enet packet filter,
6305549Sjhb * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7305549Sjhb * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8305549Sjhb * Berkeley Laboratory.
9305549Sjhb *
10305549Sjhb * Redistribution and use in source and binary forms, with or without
11305549Sjhb * modification, are permitted provided that the following conditions
12305549Sjhb * are met:
13305549Sjhb * 1. Redistributions of source code must retain the above copyright
14305549Sjhb *    notice, this list of conditions and the following disclaimer.
15305549Sjhb * 2. Redistributions in binary form must reproduce the above copyright
16305549Sjhb *    notice, this list of conditions and the following disclaimer in the
17305549Sjhb *    documentation and/or other materials provided with the distribution.
18305549Sjhb * 4. Neither the name of the University nor the names of its contributors
19305549Sjhb *    may be used to endorse or promote products derived from this software
20305549Sjhb *    without specific prior written permission.
21305549Sjhb *
22305549Sjhb * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23305549Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24305549Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25305549Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26305549Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27305549Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28305549Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29305549Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30305549Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31305549Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32305549Sjhb * SUCH DAMAGE.
33305549Sjhb *
34305549Sjhb *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35305549Sjhb *
36305549Sjhb * $FreeBSD: head/sys/net/bpf.c 143064 2005-03-02 21:59:39Z jmg $
37305549Sjhb */
38305549Sjhb
39305549Sjhb#include "opt_bpf.h"
40305549Sjhb#include "opt_mac.h"
41305549Sjhb#include "opt_netgraph.h"
42305549Sjhb
43305549Sjhb#include <sys/types.h>
44305549Sjhb#include <sys/param.h>
45305549Sjhb#include <sys/systm.h>
46305549Sjhb#include <sys/conf.h>
47305549Sjhb#include <sys/fcntl.h>
48305549Sjhb#include <sys/mac.h>
49305549Sjhb#include <sys/malloc.h>
50305549Sjhb#include <sys/mbuf.h>
51305549Sjhb#include <sys/time.h>
52305549Sjhb#include <sys/proc.h>
53305549Sjhb#include <sys/signalvar.h>
54305549Sjhb#include <sys/filio.h>
55305549Sjhb#include <sys/sockio.h>
56305549Sjhb#include <sys/ttycom.h>
57305549Sjhb#include <sys/uio.h>
58305549Sjhb
59305549Sjhb#include <sys/event.h>
60305549Sjhb#include <sys/file.h>
61305549Sjhb#include <sys/poll.h>
62305549Sjhb#include <sys/proc.h>
63305549Sjhb
64305549Sjhb#include <sys/socket.h>
65330307Snp
66330307Snp#include <net/if.h>
67305549Sjhb#include <net/bpf.h>
68305549Sjhb#include <net/bpfdesc.h>
69305549Sjhb
70305549Sjhb#include <netinet/in.h>
71305549Sjhb#include <netinet/if_ether.h>
72305549Sjhb#include <sys/kernel.h>
73305549Sjhb#include <sys/sysctl.h>
74305549Sjhb
75305549Sjhbstatic MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
76305549Sjhb
77305549Sjhb#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
78305549Sjhb
79305549Sjhb#define PRINET  26			/* interruptible */
80305549Sjhb
81305549Sjhb/*
82305549Sjhb * The default read buffer size is patchable.
83305549Sjhb */
84305549Sjhbstatic int bpf_bufsize = 4096;
85305549SjhbSYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
86305549Sjhb	&bpf_bufsize, 0, "");
87305549Sjhbstatic int bpf_maxbufsize = BPF_MAXBUFSIZE;
88305549SjhbSYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
89305549Sjhb	&bpf_maxbufsize, 0, "");
90305549Sjhb
91305549Sjhb/*
92305549Sjhb * bpf_iflist is a list of BPF interface structures, each corresponding to a
93305549Sjhb * specific DLT.  The same network interface might have several BPF interface
94305549Sjhb * structures registered by different layers in the stack (i.e., 802.11
95305549Sjhb * frames, ethernet frames, etc).
96305549Sjhb */
97305549Sjhbstatic LIST_HEAD(, bpf_if)	bpf_iflist;
98305549Sjhbstatic struct mtx	bpf_mtx;		/* bpf global lock */
99305549Sjhb
100355253Snpstatic int	bpf_allocbufs(struct bpf_d *);
101355253Snpstatic void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
102355253Snpstatic void	bpf_detachd(struct bpf_d *d);
103355253Snpstatic void	bpf_freed(struct bpf_d *);
104309560Sjhbstatic void	bpf_mcopy(const void *, void *, size_t);
105318843Snpstatic int	bpf_movein(struct uio *, int,
106309560Sjhb		    struct mbuf **, struct sockaddr *, int *);
107309560Sjhbstatic int	bpf_setif(struct bpf_d *, struct ifreq *);
108318843Snpstatic void	bpf_timed_out(void *);
109318843Snpstatic __inline void
110318843Snp		bpf_wakeup(struct bpf_d *);
111318843Snpstatic void	catchpacket(struct bpf_d *, u_char *, u_int,
112309560Sjhb		    u_int, void (*)(const void *, void *, size_t));
113309560Sjhbstatic void	reset_d(struct bpf_d *);
114318843Snpstatic int	 bpf_setf(struct bpf_d *, struct bpf_program *);
115309560Sjhbstatic int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
116318843Snpstatic int	bpf_setdlt(struct bpf_d *, u_int);
117318843Snpstatic void	filt_bpfdetach(struct knote *);
118318843Snpstatic int	filt_bpfread(struct knote *, long);
119318843Snp
120318843Snpstatic	d_open_t	bpfopen;
121318843Snpstatic	d_close_t	bpfclose;
122318843Snpstatic	d_read_t	bpfread;
123318843Snpstatic	d_write_t	bpfwrite;
124355253Snpstatic	d_ioctl_t	bpfioctl;
125355253Snpstatic	d_poll_t	bpfpoll;
126355253Snpstatic	d_kqfilter_t	bpfkqfilter;
127355253Snp
128355253Snpstatic struct cdevsw bpf_cdevsw = {
129355253Snp	.d_version =	D_VERSION,
130305549Sjhb	.d_flags =	D_NEEDGIANT,
131305549Sjhb	.d_open =	bpfopen,
132305549Sjhb	.d_close =	bpfclose,
133305549Sjhb	.d_read =	bpfread,
134305549Sjhb	.d_write =	bpfwrite,
135305549Sjhb	.d_ioctl =	bpfioctl,
136305549Sjhb	.d_poll =	bpfpoll,
137305549Sjhb	.d_name =	"bpf",
138305549Sjhb	.d_kqfilter =	bpfkqfilter,
139305549Sjhb};
140305549Sjhb
141305549Sjhbstatic struct filterops bpfread_filtops =
142305549Sjhb	{ 1, NULL, filt_bpfdetach, filt_bpfread };
143306694Sjhb
144305549Sjhbstatic int
145305549Sjhbbpf_movein(uio, linktype, mp, sockp, datlen)
146305549Sjhb	struct uio *uio;
147305549Sjhb	int linktype, *datlen;
148305549Sjhb	struct mbuf **mp;
149305549Sjhb	struct sockaddr *sockp;
150305549Sjhb{
151305549Sjhb	struct mbuf *m;
152305549Sjhb	int error;
153305549Sjhb	int len;
154305549Sjhb	int hlen;
155305549Sjhb
156305549Sjhb	/*
157305549Sjhb	 * Build a sockaddr based on the data link layer type.
158305549Sjhb	 * We do this at this level because the ethernet header
159306694Sjhb	 * is copied directly into the data field of the sockaddr.
160305549Sjhb	 * In the case of SLIP, there is no header and the packet
161305549Sjhb	 * is forwarded as is.
162305549Sjhb	 * Also, we are careful to leave room at the front of the mbuf
163305549Sjhb	 * for the link level header.
164305549Sjhb	 */
165305549Sjhb	switch (linktype) {
166305549Sjhb
167305549Sjhb	case DLT_SLIP:
168305549Sjhb		sockp->sa_family = AF_INET;
169305549Sjhb		hlen = 0;
170305549Sjhb		break;
171305549Sjhb
172309560Sjhb	case DLT_EN10MB:
173309560Sjhb		sockp->sa_family = AF_UNSPEC;
174309560Sjhb		/* XXX Would MAXLINKHDR be better? */
175309560Sjhb		hlen = ETHER_HDR_LEN;
176309560Sjhb		break;
177309560Sjhb
178309560Sjhb	case DLT_FDDI:
179309560Sjhb		sockp->sa_family = AF_IMPLINK;
180309560Sjhb		hlen = 0;
181309560Sjhb		break;
182309560Sjhb
183309560Sjhb	case DLT_RAW:
184309560Sjhb	case DLT_NULL:
185309560Sjhb		sockp->sa_family = AF_UNSPEC;
186309560Sjhb		hlen = 0;
187309560Sjhb		break;
188305549Sjhb
189305549Sjhb	case DLT_ATM_RFC1483:
190305549Sjhb		/*
191305549Sjhb		 * en atm driver requires 4-byte atm pseudo header.
192305549Sjhb		 * though it isn't standard, vpi:vci needs to be
193305549Sjhb		 * specified anyway.
194305549Sjhb		 */
195305549Sjhb		sockp->sa_family = AF_UNSPEC;
196305549Sjhb		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
197305549Sjhb		break;
198305549Sjhb
199305549Sjhb	case DLT_PPP:
200305549Sjhb		sockp->sa_family = AF_UNSPEC;
201305549Sjhb		hlen = 4;	/* This should match PPP_HDRLEN */
202305549Sjhb		break;
203305549Sjhb
204305549Sjhb	default:
205305549Sjhb		return (EIO);
206305549Sjhb	}
207305549Sjhb
208305549Sjhb	len = uio->uio_resid;
209305549Sjhb	*datlen = len - hlen;
210305549Sjhb	if ((unsigned)len > MCLBYTES)
211305549Sjhb		return (EIO);
212305549Sjhb
213305549Sjhb	if (len > MHLEN) {
214305549Sjhb		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
215305549Sjhb	} else {
216305549Sjhb		MGETHDR(m, M_TRYWAIT, MT_DATA);
217305549Sjhb	}
218305549Sjhb	if (m == NULL)
219305549Sjhb		return (ENOBUFS);
220305549Sjhb	m->m_pkthdr.len = m->m_len = len;
221305549Sjhb	m->m_pkthdr.rcvif = NULL;
222305549Sjhb	*mp = m;
223305549Sjhb
224305549Sjhb	/*
225305549Sjhb	 * Make room for link header.
226305549Sjhb	 */
227305549Sjhb	if (hlen != 0) {
228305549Sjhb		m->m_pkthdr.len -= hlen;
229305549Sjhb		m->m_len -= hlen;
230305549Sjhb#if BSD >= 199103
231305549Sjhb		m->m_data += hlen; /* XXX */
232305549Sjhb#else
233305549Sjhb		m->m_off += hlen;
234305549Sjhb#endif
235305549Sjhb		error = uiomove(sockp->sa_data, hlen, uio);
236305549Sjhb		if (error)
237305549Sjhb			goto bad;
238305549Sjhb	}
239305549Sjhb	error = uiomove(mtod(m, void *), len - hlen, uio);
240305549Sjhb	if (!error)
241305549Sjhb		return (0);
242305549Sjhbbad:
243305549Sjhb	m_freem(m);
244305549Sjhb	return (error);
245305549Sjhb}
246305549Sjhb
247305549Sjhb/*
248305549Sjhb * Attach file to the bpf interface, i.e. make d listen on bp.
249305549Sjhb */
250305549Sjhbstatic void
251305549Sjhbbpf_attachd(d, bp)
252305549Sjhb	struct bpf_d *d;
253305549Sjhb	struct bpf_if *bp;
254305549Sjhb{
255305549Sjhb	/*
256305549Sjhb	 * Point d at bp, and add d to the interface's list of listeners.
257305549Sjhb	 * Finally, point the driver's bpf cookie at the interface so
258305549Sjhb	 * it will divert packets to bpf.
259305549Sjhb	 */
260305549Sjhb	BPFIF_LOCK(bp);
261305549Sjhb	d->bd_bif = bp;
262305549Sjhb	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
263305549Sjhb
264305549Sjhb	*bp->bif_driverp = bp;
265305549Sjhb	BPFIF_UNLOCK(bp);
266305549Sjhb}
267305549Sjhb
268305549Sjhb/*
269305549Sjhb * Detach a file from its interface.
270305549Sjhb */
271305549Sjhbstatic void
272305549Sjhbbpf_detachd(d)
273305549Sjhb	struct bpf_d *d;
274305549Sjhb{
275305549Sjhb	int error;
276305549Sjhb	struct bpf_if *bp;
277305549Sjhb	struct ifnet *ifp;
278305549Sjhb
279305549Sjhb	bp = d->bd_bif;
280305549Sjhb	BPFIF_LOCK(bp);
281305549Sjhb	BPFD_LOCK(d);
282305549Sjhb	ifp = d->bd_bif->bif_ifp;
283305549Sjhb
284305549Sjhb	/*
285305549Sjhb	 * Remove d from the interface's descriptor list.
286305549Sjhb	 */
287305549Sjhb	LIST_REMOVE(d, bd_next);
288305549Sjhb
289305549Sjhb	/*
290305549Sjhb	 * Let the driver know that there are no more listeners.
291305549Sjhb	 */
292305549Sjhb	if (LIST_EMPTY(&bp->bif_dlist))
293305549Sjhb		*bp->bif_driverp = NULL;
294305549Sjhb
295305549Sjhb	d->bd_bif = NULL;
296305549Sjhb	BPFD_UNLOCK(d);
297305549Sjhb	BPFIF_UNLOCK(bp);
298346964Snp
299346964Snp	/*
300346964Snp	 * Check if this descriptor had requested promiscuous mode.
301346964Snp	 * If so, turn it off.
302346964Snp	 */
303346964Snp	if (d->bd_promisc) {
304305549Sjhb		d->bd_promisc = 0;
305305549Sjhb		error = ifpromisc(ifp, 0);
306305549Sjhb		if (error != 0 && error != ENXIO) {
307305549Sjhb			/*
308305549Sjhb			 * ENXIO can happen if a pccard is unplugged
309305549Sjhb			 * Something is really wrong if we were able to put
310305549Sjhb			 * the driver into promiscuous mode, but can't
311330307Snp			 * take it out.
312305549Sjhb			 */
313305549Sjhb			if_printf(bp->bif_ifp,
314330307Snp				"bpf_detach: ifpromisc failed (%d)\n", error);
315305549Sjhb		}
316305549Sjhb	}
317305549Sjhb}
318305549Sjhb
319305549Sjhb/*
320305549Sjhb * Open ethernet device.  Returns ENXIO for illegal minor device number,
321305549Sjhb * EBUSY if file is open by another process.
322330307Snp */
323305549Sjhb/* ARGSUSED */
324305549Sjhbstatic	int
325305549Sjhbbpfopen(dev, flags, fmt, td)
326305549Sjhb	struct cdev *dev;
327305549Sjhb	int flags;
328305549Sjhb	int fmt;
329305549Sjhb	struct thread *td;
330305549Sjhb{
331305549Sjhb	struct bpf_d *d;
332305549Sjhb
333305549Sjhb	mtx_lock(&bpf_mtx);
334305549Sjhb	d = dev->si_drv1;
335305549Sjhb	/*
336305549Sjhb	 * Each minor can be opened by only one process.  If the requested
337305549Sjhb	 * minor is in use, return EBUSY.
338305549Sjhb	 */
339305549Sjhb	if (d != NULL) {
340305549Sjhb		mtx_unlock(&bpf_mtx);
341305549Sjhb		return (EBUSY);
342305549Sjhb	}
343305549Sjhb	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
344305549Sjhb	mtx_unlock(&bpf_mtx);
345305549Sjhb
346305549Sjhb	if ((dev->si_flags & SI_NAMED) == 0)
347305549Sjhb		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
348305549Sjhb		    "bpf%d", dev2unit(dev));
349305549Sjhb	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
350305549Sjhb	dev->si_drv1 = d;
351305549Sjhb	d->bd_bufsize = bpf_bufsize;
352305549Sjhb	d->bd_sig = SIGIO;
353305549Sjhb	d->bd_seesent = 1;
354305549Sjhb#ifdef MAC
355305549Sjhb	mac_init_bpfdesc(d);
356305549Sjhb	mac_create_bpfdesc(td->td_ucred, d);
357305549Sjhb#endif
358305549Sjhb	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
359305549Sjhb	callout_init(&d->bd_callout, NET_CALLOUT_MPSAFE);
360330307Snp	knlist_init(&d->bd_sel.si_note, &d->bd_mtx);
361305549Sjhb
362305549Sjhb	return (0);
363330307Snp}
364305549Sjhb
365305549Sjhb/*
366305549Sjhb * Close the descriptor by detaching it from its interface,
367305549Sjhb * deallocating its buffers, and marking it free.
368305549Sjhb */
369305549Sjhb/* ARGSUSED */
370305549Sjhbstatic	int
371305549Sjhbbpfclose(dev, flags, fmt, td)
372305549Sjhb	struct cdev *dev;
373330307Snp	int flags;
374305549Sjhb	int fmt;
375305549Sjhb	struct thread *td;
376305549Sjhb{
377305549Sjhb	struct bpf_d *d = dev->si_drv1;
378330307Snp
379305549Sjhb	BPFD_LOCK(d);
380305549Sjhb	if (d->bd_state == BPF_WAITING)
381330307Snp		callout_stop(&d->bd_callout);
382305549Sjhb	d->bd_state = BPF_IDLE;
383330307Snp	BPFD_UNLOCK(d);
384305549Sjhb	funsetown(&d->bd_sigio);
385305549Sjhb	mtx_lock(&bpf_mtx);
386305549Sjhb	if (d->bd_bif)
387305549Sjhb		bpf_detachd(d);
388305549Sjhb	mtx_unlock(&bpf_mtx);
389305549Sjhb#ifdef MAC
390305549Sjhb	mac_destroy_bpfdesc(d);
391330307Snp#endif /* MAC */
392305549Sjhb	knlist_destroy(&d->bd_sel.si_note);
393305549Sjhb	bpf_freed(d);
394330307Snp	dev->si_drv1 = NULL;
395305549Sjhb	free(d, M_BPF);
396305549Sjhb
397305549Sjhb	return (0);
398330307Snp}
399305549Sjhb
400305549Sjhb
401330307Snp/*
402305549Sjhb * Rotate the packet buffers in descriptor d.  Move the store buffer
403330307Snp * into the hold slot, and the free buffer into the store slot.
404305549Sjhb * Zero the length of the new store buffer.
405305549Sjhb */
406305549Sjhb#define ROTATE_BUFFERS(d) \
407305549Sjhb	(d)->bd_hbuf = (d)->bd_sbuf; \
408305549Sjhb	(d)->bd_hlen = (d)->bd_slen; \
409305549Sjhb	(d)->bd_sbuf = (d)->bd_fbuf; \
410330307Snp	(d)->bd_slen = 0; \
411305549Sjhb	(d)->bd_fbuf = NULL;
412305549Sjhb/*
413330307Snp *  bpfread - read next chunk of packets from buffers
414305549Sjhb */
415305549Sjhbstatic	int
416305549Sjhbbpfread(dev, uio, ioflag)
417305549Sjhb	struct cdev *dev;
418330307Snp	struct uio *uio;
419305549Sjhb	int ioflag;
420305549Sjhb{
421305549Sjhb	struct bpf_d *d = dev->si_drv1;
422305549Sjhb	int timed_out;
423305549Sjhb	int error;
424305549Sjhb
425305549Sjhb	/*
426305549Sjhb	 * Restrict application to use a buffer the same size as
427305549Sjhb	 * as kernel buffers.
428305549Sjhb	 */
429330307Snp	if (uio->uio_resid != d->bd_bufsize)
430330307Snp		return (EINVAL);
431305549Sjhb
432305549Sjhb	BPFD_LOCK(d);
433305549Sjhb	if (d->bd_state == BPF_WAITING)
434305549Sjhb		callout_stop(&d->bd_callout);
435305549Sjhb	timed_out = (d->bd_state == BPF_TIMED_OUT);
436305549Sjhb	d->bd_state = BPF_IDLE;
437305549Sjhb	/*
438305549Sjhb	 * If the hold buffer is empty, then do a timed sleep, which
439305549Sjhb	 * ends when the timeout expires or when enough packets
440305549Sjhb	 * have arrived to fill the store buffer.
441305549Sjhb	 */
442305549Sjhb	while (d->bd_hbuf == NULL) {
443305549Sjhb		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
444305549Sjhb			/*
445305549Sjhb			 * A packet(s) either arrived since the previous
446305549Sjhb			 * read or arrived while we were asleep.
447305549Sjhb			 * Rotate the buffers and return what's here.
448305549Sjhb			 */
449305549Sjhb			ROTATE_BUFFERS(d);
450305549Sjhb			break;
451305549Sjhb		}
452305549Sjhb
453305549Sjhb		/*
454305549Sjhb		 * No data is available, check to see if the bpf device
455305549Sjhb		 * is still pointed at a real interface.  If not, return
456305549Sjhb		 * ENXIO so that the userland process knows to rebind
457305549Sjhb		 * it before using it again.
458305549Sjhb		 */
459305549Sjhb		if (d->bd_bif == NULL) {
460305549Sjhb			BPFD_UNLOCK(d);
461305549Sjhb			return (ENXIO);
462305549Sjhb		}
463305549Sjhb
464305549Sjhb		if (ioflag & O_NONBLOCK) {
465305549Sjhb			BPFD_UNLOCK(d);
466305549Sjhb			return (EWOULDBLOCK);
467305549Sjhb		}
468305549Sjhb		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
469305549Sjhb		     "bpf", d->bd_rtout);
470305549Sjhb		if (error == EINTR || error == ERESTART) {
471305549Sjhb			BPFD_UNLOCK(d);
472305549Sjhb			return (error);
473305549Sjhb		}
474305549Sjhb		if (error == EWOULDBLOCK) {
475305549Sjhb			/*
476330307Snp			 * On a timeout, return what's in the buffer,
477305549Sjhb			 * which may be nothing.  If there is something
478305549Sjhb			 * in the store buffer, we can rotate the buffers.
479305549Sjhb			 */
480305549Sjhb			if (d->bd_hbuf)
481305549Sjhb				/*
482305549Sjhb				 * We filled up the buffer in between
483305549Sjhb				 * getting the timeout and arriving
484305549Sjhb				 * here, so we don't need to rotate.
485305549Sjhb				 */
486305549Sjhb				break;
487305549Sjhb
488346964Snp			if (d->bd_slen == 0) {
489305549Sjhb				BPFD_UNLOCK(d);
490305549Sjhb				return (0);
491305549Sjhb			}
492305549Sjhb			ROTATE_BUFFERS(d);
493305549Sjhb			break;
494305549Sjhb		}
495305549Sjhb	}
496305549Sjhb	/*
497305549Sjhb	 * At this point, we know we have something in the hold slot.
498305549Sjhb	 */
499305549Sjhb	BPFD_UNLOCK(d);
500305549Sjhb
501305549Sjhb	/*
502305549Sjhb	 * Move data from hold buffer into user space.
503305549Sjhb	 * We know the entire buffer is transferred since
504305549Sjhb	 * we checked above that the read buffer is bpf_bufsize bytes.
505305549Sjhb	 */
506305549Sjhb	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
507305549Sjhb
508305549Sjhb	BPFD_LOCK(d);
509305549Sjhb	d->bd_fbuf = d->bd_hbuf;
510305549Sjhb	d->bd_hbuf = NULL;
511309560Sjhb	d->bd_hlen = 0;
512309560Sjhb	BPFD_UNLOCK(d);
513309560Sjhb
514309560Sjhb	return (error);
515309560Sjhb}
516309560Sjhb
517305549Sjhb
518305549Sjhb/*
519305549Sjhb * If there are processes sleeping on this descriptor, wake them up.
520305549Sjhb */
521305549Sjhbstatic __inline void
522305549Sjhbbpf_wakeup(d)
523305549Sjhb	struct bpf_d *d;
524305549Sjhb{
525305549Sjhb	if (d->bd_state == BPF_WAITING) {
526305549Sjhb		callout_stop(&d->bd_callout);
527305549Sjhb		d->bd_state = BPF_IDLE;
528305549Sjhb	}
529305549Sjhb	wakeup(d);
530305549Sjhb	if (d->bd_async && d->bd_sig && d->bd_sigio)
531305549Sjhb		pgsigio(&d->bd_sigio, d->bd_sig, 0);
532305549Sjhb
533305549Sjhb	selwakeuppri(&d->bd_sel, PRINET);
534305549Sjhb	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
535305549Sjhb}
536305549Sjhb
537305549Sjhbstatic void
538305549Sjhbbpf_timed_out(arg)
539305549Sjhb	void *arg;
540305549Sjhb{
541305549Sjhb	struct bpf_d *d = (struct bpf_d *)arg;
542305549Sjhb
543305549Sjhb	BPFD_LOCK(d);
544305549Sjhb	if (d->bd_state == BPF_WAITING) {
545305549Sjhb		d->bd_state = BPF_TIMED_OUT;
546305549Sjhb		if (d->bd_slen != 0)
547305549Sjhb			bpf_wakeup(d);
548305549Sjhb	}
549305549Sjhb	BPFD_UNLOCK(d);
550305549Sjhb}
551305549Sjhb
552305549Sjhbstatic	int
553305549Sjhbbpfwrite(dev, uio, ioflag)
554305549Sjhb	struct cdev *dev;
555305549Sjhb	struct uio *uio;
556305549Sjhb	int ioflag;
557305549Sjhb{
558305549Sjhb	struct bpf_d *d = dev->si_drv1;
559305549Sjhb	struct ifnet *ifp;
560305549Sjhb	struct mbuf *m;
561305549Sjhb	int error;
562305549Sjhb	struct sockaddr dst;
563305549Sjhb	int datlen;
564305549Sjhb
565305549Sjhb	if (d->bd_bif == NULL)
566305549Sjhb		return (ENXIO);
567305549Sjhb
568305549Sjhb	ifp = d->bd_bif->bif_ifp;
569305549Sjhb
570305549Sjhb	if ((ifp->if_flags & IFF_UP) == 0)
571305549Sjhb		return (ENETDOWN);
572305549Sjhb
573305549Sjhb	if (uio->uio_resid == 0)
574305549Sjhb		return (0);
575305549Sjhb
576305549Sjhb	bzero(&dst, sizeof(dst));
577305549Sjhb	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
578305549Sjhb	if (error)
579305549Sjhb		return (error);
580305549Sjhb
581305549Sjhb	if (datlen > ifp->if_mtu) {
582305549Sjhb		m_freem(m);
583305549Sjhb		return (EMSGSIZE);
584305549Sjhb	}
585305549Sjhb
586305549Sjhb	if (d->bd_hdrcmplt)
587305549Sjhb		dst.sa_family = pseudo_AF_HDRCMPLT;
588305549Sjhb
589305549Sjhb#ifdef MAC
590305549Sjhb	BPFD_LOCK(d);
591305549Sjhb	mac_create_mbuf_from_bpfdesc(d, m);
592305549Sjhb	BPFD_UNLOCK(d);
593305549Sjhb#endif
594305549Sjhb	NET_LOCK_GIANT();
595305549Sjhb	error = (*ifp->if_output)(ifp, m, &dst, NULL);
596305549Sjhb	NET_UNLOCK_GIANT();
597305549Sjhb	/*
598305549Sjhb	 * The driver frees the mbuf.
599305549Sjhb	 */
600305549Sjhb	return (error);
601305549Sjhb}
602305549Sjhb
603305549Sjhb/*
604305549Sjhb * Reset a descriptor by flushing its packet buffer and clearing the
605305549Sjhb * receive and drop counts.
606305549Sjhb */
607305549Sjhbstatic void
608305549Sjhbreset_d(d)
609305549Sjhb	struct bpf_d *d;
610305549Sjhb{
611330307Snp
612305549Sjhb	mtx_assert(&d->bd_mtx, MA_OWNED);
613305549Sjhb	if (d->bd_hbuf) {
614305549Sjhb		/* Free the hold buffer. */
615305549Sjhb		d->bd_fbuf = d->bd_hbuf;
616305549Sjhb		d->bd_hbuf = NULL;
617305549Sjhb	}
618305549Sjhb	d->bd_slen = 0;
619305549Sjhb	d->bd_hlen = 0;
620305549Sjhb	d->bd_rcount = 0;
621305549Sjhb	d->bd_dcount = 0;
622305549Sjhb}
623305549Sjhb
624305549Sjhb/*
625305549Sjhb *  FIONREAD		Check for read packet available.
626305549Sjhb *  SIOCGIFADDR		Get interface address - convenient hook to driver.
627305549Sjhb *  BIOCGBLEN		Get buffer len [for read()].
628305549Sjhb *  BIOCSETF		Set ethernet read filter.
629305549Sjhb *  BIOCFLUSH		Flush read packet buffer.
630305549Sjhb *  BIOCPROMISC		Put interface into promiscuous mode.
631305549Sjhb *  BIOCGDLT		Get link layer type.
632305549Sjhb *  BIOCGETIF		Get interface name.
633305549Sjhb *  BIOCSETIF		Set interface.
634305549Sjhb *  BIOCSRTIMEOUT	Set read timeout.
635305549Sjhb *  BIOCGRTIMEOUT	Get read timeout.
636305549Sjhb *  BIOCGSTATS		Get packet stats.
637305549Sjhb *  BIOCIMMEDIATE	Set immediate mode.
638305549Sjhb *  BIOCVERSION		Get filter language version.
639305549Sjhb *  BIOCGHDRCMPLT	Get "header already complete" flag
640305549Sjhb *  BIOCSHDRCMPLT	Set "header already complete" flag
641305549Sjhb *  BIOCGSEESENT	Get "see packets sent" flag
642305549Sjhb *  BIOCSSEESENT	Set "see packets sent" flag
643305549Sjhb */
644305549Sjhb/* ARGSUSED */
645305549Sjhbstatic	int
646305549Sjhbbpfioctl(dev, cmd, addr, flags, td)
647346883Snp	struct cdev *dev;
648346883Snp	u_long cmd;
649346883Snp	caddr_t addr;
650346883Snp	int flags;
651309560Sjhb	struct thread *td;
652305549Sjhb{
653305549Sjhb	struct bpf_d *d = dev->si_drv1;
654305549Sjhb	int error = 0;
655305549Sjhb
656305549Sjhb	BPFD_LOCK(d);
657305549Sjhb	if (d->bd_state == BPF_WAITING)
658305549Sjhb		callout_stop(&d->bd_callout);
659305549Sjhb	d->bd_state = BPF_IDLE;
660305549Sjhb	BPFD_UNLOCK(d);
661306694Sjhb
662305549Sjhb	switch (cmd) {
663305549Sjhb
664305549Sjhb	default:
665330307Snp		error = EINVAL;
666305549Sjhb		break;
667305549Sjhb
668305549Sjhb	/*
669305549Sjhb	 * Check for read packet available.
670305549Sjhb	 */
671305549Sjhb	case FIONREAD:
672305549Sjhb		{
673330307Snp			int n;
674330307Snp
675305549Sjhb			BPFD_LOCK(d);
676346876Snp			n = d->bd_slen;
677305549Sjhb			if (d->bd_hbuf)
678305549Sjhb				n += d->bd_hlen;
679305549Sjhb			BPFD_UNLOCK(d);
680305549Sjhb
681305549Sjhb			*(int *)addr = n;
682305549Sjhb			break;
683305549Sjhb		}
684305549Sjhb
685305549Sjhb	case SIOCGIFADDR:
686305549Sjhb		{
687305549Sjhb			struct ifnet *ifp;
688305549Sjhb
689305549Sjhb			if (d->bd_bif == NULL)
690305549Sjhb				error = EINVAL;
691305549Sjhb			else {
692305549Sjhb				ifp = d->bd_bif->bif_ifp;
693305549Sjhb				error = (*ifp->if_ioctl)(ifp, cmd, addr);
694305549Sjhb			}
695305549Sjhb			break;
696305549Sjhb		}
697305549Sjhb
698305549Sjhb	/*
699305549Sjhb	 * Get buffer len [for read()].
700305549Sjhb	 */
701305549Sjhb	case BIOCGBLEN:
702305549Sjhb		*(u_int *)addr = d->bd_bufsize;
703305549Sjhb		break;
704305549Sjhb
705305549Sjhb	/*
706305549Sjhb	 * Set buffer length.
707305549Sjhb	 */
708305549Sjhb	case BIOCSBLEN:
709305549Sjhb		if (d->bd_bif != NULL)
710330307Snp			error = EINVAL;
711330307Snp		else {
712330307Snp			u_int size = *(u_int *)addr;
713330307Snp
714330307Snp			if (size > bpf_maxbufsize)
715305549Sjhb				*(u_int *)addr = size = bpf_maxbufsize;
716305549Sjhb			else if (size < BPF_MINBUFSIZE)
717305549Sjhb				*(u_int *)addr = size = BPF_MINBUFSIZE;
718305549Sjhb			d->bd_bufsize = size;
719305549Sjhb		}
720305549Sjhb		break;
721305549Sjhb
722305549Sjhb	/*
723305549Sjhb	 * Set link layer read filter.
724305549Sjhb	 */
725305549Sjhb	case BIOCSETF:
726305549Sjhb		error = bpf_setf(d, (struct bpf_program *)addr);
727305549Sjhb		break;
728305549Sjhb
729305549Sjhb	/*
730305549Sjhb	 * Flush read packet buffer.
731305549Sjhb	 */
732305549Sjhb	case BIOCFLUSH:
733305549Sjhb		BPFD_LOCK(d);
734305549Sjhb		reset_d(d);
735305549Sjhb		BPFD_UNLOCK(d);
736305549Sjhb		break;
737305549Sjhb
738305549Sjhb	/*
739305549Sjhb	 * Put interface into promiscuous mode.
740305549Sjhb	 */
741305549Sjhb	case BIOCPROMISC:
742305549Sjhb		if (d->bd_bif == NULL) {
743305549Sjhb			/*
744305549Sjhb			 * No interface attached yet.
745305549Sjhb			 */
746305549Sjhb			error = EINVAL;
747305549Sjhb			break;
748305549Sjhb		}
749305549Sjhb		if (d->bd_promisc == 0) {
750305549Sjhb			mtx_lock(&Giant);
751305549Sjhb			error = ifpromisc(d->bd_bif->bif_ifp, 1);
752305549Sjhb			mtx_unlock(&Giant);
753305549Sjhb			if (error == 0)
754305549Sjhb				d->bd_promisc = 1;
755305549Sjhb		}
756305549Sjhb		break;
757305549Sjhb
758305549Sjhb	/*
759305549Sjhb	 * Get current data link type.
760305549Sjhb	 */
761305549Sjhb	case BIOCGDLT:
762305549Sjhb		if (d->bd_bif == NULL)
763305549Sjhb			error = EINVAL;
764305549Sjhb		else
765305549Sjhb			*(u_int *)addr = d->bd_bif->bif_dlt;
766305549Sjhb		break;
767305549Sjhb
768305549Sjhb	/*
769305549Sjhb	 * Get a list of supported data link types.
770305549Sjhb	 */
771305549Sjhb	case BIOCGDLTLIST:
772305549Sjhb		if (d->bd_bif == NULL)
773305549Sjhb			error = EINVAL;
774305549Sjhb		else
775305549Sjhb			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
776305549Sjhb		break;
777305549Sjhb
778305549Sjhb	/*
779305549Sjhb	 * Set data link type.
780305549Sjhb	 */
781305549Sjhb	case BIOCSDLT:
782305549Sjhb		if (d->bd_bif == NULL)
783305549Sjhb			error = EINVAL;
784305549Sjhb		else
785305549Sjhb			error = bpf_setdlt(d, *(u_int *)addr);
786305549Sjhb		break;
787305549Sjhb
788305549Sjhb	/*
789305549Sjhb	 * Get interface name.
790305549Sjhb	 */
791305549Sjhb	case BIOCGETIF:
792305549Sjhb		if (d->bd_bif == NULL)
793305549Sjhb			error = EINVAL;
794305549Sjhb		else {
795305549Sjhb			struct ifnet *const ifp = d->bd_bif->bif_ifp;
796305549Sjhb			struct ifreq *const ifr = (struct ifreq *)addr;
797305549Sjhb
798305549Sjhb			strlcpy(ifr->ifr_name, ifp->if_xname,
799305549Sjhb			    sizeof(ifr->ifr_name));
800305549Sjhb		}
801305549Sjhb		break;
802305549Sjhb
803305549Sjhb	/*
804305549Sjhb	 * Set interface.
805305549Sjhb	 */
806305549Sjhb	case BIOCSETIF:
807305549Sjhb		error = bpf_setif(d, (struct ifreq *)addr);
808305549Sjhb		break;
809305549Sjhb
810305549Sjhb	/*
811305549Sjhb	 * Set read timeout.
812305549Sjhb	 */
813305549Sjhb	case BIOCSRTIMEOUT:
814305549Sjhb		{
815305549Sjhb			struct timeval *tv = (struct timeval *)addr;
816305549Sjhb
817305549Sjhb			/*
818305549Sjhb			 * Subtract 1 tick from tvtohz() since this isn't
819305549Sjhb			 * a one-shot timer.
820305549Sjhb			 */
821305549Sjhb			if ((error = itimerfix(tv)) == 0)
822305549Sjhb				d->bd_rtout = tvtohz(tv) - 1;
823305549Sjhb			break;
824305549Sjhb		}
825305549Sjhb
826305549Sjhb	/*
827305549Sjhb	 * Get read timeout.
828305549Sjhb	 */
829305549Sjhb	case BIOCGRTIMEOUT:
830305549Sjhb		{
831305549Sjhb			struct timeval *tv = (struct timeval *)addr;
832305549Sjhb
833305549Sjhb			tv->tv_sec = d->bd_rtout / hz;
834305549Sjhb			tv->tv_usec = (d->bd_rtout % hz) * tick;
835305549Sjhb			break;
836305549Sjhb		}
837305549Sjhb
838305549Sjhb	/*
839305549Sjhb	 * Get packet stats.
840305549Sjhb	 */
841305549Sjhb	case BIOCGSTATS:
842305549Sjhb		{
843305549Sjhb			struct bpf_stat *bs = (struct bpf_stat *)addr;
844305549Sjhb
845305549Sjhb			bs->bs_recv = d->bd_rcount;
846305549Sjhb			bs->bs_drop = d->bd_dcount;
847305549Sjhb			break;
848305549Sjhb		}
849305549Sjhb
850305549Sjhb	/*
851305549Sjhb	 * Set immediate mode.
852305549Sjhb	 */
853305549Sjhb	case BIOCIMMEDIATE:
854305549Sjhb		d->bd_immediate = *(u_int *)addr;
855305549Sjhb		break;
856305549Sjhb
857305549Sjhb	case BIOCVERSION:
858305549Sjhb		{
859305549Sjhb			struct bpf_version *bv = (struct bpf_version *)addr;
860305549Sjhb
861305549Sjhb			bv->bv_major = BPF_MAJOR_VERSION;
862305549Sjhb			bv->bv_minor = BPF_MINOR_VERSION;
863305549Sjhb			break;
864305549Sjhb		}
865305549Sjhb
866305549Sjhb	/*
867305549Sjhb	 * Get "header already complete" flag
868305549Sjhb	 */
869305549Sjhb	case BIOCGHDRCMPLT:
870305549Sjhb		*(u_int *)addr = d->bd_hdrcmplt;
871305549Sjhb		break;
872305549Sjhb
873305549Sjhb	/*
874305549Sjhb	 * Set "header already complete" flag
875305549Sjhb	 */
876305549Sjhb	case BIOCSHDRCMPLT:
877305549Sjhb		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
878305549Sjhb		break;
879305549Sjhb
880305549Sjhb	/*
881305549Sjhb	 * Get "see sent packets" flag
882305549Sjhb	 */
883305549Sjhb	case BIOCGSEESENT:
884305549Sjhb		*(u_int *)addr = d->bd_seesent;
885305549Sjhb		break;
886305549Sjhb
887305549Sjhb	/*
888305549Sjhb	 * Set "see sent packets" flag
889305549Sjhb	 */
890305549Sjhb	case BIOCSSEESENT:
891305549Sjhb		d->bd_seesent = *(u_int *)addr;
892305549Sjhb		break;
893305549Sjhb
894305549Sjhb	case FIONBIO:		/* Non-blocking I/O */
895305549Sjhb		break;
896305549Sjhb
897305549Sjhb	case FIOASYNC:		/* Send signal on receive packets */
898305549Sjhb		d->bd_async = *(int *)addr;
899305549Sjhb		break;
900305549Sjhb
901305549Sjhb	case FIOSETOWN:
902305549Sjhb		error = fsetown(*(int *)addr, &d->bd_sigio);
903305549Sjhb		break;
904305549Sjhb
905305549Sjhb	case FIOGETOWN:
906305549Sjhb		*(int *)addr = fgetown(&d->bd_sigio);
907305549Sjhb		break;
908305549Sjhb
909305549Sjhb	/* This is deprecated, FIOSETOWN should be used instead. */
910305549Sjhb	case TIOCSPGRP:
911305549Sjhb		error = fsetown(-(*(int *)addr), &d->bd_sigio);
912305549Sjhb		break;
913305549Sjhb
914305549Sjhb	/* This is deprecated, FIOGETOWN should be used instead. */
915305549Sjhb	case TIOCGPGRP:
916305549Sjhb		*(int *)addr = -fgetown(&d->bd_sigio);
917305549Sjhb		break;
918305549Sjhb
919305549Sjhb	case BIOCSRSIG:		/* Set receive signal */
920309560Sjhb		{
921309560Sjhb			u_int sig;
922309560Sjhb
923309560Sjhb			sig = *(u_int *)addr;
924309560Sjhb
925309560Sjhb			if (sig >= NSIG)
926309560Sjhb				error = EINVAL;
927309560Sjhb			else
928309560Sjhb				d->bd_sig = sig;
929309560Sjhb			break;
930309560Sjhb		}
931309560Sjhb	case BIOCGRSIG:
932309560Sjhb		*(u_int *)addr = d->bd_sig;
933309560Sjhb		break;
934305549Sjhb	}
935305549Sjhb	return (error);
936305549Sjhb}
937305549Sjhb
938305549Sjhb/*
939305549Sjhb * Set d's packet filter program to fp.  If this file already has a filter,
940305549Sjhb * free it and replace it.  Returns EINVAL for bogus requests.
941305549Sjhb */
942305549Sjhbstatic int
943305549Sjhbbpf_setf(d, fp)
944305549Sjhb	struct bpf_d *d;
945305549Sjhb	struct bpf_program *fp;
946309560Sjhb{
947309560Sjhb	struct bpf_insn *fcode, *old;
948309560Sjhb	u_int flen, size;
949309560Sjhb
950309560Sjhb	if (fp->bf_insns == NULL) {
951305549Sjhb		if (fp->bf_len != 0)
952309560Sjhb			return (EINVAL);
953309560Sjhb		BPFD_LOCK(d);
954309560Sjhb		old = d->bd_filter;
955305549Sjhb		d->bd_filter = NULL;
956305549Sjhb		reset_d(d);
957305549Sjhb		BPFD_UNLOCK(d);
958305549Sjhb		if (old != NULL)
959305549Sjhb			free((caddr_t)old, M_BPF);
960305549Sjhb		return (0);
961305549Sjhb	}
962305549Sjhb	flen = fp->bf_len;
963309560Sjhb	if (flen > BPF_MAXINSNS)
964309560Sjhb		return (EINVAL);
965309560Sjhb
966309560Sjhb	size = flen * sizeof(*fp->bf_insns);
967305549Sjhb	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
968305549Sjhb	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
969305549Sjhb	    bpf_validate(fcode, (int)flen)) {
970305549Sjhb		BPFD_LOCK(d);
971305549Sjhb		old = d->bd_filter;
972309560Sjhb		d->bd_filter = fcode;
973309560Sjhb		reset_d(d);
974309560Sjhb		BPFD_UNLOCK(d);
975		if (old != NULL)
976			free((caddr_t)old, M_BPF);
977
978		return (0);
979	}
980	free((caddr_t)fcode, M_BPF);
981	return (EINVAL);
982}
983
984/*
985 * Detach a file from its current interface (if attached at all) and attach
986 * to the interface indicated by the name stored in ifr.
987 * Return an errno or 0.
988 */
989static int
990bpf_setif(d, ifr)
991	struct bpf_d *d;
992	struct ifreq *ifr;
993{
994	struct bpf_if *bp;
995	int error;
996	struct ifnet *theywant;
997
998	theywant = ifunit(ifr->ifr_name);
999	if (theywant == NULL)
1000		return ENXIO;
1001
1002	/*
1003	 * Look through attached interfaces for the named one.
1004	 */
1005	mtx_lock(&bpf_mtx);
1006	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1007		struct ifnet *ifp = bp->bif_ifp;
1008
1009		if (ifp == NULL || ifp != theywant)
1010			continue;
1011		/* skip additional entry */
1012		if (bp->bif_driverp != (struct bpf_if **)&ifp->if_bpf)
1013			continue;
1014
1015		mtx_unlock(&bpf_mtx);
1016		/*
1017		 * We found the requested interface.
1018		 * Allocate the packet buffers if we need to.
1019		 * If we're already attached to requested interface,
1020		 * just flush the buffer.
1021		 */
1022		if (d->bd_sbuf == NULL) {
1023			error = bpf_allocbufs(d);
1024			if (error != 0)
1025				return (error);
1026		}
1027		if (bp != d->bd_bif) {
1028			if (d->bd_bif)
1029				/*
1030				 * Detach if attached to something else.
1031				 */
1032				bpf_detachd(d);
1033
1034			bpf_attachd(d, bp);
1035		}
1036		BPFD_LOCK(d);
1037		reset_d(d);
1038		BPFD_UNLOCK(d);
1039		return (0);
1040	}
1041	mtx_unlock(&bpf_mtx);
1042	/* Not found. */
1043	return (ENXIO);
1044}
1045
1046/*
1047 * Support for select() and poll() system calls
1048 *
1049 * Return true iff the specific operation will not block indefinitely.
1050 * Otherwise, return false but make a note that a selwakeup() must be done.
1051 */
1052static int
1053bpfpoll(dev, events, td)
1054	struct cdev *dev;
1055	int events;
1056	struct thread *td;
1057{
1058	struct bpf_d *d;
1059	int revents;
1060
1061	d = dev->si_drv1;
1062	if (d->bd_bif == NULL)
1063		return (ENXIO);
1064
1065	revents = events & (POLLOUT | POLLWRNORM);
1066	BPFD_LOCK(d);
1067	if (events & (POLLIN | POLLRDNORM)) {
1068		if (bpf_ready(d))
1069			revents |= events & (POLLIN | POLLRDNORM);
1070		else {
1071			selrecord(td, &d->bd_sel);
1072			/* Start the read timeout if necessary. */
1073			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1074				callout_reset(&d->bd_callout, d->bd_rtout,
1075				    bpf_timed_out, d);
1076				d->bd_state = BPF_WAITING;
1077			}
1078		}
1079	}
1080	BPFD_UNLOCK(d);
1081	return (revents);
1082}
1083
1084/*
1085 * Support for kevent() system call.  Register EVFILT_READ filters and
1086 * reject all others.
1087 */
1088int
1089bpfkqfilter(dev, kn)
1090	struct cdev *dev;
1091	struct knote *kn;
1092{
1093	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1094
1095	if (kn->kn_filter != EVFILT_READ)
1096		return (1);
1097
1098	kn->kn_fop = &bpfread_filtops;
1099	kn->kn_hook = d;
1100	knlist_add(&d->bd_sel.si_note, kn, 0);
1101
1102	return (0);
1103}
1104
1105static void
1106filt_bpfdetach(kn)
1107	struct knote *kn;
1108{
1109	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1110
1111	knlist_remove(&d->bd_sel.si_note, kn, 0);
1112}
1113
1114static int
1115filt_bpfread(kn, hint)
1116	struct knote *kn;
1117	long hint;
1118{
1119	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1120	int ready;
1121
1122	BPFD_LOCK_ASSERT(d);
1123	ready = bpf_ready(d);
1124	if (ready) {
1125		kn->kn_data = d->bd_slen;
1126		if (d->bd_hbuf)
1127			kn->kn_data += d->bd_hlen;
1128	}
1129	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1130		callout_reset(&d->bd_callout, d->bd_rtout,
1131		    bpf_timed_out, d);
1132		d->bd_state = BPF_WAITING;
1133	}
1134
1135	return (ready);
1136}
1137
1138/*
1139 * Incoming linkage from device drivers.  Process the packet pkt, of length
1140 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1141 * by each process' filter, and if accepted, stashed into the corresponding
1142 * buffer.
1143 */
1144void
1145bpf_tap(bp, pkt, pktlen)
1146	struct bpf_if *bp;
1147	u_char *pkt;
1148	u_int pktlen;
1149{
1150	struct bpf_d *d;
1151	u_int slen;
1152
1153	/*
1154	 * Lockless read to avoid cost of locking the interface if there are
1155	 * no descriptors attached.
1156	 */
1157	if (LIST_EMPTY(&bp->bif_dlist))
1158		return;
1159
1160	BPFIF_LOCK(bp);
1161	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1162		BPFD_LOCK(d);
1163		++d->bd_rcount;
1164		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1165		if (slen != 0) {
1166#ifdef MAC
1167			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1168#endif
1169				catchpacket(d, pkt, pktlen, slen, bcopy);
1170		}
1171		BPFD_UNLOCK(d);
1172	}
1173	BPFIF_UNLOCK(bp);
1174}
1175
1176/*
1177 * Copy data from an mbuf chain into a buffer.  This code is derived
1178 * from m_copydata in sys/uipc_mbuf.c.
1179 */
1180static void
1181bpf_mcopy(src_arg, dst_arg, len)
1182	const void *src_arg;
1183	void *dst_arg;
1184	size_t len;
1185{
1186	const struct mbuf *m;
1187	u_int count;
1188	u_char *dst;
1189
1190	m = src_arg;
1191	dst = dst_arg;
1192	while (len > 0) {
1193		if (m == NULL)
1194			panic("bpf_mcopy");
1195		count = min(m->m_len, len);
1196		bcopy(mtod(m, void *), dst, count);
1197		m = m->m_next;
1198		dst += count;
1199		len -= count;
1200	}
1201}
1202
1203/*
1204 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1205 */
1206void
1207bpf_mtap(bp, m)
1208	struct bpf_if *bp;
1209	struct mbuf *m;
1210{
1211	struct bpf_d *d;
1212	u_int pktlen, slen;
1213
1214	/*
1215	 * Lockless read to avoid cost of locking the interface if there are
1216	 * no descriptors attached.
1217	 */
1218	if (LIST_EMPTY(&bp->bif_dlist))
1219		return;
1220
1221	pktlen = m_length(m, NULL);
1222	if (pktlen == m->m_len) {
1223		bpf_tap(bp, mtod(m, u_char *), pktlen);
1224		return;
1225	}
1226
1227	BPFIF_LOCK(bp);
1228	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1229		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1230			continue;
1231		BPFD_LOCK(d);
1232		++d->bd_rcount;
1233		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1234		if (slen != 0)
1235#ifdef MAC
1236			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1237#endif
1238				catchpacket(d, (u_char *)m, pktlen, slen,
1239				    bpf_mcopy);
1240		BPFD_UNLOCK(d);
1241	}
1242	BPFIF_UNLOCK(bp);
1243}
1244
1245/*
1246 * Incoming linkage from device drivers, when packet is in
1247 * an mbuf chain and to be prepended by a contiguous header.
1248 */
1249void
1250bpf_mtap2(bp, data, dlen, m)
1251	struct bpf_if *bp;
1252	void *data;
1253	u_int dlen;
1254	struct mbuf *m;
1255{
1256	struct mbuf mb;
1257	struct bpf_d *d;
1258	u_int pktlen, slen;
1259
1260	/*
1261	 * Lockless read to avoid cost of locking the interface if there are
1262	 * no descriptors attached.
1263	 */
1264	if (LIST_EMPTY(&bp->bif_dlist))
1265		return;
1266
1267	pktlen = m_length(m, NULL);
1268	/*
1269	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1270	 * Note that we cut corners here; we only setup what's
1271	 * absolutely needed--this mbuf should never go anywhere else.
1272	 */
1273	mb.m_next = m;
1274	mb.m_data = data;
1275	mb.m_len = dlen;
1276	pktlen += dlen;
1277
1278	BPFIF_LOCK(bp);
1279	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1280		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1281			continue;
1282		BPFD_LOCK(d);
1283		++d->bd_rcount;
1284		slen = bpf_filter(d->bd_filter, (u_char *)&mb, pktlen, 0);
1285		if (slen != 0)
1286#ifdef MAC
1287			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1288#endif
1289				catchpacket(d, (u_char *)&mb, pktlen, slen,
1290				    bpf_mcopy);
1291		BPFD_UNLOCK(d);
1292	}
1293	BPFIF_UNLOCK(bp);
1294}
1295
1296/*
1297 * Move the packet data from interface memory (pkt) into the
1298 * store buffer.  "cpfn" is the routine called to do the actual data
1299 * transfer.  bcopy is passed in to copy contiguous chunks, while
1300 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1301 * pkt is really an mbuf.
1302 */
1303static void
1304catchpacket(d, pkt, pktlen, snaplen, cpfn)
1305	struct bpf_d *d;
1306	u_char *pkt;
1307	u_int pktlen, snaplen;
1308	void (*cpfn)(const void *, void *, size_t);
1309{
1310	struct bpf_hdr *hp;
1311	int totlen, curlen;
1312	int hdrlen = d->bd_bif->bif_hdrlen;
1313	int do_wakeup = 0;
1314
1315	/*
1316	 * Figure out how many bytes to move.  If the packet is
1317	 * greater or equal to the snapshot length, transfer that
1318	 * much.  Otherwise, transfer the whole packet (unless
1319	 * we hit the buffer size limit).
1320	 */
1321	totlen = hdrlen + min(snaplen, pktlen);
1322	if (totlen > d->bd_bufsize)
1323		totlen = d->bd_bufsize;
1324
1325	/*
1326	 * Round up the end of the previous packet to the next longword.
1327	 */
1328	curlen = BPF_WORDALIGN(d->bd_slen);
1329	if (curlen + totlen > d->bd_bufsize) {
1330		/*
1331		 * This packet will overflow the storage buffer.
1332		 * Rotate the buffers if we can, then wakeup any
1333		 * pending reads.
1334		 */
1335		if (d->bd_fbuf == NULL) {
1336			/*
1337			 * We haven't completed the previous read yet,
1338			 * so drop the packet.
1339			 */
1340			++d->bd_dcount;
1341			return;
1342		}
1343		ROTATE_BUFFERS(d);
1344		do_wakeup = 1;
1345		curlen = 0;
1346	}
1347	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1348		/*
1349		 * Immediate mode is set, or the read timeout has
1350		 * already expired during a select call.  A packet
1351		 * arrived, so the reader should be woken up.
1352		 */
1353		do_wakeup = 1;
1354
1355	/*
1356	 * Append the bpf header.
1357	 */
1358	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1359	microtime(&hp->bh_tstamp);
1360	hp->bh_datalen = pktlen;
1361	hp->bh_hdrlen = hdrlen;
1362	/*
1363	 * Copy the packet data into the store buffer and update its length.
1364	 */
1365	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1366	d->bd_slen = curlen + totlen;
1367
1368	if (do_wakeup)
1369		bpf_wakeup(d);
1370}
1371
1372/*
1373 * Initialize all nonzero fields of a descriptor.
1374 */
1375static int
1376bpf_allocbufs(d)
1377	struct bpf_d *d;
1378{
1379	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1380	if (d->bd_fbuf == NULL)
1381		return (ENOBUFS);
1382
1383	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1384	if (d->bd_sbuf == NULL) {
1385		free(d->bd_fbuf, M_BPF);
1386		return (ENOBUFS);
1387	}
1388	d->bd_slen = 0;
1389	d->bd_hlen = 0;
1390	return (0);
1391}
1392
1393/*
1394 * Free buffers currently in use by a descriptor.
1395 * Called on close.
1396 */
1397static void
1398bpf_freed(d)
1399	struct bpf_d *d;
1400{
1401	/*
1402	 * We don't need to lock out interrupts since this descriptor has
1403	 * been detached from its interface and it yet hasn't been marked
1404	 * free.
1405	 */
1406	if (d->bd_sbuf != NULL) {
1407		free(d->bd_sbuf, M_BPF);
1408		if (d->bd_hbuf != NULL)
1409			free(d->bd_hbuf, M_BPF);
1410		if (d->bd_fbuf != NULL)
1411			free(d->bd_fbuf, M_BPF);
1412	}
1413	if (d->bd_filter)
1414		free((caddr_t)d->bd_filter, M_BPF);
1415	mtx_destroy(&d->bd_mtx);
1416}
1417
1418/*
1419 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1420 * fixed size of the link header (variable length headers not yet supported).
1421 */
1422void
1423bpfattach(ifp, dlt, hdrlen)
1424	struct ifnet *ifp;
1425	u_int dlt, hdrlen;
1426{
1427
1428	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1429}
1430
1431/*
1432 * Attach an interface to bpf.  ifp is a pointer to the structure
1433 * defining the interface to be attached, dlt is the link layer type,
1434 * and hdrlen is the fixed size of the link header (variable length
1435 * headers are not yet supporrted).
1436 */
1437void
1438bpfattach2(ifp, dlt, hdrlen, driverp)
1439	struct ifnet *ifp;
1440	u_int dlt, hdrlen;
1441	struct bpf_if **driverp;
1442{
1443	struct bpf_if *bp;
1444	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1445	if (bp == NULL)
1446		panic("bpfattach");
1447
1448	LIST_INIT(&bp->bif_dlist);
1449	bp->bif_driverp = driverp;
1450	bp->bif_ifp = ifp;
1451	bp->bif_dlt = dlt;
1452	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1453
1454	mtx_lock(&bpf_mtx);
1455	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1456	mtx_unlock(&bpf_mtx);
1457
1458	*bp->bif_driverp = NULL;
1459
1460	/*
1461	 * Compute the length of the bpf header.  This is not necessarily
1462	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1463	 * that the network layer header begins on a longword boundary (for
1464	 * performance reasons and to alleviate alignment restrictions).
1465	 */
1466	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1467
1468	if (bootverbose)
1469		if_printf(ifp, "bpf attached\n");
1470}
1471
1472/*
1473 * Detach bpf from an interface.  This involves detaching each descriptor
1474 * associated with the interface, and leaving bd_bif NULL.  Notify each
1475 * descriptor as it's detached so that any sleepers wake up and get
1476 * ENXIO.
1477 */
1478void
1479bpfdetach(ifp)
1480	struct ifnet *ifp;
1481{
1482	struct bpf_if	*bp;
1483	struct bpf_d	*d;
1484
1485	/* Locate BPF interface information */
1486	mtx_lock(&bpf_mtx);
1487	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1488		if (ifp == bp->bif_ifp)
1489			break;
1490	}
1491
1492	/* Interface wasn't attached */
1493	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1494		mtx_unlock(&bpf_mtx);
1495		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1496		return;
1497	}
1498
1499	LIST_REMOVE(bp, bif_next);
1500	mtx_unlock(&bpf_mtx);
1501
1502	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1503		bpf_detachd(d);
1504		BPFD_LOCK(d);
1505		bpf_wakeup(d);
1506		BPFD_UNLOCK(d);
1507	}
1508
1509	mtx_destroy(&bp->bif_mtx);
1510	free(bp, M_BPF);
1511}
1512
1513/*
1514 * Get a list of available data link type of the interface.
1515 */
1516static int
1517bpf_getdltlist(d, bfl)
1518	struct bpf_d *d;
1519	struct bpf_dltlist *bfl;
1520{
1521	int n, error;
1522	struct ifnet *ifp;
1523	struct bpf_if *bp;
1524
1525	ifp = d->bd_bif->bif_ifp;
1526	n = 0;
1527	error = 0;
1528	mtx_lock(&bpf_mtx);
1529	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1530		if (bp->bif_ifp != ifp)
1531			continue;
1532		if (bfl->bfl_list != NULL) {
1533			if (n >= bfl->bfl_len) {
1534				mtx_unlock(&bpf_mtx);
1535				return (ENOMEM);
1536			}
1537			error = copyout(&bp->bif_dlt,
1538			    bfl->bfl_list + n, sizeof(u_int));
1539		}
1540		n++;
1541	}
1542	mtx_unlock(&bpf_mtx);
1543	bfl->bfl_len = n;
1544	return (error);
1545}
1546
1547/*
1548 * Set the data link type of a BPF instance.
1549 */
1550static int
1551bpf_setdlt(d, dlt)
1552	struct bpf_d *d;
1553	u_int dlt;
1554{
1555	int error, opromisc;
1556	struct ifnet *ifp;
1557	struct bpf_if *bp;
1558
1559	if (d->bd_bif->bif_dlt == dlt)
1560		return (0);
1561	ifp = d->bd_bif->bif_ifp;
1562	mtx_lock(&bpf_mtx);
1563	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1564		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1565			break;
1566	}
1567	mtx_unlock(&bpf_mtx);
1568	if (bp != NULL) {
1569		opromisc = d->bd_promisc;
1570		bpf_detachd(d);
1571		bpf_attachd(d, bp);
1572		BPFD_LOCK(d);
1573		reset_d(d);
1574		BPFD_UNLOCK(d);
1575		if (opromisc) {
1576			error = ifpromisc(bp->bif_ifp, 1);
1577			if (error)
1578				if_printf(bp->bif_ifp,
1579					"bpf_setdlt: ifpromisc failed (%d)\n",
1580					error);
1581			else
1582				d->bd_promisc = 1;
1583		}
1584	}
1585	return (bp == NULL ? EINVAL : 0);
1586}
1587
1588static void bpf_drvinit(void *unused);
1589
1590static void bpf_clone(void *arg, char *name, int namelen, struct cdev **dev);
1591
1592static void
1593bpf_clone(arg, name, namelen, dev)
1594	void *arg;
1595	char *name;
1596	int namelen;
1597	struct cdev **dev;
1598{
1599	int u;
1600
1601	if (*dev != NULL)
1602		return;
1603	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1604		return;
1605	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1606	    "bpf%d", u);
1607	(*dev)->si_flags |= SI_CHEAPCLONE;
1608	return;
1609}
1610
1611static void
1612bpf_drvinit(unused)
1613	void *unused;
1614{
1615
1616	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1617	LIST_INIT(&bpf_iflist);
1618	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1619}
1620
1621SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1622
1623#else /* !DEV_BPF && !NETGRAPH_BPF */
1624/*
1625 * NOP stubs to allow bpf-using drivers to load and function.
1626 *
1627 * A 'better' implementation would allow the core bpf functionality
1628 * to be loaded at runtime.
1629 */
1630
1631void
1632bpf_tap(bp, pkt, pktlen)
1633	struct bpf_if *bp;
1634	u_char *pkt;
1635	u_int pktlen;
1636{
1637}
1638
1639void
1640bpf_mtap(bp, m)
1641	struct bpf_if *bp;
1642	struct mbuf *m;
1643{
1644}
1645
1646void
1647bpf_mtap2(bp, d, l, m)
1648	struct bpf_if *bp;
1649	void *d;
1650	u_int l;
1651	struct mbuf *m;
1652{
1653}
1654
1655void
1656bpfattach(ifp, dlt, hdrlen)
1657	struct ifnet *ifp;
1658	u_int dlt, hdrlen;
1659{
1660}
1661
1662void
1663bpfattach2(ifp, dlt, hdrlen, driverp)
1664	struct ifnet *ifp;
1665	u_int dlt, hdrlen;
1666	struct bpf_if **driverp;
1667{
1668}
1669
1670void
1671bpfdetach(ifp)
1672	struct ifnet *ifp;
1673{
1674}
1675
1676u_int
1677bpf_filter(pc, p, wirelen, buflen)
1678	const struct bpf_insn *pc;
1679	u_char *p;
1680	u_int wirelen;
1681	u_int buflen;
1682{
1683	return -1;	/* "no filter" behaviour */
1684}
1685
1686int
1687bpf_validate(f, len)
1688	const struct bpf_insn *f;
1689	int len;
1690{
1691	return 0;		/* false */
1692}
1693
1694#endif /* !DEV_BPF && !NETGRAPH_BPF */
1695