bpf.c revision 130335
1209139Srpaulo/*
2209139Srpaulo * Copyright (c) 1990, 1991, 1993
3209139Srpaulo *	The Regents of the University of California.  All rights reserved.
4209139Srpaulo *
5252190Srpaulo * This code is derived from the Stanford/CMU enet packet filter,
6252190Srpaulo * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7209139Srpaulo * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8209139Srpaulo * Berkeley Laboratory.
9209139Srpaulo *
10209139Srpaulo * Redistribution and use in source and binary forms, with or without
11209139Srpaulo * modification, are permitted provided that the following conditions
12209139Srpaulo * are met:
13209139Srpaulo * 1. Redistributions of source code must retain the above copyright
14209139Srpaulo *    notice, this list of conditions and the following disclaimer.
15214501Srpaulo * 2. Redistributions in binary form must reproduce the above copyright
16209139Srpaulo *    notice, this list of conditions and the following disclaimer in the
17209139Srpaulo *    documentation and/or other materials provided with the distribution.
18209139Srpaulo * 4. Neither the name of the University nor the names of its contributors
19209139Srpaulo *    may be used to endorse or promote products derived from this software
20209139Srpaulo *    without specific prior written permission.
21209139Srpaulo *
22209139Srpaulo * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23209139Srpaulo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24209139Srpaulo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25209139Srpaulo * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26209139Srpaulo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27209139Srpaulo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28209139Srpaulo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29209139Srpaulo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30209139Srpaulo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31209139Srpaulo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32209139Srpaulo * SUCH DAMAGE.
33209139Srpaulo *
34209139Srpaulo *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35209139Srpaulo *
36209139Srpaulo * $FreeBSD: head/sys/net/bpf.c 130335 2004-06-11 03:47:21Z rwatson $
37209139Srpaulo */
38209139Srpaulo
39209139Srpaulo#include "opt_bpf.h"
40209139Srpaulo#include "opt_mac.h"
41209139Srpaulo#include "opt_netgraph.h"
42209139Srpaulo
43209139Srpaulo#include <sys/types.h>
44209139Srpaulo#include <sys/param.h>
45209139Srpaulo#include <sys/systm.h>
46209139Srpaulo#include <sys/conf.h>
47209139Srpaulo#include <sys/mac.h>
48209139Srpaulo#include <sys/malloc.h>
49209139Srpaulo#include <sys/mbuf.h>
50209139Srpaulo#include <sys/time.h>
51209139Srpaulo#include <sys/proc.h>
52209139Srpaulo#include <sys/signalvar.h>
53209139Srpaulo#include <sys/filio.h>
54209139Srpaulo#include <sys/sockio.h>
55209139Srpaulo#include <sys/ttycom.h>
56209139Srpaulo#include <sys/filedesc.h>
57209139Srpaulo
58209139Srpaulo#include <sys/event.h>
59209139Srpaulo#include <sys/file.h>
60209139Srpaulo#include <sys/poll.h>
61209139Srpaulo#include <sys/proc.h>
62209139Srpaulo
63209139Srpaulo#include <sys/socket.h>
64209139Srpaulo#include <sys/vnode.h>
65209139Srpaulo
66209139Srpaulo#include <net/if.h>
67209139Srpaulo#include <net/bpf.h>
68209139Srpaulo#include <net/bpfdesc.h>
69209139Srpaulo
70209139Srpaulo#include <netinet/in.h>
71209139Srpaulo#include <netinet/if_ether.h>
72209139Srpaulo#include <sys/kernel.h>
73209139Srpaulo#include <sys/sysctl.h>
74209139Srpaulo
75209139Srpaulostatic MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
76209139Srpaulo
77209139Srpaulo#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
78209139Srpaulo
79209139Srpaulo#define PRINET  26			/* interruptible */
80209139Srpaulo
81209139Srpaulo/*
82209139Srpaulo * The default read buffer size is patchable.
83209139Srpaulo */
84209139Srpaulostatic int bpf_bufsize = 4096;
85209139SrpauloSYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
86209139Srpaulo	&bpf_bufsize, 0, "");
87209139Srpaulostatic int bpf_maxbufsize = BPF_MAXBUFSIZE;
88209139SrpauloSYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
89209139Srpaulo	&bpf_maxbufsize, 0, "");
90209139Srpaulo
91209139Srpaulo/*
92209139Srpaulo *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
93209139Srpaulo */
94209139Srpaulostatic struct bpf_if	*bpf_iflist;
95209139Srpaulostatic struct mtx	bpf_mtx;		/* bpf global lock */
96209139Srpaulo
97209139Srpaulostatic int	bpf_allocbufs(struct bpf_d *);
98209139Srpaulostatic void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
99214501Srpaulostatic void	bpf_detachd(struct bpf_d *d);
100214501Srpaulostatic void	bpf_freed(struct bpf_d *);
101209139Srpaulostatic void	bpf_mcopy(const void *, void *, size_t);
102209139Srpaulostatic int	bpf_movein(struct uio *, int,
103214501Srpaulo		    struct mbuf **, struct sockaddr *, int *);
104214501Srpaulostatic int	bpf_setif(struct bpf_d *, struct ifreq *);
105209139Srpaulostatic void	bpf_timed_out(void *);
106209139Srpaulostatic __inline void
107209139Srpaulo		bpf_wakeup(struct bpf_d *);
108209139Srpaulostatic void	catchpacket(struct bpf_d *, u_char *, u_int,
109209139Srpaulo		    u_int, void (*)(const void *, void *, size_t));
110209139Srpaulostatic void	reset_d(struct bpf_d *);
111209139Srpaulostatic int	 bpf_setf(struct bpf_d *, struct bpf_program *);
112209139Srpaulostatic int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
113209139Srpaulostatic int	bpf_setdlt(struct bpf_d *, u_int);
114209139Srpaulostatic void	filt_bpfdetach(struct knote *);
115209139Srpaulostatic int	filt_bpfread(struct knote *, long);
116209139Srpaulo
117209139Srpaulostatic	d_open_t	bpfopen;
118209139Srpaulostatic	d_close_t	bpfclose;
119209139Srpaulostatic	d_read_t	bpfread;
120209139Srpaulostatic	d_write_t	bpfwrite;
121209139Srpaulostatic	d_ioctl_t	bpfioctl;
122209139Srpaulostatic	d_poll_t	bpfpoll;
123209139Srpaulostatic	d_kqfilter_t	bpfkqfilter;
124209139Srpaulo
125209139Srpaulostatic struct cdevsw bpf_cdevsw = {
126209139Srpaulo	.d_version =	D_VERSION,
127209139Srpaulo	.d_flags =	D_NEEDGIANT,
128209139Srpaulo	.d_open =	bpfopen,
129209139Srpaulo	.d_close =	bpfclose,
130209139Srpaulo	.d_read =	bpfread,
131209139Srpaulo	.d_write =	bpfwrite,
132209139Srpaulo	.d_ioctl =	bpfioctl,
133209139Srpaulo	.d_poll =	bpfpoll,
134209139Srpaulo	.d_name =	"bpf",
135209139Srpaulo	.d_kqfilter =	bpfkqfilter,
136209139Srpaulo};
137209139Srpaulo
138209139Srpaulostatic struct filterops bpfread_filtops =
139209139Srpaulo	{ 1, NULL, filt_bpfdetach, filt_bpfread };
140209139Srpaulo
141209139Srpaulostatic int
142209139Srpaulobpf_movein(uio, linktype, mp, sockp, datlen)
143209139Srpaulo	struct uio *uio;
144209139Srpaulo	int linktype, *datlen;
145209139Srpaulo	struct mbuf **mp;
146209139Srpaulo	struct sockaddr *sockp;
147209139Srpaulo{
148209139Srpaulo	struct mbuf *m;
149209139Srpaulo	int error;
150209139Srpaulo	int len;
151209139Srpaulo	int hlen;
152209139Srpaulo
153209139Srpaulo	/*
154209139Srpaulo	 * Build a sockaddr based on the data link layer type.
155252190Srpaulo	 * We do this at this level because the ethernet header
156209139Srpaulo	 * is copied directly into the data field of the sockaddr.
157209139Srpaulo	 * In the case of SLIP, there is no header and the packet
158209139Srpaulo	 * is forwarded as is.
159209139Srpaulo	 * Also, we are careful to leave room at the front of the mbuf
160209139Srpaulo	 * for the link level header.
161209139Srpaulo	 */
162209139Srpaulo	switch (linktype) {
163209139Srpaulo
164209139Srpaulo	case DLT_SLIP:
165209139Srpaulo		sockp->sa_family = AF_INET;
166209139Srpaulo		hlen = 0;
167209139Srpaulo		break;
168209139Srpaulo
169209139Srpaulo	case DLT_EN10MB:
170209139Srpaulo		sockp->sa_family = AF_UNSPEC;
171209139Srpaulo		/* XXX Would MAXLINKHDR be better? */
172209139Srpaulo		hlen = ETHER_HDR_LEN;
173209139Srpaulo		break;
174209139Srpaulo
175209139Srpaulo	case DLT_FDDI:
176209139Srpaulo		sockp->sa_family = AF_IMPLINK;
177209139Srpaulo		hlen = 0;
178209139Srpaulo		break;
179209139Srpaulo
180209139Srpaulo	case DLT_RAW:
181209139Srpaulo	case DLT_NULL:
182209139Srpaulo		sockp->sa_family = AF_UNSPEC;
183209139Srpaulo		hlen = 0;
184209139Srpaulo		break;
185209139Srpaulo
186209139Srpaulo	case DLT_ATM_RFC1483:
187214501Srpaulo		/*
188214501Srpaulo		 * en atm driver requires 4-byte atm pseudo header.
189214501Srpaulo		 * though it isn't standard, vpi:vci needs to be
190214501Srpaulo		 * specified anyway.
191214501Srpaulo		 */
192209139Srpaulo		sockp->sa_family = AF_UNSPEC;
193209139Srpaulo		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
194209139Srpaulo		break;
195209139Srpaulo
196209139Srpaulo	case DLT_PPP:
197209139Srpaulo		sockp->sa_family = AF_UNSPEC;
198209139Srpaulo		hlen = 4;	/* This should match PPP_HDRLEN */
199209139Srpaulo		break;
200209139Srpaulo
201209139Srpaulo	default:
202209139Srpaulo		return (EIO);
203209139Srpaulo	}
204209139Srpaulo
205209139Srpaulo	len = uio->uio_resid;
206209139Srpaulo	*datlen = len - hlen;
207209139Srpaulo	if ((unsigned)len > MCLBYTES)
208209139Srpaulo		return (EIO);
209209139Srpaulo
210209139Srpaulo	if (len > MHLEN) {
211209139Srpaulo		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
212209139Srpaulo	} else {
213209139Srpaulo		MGETHDR(m, M_TRYWAIT, MT_DATA);
214209139Srpaulo	}
215209139Srpaulo	if (m == NULL)
216209139Srpaulo		return (ENOBUFS);
217209139Srpaulo	m->m_pkthdr.len = m->m_len = len;
218209139Srpaulo	m->m_pkthdr.rcvif = NULL;
219209139Srpaulo	*mp = m;
220209139Srpaulo
221209139Srpaulo	/*
222209139Srpaulo	 * Make room for link header.
223209139Srpaulo	 */
224209139Srpaulo	if (hlen != 0) {
225209139Srpaulo		m->m_pkthdr.len -= hlen;
226209139Srpaulo		m->m_len -= hlen;
227209139Srpaulo#if BSD >= 199103
228209139Srpaulo		m->m_data += hlen; /* XXX */
229209139Srpaulo#else
230209139Srpaulo		m->m_off += hlen;
231209139Srpaulo#endif
232209139Srpaulo		error = uiomove(sockp->sa_data, hlen, uio);
233209139Srpaulo		if (error)
234209139Srpaulo			goto bad;
235209139Srpaulo	}
236209139Srpaulo	error = uiomove(mtod(m, void *), len - hlen, uio);
237209139Srpaulo	if (!error)
238209139Srpaulo		return (0);
239209139Srpaulobad:
240209139Srpaulo	m_freem(m);
241209139Srpaulo	return (error);
242209139Srpaulo}
243209139Srpaulo
244209139Srpaulo/*
245209139Srpaulo * Attach file to the bpf interface, i.e. make d listen on bp.
246209139Srpaulo */
247209139Srpaulostatic void
248209139Srpaulobpf_attachd(d, bp)
249209139Srpaulo	struct bpf_d *d;
250209139Srpaulo	struct bpf_if *bp;
251209139Srpaulo{
252209139Srpaulo	/*
253209139Srpaulo	 * Point d at bp, and add d to the interface's list of listeners.
254209139Srpaulo	 * Finally, point the driver's bpf cookie at the interface so
255209139Srpaulo	 * it will divert packets to bpf.
256209139Srpaulo	 */
257209139Srpaulo	BPFIF_LOCK(bp);
258209139Srpaulo	d->bd_bif = bp;
259209139Srpaulo	d->bd_next = bp->bif_dlist;
260209139Srpaulo	bp->bif_dlist = d;
261209139Srpaulo
262209139Srpaulo	*bp->bif_driverp = bp;
263209139Srpaulo	BPFIF_UNLOCK(bp);
264209139Srpaulo}
265209139Srpaulo
266209139Srpaulo/*
267209139Srpaulo * Detach a file from its interface.
268209139Srpaulo */
269209139Srpaulostatic void
270209139Srpaulobpf_detachd(d)
271209139Srpaulo	struct bpf_d *d;
272209139Srpaulo{
273209139Srpaulo	int error;
274209139Srpaulo	struct bpf_d **p;
275209139Srpaulo	struct bpf_if *bp;
276209139Srpaulo
277209139Srpaulo	/* XXX locking */
278209139Srpaulo	bp = d->bd_bif;
279209139Srpaulo	d->bd_bif = 0;
280209139Srpaulo	/*
281209139Srpaulo	 * Check if this descriptor had requested promiscuous mode.
282209139Srpaulo	 * If so, turn it off.
283209139Srpaulo	 */
284209139Srpaulo	if (d->bd_promisc) {
285209139Srpaulo		d->bd_promisc = 0;
286209139Srpaulo		error = ifpromisc(bp->bif_ifp, 0);
287209139Srpaulo		if (error != 0 && error != ENXIO) {
288209139Srpaulo			/*
289209139Srpaulo			 * ENXIO can happen if a pccard is unplugged
290209139Srpaulo			 * Something is really wrong if we were able to put
291209139Srpaulo			 * the driver into promiscuous mode, but can't
292209139Srpaulo			 * take it out.
293209139Srpaulo			 */
294209139Srpaulo			if_printf(bp->bif_ifp,
295209139Srpaulo				"bpf_detach: ifpromisc failed (%d)\n", error);
296209139Srpaulo		}
297209139Srpaulo	}
298209139Srpaulo	/* Remove d from the interface's descriptor list. */
299209139Srpaulo	BPFIF_LOCK(bp);
300209139Srpaulo	p = &bp->bif_dlist;
301209139Srpaulo	while (*p != d) {
302209139Srpaulo		p = &(*p)->bd_next;
303209139Srpaulo		if (*p == 0)
304209139Srpaulo			panic("bpf_detachd: descriptor not in list");
305209139Srpaulo	}
306209139Srpaulo	*p = (*p)->bd_next;
307214501Srpaulo	if (bp->bif_dlist == 0)
308214501Srpaulo		/*
309209139Srpaulo		 * Let the driver know that there are no more listeners.
310209139Srpaulo		 */
311209139Srpaulo		*bp->bif_driverp = 0;
312209139Srpaulo	BPFIF_UNLOCK(bp);
313209139Srpaulo}
314209139Srpaulo
315209139Srpaulo/*
316209139Srpaulo * Open ethernet device.  Returns ENXIO for illegal minor device number,
317209139Srpaulo * EBUSY if file is open by another process.
318209139Srpaulo */
319209139Srpaulo/* ARGSUSED */
320209139Srpaulostatic	int
321209139Srpaulobpfopen(dev, flags, fmt, td)
322209139Srpaulo	dev_t dev;
323209139Srpaulo	int flags;
324209139Srpaulo	int fmt;
325209139Srpaulo	struct thread *td;
326209139Srpaulo{
327209139Srpaulo	struct bpf_d *d;
328209139Srpaulo
329209139Srpaulo	mtx_lock(&bpf_mtx);
330209139Srpaulo	d = dev->si_drv1;
331209139Srpaulo	/*
332209139Srpaulo	 * Each minor can be opened by only one process.  If the requested
333209139Srpaulo	 * minor is in use, return EBUSY.
334209139Srpaulo	 */
335209139Srpaulo	if (d) {
336209139Srpaulo		mtx_unlock(&bpf_mtx);
337209139Srpaulo		return (EBUSY);
338209139Srpaulo	}
339209139Srpaulo	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
340209139Srpaulo	mtx_unlock(&bpf_mtx);
341209139Srpaulo
342209139Srpaulo	if ((dev->si_flags & SI_NAMED) == 0)
343209139Srpaulo		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
344209139Srpaulo		    "bpf%d", dev2unit(dev));
345209139Srpaulo	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
346209139Srpaulo	dev->si_drv1 = d;
347209139Srpaulo	d->bd_bufsize = bpf_bufsize;
348209139Srpaulo	d->bd_sig = SIGIO;
349209139Srpaulo	d->bd_seesent = 1;
350209139Srpaulo#ifdef MAC
351209139Srpaulo	mac_init_bpfdesc(d);
352209139Srpaulo	mac_create_bpfdesc(td->td_ucred, d);
353209139Srpaulo#endif
354209139Srpaulo	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
355209139Srpaulo	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
356209139Srpaulo
357209139Srpaulo	return (0);
358209139Srpaulo}
359209139Srpaulo
360209139Srpaulo/*
361209139Srpaulo * Close the descriptor by detaching it from its interface,
362209139Srpaulo * deallocating its buffers, and marking it free.
363209139Srpaulo */
364209139Srpaulo/* ARGSUSED */
365209139Srpaulostatic	int
366209139Srpaulobpfclose(dev, flags, fmt, td)
367209139Srpaulo	dev_t dev;
368209139Srpaulo	int flags;
369209139Srpaulo	int fmt;
370209139Srpaulo	struct thread *td;
371209139Srpaulo{
372209139Srpaulo	struct bpf_d *d = dev->si_drv1;
373209139Srpaulo
374209139Srpaulo	BPFD_LOCK(d);
375209139Srpaulo	if (d->bd_state == BPF_WAITING)
376209139Srpaulo		callout_stop(&d->bd_callout);
377209139Srpaulo	d->bd_state = BPF_IDLE;
378209139Srpaulo	BPFD_UNLOCK(d);
379209139Srpaulo	funsetown(&d->bd_sigio);
380209139Srpaulo	mtx_lock(&bpf_mtx);
381209139Srpaulo	if (d->bd_bif)
382209139Srpaulo		bpf_detachd(d);
383209139Srpaulo	mtx_unlock(&bpf_mtx);
384209139Srpaulo#ifdef MAC
385209139Srpaulo	mac_destroy_bpfdesc(d);
386209139Srpaulo#endif /* MAC */
387209139Srpaulo	bpf_freed(d);
388209139Srpaulo	dev->si_drv1 = 0;
389209139Srpaulo	free(d, M_BPF);
390209139Srpaulo
391209139Srpaulo	return (0);
392209139Srpaulo}
393209139Srpaulo
394209139Srpaulo
395209139Srpaulo/*
396209139Srpaulo * Rotate the packet buffers in descriptor d.  Move the store buffer
397209139Srpaulo * into the hold slot, and the free buffer into the store slot.
398209139Srpaulo * Zero the length of the new store buffer.
399209139Srpaulo */
400209139Srpaulo#define ROTATE_BUFFERS(d) \
401209139Srpaulo	(d)->bd_hbuf = (d)->bd_sbuf; \
402209139Srpaulo	(d)->bd_hlen = (d)->bd_slen; \
403209139Srpaulo	(d)->bd_sbuf = (d)->bd_fbuf; \
404209139Srpaulo	(d)->bd_slen = 0; \
405209139Srpaulo	(d)->bd_fbuf = 0;
406209139Srpaulo/*
407209139Srpaulo *  bpfread - read next chunk of packets from buffers
408209139Srpaulo */
409209139Srpaulostatic	int
410209139Srpaulobpfread(dev, uio, ioflag)
411209139Srpaulo	dev_t dev;
412209139Srpaulo	struct uio *uio;
413209139Srpaulo	int ioflag;
414209139Srpaulo{
415209139Srpaulo	struct bpf_d *d = dev->si_drv1;
416209139Srpaulo	int timed_out;
417209139Srpaulo	int error;
418209139Srpaulo
419209139Srpaulo	/*
420214501Srpaulo	 * Restrict application to use a buffer the same size as
421209139Srpaulo	 * as kernel buffers.
422209139Srpaulo	 */
423209139Srpaulo	if (uio->uio_resid != d->bd_bufsize)
424209139Srpaulo		return (EINVAL);
425209139Srpaulo
426209139Srpaulo	BPFD_LOCK(d);
427209139Srpaulo	if (d->bd_state == BPF_WAITING)
428209139Srpaulo		callout_stop(&d->bd_callout);
429209139Srpaulo	timed_out = (d->bd_state == BPF_TIMED_OUT);
430209139Srpaulo	d->bd_state = BPF_IDLE;
431209139Srpaulo	/*
432209139Srpaulo	 * If the hold buffer is empty, then do a timed sleep, which
433209139Srpaulo	 * ends when the timeout expires or when enough packets
434209139Srpaulo	 * have arrived to fill the store buffer.
435209139Srpaulo	 */
436209139Srpaulo	while (d->bd_hbuf == 0) {
437209139Srpaulo		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
438209139Srpaulo			/*
439209139Srpaulo			 * A packet(s) either arrived since the previous
440209139Srpaulo			 * read or arrived while we were asleep.
441209139Srpaulo			 * Rotate the buffers and return what's here.
442209139Srpaulo			 */
443209139Srpaulo			ROTATE_BUFFERS(d);
444209139Srpaulo			break;
445209139Srpaulo		}
446209139Srpaulo
447209139Srpaulo		/*
448209139Srpaulo		 * No data is available, check to see if the bpf device
449209139Srpaulo		 * is still pointed at a real interface.  If not, return
450209139Srpaulo		 * ENXIO so that the userland process knows to rebind
451209139Srpaulo		 * it before using it again.
452209139Srpaulo		 */
453209139Srpaulo		if (d->bd_bif == NULL) {
454209139Srpaulo			BPFD_UNLOCK(d);
455209139Srpaulo			return (ENXIO);
456209139Srpaulo		}
457209139Srpaulo
458209139Srpaulo		if (ioflag & IO_NDELAY) {
459209139Srpaulo			BPFD_UNLOCK(d);
460209139Srpaulo			return (EWOULDBLOCK);
461209139Srpaulo		}
462209139Srpaulo		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
463209139Srpaulo		     "bpf", d->bd_rtout);
464209139Srpaulo		if (error == EINTR || error == ERESTART) {
465209139Srpaulo			BPFD_UNLOCK(d);
466209139Srpaulo			return (error);
467209139Srpaulo		}
468209139Srpaulo		if (error == EWOULDBLOCK) {
469209139Srpaulo			/*
470209139Srpaulo			 * On a timeout, return what's in the buffer,
471209139Srpaulo			 * which may be nothing.  If there is something
472209139Srpaulo			 * in the store buffer, we can rotate the buffers.
473209139Srpaulo			 */
474209139Srpaulo			if (d->bd_hbuf)
475209139Srpaulo				/*
476209139Srpaulo				 * We filled up the buffer in between
477209139Srpaulo				 * getting the timeout and arriving
478209139Srpaulo				 * here, so we don't need to rotate.
479209139Srpaulo				 */
480209139Srpaulo				break;
481209139Srpaulo
482209139Srpaulo			if (d->bd_slen == 0) {
483209139Srpaulo				BPFD_UNLOCK(d);
484209139Srpaulo				return (0);
485209139Srpaulo			}
486209139Srpaulo			ROTATE_BUFFERS(d);
487209139Srpaulo			break;
488209139Srpaulo		}
489209139Srpaulo	}
490209139Srpaulo	/*
491209139Srpaulo	 * At this point, we know we have something in the hold slot.
492209139Srpaulo	 */
493209139Srpaulo	BPFD_UNLOCK(d);
494209139Srpaulo
495209139Srpaulo	/*
496209139Srpaulo	 * Move data from hold buffer into user space.
497209139Srpaulo	 * We know the entire buffer is transferred since
498209139Srpaulo	 * we checked above that the read buffer is bpf_bufsize bytes.
499209139Srpaulo	 */
500209139Srpaulo	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
501209139Srpaulo
502209139Srpaulo	BPFD_LOCK(d);
503209139Srpaulo	d->bd_fbuf = d->bd_hbuf;
504209139Srpaulo	d->bd_hbuf = 0;
505209139Srpaulo	d->bd_hlen = 0;
506209139Srpaulo	BPFD_UNLOCK(d);
507209139Srpaulo
508209139Srpaulo	return (error);
509209139Srpaulo}
510209139Srpaulo
511209139Srpaulo
512209139Srpaulo/*
513209139Srpaulo * If there are processes sleeping on this descriptor, wake them up.
514209139Srpaulo */
515209139Srpaulostatic __inline void
516209139Srpaulobpf_wakeup(d)
517209139Srpaulo	struct bpf_d *d;
518209139Srpaulo{
519209139Srpaulo	if (d->bd_state == BPF_WAITING) {
520209139Srpaulo		callout_stop(&d->bd_callout);
521209139Srpaulo		d->bd_state = BPF_IDLE;
522209139Srpaulo	}
523209139Srpaulo	wakeup(d);
524209139Srpaulo	if (d->bd_async && d->bd_sig && d->bd_sigio)
525209139Srpaulo		pgsigio(&d->bd_sigio, d->bd_sig, 0);
526209139Srpaulo
527209139Srpaulo	selwakeuppri(&d->bd_sel, PRINET);
528209139Srpaulo	KNOTE(&d->bd_sel.si_note, 0);
529209139Srpaulo}
530209139Srpaulo
531209139Srpaulostatic void
532209139Srpaulobpf_timed_out(arg)
533209139Srpaulo	void *arg;
534209139Srpaulo{
535209139Srpaulo	struct bpf_d *d = (struct bpf_d *)arg;
536209139Srpaulo
537209139Srpaulo	BPFD_LOCK(d);
538209139Srpaulo	if (d->bd_state == BPF_WAITING) {
539209139Srpaulo		d->bd_state = BPF_TIMED_OUT;
540209139Srpaulo		if (d->bd_slen != 0)
541209139Srpaulo			bpf_wakeup(d);
542209139Srpaulo	}
543209139Srpaulo	BPFD_UNLOCK(d);
544209139Srpaulo}
545209139Srpaulo
546209139Srpaulostatic	int
547209139Srpaulobpfwrite(dev, uio, ioflag)
548209139Srpaulo	dev_t dev;
549209139Srpaulo	struct uio *uio;
550209139Srpaulo	int ioflag;
551209139Srpaulo{
552209139Srpaulo	struct bpf_d *d = dev->si_drv1;
553209139Srpaulo	struct ifnet *ifp;
554209139Srpaulo	struct mbuf *m;
555209139Srpaulo	int error;
556209139Srpaulo	struct sockaddr dst;
557209139Srpaulo	int datlen;
558209139Srpaulo
559209139Srpaulo	if (d->bd_bif == 0)
560209139Srpaulo		return (ENXIO);
561209139Srpaulo
562209139Srpaulo	ifp = d->bd_bif->bif_ifp;
563209139Srpaulo
564209139Srpaulo	if (uio->uio_resid == 0)
565209139Srpaulo		return (0);
566209139Srpaulo
567209139Srpaulo	bzero(&dst, sizeof(dst));
568209139Srpaulo	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
569209139Srpaulo	if (error)
570209139Srpaulo		return (error);
571209139Srpaulo
572209139Srpaulo	if (datlen > ifp->if_mtu)
573209139Srpaulo		return (EMSGSIZE);
574209139Srpaulo
575209139Srpaulo	if (d->bd_hdrcmplt)
576209139Srpaulo		dst.sa_family = pseudo_AF_HDRCMPLT;
577209139Srpaulo
578209139Srpaulo#ifdef MAC
579209139Srpaulo	BPFD_LOCK(d);
580209139Srpaulo	mac_create_mbuf_from_bpfdesc(d, m);
581209139Srpaulo	BPFD_UNLOCK(d);
582209139Srpaulo#endif
583209139Srpaulo	NET_LOCK_GIANT();
584209139Srpaulo	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
585209139Srpaulo	NET_UNLOCK_GIANT();
586209139Srpaulo	/*
587209139Srpaulo	 * The driver frees the mbuf.
588209139Srpaulo	 */
589209139Srpaulo	return (error);
590209139Srpaulo}
591209139Srpaulo
592209139Srpaulo/*
593209139Srpaulo * Reset a descriptor by flushing its packet buffer and clearing the
594209139Srpaulo * receive and drop counts.
595209139Srpaulo */
596209139Srpaulostatic void
597209139Srpauloreset_d(d)
598209139Srpaulo	struct bpf_d *d;
599209139Srpaulo{
600209139Srpaulo
601209139Srpaulo	mtx_assert(&d->bd_mtx, MA_OWNED);
602209139Srpaulo	if (d->bd_hbuf) {
603209139Srpaulo		/* Free the hold buffer. */
604209139Srpaulo		d->bd_fbuf = d->bd_hbuf;
605209139Srpaulo		d->bd_hbuf = 0;
606209139Srpaulo	}
607209139Srpaulo	d->bd_slen = 0;
608209139Srpaulo	d->bd_hlen = 0;
609209139Srpaulo	d->bd_rcount = 0;
610209139Srpaulo	d->bd_dcount = 0;
611209139Srpaulo}
612209139Srpaulo
613209139Srpaulo/*
614209139Srpaulo *  FIONREAD		Check for read packet available.
615209139Srpaulo *  SIOCGIFADDR		Get interface address - convenient hook to driver.
616209139Srpaulo *  BIOCGBLEN		Get buffer len [for read()].
617209139Srpaulo *  BIOCSETF		Set ethernet read filter.
618209139Srpaulo *  BIOCFLUSH		Flush read packet buffer.
619209139Srpaulo *  BIOCPROMISC		Put interface into promiscuous mode.
620209139Srpaulo *  BIOCGDLT		Get link layer type.
621209139Srpaulo *  BIOCGETIF		Get interface name.
622209139Srpaulo *  BIOCSETIF		Set interface.
623209139Srpaulo *  BIOCSRTIMEOUT	Set read timeout.
624209139Srpaulo *  BIOCGRTIMEOUT	Get read timeout.
625209139Srpaulo *  BIOCGSTATS		Get packet stats.
626209139Srpaulo *  BIOCIMMEDIATE	Set immediate mode.
627209139Srpaulo *  BIOCVERSION		Get filter language version.
628209139Srpaulo *  BIOCGHDRCMPLT	Get "header already complete" flag
629209139Srpaulo *  BIOCSHDRCMPLT	Set "header already complete" flag
630209139Srpaulo *  BIOCGSEESENT	Get "see packets sent" flag
631209139Srpaulo *  BIOCSSEESENT	Set "see packets sent" flag
632209139Srpaulo */
633209139Srpaulo/* ARGSUSED */
634209139Srpaulostatic	int
635209139Srpaulobpfioctl(dev, cmd, addr, flags, td)
636209139Srpaulo	dev_t dev;
637209139Srpaulo	u_long cmd;
638209139Srpaulo	caddr_t addr;
639209139Srpaulo	int flags;
640209139Srpaulo	struct thread *td;
641209139Srpaulo{
642209139Srpaulo	struct bpf_d *d = dev->si_drv1;
643252190Srpaulo	int error = 0;
644209139Srpaulo
645209139Srpaulo	BPFD_LOCK(d);
646209139Srpaulo	if (d->bd_state == BPF_WAITING)
647209139Srpaulo		callout_stop(&d->bd_callout);
648209139Srpaulo	d->bd_state = BPF_IDLE;
649209139Srpaulo	BPFD_UNLOCK(d);
650209139Srpaulo
651209139Srpaulo	switch (cmd) {
652209139Srpaulo
653209139Srpaulo	default:
654209139Srpaulo		error = EINVAL;
655209139Srpaulo		break;
656209139Srpaulo
657209139Srpaulo	/*
658209139Srpaulo	 * Check for read packet available.
659209139Srpaulo	 */
660209139Srpaulo	case FIONREAD:
661209139Srpaulo		{
662209139Srpaulo			int n;
663209139Srpaulo
664209139Srpaulo			BPFD_LOCK(d);
665209139Srpaulo			n = d->bd_slen;
666209139Srpaulo			if (d->bd_hbuf)
667209139Srpaulo				n += d->bd_hlen;
668252190Srpaulo			BPFD_UNLOCK(d);
669209139Srpaulo
670209139Srpaulo			*(int *)addr = n;
671209139Srpaulo			break;
672209139Srpaulo		}
673209139Srpaulo
674209139Srpaulo	case SIOCGIFADDR:
675209139Srpaulo		{
676209139Srpaulo			struct ifnet *ifp;
677209139Srpaulo
678209139Srpaulo			if (d->bd_bif == 0)
679209139Srpaulo				error = EINVAL;
680209139Srpaulo			else {
681209139Srpaulo				ifp = d->bd_bif->bif_ifp;
682209139Srpaulo				error = (*ifp->if_ioctl)(ifp, cmd, addr);
683209139Srpaulo			}
684209139Srpaulo			break;
685209139Srpaulo		}
686209139Srpaulo
687209139Srpaulo	/*
688209139Srpaulo	 * Get buffer len [for read()].
689209139Srpaulo	 */
690209139Srpaulo	case BIOCGBLEN:
691209139Srpaulo		*(u_int *)addr = d->bd_bufsize;
692209139Srpaulo		break;
693209139Srpaulo
694209139Srpaulo	/*
695209139Srpaulo	 * Set buffer length.
696209139Srpaulo	 */
697209139Srpaulo	case BIOCSBLEN:
698209139Srpaulo		if (d->bd_bif != 0)
699209139Srpaulo			error = EINVAL;
700209139Srpaulo		else {
701209139Srpaulo			u_int size = *(u_int *)addr;
702209139Srpaulo
703209139Srpaulo			if (size > bpf_maxbufsize)
704209139Srpaulo				*(u_int *)addr = size = bpf_maxbufsize;
705209139Srpaulo			else if (size < BPF_MINBUFSIZE)
706209139Srpaulo				*(u_int *)addr = size = BPF_MINBUFSIZE;
707209139Srpaulo			d->bd_bufsize = size;
708209139Srpaulo		}
709209139Srpaulo		break;
710209139Srpaulo
711209139Srpaulo	/*
712209139Srpaulo	 * Set link layer read filter.
713209139Srpaulo	 */
714209139Srpaulo	case BIOCSETF:
715209139Srpaulo		error = bpf_setf(d, (struct bpf_program *)addr);
716209139Srpaulo		break;
717209139Srpaulo
718209139Srpaulo	/*
719209139Srpaulo	 * Flush read packet buffer.
720214501Srpaulo	 */
721214501Srpaulo	case BIOCFLUSH:
722214501Srpaulo		BPFD_LOCK(d);
723214501Srpaulo		reset_d(d);
724214501Srpaulo		BPFD_UNLOCK(d);
725214501Srpaulo		break;
726214501Srpaulo
727214501Srpaulo	/*
728214501Srpaulo	 * Put interface into promiscuous mode.
729214501Srpaulo	 */
730214501Srpaulo	case BIOCPROMISC:
731214501Srpaulo		if (d->bd_bif == 0) {
732214501Srpaulo			/*
733209139Srpaulo			 * No interface attached yet.
734209139Srpaulo			 */
735209139Srpaulo			error = EINVAL;
736214501Srpaulo			break;
737209139Srpaulo		}
738209139Srpaulo		if (d->bd_promisc == 0) {
739209139Srpaulo			mtx_lock(&Giant);
740209139Srpaulo			error = ifpromisc(d->bd_bif->bif_ifp, 1);
741			mtx_unlock(&Giant);
742			if (error == 0)
743				d->bd_promisc = 1;
744		}
745		break;
746
747	/*
748	 * Get current data link type.
749	 */
750	case BIOCGDLT:
751		if (d->bd_bif == 0)
752			error = EINVAL;
753		else
754			*(u_int *)addr = d->bd_bif->bif_dlt;
755		break;
756
757	/*
758	 * Get a list of supported data link types.
759	 */
760	case BIOCGDLTLIST:
761		if (d->bd_bif == 0)
762			error = EINVAL;
763		else
764			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
765		break;
766
767	/*
768	 * Set data link type.
769	 */
770	case BIOCSDLT:
771		if (d->bd_bif == 0)
772			error = EINVAL;
773		else
774			error = bpf_setdlt(d, *(u_int *)addr);
775		break;
776
777	/*
778	 * Get interface name.
779	 */
780	case BIOCGETIF:
781		if (d->bd_bif == 0)
782			error = EINVAL;
783		else {
784			struct ifnet *const ifp = d->bd_bif->bif_ifp;
785			struct ifreq *const ifr = (struct ifreq *)addr;
786
787			strlcpy(ifr->ifr_name, ifp->if_xname,
788			    sizeof(ifr->ifr_name));
789		}
790		break;
791
792	/*
793	 * Set interface.
794	 */
795	case BIOCSETIF:
796		error = bpf_setif(d, (struct ifreq *)addr);
797		break;
798
799	/*
800	 * Set read timeout.
801	 */
802	case BIOCSRTIMEOUT:
803		{
804			struct timeval *tv = (struct timeval *)addr;
805
806			/*
807			 * Subtract 1 tick from tvtohz() since this isn't
808			 * a one-shot timer.
809			 */
810			if ((error = itimerfix(tv)) == 0)
811				d->bd_rtout = tvtohz(tv) - 1;
812			break;
813		}
814
815	/*
816	 * Get read timeout.
817	 */
818	case BIOCGRTIMEOUT:
819		{
820			struct timeval *tv = (struct timeval *)addr;
821
822			tv->tv_sec = d->bd_rtout / hz;
823			tv->tv_usec = (d->bd_rtout % hz) * tick;
824			break;
825		}
826
827	/*
828	 * Get packet stats.
829	 */
830	case BIOCGSTATS:
831		{
832			struct bpf_stat *bs = (struct bpf_stat *)addr;
833
834			bs->bs_recv = d->bd_rcount;
835			bs->bs_drop = d->bd_dcount;
836			break;
837		}
838
839	/*
840	 * Set immediate mode.
841	 */
842	case BIOCIMMEDIATE:
843		d->bd_immediate = *(u_int *)addr;
844		break;
845
846	case BIOCVERSION:
847		{
848			struct bpf_version *bv = (struct bpf_version *)addr;
849
850			bv->bv_major = BPF_MAJOR_VERSION;
851			bv->bv_minor = BPF_MINOR_VERSION;
852			break;
853		}
854
855	/*
856	 * Get "header already complete" flag
857	 */
858	case BIOCGHDRCMPLT:
859		*(u_int *)addr = d->bd_hdrcmplt;
860		break;
861
862	/*
863	 * Set "header already complete" flag
864	 */
865	case BIOCSHDRCMPLT:
866		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
867		break;
868
869	/*
870	 * Get "see sent packets" flag
871	 */
872	case BIOCGSEESENT:
873		*(u_int *)addr = d->bd_seesent;
874		break;
875
876	/*
877	 * Set "see sent packets" flag
878	 */
879	case BIOCSSEESENT:
880		d->bd_seesent = *(u_int *)addr;
881		break;
882
883	case FIONBIO:		/* Non-blocking I/O */
884		break;
885
886	case FIOASYNC:		/* Send signal on receive packets */
887		d->bd_async = *(int *)addr;
888		break;
889
890	case FIOSETOWN:
891		error = fsetown(*(int *)addr, &d->bd_sigio);
892		break;
893
894	case FIOGETOWN:
895		*(int *)addr = fgetown(&d->bd_sigio);
896		break;
897
898	/* This is deprecated, FIOSETOWN should be used instead. */
899	case TIOCSPGRP:
900		error = fsetown(-(*(int *)addr), &d->bd_sigio);
901		break;
902
903	/* This is deprecated, FIOGETOWN should be used instead. */
904	case TIOCGPGRP:
905		*(int *)addr = -fgetown(&d->bd_sigio);
906		break;
907
908	case BIOCSRSIG:		/* Set receive signal */
909		{
910			u_int sig;
911
912			sig = *(u_int *)addr;
913
914			if (sig >= NSIG)
915				error = EINVAL;
916			else
917				d->bd_sig = sig;
918			break;
919		}
920	case BIOCGRSIG:
921		*(u_int *)addr = d->bd_sig;
922		break;
923	}
924	return (error);
925}
926
927/*
928 * Set d's packet filter program to fp.  If this file already has a filter,
929 * free it and replace it.  Returns EINVAL for bogus requests.
930 */
931static int
932bpf_setf(d, fp)
933	struct bpf_d *d;
934	struct bpf_program *fp;
935{
936	struct bpf_insn *fcode, *old;
937	u_int flen, size;
938
939	old = d->bd_filter;
940	if (fp->bf_insns == 0) {
941		if (fp->bf_len != 0)
942			return (EINVAL);
943		BPFD_LOCK(d);
944		d->bd_filter = 0;
945		reset_d(d);
946		BPFD_UNLOCK(d);
947		if (old != 0)
948			free((caddr_t)old, M_BPF);
949		return (0);
950	}
951	flen = fp->bf_len;
952	if (flen > BPF_MAXINSNS)
953		return (EINVAL);
954
955	size = flen * sizeof(*fp->bf_insns);
956	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
957	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
958	    bpf_validate(fcode, (int)flen)) {
959		BPFD_LOCK(d);
960		d->bd_filter = fcode;
961		reset_d(d);
962		BPFD_UNLOCK(d);
963		if (old != 0)
964			free((caddr_t)old, M_BPF);
965
966		return (0);
967	}
968	free((caddr_t)fcode, M_BPF);
969	return (EINVAL);
970}
971
972/*
973 * Detach a file from its current interface (if attached at all) and attach
974 * to the interface indicated by the name stored in ifr.
975 * Return an errno or 0.
976 */
977static int
978bpf_setif(d, ifr)
979	struct bpf_d *d;
980	struct ifreq *ifr;
981{
982	struct bpf_if *bp;
983	int error;
984	struct ifnet *theywant;
985
986	theywant = ifunit(ifr->ifr_name);
987	if (theywant == 0)
988		return ENXIO;
989
990	/*
991	 * Look through attached interfaces for the named one.
992	 */
993	mtx_lock(&bpf_mtx);
994	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
995		struct ifnet *ifp = bp->bif_ifp;
996
997		if (ifp == 0 || ifp != theywant)
998			continue;
999		/* skip additional entry */
1000		if (bp->bif_driverp != (struct bpf_if **)&ifp->if_bpf)
1001			continue;
1002
1003		mtx_unlock(&bpf_mtx);
1004		/*
1005		 * We found the requested interface.
1006		 * If it's not up, return an error.
1007		 * Allocate the packet buffers if we need to.
1008		 * If we're already attached to requested interface,
1009		 * just flush the buffer.
1010		 */
1011		if ((ifp->if_flags & IFF_UP) == 0)
1012			return (ENETDOWN);
1013
1014		if (d->bd_sbuf == 0) {
1015			error = bpf_allocbufs(d);
1016			if (error != 0)
1017				return (error);
1018		}
1019		if (bp != d->bd_bif) {
1020			if (d->bd_bif)
1021				/*
1022				 * Detach if attached to something else.
1023				 */
1024				bpf_detachd(d);
1025
1026			bpf_attachd(d, bp);
1027		}
1028		BPFD_LOCK(d);
1029		reset_d(d);
1030		BPFD_UNLOCK(d);
1031		return (0);
1032	}
1033	mtx_unlock(&bpf_mtx);
1034	/* Not found. */
1035	return (ENXIO);
1036}
1037
1038/*
1039 * Support for select() and poll() system calls
1040 *
1041 * Return true iff the specific operation will not block indefinitely.
1042 * Otherwise, return false but make a note that a selwakeup() must be done.
1043 */
1044static int
1045bpfpoll(dev, events, td)
1046	dev_t dev;
1047	int events;
1048	struct thread *td;
1049{
1050	struct bpf_d *d;
1051	int revents;
1052
1053	d = dev->si_drv1;
1054	if (d->bd_bif == NULL)
1055		return (ENXIO);
1056
1057	revents = events & (POLLOUT | POLLWRNORM);
1058	BPFD_LOCK(d);
1059	if (events & (POLLIN | POLLRDNORM)) {
1060		if (bpf_ready(d))
1061			revents |= events & (POLLIN | POLLRDNORM);
1062		else {
1063			selrecord(td, &d->bd_sel);
1064			/* Start the read timeout if necessary. */
1065			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1066				callout_reset(&d->bd_callout, d->bd_rtout,
1067				    bpf_timed_out, d);
1068				d->bd_state = BPF_WAITING;
1069			}
1070		}
1071	}
1072	BPFD_UNLOCK(d);
1073	return (revents);
1074}
1075
1076/*
1077 * Support for kevent() system call.  Register EVFILT_READ filters and
1078 * reject all others.
1079 */
1080int
1081bpfkqfilter(dev, kn)
1082	dev_t dev;
1083	struct knote *kn;
1084{
1085	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1086
1087	if (kn->kn_filter != EVFILT_READ)
1088		return (1);
1089
1090	kn->kn_fop = &bpfread_filtops;
1091	kn->kn_hook = d;
1092	BPFD_LOCK(d);
1093	SLIST_INSERT_HEAD(&d->bd_sel.si_note, kn, kn_selnext);
1094	BPFD_UNLOCK(d);
1095
1096	return (0);
1097}
1098
1099static void
1100filt_bpfdetach(kn)
1101	struct knote *kn;
1102{
1103	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1104
1105	BPFD_LOCK(d);
1106	SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext);
1107	BPFD_UNLOCK(d);
1108}
1109
1110static int
1111filt_bpfread(kn, hint)
1112	struct knote *kn;
1113	long hint;
1114{
1115	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1116	int ready;
1117
1118	BPFD_LOCK(d);
1119	ready = bpf_ready(d);
1120	if (ready) {
1121		kn->kn_data = d->bd_slen;
1122		if (d->bd_hbuf)
1123			kn->kn_data += d->bd_hlen;
1124	}
1125	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1126		callout_reset(&d->bd_callout, d->bd_rtout,
1127		    bpf_timed_out, d);
1128		d->bd_state = BPF_WAITING;
1129	}
1130	BPFD_UNLOCK(d);
1131
1132	return (ready);
1133}
1134
1135/*
1136 * Incoming linkage from device drivers.  Process the packet pkt, of length
1137 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1138 * by each process' filter, and if accepted, stashed into the corresponding
1139 * buffer.
1140 */
1141void
1142bpf_tap(bp, pkt, pktlen)
1143	struct bpf_if *bp;
1144	u_char *pkt;
1145	u_int pktlen;
1146{
1147	struct bpf_d *d;
1148	u_int slen;
1149
1150	BPFIF_LOCK(bp);
1151	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1152		BPFD_LOCK(d);
1153		++d->bd_rcount;
1154		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1155		if (slen != 0) {
1156#ifdef MAC
1157			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1158#endif
1159				catchpacket(d, pkt, pktlen, slen, bcopy);
1160		}
1161		BPFD_UNLOCK(d);
1162	}
1163	BPFIF_UNLOCK(bp);
1164}
1165
1166/*
1167 * Copy data from an mbuf chain into a buffer.  This code is derived
1168 * from m_copydata in sys/uipc_mbuf.c.
1169 */
1170static void
1171bpf_mcopy(src_arg, dst_arg, len)
1172	const void *src_arg;
1173	void *dst_arg;
1174	size_t len;
1175{
1176	const struct mbuf *m;
1177	u_int count;
1178	u_char *dst;
1179
1180	m = src_arg;
1181	dst = dst_arg;
1182	while (len > 0) {
1183		if (m == 0)
1184			panic("bpf_mcopy");
1185		count = min(m->m_len, len);
1186		bcopy(mtod(m, void *), dst, count);
1187		m = m->m_next;
1188		dst += count;
1189		len -= count;
1190	}
1191}
1192
1193/*
1194 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1195 */
1196void
1197bpf_mtap(bp, m)
1198	struct bpf_if *bp;
1199	struct mbuf *m;
1200{
1201	struct bpf_d *d;
1202	u_int pktlen, slen;
1203
1204	pktlen = m_length(m, NULL);
1205	if (pktlen == m->m_len) {
1206		bpf_tap(bp, mtod(m, u_char *), pktlen);
1207		return;
1208	}
1209
1210	BPFIF_LOCK(bp);
1211	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1212		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1213			continue;
1214		BPFD_LOCK(d);
1215		++d->bd_rcount;
1216		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1217		if (slen != 0)
1218#ifdef MAC
1219			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1220#endif
1221				catchpacket(d, (u_char *)m, pktlen, slen,
1222				    bpf_mcopy);
1223		BPFD_UNLOCK(d);
1224	}
1225	BPFIF_UNLOCK(bp);
1226}
1227
1228/*
1229 * Incoming linkage from device drivers, when packet is in
1230 * an mbuf chain and to be prepended by a contiguous header.
1231 */
1232void
1233bpf_mtap2(bp, data, dlen, m)
1234	struct bpf_if *bp;
1235	void *data;
1236	u_int dlen;
1237	struct mbuf *m;
1238{
1239	struct mbuf mb;
1240	struct bpf_d *d;
1241	u_int pktlen, slen;
1242
1243	pktlen = m_length(m, NULL);
1244	/*
1245	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1246	 * Note that we cut corners here; we only setup what's
1247	 * absolutely needed--this mbuf should never go anywhere else.
1248	 */
1249	mb.m_next = m;
1250	mb.m_data = data;
1251	mb.m_len = dlen;
1252	pktlen += dlen;
1253
1254	BPFIF_LOCK(bp);
1255	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1256		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1257			continue;
1258		BPFD_LOCK(d);
1259		++d->bd_rcount;
1260		slen = bpf_filter(d->bd_filter, (u_char *)&mb, pktlen, 0);
1261		if (slen != 0)
1262#ifdef MAC
1263			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1264#endif
1265				catchpacket(d, (u_char *)&mb, pktlen, slen,
1266				    bpf_mcopy);
1267		BPFD_UNLOCK(d);
1268	}
1269	BPFIF_UNLOCK(bp);
1270}
1271
1272/*
1273 * Move the packet data from interface memory (pkt) into the
1274 * store buffer.  "cpfn" is the routine called to do the actual data
1275 * transfer.  bcopy is passed in to copy contiguous chunks, while
1276 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1277 * pkt is really an mbuf.
1278 */
1279static void
1280catchpacket(d, pkt, pktlen, snaplen, cpfn)
1281	struct bpf_d *d;
1282	u_char *pkt;
1283	u_int pktlen, snaplen;
1284	void (*cpfn)(const void *, void *, size_t);
1285{
1286	struct bpf_hdr *hp;
1287	int totlen, curlen;
1288	int hdrlen = d->bd_bif->bif_hdrlen;
1289
1290	/*
1291	 * Figure out how many bytes to move.  If the packet is
1292	 * greater or equal to the snapshot length, transfer that
1293	 * much.  Otherwise, transfer the whole packet (unless
1294	 * we hit the buffer size limit).
1295	 */
1296	totlen = hdrlen + min(snaplen, pktlen);
1297	if (totlen > d->bd_bufsize)
1298		totlen = d->bd_bufsize;
1299
1300	/*
1301	 * Round up the end of the previous packet to the next longword.
1302	 */
1303	curlen = BPF_WORDALIGN(d->bd_slen);
1304	if (curlen + totlen > d->bd_bufsize) {
1305		/*
1306		 * This packet will overflow the storage buffer.
1307		 * Rotate the buffers if we can, then wakeup any
1308		 * pending reads.
1309		 */
1310		if (d->bd_fbuf == 0) {
1311			/*
1312			 * We haven't completed the previous read yet,
1313			 * so drop the packet.
1314			 */
1315			++d->bd_dcount;
1316			return;
1317		}
1318		ROTATE_BUFFERS(d);
1319		bpf_wakeup(d);
1320		curlen = 0;
1321	}
1322	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1323		/*
1324		 * Immediate mode is set, or the read timeout has
1325		 * already expired during a select call.  A packet
1326		 * arrived, so the reader should be woken up.
1327		 */
1328		bpf_wakeup(d);
1329
1330	/*
1331	 * Append the bpf header.
1332	 */
1333	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1334	microtime(&hp->bh_tstamp);
1335	hp->bh_datalen = pktlen;
1336	hp->bh_hdrlen = hdrlen;
1337	/*
1338	 * Copy the packet data into the store buffer and update its length.
1339	 */
1340	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1341	d->bd_slen = curlen + totlen;
1342}
1343
1344/*
1345 * Initialize all nonzero fields of a descriptor.
1346 */
1347static int
1348bpf_allocbufs(d)
1349	struct bpf_d *d;
1350{
1351	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1352	if (d->bd_fbuf == 0)
1353		return (ENOBUFS);
1354
1355	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1356	if (d->bd_sbuf == 0) {
1357		free(d->bd_fbuf, M_BPF);
1358		return (ENOBUFS);
1359	}
1360	d->bd_slen = 0;
1361	d->bd_hlen = 0;
1362	return (0);
1363}
1364
1365/*
1366 * Free buffers currently in use by a descriptor.
1367 * Called on close.
1368 */
1369static void
1370bpf_freed(d)
1371	struct bpf_d *d;
1372{
1373	/*
1374	 * We don't need to lock out interrupts since this descriptor has
1375	 * been detached from its interface and it yet hasn't been marked
1376	 * free.
1377	 */
1378	if (d->bd_sbuf != 0) {
1379		free(d->bd_sbuf, M_BPF);
1380		if (d->bd_hbuf != 0)
1381			free(d->bd_hbuf, M_BPF);
1382		if (d->bd_fbuf != 0)
1383			free(d->bd_fbuf, M_BPF);
1384	}
1385	if (d->bd_filter)
1386		free((caddr_t)d->bd_filter, M_BPF);
1387	mtx_destroy(&d->bd_mtx);
1388}
1389
1390/*
1391 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1392 * fixed size of the link header (variable length headers not yet supported).
1393 */
1394void
1395bpfattach(ifp, dlt, hdrlen)
1396	struct ifnet *ifp;
1397	u_int dlt, hdrlen;
1398{
1399
1400	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1401}
1402
1403/*
1404 * Attach an interface to bpf.  ifp is a pointer to the structure
1405 * defining the interface to be attached, dlt is the link layer type,
1406 * and hdrlen is the fixed size of the link header (variable length
1407 * headers are not yet supporrted).
1408 */
1409void
1410bpfattach2(ifp, dlt, hdrlen, driverp)
1411	struct ifnet *ifp;
1412	u_int dlt, hdrlen;
1413	struct bpf_if **driverp;
1414{
1415	struct bpf_if *bp;
1416	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1417	if (bp == 0)
1418		panic("bpfattach");
1419
1420	bp->bif_dlist = 0;
1421	bp->bif_driverp = driverp;
1422	bp->bif_ifp = ifp;
1423	bp->bif_dlt = dlt;
1424	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1425
1426	mtx_lock(&bpf_mtx);
1427	bp->bif_next = bpf_iflist;
1428	bpf_iflist = bp;
1429	mtx_unlock(&bpf_mtx);
1430
1431	*bp->bif_driverp = 0;
1432
1433	/*
1434	 * Compute the length of the bpf header.  This is not necessarily
1435	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1436	 * that the network layer header begins on a longword boundary (for
1437	 * performance reasons and to alleviate alignment restrictions).
1438	 */
1439	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1440
1441	if (bootverbose)
1442		if_printf(ifp, "bpf attached\n");
1443}
1444
1445/*
1446 * Detach bpf from an interface.  This involves detaching each descriptor
1447 * associated with the interface, and leaving bd_bif NULL.  Notify each
1448 * descriptor as it's detached so that any sleepers wake up and get
1449 * ENXIO.
1450 */
1451void
1452bpfdetach(ifp)
1453	struct ifnet *ifp;
1454{
1455	struct bpf_if	*bp, *bp_prev;
1456	struct bpf_d	*d;
1457
1458	/* Locate BPF interface information */
1459	bp_prev = NULL;
1460
1461	mtx_lock(&bpf_mtx);
1462	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1463		if (ifp == bp->bif_ifp)
1464			break;
1465		bp_prev = bp;
1466	}
1467
1468	/* Interface wasn't attached */
1469	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1470		mtx_unlock(&bpf_mtx);
1471		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1472		return;
1473	}
1474
1475	if (bp_prev) {
1476		bp_prev->bif_next = bp->bif_next;
1477	} else {
1478		bpf_iflist = bp->bif_next;
1479	}
1480	mtx_unlock(&bpf_mtx);
1481
1482	while ((d = bp->bif_dlist) != NULL) {
1483		bpf_detachd(d);
1484		BPFD_LOCK(d);
1485		bpf_wakeup(d);
1486		BPFD_UNLOCK(d);
1487	}
1488
1489	mtx_destroy(&bp->bif_mtx);
1490	free(bp, M_BPF);
1491}
1492
1493/*
1494 * Get a list of available data link type of the interface.
1495 */
1496static int
1497bpf_getdltlist(d, bfl)
1498	struct bpf_d *d;
1499	struct bpf_dltlist *bfl;
1500{
1501	int n, error;
1502	struct ifnet *ifp;
1503	struct bpf_if *bp;
1504
1505	ifp = d->bd_bif->bif_ifp;
1506	n = 0;
1507	error = 0;
1508	mtx_lock(&bpf_mtx);
1509	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1510		if (bp->bif_ifp != ifp)
1511			continue;
1512		if (bfl->bfl_list != NULL) {
1513			if (n >= bfl->bfl_len) {
1514				mtx_unlock(&bpf_mtx);
1515				return (ENOMEM);
1516			}
1517			error = copyout(&bp->bif_dlt,
1518			    bfl->bfl_list + n, sizeof(u_int));
1519		}
1520		n++;
1521	}
1522	mtx_unlock(&bpf_mtx);
1523	bfl->bfl_len = n;
1524	return (error);
1525}
1526
1527/*
1528 * Set the data link type of a BPF instance.
1529 */
1530static int
1531bpf_setdlt(d, dlt)
1532	struct bpf_d *d;
1533	u_int dlt;
1534{
1535	int error, opromisc;
1536	struct ifnet *ifp;
1537	struct bpf_if *bp;
1538
1539	if (d->bd_bif->bif_dlt == dlt)
1540		return (0);
1541	ifp = d->bd_bif->bif_ifp;
1542	mtx_lock(&bpf_mtx);
1543	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1544		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1545			break;
1546	}
1547	mtx_unlock(&bpf_mtx);
1548	if (bp != NULL) {
1549		BPFD_LOCK(d);
1550		opromisc = d->bd_promisc;
1551		bpf_detachd(d);
1552		bpf_attachd(d, bp);
1553		reset_d(d);
1554		BPFD_UNLOCK(d);
1555		if (opromisc) {
1556			error = ifpromisc(bp->bif_ifp, 1);
1557			if (error)
1558				if_printf(bp->bif_ifp,
1559					"bpf_setdlt: ifpromisc failed (%d)\n",
1560					error);
1561			else
1562				d->bd_promisc = 1;
1563		}
1564	}
1565	return (bp == NULL ? EINVAL : 0);
1566}
1567
1568static void bpf_drvinit(void *unused);
1569
1570static void bpf_clone(void *arg, char *name, int namelen, dev_t *dev);
1571
1572static void
1573bpf_clone(arg, name, namelen, dev)
1574	void *arg;
1575	char *name;
1576	int namelen;
1577	dev_t *dev;
1578{
1579	int u;
1580
1581	if (*dev != NODEV)
1582		return;
1583	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1584		return;
1585	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1586	    "bpf%d", u);
1587	(*dev)->si_flags |= SI_CHEAPCLONE;
1588	return;
1589}
1590
1591static void
1592bpf_drvinit(unused)
1593	void *unused;
1594{
1595
1596	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1597	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1598}
1599
1600SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1601
1602#else /* !DEV_BPF && !NETGRAPH_BPF */
1603/*
1604 * NOP stubs to allow bpf-using drivers to load and function.
1605 *
1606 * A 'better' implementation would allow the core bpf functionality
1607 * to be loaded at runtime.
1608 */
1609
1610void
1611bpf_tap(bp, pkt, pktlen)
1612	struct bpf_if *bp;
1613	u_char *pkt;
1614	u_int pktlen;
1615{
1616}
1617
1618void
1619bpf_mtap(bp, m)
1620	struct bpf_if *bp;
1621	struct mbuf *m;
1622{
1623}
1624
1625void
1626bpf_mtap2(bp, d, l, m)
1627	struct bpf_if *bp;
1628	void *d;
1629	u_int l;
1630	struct mbuf *m;
1631{
1632}
1633
1634void
1635bpfattach(ifp, dlt, hdrlen)
1636	struct ifnet *ifp;
1637	u_int dlt, hdrlen;
1638{
1639}
1640
1641void
1642bpfattach2(ifp, dlt, hdrlen, driverp)
1643	struct ifnet *ifp;
1644	u_int dlt, hdrlen;
1645	struct bpf_if **driverp;
1646{
1647}
1648
1649void
1650bpfdetach(ifp)
1651	struct ifnet *ifp;
1652{
1653}
1654
1655u_int
1656bpf_filter(pc, p, wirelen, buflen)
1657	const struct bpf_insn *pc;
1658	u_char *p;
1659	u_int wirelen;
1660	u_int buflen;
1661{
1662	return -1;	/* "no filter" behaviour */
1663}
1664
1665int
1666bpf_validate(f, len)
1667	const struct bpf_insn *f;
1668	int len;
1669{
1670	return 0;		/* false */
1671}
1672
1673#endif /* !DEV_BPF && !NETGRAPH_BPF */
1674