bpf.c revision 108364
1/*
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
39 *
40 * $FreeBSD: head/sys/net/bpf.c 108364 2002-12-28 21:40:20Z phk $
41 */
42
43#include "opt_bpf.h"
44#include "opt_mac.h"
45#include "opt_netgraph.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/conf.h>
50#include <sys/mac.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/time.h>
54#include <sys/proc.h>
55#include <sys/signalvar.h>
56#include <sys/filio.h>
57#include <sys/sockio.h>
58#include <sys/ttycom.h>
59#include <sys/filedesc.h>
60
61#include <sys/poll.h>
62
63#include <sys/socket.h>
64#include <sys/vnode.h>
65
66#include <net/if.h>
67#include <net/bpf.h>
68#include <net/bpfdesc.h>
69
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <sys/kernel.h>
73#include <sys/sysctl.h>
74
75static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
76
77#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
78
79#define PRINET  26			/* interruptible */
80
81/*
82 * The default read buffer size is patchable.
83 */
84static int bpf_bufsize = 4096;
85SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
86	&bpf_bufsize, 0, "");
87static int bpf_maxbufsize = BPF_MAXBUFSIZE;
88SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
89	&bpf_maxbufsize, 0, "");
90
91/*
92 *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
93 */
94static struct bpf_if	*bpf_iflist;
95static struct mtx	bpf_mtx;		/* bpf global lock */
96
97static int	bpf_allocbufs(struct bpf_d *);
98static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
99static void	bpf_detachd(struct bpf_d *d);
100static void	bpf_freed(struct bpf_d *);
101static void	bpf_mcopy(const void *, void *, size_t);
102static int	bpf_movein(struct uio *, int,
103		    struct mbuf **, struct sockaddr *, int *);
104static int	bpf_setif(struct bpf_d *, struct ifreq *);
105static void	bpf_timed_out(void *);
106static __inline void
107		bpf_wakeup(struct bpf_d *);
108static void	catchpacket(struct bpf_d *, u_char *, u_int,
109		    u_int, void (*)(const void *, void *, size_t));
110static void	reset_d(struct bpf_d *);
111static int	 bpf_setf(struct bpf_d *, struct bpf_program *);
112
113static	d_open_t	bpfopen;
114static	d_close_t	bpfclose;
115static	d_read_t	bpfread;
116static	d_write_t	bpfwrite;
117static	d_ioctl_t	bpfioctl;
118static	d_poll_t	bpfpoll;
119
120#define CDEV_MAJOR 23
121static struct cdevsw bpf_cdevsw = {
122	/* open */	bpfopen,
123	/* close */	bpfclose,
124	/* read */	bpfread,
125	/* write */	bpfwrite,
126	/* ioctl */	bpfioctl,
127	/* poll */	bpfpoll,
128	/* mmap */	nommap,
129	/* strategy */	nostrategy,
130	/* name */	"bpf",
131	/* maj */	CDEV_MAJOR,
132	/* dump */	nodump,
133	/* psize */	nopsize,
134	/* flags */	0,
135};
136
137
138static int
139bpf_movein(uio, linktype, mp, sockp, datlen)
140	register struct uio *uio;
141	int linktype, *datlen;
142	register struct mbuf **mp;
143	register struct sockaddr *sockp;
144{
145	struct mbuf *m;
146	int error;
147	int len;
148	int hlen;
149
150	/*
151	 * Build a sockaddr based on the data link layer type.
152	 * We do this at this level because the ethernet header
153	 * is copied directly into the data field of the sockaddr.
154	 * In the case of SLIP, there is no header and the packet
155	 * is forwarded as is.
156	 * Also, we are careful to leave room at the front of the mbuf
157	 * for the link level header.
158	 */
159	switch (linktype) {
160
161	case DLT_SLIP:
162		sockp->sa_family = AF_INET;
163		hlen = 0;
164		break;
165
166	case DLT_EN10MB:
167		sockp->sa_family = AF_UNSPEC;
168		/* XXX Would MAXLINKHDR be better? */
169		hlen = sizeof(struct ether_header);
170		break;
171
172	case DLT_FDDI:
173		sockp->sa_family = AF_IMPLINK;
174		hlen = 0;
175		break;
176
177	case DLT_RAW:
178	case DLT_NULL:
179		sockp->sa_family = AF_UNSPEC;
180		hlen = 0;
181		break;
182
183	case DLT_ATM_RFC1483:
184		/*
185		 * en atm driver requires 4-byte atm pseudo header.
186		 * though it isn't standard, vpi:vci needs to be
187		 * specified anyway.
188		 */
189		sockp->sa_family = AF_UNSPEC;
190		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
191		break;
192
193	case DLT_PPP:
194		sockp->sa_family = AF_UNSPEC;
195		hlen = 4;	/* This should match PPP_HDRLEN */
196		break;
197
198	default:
199		return (EIO);
200	}
201
202	len = uio->uio_resid;
203	*datlen = len - hlen;
204	if ((unsigned)len > MCLBYTES)
205		return (EIO);
206
207	if (len > MHLEN) {
208		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
209	} else {
210		MGETHDR(m, M_TRYWAIT, MT_DATA);
211	}
212	if (m == NULL)
213		return (ENOBUFS);
214	m->m_pkthdr.len = m->m_len = len;
215	m->m_pkthdr.rcvif = NULL;
216	*mp = m;
217
218	/*
219	 * Make room for link header.
220	 */
221	if (hlen != 0) {
222		m->m_pkthdr.len -= hlen;
223		m->m_len -= hlen;
224#if BSD >= 199103
225		m->m_data += hlen; /* XXX */
226#else
227		m->m_off += hlen;
228#endif
229		error = uiomove((caddr_t)sockp->sa_data, hlen, uio);
230		if (error)
231			goto bad;
232	}
233	error = uiomove(mtod(m, caddr_t), len - hlen, uio);
234	if (!error)
235		return (0);
236bad:
237	m_freem(m);
238	return (error);
239}
240
241/*
242 * Attach file to the bpf interface, i.e. make d listen on bp.
243 */
244static void
245bpf_attachd(d, bp)
246	struct bpf_d *d;
247	struct bpf_if *bp;
248{
249	/*
250	 * Point d at bp, and add d to the interface's list of listeners.
251	 * Finally, point the driver's bpf cookie at the interface so
252	 * it will divert packets to bpf.
253	 */
254	BPFIF_LOCK(bp);
255	d->bd_bif = bp;
256	d->bd_next = bp->bif_dlist;
257	bp->bif_dlist = d;
258
259	*bp->bif_driverp = bp;
260	BPFIF_UNLOCK(bp);
261}
262
263/*
264 * Detach a file from its interface.
265 */
266static void
267bpf_detachd(d)
268	struct bpf_d *d;
269{
270	int error;
271	struct bpf_d **p;
272	struct bpf_if *bp;
273
274	bp = d->bd_bif;
275	/*
276	 * Check if this descriptor had requested promiscuous mode.
277	 * If so, turn it off.
278	 */
279	if (d->bd_promisc) {
280		d->bd_promisc = 0;
281		error = ifpromisc(bp->bif_ifp, 0);
282		if (error != 0 && error != ENXIO) {
283			/*
284			 * ENXIO can happen if a pccard is unplugged
285			 * Something is really wrong if we were able to put
286			 * the driver into promiscuous mode, but can't
287			 * take it out.
288			 */
289			if_printf(bp->bif_ifp, "ifpromisc failed %d\n", error);
290		}
291	}
292	/* Remove d from the interface's descriptor list. */
293	BPFIF_LOCK(bp);
294	p = &bp->bif_dlist;
295	while (*p != d) {
296		p = &(*p)->bd_next;
297		if (*p == 0)
298			panic("bpf_detachd: descriptor not in list");
299	}
300	*p = (*p)->bd_next;
301	if (bp->bif_dlist == 0)
302		/*
303		 * Let the driver know that there are no more listeners.
304		 */
305		*d->bd_bif->bif_driverp = 0;
306	BPFIF_UNLOCK(bp);
307	d->bd_bif = 0;
308}
309
310/*
311 * Open ethernet device.  Returns ENXIO for illegal minor device number,
312 * EBUSY if file is open by another process.
313 */
314/* ARGSUSED */
315static	int
316bpfopen(dev, flags, fmt, td)
317	dev_t dev;
318	int flags;
319	int fmt;
320	struct thread *td;
321{
322	struct bpf_d *d;
323
324	mtx_lock(&bpf_mtx);
325	d = dev->si_drv1;
326	/*
327	 * Each minor can be opened by only one process.  If the requested
328	 * minor is in use, return EBUSY.
329	 */
330	if (d) {
331		mtx_unlock(&bpf_mtx);
332		return (EBUSY);
333	}
334	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
335	mtx_unlock(&bpf_mtx);
336
337	if ((dev->si_flags & SI_NAMED) == 0)
338		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
339		    "bpf%d", dev2unit(dev));
340	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
341	dev->si_drv1 = d;
342	d->bd_bufsize = bpf_bufsize;
343	d->bd_sig = SIGIO;
344	d->bd_seesent = 1;
345#ifdef MAC
346	mac_init_bpfdesc(d);
347	mac_create_bpfdesc(td->td_ucred, d);
348#endif
349	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
350	callout_init(&d->bd_callout, 1);
351
352	return (0);
353}
354
355/*
356 * Close the descriptor by detaching it from its interface,
357 * deallocating its buffers, and marking it free.
358 */
359/* ARGSUSED */
360static	int
361bpfclose(dev, flags, fmt, td)
362	dev_t dev;
363	int flags;
364	int fmt;
365	struct thread *td;
366{
367	struct bpf_d *d = dev->si_drv1;
368
369	BPFD_LOCK(d);
370	if (d->bd_state == BPF_WAITING)
371		callout_stop(&d->bd_callout);
372	d->bd_state = BPF_IDLE;
373	BPFD_UNLOCK(d);
374	funsetown(&d->bd_sigio);
375	mtx_lock(&bpf_mtx);
376	if (d->bd_bif)
377		bpf_detachd(d);
378	mtx_unlock(&bpf_mtx);
379#ifdef MAC
380	mac_destroy_bpfdesc(d);
381#endif /* MAC */
382	bpf_freed(d);
383	dev->si_drv1 = 0;
384	free(d, M_BPF);
385
386	return (0);
387}
388
389
390/*
391 * Rotate the packet buffers in descriptor d.  Move the store buffer
392 * into the hold slot, and the free buffer into the store slot.
393 * Zero the length of the new store buffer.
394 */
395#define ROTATE_BUFFERS(d) \
396	(d)->bd_hbuf = (d)->bd_sbuf; \
397	(d)->bd_hlen = (d)->bd_slen; \
398	(d)->bd_sbuf = (d)->bd_fbuf; \
399	(d)->bd_slen = 0; \
400	(d)->bd_fbuf = 0;
401/*
402 *  bpfread - read next chunk of packets from buffers
403 */
404static	int
405bpfread(dev, uio, ioflag)
406	dev_t dev;
407	register struct uio *uio;
408	int ioflag;
409{
410	struct bpf_d *d = dev->si_drv1;
411	int timed_out;
412	int error;
413
414	/*
415	 * Restrict application to use a buffer the same size as
416	 * as kernel buffers.
417	 */
418	if (uio->uio_resid != d->bd_bufsize)
419		return (EINVAL);
420
421	BPFD_LOCK(d);
422	if (d->bd_state == BPF_WAITING)
423		callout_stop(&d->bd_callout);
424	timed_out = (d->bd_state == BPF_TIMED_OUT);
425	d->bd_state = BPF_IDLE;
426	/*
427	 * If the hold buffer is empty, then do a timed sleep, which
428	 * ends when the timeout expires or when enough packets
429	 * have arrived to fill the store buffer.
430	 */
431	while (d->bd_hbuf == 0) {
432		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
433			/*
434			 * A packet(s) either arrived since the previous
435			 * read or arrived while we were asleep.
436			 * Rotate the buffers and return what's here.
437			 */
438			ROTATE_BUFFERS(d);
439			break;
440		}
441
442		/*
443		 * No data is available, check to see if the bpf device
444		 * is still pointed at a real interface.  If not, return
445		 * ENXIO so that the userland process knows to rebind
446		 * it before using it again.
447		 */
448		if (d->bd_bif == NULL) {
449			BPFD_UNLOCK(d);
450			return (ENXIO);
451		}
452
453		if (ioflag & IO_NDELAY) {
454			BPFD_UNLOCK(d);
455			return (EWOULDBLOCK);
456		}
457		error = msleep((caddr_t)d, &d->bd_mtx, PRINET|PCATCH,
458		     "bpf", d->bd_rtout);
459		if (error == EINTR || error == ERESTART) {
460			BPFD_UNLOCK(d);
461			return (error);
462		}
463		if (error == EWOULDBLOCK) {
464			/*
465			 * On a timeout, return what's in the buffer,
466			 * which may be nothing.  If there is something
467			 * in the store buffer, we can rotate the buffers.
468			 */
469			if (d->bd_hbuf)
470				/*
471				 * We filled up the buffer in between
472				 * getting the timeout and arriving
473				 * here, so we don't need to rotate.
474				 */
475				break;
476
477			if (d->bd_slen == 0) {
478				BPFD_UNLOCK(d);
479				return (0);
480			}
481			ROTATE_BUFFERS(d);
482			break;
483		}
484	}
485	/*
486	 * At this point, we know we have something in the hold slot.
487	 */
488	BPFD_UNLOCK(d);
489
490	/*
491	 * Move data from hold buffer into user space.
492	 * We know the entire buffer is transferred since
493	 * we checked above that the read buffer is bpf_bufsize bytes.
494	 */
495	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
496
497	BPFD_LOCK(d);
498	d->bd_fbuf = d->bd_hbuf;
499	d->bd_hbuf = 0;
500	d->bd_hlen = 0;
501	BPFD_UNLOCK(d);
502
503	return (error);
504}
505
506
507/*
508 * If there are processes sleeping on this descriptor, wake them up.
509 */
510static __inline void
511bpf_wakeup(d)
512	register struct bpf_d *d;
513{
514	if (d->bd_state == BPF_WAITING) {
515		callout_stop(&d->bd_callout);
516		d->bd_state = BPF_IDLE;
517	}
518	wakeup((caddr_t)d);
519	if (d->bd_async && d->bd_sig && d->bd_sigio)
520		pgsigio(&d->bd_sigio, d->bd_sig, 0);
521
522	selwakeup(&d->bd_sel);
523}
524
525static void
526bpf_timed_out(arg)
527	void *arg;
528{
529	struct bpf_d *d = (struct bpf_d *)arg;
530
531	BPFD_LOCK(d);
532	if (d->bd_state == BPF_WAITING) {
533		d->bd_state = BPF_TIMED_OUT;
534		if (d->bd_slen != 0)
535			bpf_wakeup(d);
536	}
537	BPFD_UNLOCK(d);
538}
539
540static	int
541bpfwrite(dev, uio, ioflag)
542	dev_t dev;
543	struct uio *uio;
544	int ioflag;
545{
546	struct bpf_d *d = dev->si_drv1;
547	struct ifnet *ifp;
548	struct mbuf *m;
549	int error;
550	static struct sockaddr dst;
551	int datlen;
552
553	if (d->bd_bif == 0)
554		return (ENXIO);
555
556	ifp = d->bd_bif->bif_ifp;
557
558	if (uio->uio_resid == 0)
559		return (0);
560
561	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
562	if (error)
563		return (error);
564
565	if (datlen > ifp->if_mtu)
566		return (EMSGSIZE);
567
568	if (d->bd_hdrcmplt)
569		dst.sa_family = pseudo_AF_HDRCMPLT;
570
571	mtx_lock(&Giant);
572#ifdef MAC
573	mac_create_mbuf_from_bpfdesc(d, m);
574#endif
575	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
576	mtx_unlock(&Giant);
577	/*
578	 * The driver frees the mbuf.
579	 */
580	return (error);
581}
582
583/*
584 * Reset a descriptor by flushing its packet buffer and clearing the
585 * receive and drop counts.
586 */
587static void
588reset_d(d)
589	struct bpf_d *d;
590{
591
592	mtx_assert(&d->bd_mtx, MA_OWNED);
593	if (d->bd_hbuf) {
594		/* Free the hold buffer. */
595		d->bd_fbuf = d->bd_hbuf;
596		d->bd_hbuf = 0;
597	}
598	d->bd_slen = 0;
599	d->bd_hlen = 0;
600	d->bd_rcount = 0;
601	d->bd_dcount = 0;
602}
603
604/*
605 *  FIONREAD		Check for read packet available.
606 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
607 *  BIOCGBLEN		Get buffer len [for read()].
608 *  BIOCSETF		Set ethernet read filter.
609 *  BIOCFLUSH		Flush read packet buffer.
610 *  BIOCPROMISC		Put interface into promiscuous mode.
611 *  BIOCGDLT		Get link layer type.
612 *  BIOCGETIF		Get interface name.
613 *  BIOCSETIF		Set interface.
614 *  BIOCSRTIMEOUT	Set read timeout.
615 *  BIOCGRTIMEOUT	Get read timeout.
616 *  BIOCGSTATS		Get packet stats.
617 *  BIOCIMMEDIATE	Set immediate mode.
618 *  BIOCVERSION		Get filter language version.
619 *  BIOCGHDRCMPLT	Get "header already complete" flag
620 *  BIOCSHDRCMPLT	Set "header already complete" flag
621 *  BIOCGSEESENT	Get "see packets sent" flag
622 *  BIOCSSEESENT	Set "see packets sent" flag
623 */
624/* ARGSUSED */
625static	int
626bpfioctl(dev, cmd, addr, flags, td)
627	dev_t dev;
628	u_long cmd;
629	caddr_t addr;
630	int flags;
631	struct thread *td;
632{
633	struct bpf_d *d = dev->si_drv1;
634	int error = 0;
635
636	BPFD_LOCK(d);
637	if (d->bd_state == BPF_WAITING)
638		callout_stop(&d->bd_callout);
639	d->bd_state = BPF_IDLE;
640	BPFD_UNLOCK(d);
641
642	switch (cmd) {
643
644	default:
645		error = EINVAL;
646		break;
647
648	/*
649	 * Check for read packet available.
650	 */
651	case FIONREAD:
652		{
653			int n;
654
655			BPFD_LOCK(d);
656			n = d->bd_slen;
657			if (d->bd_hbuf)
658				n += d->bd_hlen;
659			BPFD_UNLOCK(d);
660
661			*(int *)addr = n;
662			break;
663		}
664
665	case SIOCGIFADDR:
666		{
667			struct ifnet *ifp;
668
669			if (d->bd_bif == 0)
670				error = EINVAL;
671			else {
672				ifp = d->bd_bif->bif_ifp;
673				error = (*ifp->if_ioctl)(ifp, cmd, addr);
674			}
675			break;
676		}
677
678	/*
679	 * Get buffer len [for read()].
680	 */
681	case BIOCGBLEN:
682		*(u_int *)addr = d->bd_bufsize;
683		break;
684
685	/*
686	 * Set buffer length.
687	 */
688	case BIOCSBLEN:
689		if (d->bd_bif != 0)
690			error = EINVAL;
691		else {
692			register u_int size = *(u_int *)addr;
693
694			if (size > bpf_maxbufsize)
695				*(u_int *)addr = size = bpf_maxbufsize;
696			else if (size < BPF_MINBUFSIZE)
697				*(u_int *)addr = size = BPF_MINBUFSIZE;
698			d->bd_bufsize = size;
699		}
700		break;
701
702	/*
703	 * Set link layer read filter.
704	 */
705	case BIOCSETF:
706		error = bpf_setf(d, (struct bpf_program *)addr);
707		break;
708
709	/*
710	 * Flush read packet buffer.
711	 */
712	case BIOCFLUSH:
713		BPFD_LOCK(d);
714		reset_d(d);
715		BPFD_UNLOCK(d);
716		break;
717
718	/*
719	 * Put interface into promiscuous mode.
720	 */
721	case BIOCPROMISC:
722		if (d->bd_bif == 0) {
723			/*
724			 * No interface attached yet.
725			 */
726			error = EINVAL;
727			break;
728		}
729		if (d->bd_promisc == 0) {
730			mtx_lock(&Giant);
731			error = ifpromisc(d->bd_bif->bif_ifp, 1);
732			mtx_unlock(&Giant);
733			if (error == 0)
734				d->bd_promisc = 1;
735		}
736		break;
737
738	/*
739	 * Get device parameters.
740	 */
741	case BIOCGDLT:
742		if (d->bd_bif == 0)
743			error = EINVAL;
744		else
745			*(u_int *)addr = d->bd_bif->bif_dlt;
746		break;
747
748	/*
749	 * Get interface name.
750	 */
751	case BIOCGETIF:
752		if (d->bd_bif == 0)
753			error = EINVAL;
754		else {
755			struct ifnet *const ifp = d->bd_bif->bif_ifp;
756			struct ifreq *const ifr = (struct ifreq *)addr;
757
758			snprintf(ifr->ifr_name, sizeof(ifr->ifr_name),
759			    "%s%d", ifp->if_name, ifp->if_unit);
760		}
761		break;
762
763	/*
764	 * Set interface.
765	 */
766	case BIOCSETIF:
767		error = bpf_setif(d, (struct ifreq *)addr);
768		break;
769
770	/*
771	 * Set read timeout.
772	 */
773	case BIOCSRTIMEOUT:
774		{
775			struct timeval *tv = (struct timeval *)addr;
776
777			/*
778			 * Subtract 1 tick from tvtohz() since this isn't
779			 * a one-shot timer.
780			 */
781			if ((error = itimerfix(tv)) == 0)
782				d->bd_rtout = tvtohz(tv) - 1;
783			break;
784		}
785
786	/*
787	 * Get read timeout.
788	 */
789	case BIOCGRTIMEOUT:
790		{
791			struct timeval *tv = (struct timeval *)addr;
792
793			tv->tv_sec = d->bd_rtout / hz;
794			tv->tv_usec = (d->bd_rtout % hz) * tick;
795			break;
796		}
797
798	/*
799	 * Get packet stats.
800	 */
801	case BIOCGSTATS:
802		{
803			struct bpf_stat *bs = (struct bpf_stat *)addr;
804
805			bs->bs_recv = d->bd_rcount;
806			bs->bs_drop = d->bd_dcount;
807			break;
808		}
809
810	/*
811	 * Set immediate mode.
812	 */
813	case BIOCIMMEDIATE:
814		d->bd_immediate = *(u_int *)addr;
815		break;
816
817	case BIOCVERSION:
818		{
819			struct bpf_version *bv = (struct bpf_version *)addr;
820
821			bv->bv_major = BPF_MAJOR_VERSION;
822			bv->bv_minor = BPF_MINOR_VERSION;
823			break;
824		}
825
826	/*
827	 * Get "header already complete" flag
828	 */
829	case BIOCGHDRCMPLT:
830		*(u_int *)addr = d->bd_hdrcmplt;
831		break;
832
833	/*
834	 * Set "header already complete" flag
835	 */
836	case BIOCSHDRCMPLT:
837		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
838		break;
839
840	/*
841	 * Get "see sent packets" flag
842	 */
843	case BIOCGSEESENT:
844		*(u_int *)addr = d->bd_seesent;
845		break;
846
847	/*
848	 * Set "see sent packets" flag
849	 */
850	case BIOCSSEESENT:
851		d->bd_seesent = *(u_int *)addr;
852		break;
853
854	case FIONBIO:		/* Non-blocking I/O */
855		break;
856
857	case FIOASYNC:		/* Send signal on receive packets */
858		d->bd_async = *(int *)addr;
859		break;
860
861	case FIOSETOWN:
862		error = fsetown(*(int *)addr, &d->bd_sigio);
863		break;
864
865	case FIOGETOWN:
866		*(int *)addr = fgetown(&d->bd_sigio);
867		break;
868
869	/* This is deprecated, FIOSETOWN should be used instead. */
870	case TIOCSPGRP:
871		error = fsetown(-(*(int *)addr), &d->bd_sigio);
872		break;
873
874	/* This is deprecated, FIOGETOWN should be used instead. */
875	case TIOCGPGRP:
876		*(int *)addr = -fgetown(&d->bd_sigio);
877		break;
878
879	case BIOCSRSIG:		/* Set receive signal */
880		{
881		 	u_int sig;
882
883			sig = *(u_int *)addr;
884
885			if (sig >= NSIG)
886				error = EINVAL;
887			else
888				d->bd_sig = sig;
889			break;
890		}
891	case BIOCGRSIG:
892		*(u_int *)addr = d->bd_sig;
893		break;
894	}
895	return (error);
896}
897
898/*
899 * Set d's packet filter program to fp.  If this file already has a filter,
900 * free it and replace it.  Returns EINVAL for bogus requests.
901 */
902static int
903bpf_setf(d, fp)
904	struct bpf_d *d;
905	struct bpf_program *fp;
906{
907	struct bpf_insn *fcode, *old;
908	u_int flen, size;
909
910	old = d->bd_filter;
911	if (fp->bf_insns == 0) {
912		if (fp->bf_len != 0)
913			return (EINVAL);
914		BPFD_LOCK(d);
915		d->bd_filter = 0;
916		reset_d(d);
917		BPFD_UNLOCK(d);
918		if (old != 0)
919			free((caddr_t)old, M_BPF);
920		return (0);
921	}
922	flen = fp->bf_len;
923	if (flen > BPF_MAXINSNS)
924		return (EINVAL);
925
926	size = flen * sizeof(*fp->bf_insns);
927	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
928	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
929	    bpf_validate(fcode, (int)flen)) {
930		BPFD_LOCK(d);
931		d->bd_filter = fcode;
932		reset_d(d);
933		BPFD_UNLOCK(d);
934		if (old != 0)
935			free((caddr_t)old, M_BPF);
936
937		return (0);
938	}
939	free((caddr_t)fcode, M_BPF);
940	return (EINVAL);
941}
942
943/*
944 * Detach a file from its current interface (if attached at all) and attach
945 * to the interface indicated by the name stored in ifr.
946 * Return an errno or 0.
947 */
948static int
949bpf_setif(d, ifr)
950	struct bpf_d *d;
951	struct ifreq *ifr;
952{
953	struct bpf_if *bp;
954	int error;
955	struct ifnet *theywant;
956
957	theywant = ifunit(ifr->ifr_name);
958	if (theywant == 0)
959		return ENXIO;
960
961	/*
962	 * Look through attached interfaces for the named one.
963	 */
964	mtx_lock(&bpf_mtx);
965	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
966		struct ifnet *ifp = bp->bif_ifp;
967
968		if (ifp == 0 || ifp != theywant)
969			continue;
970		/* skip additional entry */
971		if (bp->bif_driverp != (struct bpf_if **)&ifp->if_bpf)
972			continue;
973
974		mtx_unlock(&bpf_mtx);
975		/*
976		 * We found the requested interface.
977		 * If it's not up, return an error.
978		 * Allocate the packet buffers if we need to.
979		 * If we're already attached to requested interface,
980		 * just flush the buffer.
981		 */
982		if ((ifp->if_flags & IFF_UP) == 0)
983			return (ENETDOWN);
984
985		if (d->bd_sbuf == 0) {
986			error = bpf_allocbufs(d);
987			if (error != 0)
988				return (error);
989		}
990		if (bp != d->bd_bif) {
991			if (d->bd_bif)
992				/*
993				 * Detach if attached to something else.
994				 */
995				bpf_detachd(d);
996
997			bpf_attachd(d, bp);
998		}
999		BPFD_LOCK(d);
1000		reset_d(d);
1001		BPFD_UNLOCK(d);
1002		return (0);
1003	}
1004	mtx_unlock(&bpf_mtx);
1005	/* Not found. */
1006	return (ENXIO);
1007}
1008
1009/*
1010 * Support for select() and poll() system calls
1011 *
1012 * Return true iff the specific operation will not block indefinitely.
1013 * Otherwise, return false but make a note that a selwakeup() must be done.
1014 */
1015static int
1016bpfpoll(dev, events, td)
1017	register dev_t dev;
1018	int events;
1019	struct thread *td;
1020{
1021	struct bpf_d *d;
1022	int revents;
1023
1024	d = dev->si_drv1;
1025	if (d->bd_bif == NULL)
1026		return (ENXIO);
1027
1028	revents = events & (POLLOUT | POLLWRNORM);
1029	BPFD_LOCK(d);
1030	if (events & (POLLIN | POLLRDNORM)) {
1031		/*
1032		 * An imitation of the FIONREAD ioctl code.
1033		 * XXX not quite.  An exact imitation:
1034		 *	if (d->b_slen != 0 ||
1035		 *	    (d->bd_hbuf != NULL && d->bd_hlen != 0)
1036		 */
1037		if (d->bd_hlen != 0 ||
1038		    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1039		    d->bd_slen != 0))
1040			revents |= events & (POLLIN | POLLRDNORM);
1041		else {
1042			selrecord(td, &d->bd_sel);
1043			/* Start the read timeout if necessary. */
1044			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1045				callout_reset(&d->bd_callout, d->bd_rtout,
1046				    bpf_timed_out, d);
1047				d->bd_state = BPF_WAITING;
1048			}
1049		}
1050	}
1051	BPFD_UNLOCK(d);
1052	return (revents);
1053}
1054
1055/*
1056 * Incoming linkage from device drivers.  Process the packet pkt, of length
1057 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1058 * by each process' filter, and if accepted, stashed into the corresponding
1059 * buffer.
1060 */
1061void
1062bpf_tap(bp, pkt, pktlen)
1063	struct bpf_if *bp;
1064	register u_char *pkt;
1065	register u_int pktlen;
1066{
1067	register struct bpf_d *d;
1068	register u_int slen;
1069
1070	BPFIF_LOCK(bp);
1071	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1072		BPFD_LOCK(d);
1073		++d->bd_rcount;
1074		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1075		if (slen != 0) {
1076#ifdef MAC
1077			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1078#endif
1079				catchpacket(d, pkt, pktlen, slen, bcopy);
1080		}
1081		BPFD_UNLOCK(d);
1082	}
1083	BPFIF_UNLOCK(bp);
1084}
1085
1086/*
1087 * Copy data from an mbuf chain into a buffer.  This code is derived
1088 * from m_copydata in sys/uipc_mbuf.c.
1089 */
1090static void
1091bpf_mcopy(src_arg, dst_arg, len)
1092	const void *src_arg;
1093	void *dst_arg;
1094	register size_t len;
1095{
1096	register const struct mbuf *m;
1097	register u_int count;
1098	u_char *dst;
1099
1100	m = src_arg;
1101	dst = dst_arg;
1102	while (len > 0) {
1103		if (m == 0)
1104			panic("bpf_mcopy");
1105		count = min(m->m_len, len);
1106		bcopy(mtod(m, void *), dst, count);
1107		m = m->m_next;
1108		dst += count;
1109		len -= count;
1110	}
1111}
1112
1113/*
1114 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1115 */
1116void
1117bpf_mtap(bp, m)
1118	struct bpf_if *bp;
1119	struct mbuf *m;
1120{
1121	struct bpf_d *d;
1122	u_int pktlen, slen;
1123
1124	pktlen = m_length(m, NULL);
1125	if (pktlen == m->m_len) {
1126		bpf_tap(bp, mtod(m, u_char *), pktlen);
1127		return;
1128	}
1129
1130	BPFIF_LOCK(bp);
1131	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1132		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1133			continue;
1134		BPFD_LOCK(d);
1135		++d->bd_rcount;
1136		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1137		if (slen != 0)
1138#ifdef MAC
1139			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1140#endif
1141				catchpacket(d, (u_char *)m, pktlen, slen,
1142				    bpf_mcopy);
1143		BPFD_UNLOCK(d);
1144	}
1145	BPFIF_UNLOCK(bp);
1146}
1147
1148/*
1149 * Move the packet data from interface memory (pkt) into the
1150 * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1151 * otherwise 0.  "copy" is the routine called to do the actual data
1152 * transfer.  bcopy is passed in to copy contiguous chunks, while
1153 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1154 * pkt is really an mbuf.
1155 */
1156static void
1157catchpacket(d, pkt, pktlen, snaplen, cpfn)
1158	register struct bpf_d *d;
1159	register u_char *pkt;
1160	register u_int pktlen, snaplen;
1161	register void (*cpfn)(const void *, void *, size_t);
1162{
1163	register struct bpf_hdr *hp;
1164	register int totlen, curlen;
1165	register int hdrlen = d->bd_bif->bif_hdrlen;
1166	/*
1167	 * Figure out how many bytes to move.  If the packet is
1168	 * greater or equal to the snapshot length, transfer that
1169	 * much.  Otherwise, transfer the whole packet (unless
1170	 * we hit the buffer size limit).
1171	 */
1172	totlen = hdrlen + min(snaplen, pktlen);
1173	if (totlen > d->bd_bufsize)
1174		totlen = d->bd_bufsize;
1175
1176	/*
1177	 * Round up the end of the previous packet to the next longword.
1178	 */
1179	curlen = BPF_WORDALIGN(d->bd_slen);
1180	if (curlen + totlen > d->bd_bufsize) {
1181		/*
1182		 * This packet will overflow the storage buffer.
1183		 * Rotate the buffers if we can, then wakeup any
1184		 * pending reads.
1185		 */
1186		if (d->bd_fbuf == 0) {
1187			/*
1188			 * We haven't completed the previous read yet,
1189			 * so drop the packet.
1190			 */
1191			++d->bd_dcount;
1192			return;
1193		}
1194		ROTATE_BUFFERS(d);
1195		bpf_wakeup(d);
1196		curlen = 0;
1197	}
1198	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1199		/*
1200		 * Immediate mode is set, or the read timeout has
1201		 * already expired during a select call.  A packet
1202		 * arrived, so the reader should be woken up.
1203		 */
1204		bpf_wakeup(d);
1205
1206	/*
1207	 * Append the bpf header.
1208	 */
1209	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1210	microtime(&hp->bh_tstamp);
1211	hp->bh_datalen = pktlen;
1212	hp->bh_hdrlen = hdrlen;
1213	/*
1214	 * Copy the packet data into the store buffer and update its length.
1215	 */
1216	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1217	d->bd_slen = curlen + totlen;
1218}
1219
1220/*
1221 * Initialize all nonzero fields of a descriptor.
1222 */
1223static int
1224bpf_allocbufs(d)
1225	register struct bpf_d *d;
1226{
1227	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1228	if (d->bd_fbuf == 0)
1229		return (ENOBUFS);
1230
1231	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1232	if (d->bd_sbuf == 0) {
1233		free(d->bd_fbuf, M_BPF);
1234		return (ENOBUFS);
1235	}
1236	d->bd_slen = 0;
1237	d->bd_hlen = 0;
1238	return (0);
1239}
1240
1241/*
1242 * Free buffers currently in use by a descriptor.
1243 * Called on close.
1244 */
1245static void
1246bpf_freed(d)
1247	register struct bpf_d *d;
1248{
1249	/*
1250	 * We don't need to lock out interrupts since this descriptor has
1251	 * been detached from its interface and it yet hasn't been marked
1252	 * free.
1253	 */
1254	if (d->bd_sbuf != 0) {
1255		free(d->bd_sbuf, M_BPF);
1256		if (d->bd_hbuf != 0)
1257			free(d->bd_hbuf, M_BPF);
1258		if (d->bd_fbuf != 0)
1259			free(d->bd_fbuf, M_BPF);
1260	}
1261	if (d->bd_filter)
1262		free((caddr_t)d->bd_filter, M_BPF);
1263	mtx_destroy(&d->bd_mtx);
1264}
1265
1266/*
1267 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1268 * fixed size of the link header (variable length headers not yet supported).
1269 */
1270void
1271bpfattach(ifp, dlt, hdrlen)
1272	struct ifnet *ifp;
1273	u_int dlt, hdrlen;
1274{
1275
1276	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1277}
1278
1279/*
1280 * Attach an interface to bpf.  ifp is a pointer to the structure
1281 * defining the interface to be attached, dlt is the link layer type,
1282 * and hdrlen is the fixed size of the link header (variable length
1283 * headers are not yet supporrted).
1284 */
1285void
1286bpfattach2(ifp, dlt, hdrlen, driverp)
1287	struct ifnet *ifp;
1288	u_int dlt, hdrlen;
1289	struct bpf_if **driverp;
1290{
1291	struct bpf_if *bp;
1292	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1293	if (bp == 0)
1294		panic("bpfattach");
1295
1296	bp->bif_dlist = 0;
1297	bp->bif_driverp = driverp;
1298	bp->bif_ifp = ifp;
1299	bp->bif_dlt = dlt;
1300	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1301
1302	mtx_lock(&bpf_mtx);
1303	bp->bif_next = bpf_iflist;
1304	bpf_iflist = bp;
1305	mtx_unlock(&bpf_mtx);
1306
1307	*bp->bif_driverp = 0;
1308
1309	/*
1310	 * Compute the length of the bpf header.  This is not necessarily
1311	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1312	 * that the network layer header begins on a longword boundary (for
1313	 * performance reasons and to alleviate alignment restrictions).
1314	 */
1315	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1316
1317	if (bootverbose)
1318		if_printf(ifp, "bpf attached\n");
1319}
1320
1321/*
1322 * Detach bpf from an interface.  This involves detaching each descriptor
1323 * associated with the interface, and leaving bd_bif NULL.  Notify each
1324 * descriptor as it's detached so that any sleepers wake up and get
1325 * ENXIO.
1326 */
1327void
1328bpfdetach(ifp)
1329	struct ifnet *ifp;
1330{
1331	struct bpf_if	*bp, *bp_prev;
1332	struct bpf_d	*d;
1333
1334	mtx_lock(&bpf_mtx);
1335
1336	/* Locate BPF interface information */
1337	bp_prev = NULL;
1338	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1339		if (ifp == bp->bif_ifp)
1340			break;
1341		bp_prev = bp;
1342	}
1343
1344	/* Interface wasn't attached */
1345	if (bp->bif_ifp == NULL) {
1346		mtx_unlock(&bpf_mtx);
1347		printf("bpfdetach: %s%d was not attached\n", ifp->if_name,
1348		    ifp->if_unit);
1349		return;
1350	}
1351
1352	if (bp_prev) {
1353		bp_prev->bif_next = bp->bif_next;
1354	} else {
1355		bpf_iflist = bp->bif_next;
1356	}
1357
1358	while ((d = bp->bif_dlist) != NULL) {
1359		bpf_detachd(d);
1360		BPFD_LOCK(d);
1361		bpf_wakeup(d);
1362		BPFD_UNLOCK(d);
1363	}
1364
1365	mtx_destroy(&bp->bif_mtx);
1366	free(bp, M_BPF);
1367
1368	mtx_unlock(&bpf_mtx);
1369}
1370
1371static void bpf_drvinit(void *unused);
1372
1373static void bpf_clone(void *arg, char *name, int namelen, dev_t *dev);
1374
1375static void
1376bpf_clone(arg, name, namelen, dev)
1377	void *arg;
1378	char *name;
1379	int namelen;
1380	dev_t *dev;
1381{
1382	int u;
1383
1384	if (*dev != NODEV)
1385		return;
1386	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1387		return;
1388	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1389	    "bpf%d", u);
1390	(*dev)->si_flags |= SI_CHEAPCLONE;
1391	return;
1392}
1393
1394static void
1395bpf_drvinit(unused)
1396	void *unused;
1397{
1398
1399	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1400	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1401}
1402
1403SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
1404
1405#else /* !DEV_BPF && !NETGRAPH_BPF */
1406/*
1407 * NOP stubs to allow bpf-using drivers to load and function.
1408 *
1409 * A 'better' implementation would allow the core bpf functionality
1410 * to be loaded at runtime.
1411 */
1412
1413void
1414bpf_tap(bp, pkt, pktlen)
1415	struct bpf_if *bp;
1416	register u_char *pkt;
1417	register u_int pktlen;
1418{
1419}
1420
1421void
1422bpf_mtap(bp, m)
1423	struct bpf_if *bp;
1424	struct mbuf *m;
1425{
1426}
1427
1428void
1429bpfattach(ifp, dlt, hdrlen)
1430	struct ifnet *ifp;
1431	u_int dlt, hdrlen;
1432{
1433}
1434
1435void
1436bpfdetach(ifp)
1437	struct ifnet *ifp;
1438{
1439}
1440
1441u_int
1442bpf_filter(pc, p, wirelen, buflen)
1443	register const struct bpf_insn *pc;
1444	register u_char *p;
1445	u_int wirelen;
1446	register u_int buflen;
1447{
1448	return -1;	/* "no filter" behaviour */
1449}
1450
1451int
1452bpf_validate(f, len)
1453	const struct bpf_insn *f;
1454	int len;
1455{
1456	return 0;		/* false */
1457}
1458
1459#endif /* !DEV_BPF && !NETGRAPH_BPF */
1460