bpf.c revision 103555
1/*
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
39 *
40 * $FreeBSD: head/sys/net/bpf.c 103555 2002-09-18 19:48:59Z phk $
41 */
42
43#include "opt_bpf.h"
44#include "opt_mac.h"
45#include "opt_netgraph.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/conf.h>
50#include <sys/mac.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/time.h>
54#include <sys/proc.h>
55#include <sys/signalvar.h>
56#include <sys/filio.h>
57#include <sys/sockio.h>
58#include <sys/ttycom.h>
59#include <sys/filedesc.h>
60
61#include <sys/poll.h>
62
63#include <sys/socket.h>
64#include <sys/vnode.h>
65
66#include <net/if.h>
67#include <net/bpf.h>
68#include <net/bpfdesc.h>
69
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <sys/kernel.h>
73#include <sys/sysctl.h>
74
75static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
76
77#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
78
79#define PRINET  26			/* interruptible */
80
81/*
82 * The default read buffer size is patchable.
83 */
84static int bpf_bufsize = 4096;
85SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
86	&bpf_bufsize, 0, "");
87static int bpf_maxbufsize = BPF_MAXBUFSIZE;
88SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
89	&bpf_maxbufsize, 0, "");
90
91/*
92 *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
93 */
94static struct bpf_if	*bpf_iflist;
95static struct mtx	bpf_mtx;		/* bpf global lock */
96
97static int	bpf_allocbufs(struct bpf_d *);
98static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
99static void	bpf_detachd(struct bpf_d *d);
100static void	bpf_freed(struct bpf_d *);
101static void	bpf_mcopy(const void *, void *, size_t);
102static int	bpf_movein(struct uio *, int,
103		    struct mbuf **, struct sockaddr *, int *);
104static int	bpf_setif(struct bpf_d *, struct ifreq *);
105static void	bpf_timed_out(void *);
106static __inline void
107		bpf_wakeup(struct bpf_d *);
108static void	catchpacket(struct bpf_d *, u_char *, u_int,
109		    u_int, void (*)(const void *, void *, size_t));
110static void	reset_d(struct bpf_d *);
111static int	 bpf_setf(struct bpf_d *, struct bpf_program *);
112
113static	d_open_t	bpfopen;
114static	d_close_t	bpfclose;
115static	d_read_t	bpfread;
116static	d_write_t	bpfwrite;
117static	d_ioctl_t	bpfioctl;
118static	d_poll_t	bpfpoll;
119
120#define CDEV_MAJOR 23
121static struct cdevsw bpf_cdevsw = {
122	/* open */	bpfopen,
123	/* close */	bpfclose,
124	/* read */	bpfread,
125	/* write */	bpfwrite,
126	/* ioctl */	bpfioctl,
127	/* poll */	bpfpoll,
128	/* mmap */	nommap,
129	/* strategy */	nostrategy,
130	/* name */	"bpf",
131	/* maj */	CDEV_MAJOR,
132	/* dump */	nodump,
133	/* psize */	nopsize,
134	/* flags */	0,
135};
136
137
138static int
139bpf_movein(uio, linktype, mp, sockp, datlen)
140	register struct uio *uio;
141	int linktype, *datlen;
142	register struct mbuf **mp;
143	register struct sockaddr *sockp;
144{
145	struct mbuf *m;
146	int error;
147	int len;
148	int hlen;
149
150	/*
151	 * Build a sockaddr based on the data link layer type.
152	 * We do this at this level because the ethernet header
153	 * is copied directly into the data field of the sockaddr.
154	 * In the case of SLIP, there is no header and the packet
155	 * is forwarded as is.
156	 * Also, we are careful to leave room at the front of the mbuf
157	 * for the link level header.
158	 */
159	switch (linktype) {
160
161	case DLT_SLIP:
162		sockp->sa_family = AF_INET;
163		hlen = 0;
164		break;
165
166	case DLT_EN10MB:
167		sockp->sa_family = AF_UNSPEC;
168		/* XXX Would MAXLINKHDR be better? */
169		hlen = sizeof(struct ether_header);
170		break;
171
172	case DLT_FDDI:
173		sockp->sa_family = AF_IMPLINK;
174		hlen = 0;
175		break;
176
177	case DLT_RAW:
178	case DLT_NULL:
179		sockp->sa_family = AF_UNSPEC;
180		hlen = 0;
181		break;
182
183	case DLT_ATM_RFC1483:
184		/*
185		 * en atm driver requires 4-byte atm pseudo header.
186		 * though it isn't standard, vpi:vci needs to be
187		 * specified anyway.
188		 */
189		sockp->sa_family = AF_UNSPEC;
190		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
191		break;
192
193	case DLT_PPP:
194		sockp->sa_family = AF_UNSPEC;
195		hlen = 4;	/* This should match PPP_HDRLEN */
196		break;
197
198	default:
199		return (EIO);
200	}
201
202	len = uio->uio_resid;
203	*datlen = len - hlen;
204	if ((unsigned)len > MCLBYTES)
205		return (EIO);
206
207	MGETHDR(m, M_TRYWAIT, MT_DATA);
208	if (m == 0)
209		return (ENOBUFS);
210	if (len > MHLEN) {
211		MCLGET(m, M_TRYWAIT);
212		if ((m->m_flags & M_EXT) == 0) {
213			error = ENOBUFS;
214			goto bad;
215		}
216	}
217	m->m_pkthdr.len = m->m_len = len;
218	m->m_pkthdr.rcvif = NULL;
219	*mp = m;
220	/*
221	 * Make room for link header.
222	 */
223	if (hlen != 0) {
224		m->m_pkthdr.len -= hlen;
225		m->m_len -= hlen;
226#if BSD >= 199103
227		m->m_data += hlen; /* XXX */
228#else
229		m->m_off += hlen;
230#endif
231		error = uiomove((caddr_t)sockp->sa_data, hlen, uio);
232		if (error)
233			goto bad;
234	}
235	error = uiomove(mtod(m, caddr_t), len - hlen, uio);
236	if (!error)
237		return (0);
238 bad:
239	m_freem(m);
240	return (error);
241}
242
243/*
244 * Attach file to the bpf interface, i.e. make d listen on bp.
245 */
246static void
247bpf_attachd(d, bp)
248	struct bpf_d *d;
249	struct bpf_if *bp;
250{
251	/*
252	 * Point d at bp, and add d to the interface's list of listeners.
253	 * Finally, point the driver's bpf cookie at the interface so
254	 * it will divert packets to bpf.
255	 */
256	BPFIF_LOCK(bp);
257	d->bd_bif = bp;
258	d->bd_next = bp->bif_dlist;
259	bp->bif_dlist = d;
260
261	bp->bif_ifp->if_bpf = bp;
262	BPFIF_UNLOCK(bp);
263}
264
265/*
266 * Detach a file from its interface.
267 */
268static void
269bpf_detachd(d)
270	struct bpf_d *d;
271{
272	int error;
273	struct bpf_d **p;
274	struct bpf_if *bp;
275
276	bp = d->bd_bif;
277	/*
278	 * Check if this descriptor had requested promiscuous mode.
279	 * If so, turn it off.
280	 */
281	if (d->bd_promisc) {
282		d->bd_promisc = 0;
283		error = ifpromisc(bp->bif_ifp, 0);
284		if (error != 0 && error != ENXIO) {
285			/*
286			 * ENXIO can happen if a pccard is unplugged
287			 * Something is really wrong if we were able to put
288			 * the driver into promiscuous mode, but can't
289			 * take it out.
290			 */
291			printf("%s%d: ifpromisc failed %d\n",
292			    bp->bif_ifp->if_name, bp->bif_ifp->if_unit, error);
293		}
294	}
295	/* Remove d from the interface's descriptor list. */
296	BPFIF_LOCK(bp);
297	p = &bp->bif_dlist;
298	while (*p != d) {
299		p = &(*p)->bd_next;
300		if (*p == 0)
301			panic("bpf_detachd: descriptor not in list");
302	}
303	*p = (*p)->bd_next;
304	if (bp->bif_dlist == 0)
305		/*
306		 * Let the driver know that there are no more listeners.
307		 */
308		d->bd_bif->bif_ifp->if_bpf = 0;
309	BPFIF_UNLOCK(bp);
310	d->bd_bif = 0;
311}
312
313/*
314 * Open ethernet device.  Returns ENXIO for illegal minor device number,
315 * EBUSY if file is open by another process.
316 */
317/* ARGSUSED */
318static	int
319bpfopen(dev, flags, fmt, td)
320	dev_t dev;
321	int flags;
322	int fmt;
323	struct thread *td;
324{
325	struct bpf_d *d;
326
327	mtx_lock(&bpf_mtx);
328	d = dev->si_drv1;
329	/*
330	 * Each minor can be opened by only one process.  If the requested
331	 * minor is in use, return EBUSY.
332	 */
333	if (d) {
334		mtx_unlock(&bpf_mtx);
335		return (EBUSY);
336	}
337	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
338	mtx_unlock(&bpf_mtx);
339
340	if ((dev->si_flags & SI_NAMED) == 0)
341		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
342		    "bpf%d", dev2unit(dev));
343	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
344	dev->si_drv1 = d;
345	d->bd_bufsize = bpf_bufsize;
346	d->bd_sig = SIGIO;
347	d->bd_seesent = 1;
348#ifdef MAC
349	mac_init_bpfdesc(d);
350	mac_create_bpfdesc(td->td_ucred, d);
351#endif
352	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
353	callout_init(&d->bd_callout, 1);
354
355	return (0);
356}
357
358/*
359 * Close the descriptor by detaching it from its interface,
360 * deallocating its buffers, and marking it free.
361 */
362/* ARGSUSED */
363static	int
364bpfclose(dev, flags, fmt, td)
365	dev_t dev;
366	int flags;
367	int fmt;
368	struct thread *td;
369{
370	struct bpf_d *d = dev->si_drv1;
371
372	BPFD_LOCK(d);
373	if (d->bd_state == BPF_WAITING)
374		callout_stop(&d->bd_callout);
375	d->bd_state = BPF_IDLE;
376	BPFD_UNLOCK(d);
377	funsetown(&d->bd_sigio);
378	mtx_lock(&bpf_mtx);
379	if (d->bd_bif)
380		bpf_detachd(d);
381	mtx_unlock(&bpf_mtx);
382#ifdef MAC
383	mac_destroy_bpfdesc(d);
384#endif /* MAC */
385	bpf_freed(d);
386	dev->si_drv1 = 0;
387	free(d, M_BPF);
388
389	return (0);
390}
391
392
393/*
394 * Rotate the packet buffers in descriptor d.  Move the store buffer
395 * into the hold slot, and the free buffer into the store slot.
396 * Zero the length of the new store buffer.
397 */
398#define ROTATE_BUFFERS(d) \
399	(d)->bd_hbuf = (d)->bd_sbuf; \
400	(d)->bd_hlen = (d)->bd_slen; \
401	(d)->bd_sbuf = (d)->bd_fbuf; \
402	(d)->bd_slen = 0; \
403	(d)->bd_fbuf = 0;
404/*
405 *  bpfread - read next chunk of packets from buffers
406 */
407static	int
408bpfread(dev, uio, ioflag)
409	dev_t dev;
410	register struct uio *uio;
411	int ioflag;
412{
413	struct bpf_d *d = dev->si_drv1;
414	int timed_out;
415	int error;
416
417	/*
418	 * Restrict application to use a buffer the same size as
419	 * as kernel buffers.
420	 */
421	if (uio->uio_resid != d->bd_bufsize)
422		return (EINVAL);
423
424	BPFD_LOCK(d);
425	if (d->bd_state == BPF_WAITING)
426		callout_stop(&d->bd_callout);
427	timed_out = (d->bd_state == BPF_TIMED_OUT);
428	d->bd_state = BPF_IDLE;
429	/*
430	 * If the hold buffer is empty, then do a timed sleep, which
431	 * ends when the timeout expires or when enough packets
432	 * have arrived to fill the store buffer.
433	 */
434	while (d->bd_hbuf == 0) {
435		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
436			/*
437			 * A packet(s) either arrived since the previous
438			 * read or arrived while we were asleep.
439			 * Rotate the buffers and return what's here.
440			 */
441			ROTATE_BUFFERS(d);
442			break;
443		}
444
445		/*
446		 * No data is available, check to see if the bpf device
447		 * is still pointed at a real interface.  If not, return
448		 * ENXIO so that the userland process knows to rebind
449		 * it before using it again.
450		 */
451		if (d->bd_bif == NULL) {
452			BPFD_UNLOCK(d);
453			return (ENXIO);
454		}
455
456		if (ioflag & IO_NDELAY) {
457			BPFD_UNLOCK(d);
458			return (EWOULDBLOCK);
459		}
460		error = msleep((caddr_t)d, &d->bd_mtx, PRINET|PCATCH,
461		     "bpf", d->bd_rtout);
462		if (error == EINTR || error == ERESTART) {
463			BPFD_UNLOCK(d);
464			return (error);
465		}
466		if (error == EWOULDBLOCK) {
467			/*
468			 * On a timeout, return what's in the buffer,
469			 * which may be nothing.  If there is something
470			 * in the store buffer, we can rotate the buffers.
471			 */
472			if (d->bd_hbuf)
473				/*
474				 * We filled up the buffer in between
475				 * getting the timeout and arriving
476				 * here, so we don't need to rotate.
477				 */
478				break;
479
480			if (d->bd_slen == 0) {
481				BPFD_UNLOCK(d);
482				return (0);
483			}
484			ROTATE_BUFFERS(d);
485			break;
486		}
487	}
488	/*
489	 * At this point, we know we have something in the hold slot.
490	 */
491	BPFD_UNLOCK(d);
492
493	/*
494	 * Move data from hold buffer into user space.
495	 * We know the entire buffer is transferred since
496	 * we checked above that the read buffer is bpf_bufsize bytes.
497	 */
498	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
499
500	BPFD_LOCK(d);
501	d->bd_fbuf = d->bd_hbuf;
502	d->bd_hbuf = 0;
503	d->bd_hlen = 0;
504	BPFD_UNLOCK(d);
505
506	return (error);
507}
508
509
510/*
511 * If there are processes sleeping on this descriptor, wake them up.
512 */
513static __inline void
514bpf_wakeup(d)
515	register struct bpf_d *d;
516{
517	if (d->bd_state == BPF_WAITING) {
518		callout_stop(&d->bd_callout);
519		d->bd_state = BPF_IDLE;
520	}
521	wakeup((caddr_t)d);
522	if (d->bd_async && d->bd_sig && d->bd_sigio)
523		pgsigio(&d->bd_sigio, d->bd_sig, 0);
524
525	selwakeup(&d->bd_sel);
526}
527
528static void
529bpf_timed_out(arg)
530	void *arg;
531{
532	struct bpf_d *d = (struct bpf_d *)arg;
533
534	BPFD_LOCK(d);
535	if (d->bd_state == BPF_WAITING) {
536		d->bd_state = BPF_TIMED_OUT;
537		if (d->bd_slen != 0)
538			bpf_wakeup(d);
539	}
540	BPFD_UNLOCK(d);
541}
542
543static	int
544bpfwrite(dev, uio, ioflag)
545	dev_t dev;
546	struct uio *uio;
547	int ioflag;
548{
549	struct bpf_d *d = dev->si_drv1;
550	struct ifnet *ifp;
551	struct mbuf *m;
552	int error;
553	static struct sockaddr dst;
554	int datlen;
555
556	if (d->bd_bif == 0)
557		return (ENXIO);
558
559	ifp = d->bd_bif->bif_ifp;
560
561	if (uio->uio_resid == 0)
562		return (0);
563
564	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
565	if (error)
566		return (error);
567
568	if (datlen > ifp->if_mtu)
569		return (EMSGSIZE);
570
571	if (d->bd_hdrcmplt)
572		dst.sa_family = pseudo_AF_HDRCMPLT;
573
574	mtx_lock(&Giant);
575#ifdef MAC
576	mac_create_mbuf_from_bpfdesc(d, m);
577#endif
578	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
579	mtx_unlock(&Giant);
580	/*
581	 * The driver frees the mbuf.
582	 */
583	return (error);
584}
585
586/*
587 * Reset a descriptor by flushing its packet buffer and clearing the
588 * receive and drop counts.
589 */
590static void
591reset_d(d)
592	struct bpf_d *d;
593{
594
595	mtx_assert(&d->bd_mtx, MA_OWNED);
596	if (d->bd_hbuf) {
597		/* Free the hold buffer. */
598		d->bd_fbuf = d->bd_hbuf;
599		d->bd_hbuf = 0;
600	}
601	d->bd_slen = 0;
602	d->bd_hlen = 0;
603	d->bd_rcount = 0;
604	d->bd_dcount = 0;
605}
606
607/*
608 *  FIONREAD		Check for read packet available.
609 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
610 *  BIOCGBLEN		Get buffer len [for read()].
611 *  BIOCSETF		Set ethernet read filter.
612 *  BIOCFLUSH		Flush read packet buffer.
613 *  BIOCPROMISC		Put interface into promiscuous mode.
614 *  BIOCGDLT		Get link layer type.
615 *  BIOCGETIF		Get interface name.
616 *  BIOCSETIF		Set interface.
617 *  BIOCSRTIMEOUT	Set read timeout.
618 *  BIOCGRTIMEOUT	Get read timeout.
619 *  BIOCGSTATS		Get packet stats.
620 *  BIOCIMMEDIATE	Set immediate mode.
621 *  BIOCVERSION		Get filter language version.
622 *  BIOCGHDRCMPLT	Get "header already complete" flag
623 *  BIOCSHDRCMPLT	Set "header already complete" flag
624 *  BIOCGSEESENT	Get "see packets sent" flag
625 *  BIOCSSEESENT	Set "see packets sent" flag
626 */
627/* ARGSUSED */
628static	int
629bpfioctl(dev, cmd, addr, flags, td)
630	dev_t dev;
631	u_long cmd;
632	caddr_t addr;
633	int flags;
634	struct thread *td;
635{
636	struct bpf_d *d = dev->si_drv1;
637	int error = 0;
638
639	BPFD_LOCK(d);
640	if (d->bd_state == BPF_WAITING)
641		callout_stop(&d->bd_callout);
642	d->bd_state = BPF_IDLE;
643	BPFD_UNLOCK(d);
644
645	switch (cmd) {
646
647	default:
648		error = EINVAL;
649		break;
650
651	/*
652	 * Check for read packet available.
653	 */
654	case FIONREAD:
655		{
656			int n;
657
658			BPFD_LOCK(d);
659			n = d->bd_slen;
660			if (d->bd_hbuf)
661				n += d->bd_hlen;
662			BPFD_UNLOCK(d);
663
664			*(int *)addr = n;
665			break;
666		}
667
668	case SIOCGIFADDR:
669		{
670			struct ifnet *ifp;
671
672			if (d->bd_bif == 0)
673				error = EINVAL;
674			else {
675				ifp = d->bd_bif->bif_ifp;
676				error = (*ifp->if_ioctl)(ifp, cmd, addr);
677			}
678			break;
679		}
680
681	/*
682	 * Get buffer len [for read()].
683	 */
684	case BIOCGBLEN:
685		*(u_int *)addr = d->bd_bufsize;
686		break;
687
688	/*
689	 * Set buffer length.
690	 */
691	case BIOCSBLEN:
692		if (d->bd_bif != 0)
693			error = EINVAL;
694		else {
695			register u_int size = *(u_int *)addr;
696
697			if (size > bpf_maxbufsize)
698				*(u_int *)addr = size = bpf_maxbufsize;
699			else if (size < BPF_MINBUFSIZE)
700				*(u_int *)addr = size = BPF_MINBUFSIZE;
701			d->bd_bufsize = size;
702		}
703		break;
704
705	/*
706	 * Set link layer read filter.
707	 */
708	case BIOCSETF:
709		error = bpf_setf(d, (struct bpf_program *)addr);
710		break;
711
712	/*
713	 * Flush read packet buffer.
714	 */
715	case BIOCFLUSH:
716		BPFD_LOCK(d);
717		reset_d(d);
718		BPFD_UNLOCK(d);
719		break;
720
721	/*
722	 * Put interface into promiscuous mode.
723	 */
724	case BIOCPROMISC:
725		if (d->bd_bif == 0) {
726			/*
727			 * No interface attached yet.
728			 */
729			error = EINVAL;
730			break;
731		}
732		if (d->bd_promisc == 0) {
733			mtx_lock(&Giant);
734			error = ifpromisc(d->bd_bif->bif_ifp, 1);
735			mtx_unlock(&Giant);
736			if (error == 0)
737				d->bd_promisc = 1;
738		}
739		break;
740
741	/*
742	 * Get device parameters.
743	 */
744	case BIOCGDLT:
745		if (d->bd_bif == 0)
746			error = EINVAL;
747		else
748			*(u_int *)addr = d->bd_bif->bif_dlt;
749		break;
750
751	/*
752	 * Get interface name.
753	 */
754	case BIOCGETIF:
755		if (d->bd_bif == 0)
756			error = EINVAL;
757		else {
758			struct ifnet *const ifp = d->bd_bif->bif_ifp;
759			struct ifreq *const ifr = (struct ifreq *)addr;
760
761			snprintf(ifr->ifr_name, sizeof(ifr->ifr_name),
762			    "%s%d", ifp->if_name, ifp->if_unit);
763		}
764		break;
765
766	/*
767	 * Set interface.
768	 */
769	case BIOCSETIF:
770		error = bpf_setif(d, (struct ifreq *)addr);
771		break;
772
773	/*
774	 * Set read timeout.
775	 */
776	case BIOCSRTIMEOUT:
777		{
778			struct timeval *tv = (struct timeval *)addr;
779
780			/*
781			 * Subtract 1 tick from tvtohz() since this isn't
782			 * a one-shot timer.
783			 */
784			if ((error = itimerfix(tv)) == 0)
785				d->bd_rtout = tvtohz(tv) - 1;
786			break;
787		}
788
789	/*
790	 * Get read timeout.
791	 */
792	case BIOCGRTIMEOUT:
793		{
794			struct timeval *tv = (struct timeval *)addr;
795
796			tv->tv_sec = d->bd_rtout / hz;
797			tv->tv_usec = (d->bd_rtout % hz) * tick;
798			break;
799		}
800
801	/*
802	 * Get packet stats.
803	 */
804	case BIOCGSTATS:
805		{
806			struct bpf_stat *bs = (struct bpf_stat *)addr;
807
808			bs->bs_recv = d->bd_rcount;
809			bs->bs_drop = d->bd_dcount;
810			break;
811		}
812
813	/*
814	 * Set immediate mode.
815	 */
816	case BIOCIMMEDIATE:
817		d->bd_immediate = *(u_int *)addr;
818		break;
819
820	case BIOCVERSION:
821		{
822			struct bpf_version *bv = (struct bpf_version *)addr;
823
824			bv->bv_major = BPF_MAJOR_VERSION;
825			bv->bv_minor = BPF_MINOR_VERSION;
826			break;
827		}
828
829	/*
830	 * Get "header already complete" flag
831	 */
832	case BIOCGHDRCMPLT:
833		*(u_int *)addr = d->bd_hdrcmplt;
834		break;
835
836	/*
837	 * Set "header already complete" flag
838	 */
839	case BIOCSHDRCMPLT:
840		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
841		break;
842
843	/*
844	 * Get "see sent packets" flag
845	 */
846	case BIOCGSEESENT:
847		*(u_int *)addr = d->bd_seesent;
848		break;
849
850	/*
851	 * Set "see sent packets" flag
852	 */
853	case BIOCSSEESENT:
854		d->bd_seesent = *(u_int *)addr;
855		break;
856
857	case FIONBIO:		/* Non-blocking I/O */
858		break;
859
860	case FIOASYNC:		/* Send signal on receive packets */
861		d->bd_async = *(int *)addr;
862		break;
863
864	case FIOSETOWN:
865		error = fsetown(*(int *)addr, &d->bd_sigio);
866		break;
867
868	case FIOGETOWN:
869		*(int *)addr = fgetown(d->bd_sigio);
870		break;
871
872	/* This is deprecated, FIOSETOWN should be used instead. */
873	case TIOCSPGRP:
874		error = fsetown(-(*(int *)addr), &d->bd_sigio);
875		break;
876
877	/* This is deprecated, FIOGETOWN should be used instead. */
878	case TIOCGPGRP:
879		*(int *)addr = -fgetown(d->bd_sigio);
880		break;
881
882	case BIOCSRSIG:		/* Set receive signal */
883		{
884		 	u_int sig;
885
886			sig = *(u_int *)addr;
887
888			if (sig >= NSIG)
889				error = EINVAL;
890			else
891				d->bd_sig = sig;
892			break;
893		}
894	case BIOCGRSIG:
895		*(u_int *)addr = d->bd_sig;
896		break;
897	}
898	return (error);
899}
900
901/*
902 * Set d's packet filter program to fp.  If this file already has a filter,
903 * free it and replace it.  Returns EINVAL for bogus requests.
904 */
905static int
906bpf_setf(d, fp)
907	struct bpf_d *d;
908	struct bpf_program *fp;
909{
910	struct bpf_insn *fcode, *old;
911	u_int flen, size;
912
913	old = d->bd_filter;
914	if (fp->bf_insns == 0) {
915		if (fp->bf_len != 0)
916			return (EINVAL);
917		BPFD_LOCK(d);
918		d->bd_filter = 0;
919		reset_d(d);
920		BPFD_UNLOCK(d);
921		if (old != 0)
922			free((caddr_t)old, M_BPF);
923		return (0);
924	}
925	flen = fp->bf_len;
926	if (flen > BPF_MAXINSNS)
927		return (EINVAL);
928
929	size = flen * sizeof(*fp->bf_insns);
930	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
931	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
932	    bpf_validate(fcode, (int)flen)) {
933		BPFD_LOCK(d);
934		d->bd_filter = fcode;
935		reset_d(d);
936		BPFD_UNLOCK(d);
937		if (old != 0)
938			free((caddr_t)old, M_BPF);
939
940		return (0);
941	}
942	free((caddr_t)fcode, M_BPF);
943	return (EINVAL);
944}
945
946/*
947 * Detach a file from its current interface (if attached at all) and attach
948 * to the interface indicated by the name stored in ifr.
949 * Return an errno or 0.
950 */
951static int
952bpf_setif(d, ifr)
953	struct bpf_d *d;
954	struct ifreq *ifr;
955{
956	struct bpf_if *bp;
957	int error;
958	struct ifnet *theywant;
959
960	theywant = ifunit(ifr->ifr_name);
961	if (theywant == 0)
962		return ENXIO;
963
964	/*
965	 * Look through attached interfaces for the named one.
966	 */
967	mtx_lock(&bpf_mtx);
968	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
969		struct ifnet *ifp = bp->bif_ifp;
970
971		if (ifp == 0 || ifp != theywant)
972			continue;
973
974		mtx_unlock(&bpf_mtx);
975		/*
976		 * We found the requested interface.
977		 * If it's not up, return an error.
978		 * Allocate the packet buffers if we need to.
979		 * If we're already attached to requested interface,
980		 * just flush the buffer.
981		 */
982		if ((ifp->if_flags & IFF_UP) == 0)
983			return (ENETDOWN);
984
985		if (d->bd_sbuf == 0) {
986			error = bpf_allocbufs(d);
987			if (error != 0)
988				return (error);
989		}
990		if (bp != d->bd_bif) {
991			if (d->bd_bif)
992				/*
993				 * Detach if attached to something else.
994				 */
995				bpf_detachd(d);
996
997			bpf_attachd(d, bp);
998		}
999		BPFD_LOCK(d);
1000		reset_d(d);
1001		BPFD_UNLOCK(d);
1002		return (0);
1003	}
1004	mtx_unlock(&bpf_mtx);
1005	/* Not found. */
1006	return (ENXIO);
1007}
1008
1009/*
1010 * Support for select() and poll() system calls
1011 *
1012 * Return true iff the specific operation will not block indefinitely.
1013 * Otherwise, return false but make a note that a selwakeup() must be done.
1014 */
1015int
1016bpfpoll(dev, events, td)
1017	register dev_t dev;
1018	int events;
1019	struct thread *td;
1020{
1021	struct bpf_d *d;
1022	int revents;
1023
1024	d = dev->si_drv1;
1025	if (d->bd_bif == NULL)
1026		return (ENXIO);
1027
1028	revents = events & (POLLOUT | POLLWRNORM);
1029	BPFD_LOCK(d);
1030	if (events & (POLLIN | POLLRDNORM)) {
1031		/*
1032		 * An imitation of the FIONREAD ioctl code.
1033		 * XXX not quite.  An exact imitation:
1034		 *	if (d->b_slen != 0 ||
1035		 *	    (d->bd_hbuf != NULL && d->bd_hlen != 0)
1036		 */
1037		if (d->bd_hlen != 0 ||
1038		    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1039		    d->bd_slen != 0))
1040			revents |= events & (POLLIN | POLLRDNORM);
1041		else {
1042			selrecord(td, &d->bd_sel);
1043			/* Start the read timeout if necessary. */
1044			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1045				callout_reset(&d->bd_callout, d->bd_rtout,
1046				    bpf_timed_out, d);
1047				d->bd_state = BPF_WAITING;
1048			}
1049		}
1050	}
1051	BPFD_UNLOCK(d);
1052	return (revents);
1053}
1054
1055/*
1056 * Incoming linkage from device drivers.  Process the packet pkt, of length
1057 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1058 * by each process' filter, and if accepted, stashed into the corresponding
1059 * buffer.
1060 */
1061void
1062bpf_tap(ifp, pkt, pktlen)
1063	struct ifnet *ifp;
1064	register u_char *pkt;
1065	register u_int pktlen;
1066{
1067	struct bpf_if *bp;
1068	register struct bpf_d *d;
1069	register u_int slen;
1070
1071	bp = ifp->if_bpf;
1072	BPFIF_LOCK(bp);
1073	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1074		BPFD_LOCK(d);
1075		++d->bd_rcount;
1076		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1077		if (slen != 0) {
1078#ifdef MAC
1079			if (mac_check_bpfdesc_receive(d, ifp) == 0)
1080#endif
1081				catchpacket(d, pkt, pktlen, slen, bcopy);
1082		}
1083		BPFD_UNLOCK(d);
1084	}
1085	BPFIF_UNLOCK(bp);
1086}
1087
1088/*
1089 * Copy data from an mbuf chain into a buffer.  This code is derived
1090 * from m_copydata in sys/uipc_mbuf.c.
1091 */
1092static void
1093bpf_mcopy(src_arg, dst_arg, len)
1094	const void *src_arg;
1095	void *dst_arg;
1096	register size_t len;
1097{
1098	register const struct mbuf *m;
1099	register u_int count;
1100	u_char *dst;
1101
1102	m = src_arg;
1103	dst = dst_arg;
1104	while (len > 0) {
1105		if (m == 0)
1106			panic("bpf_mcopy");
1107		count = min(m->m_len, len);
1108		bcopy(mtod(m, void *), dst, count);
1109		m = m->m_next;
1110		dst += count;
1111		len -= count;
1112	}
1113}
1114
1115/*
1116 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1117 */
1118void
1119bpf_mtap(ifp, m)
1120	struct ifnet *ifp;
1121	struct mbuf *m;
1122{
1123	struct bpf_if *bp = ifp->if_bpf;
1124	struct bpf_d *d;
1125	u_int pktlen, slen;
1126
1127	pktlen = m_length(m, NULL);
1128	if (pktlen == m->m_len)
1129		return(bpf_tap(ifp, mtod(m, u_char *), pktlen));
1130
1131	BPFIF_LOCK(bp);
1132	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1133		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1134			continue;
1135		BPFD_LOCK(d);
1136		++d->bd_rcount;
1137		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1138		if (slen != 0)
1139			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
1140		BPFD_UNLOCK(d);
1141	}
1142	BPFIF_UNLOCK(bp);
1143}
1144
1145/*
1146 * Move the packet data from interface memory (pkt) into the
1147 * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1148 * otherwise 0.  "copy" is the routine called to do the actual data
1149 * transfer.  bcopy is passed in to copy contiguous chunks, while
1150 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1151 * pkt is really an mbuf.
1152 */
1153static void
1154catchpacket(d, pkt, pktlen, snaplen, cpfn)
1155	register struct bpf_d *d;
1156	register u_char *pkt;
1157	register u_int pktlen, snaplen;
1158	register void (*cpfn)(const void *, void *, size_t);
1159{
1160	register struct bpf_hdr *hp;
1161	register int totlen, curlen;
1162	register int hdrlen = d->bd_bif->bif_hdrlen;
1163	/*
1164	 * Figure out how many bytes to move.  If the packet is
1165	 * greater or equal to the snapshot length, transfer that
1166	 * much.  Otherwise, transfer the whole packet (unless
1167	 * we hit the buffer size limit).
1168	 */
1169	totlen = hdrlen + min(snaplen, pktlen);
1170	if (totlen > d->bd_bufsize)
1171		totlen = d->bd_bufsize;
1172
1173	/*
1174	 * Round up the end of the previous packet to the next longword.
1175	 */
1176	curlen = BPF_WORDALIGN(d->bd_slen);
1177	if (curlen + totlen > d->bd_bufsize) {
1178		/*
1179		 * This packet will overflow the storage buffer.
1180		 * Rotate the buffers if we can, then wakeup any
1181		 * pending reads.
1182		 */
1183		if (d->bd_fbuf == 0) {
1184			/*
1185			 * We haven't completed the previous read yet,
1186			 * so drop the packet.
1187			 */
1188			++d->bd_dcount;
1189			return;
1190		}
1191		ROTATE_BUFFERS(d);
1192		bpf_wakeup(d);
1193		curlen = 0;
1194	}
1195	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1196		/*
1197		 * Immediate mode is set, or the read timeout has
1198		 * already expired during a select call.  A packet
1199		 * arrived, so the reader should be woken up.
1200		 */
1201		bpf_wakeup(d);
1202
1203	/*
1204	 * Append the bpf header.
1205	 */
1206	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1207	microtime(&hp->bh_tstamp);
1208	hp->bh_datalen = pktlen;
1209	hp->bh_hdrlen = hdrlen;
1210	/*
1211	 * Copy the packet data into the store buffer and update its length.
1212	 */
1213	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1214	d->bd_slen = curlen + totlen;
1215}
1216
1217/*
1218 * Initialize all nonzero fields of a descriptor.
1219 */
1220static int
1221bpf_allocbufs(d)
1222	register struct bpf_d *d;
1223{
1224	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1225	if (d->bd_fbuf == 0)
1226		return (ENOBUFS);
1227
1228	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1229	if (d->bd_sbuf == 0) {
1230		free(d->bd_fbuf, M_BPF);
1231		return (ENOBUFS);
1232	}
1233	d->bd_slen = 0;
1234	d->bd_hlen = 0;
1235	return (0);
1236}
1237
1238/*
1239 * Free buffers currently in use by a descriptor.
1240 * Called on close.
1241 */
1242static void
1243bpf_freed(d)
1244	register struct bpf_d *d;
1245{
1246	/*
1247	 * We don't need to lock out interrupts since this descriptor has
1248	 * been detached from its interface and it yet hasn't been marked
1249	 * free.
1250	 */
1251	if (d->bd_sbuf != 0) {
1252		free(d->bd_sbuf, M_BPF);
1253		if (d->bd_hbuf != 0)
1254			free(d->bd_hbuf, M_BPF);
1255		if (d->bd_fbuf != 0)
1256			free(d->bd_fbuf, M_BPF);
1257	}
1258	if (d->bd_filter)
1259		free((caddr_t)d->bd_filter, M_BPF);
1260	mtx_destroy(&d->bd_mtx);
1261}
1262
1263/*
1264 * Attach an interface to bpf.  ifp is a pointer to the structure
1265 * defining the interface to be attached, dlt is the link layer type,
1266 * and hdrlen is the fixed size of the link header (variable length
1267 * headers are not yet supporrted).
1268 */
1269void
1270bpfattach(ifp, dlt, hdrlen)
1271	struct ifnet *ifp;
1272	u_int dlt, hdrlen;
1273{
1274	struct bpf_if *bp;
1275	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1276	if (bp == 0)
1277		panic("bpfattach");
1278
1279	bp->bif_ifp = ifp;
1280	bp->bif_dlt = dlt;
1281	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1282
1283	mtx_lock(&bpf_mtx);
1284	bp->bif_next = bpf_iflist;
1285	bpf_iflist = bp;
1286	mtx_unlock(&bpf_mtx);
1287
1288	bp->bif_ifp->if_bpf = 0;
1289
1290	/*
1291	 * Compute the length of the bpf header.  This is not necessarily
1292	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1293	 * that the network layer header begins on a longword boundary (for
1294	 * performance reasons and to alleviate alignment restrictions).
1295	 */
1296	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1297
1298	if (bootverbose)
1299		printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
1300}
1301
1302/*
1303 * Detach bpf from an interface.  This involves detaching each descriptor
1304 * associated with the interface, and leaving bd_bif NULL.  Notify each
1305 * descriptor as it's detached so that any sleepers wake up and get
1306 * ENXIO.
1307 */
1308void
1309bpfdetach(ifp)
1310	struct ifnet *ifp;
1311{
1312	struct bpf_if	*bp, *bp_prev;
1313	struct bpf_d	*d;
1314
1315	mtx_lock(&bpf_mtx);
1316
1317	/* Locate BPF interface information */
1318	bp_prev = NULL;
1319	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1320		if (ifp == bp->bif_ifp)
1321			break;
1322		bp_prev = bp;
1323	}
1324
1325	/* Interface wasn't attached */
1326	if (bp->bif_ifp == NULL) {
1327		mtx_unlock(&bpf_mtx);
1328		printf("bpfdetach: %s%d was not attached\n", ifp->if_name,
1329		    ifp->if_unit);
1330		return;
1331	}
1332
1333	if (bp_prev) {
1334		bp_prev->bif_next = bp->bif_next;
1335	} else {
1336		bpf_iflist = bp->bif_next;
1337	}
1338
1339	while ((d = bp->bif_dlist) != NULL) {
1340		bpf_detachd(d);
1341		BPFD_LOCK(d);
1342		bpf_wakeup(d);
1343		BPFD_UNLOCK(d);
1344	}
1345
1346	mtx_destroy(&bp->bif_mtx);
1347	free(bp, M_BPF);
1348
1349	mtx_unlock(&bpf_mtx);
1350}
1351
1352static void bpf_drvinit(void *unused);
1353
1354static void bpf_clone(void *arg, char *name, int namelen, dev_t *dev);
1355
1356static void
1357bpf_clone(arg, name, namelen, dev)
1358	void *arg;
1359	char *name;
1360	int namelen;
1361	dev_t *dev;
1362{
1363	int u;
1364
1365	if (*dev != NODEV)
1366		return;
1367	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1368		return;
1369	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1370	    "bpf%d", u);
1371	(*dev)->si_flags |= SI_CHEAPCLONE;
1372	return;
1373}
1374
1375static void
1376bpf_drvinit(unused)
1377	void *unused;
1378{
1379
1380	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1381	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1382	cdevsw_add(&bpf_cdevsw);
1383}
1384
1385SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
1386
1387#else /* !DEV_BPF && !NETGRAPH_BPF */
1388/*
1389 * NOP stubs to allow bpf-using drivers to load and function.
1390 *
1391 * A 'better' implementation would allow the core bpf functionality
1392 * to be loaded at runtime.
1393 */
1394
1395void
1396bpf_tap(ifp, pkt, pktlen)
1397	struct ifnet *ifp;
1398	register u_char *pkt;
1399	register u_int pktlen;
1400{
1401}
1402
1403void
1404bpf_mtap(ifp, m)
1405	struct ifnet *ifp;
1406	struct mbuf *m;
1407{
1408}
1409
1410void
1411bpfattach(ifp, dlt, hdrlen)
1412	struct ifnet *ifp;
1413	u_int dlt, hdrlen;
1414{
1415}
1416
1417void
1418bpfdetach(ifp)
1419	struct ifnet *ifp;
1420{
1421}
1422
1423u_int
1424bpf_filter(pc, p, wirelen, buflen)
1425	register const struct bpf_insn *pc;
1426	register u_char *p;
1427	u_int wirelen;
1428	register u_int buflen;
1429{
1430	return -1;	/* "no filter" behaviour */
1431}
1432
1433int
1434bpf_validate(f, len)
1435	const struct bpf_insn *f;
1436	int len;
1437{
1438	return 0;		/* false */
1439}
1440
1441#endif /* !DEV_BPF && !NETGRAPH_BPF */
1442