bpf.c revision 72784
1/*
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      @(#)bpf.c	8.2 (Berkeley) 3/28/94
39 *
40 * $FreeBSD: head/sys/net/bpf.c 72784 2001-02-21 05:34:34Z rwatson $
41 */
42
43#include "opt_bpf.h"
44#include "opt_netgraph.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/conf.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/time.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/filio.h>
55#include <sys/sockio.h>
56#include <sys/ttycom.h>
57#include <sys/filedesc.h>
58
59#include <sys/poll.h>
60
61#include <sys/socket.h>
62#include <sys/vnode.h>
63
64#include <net/if.h>
65#include <net/bpf.h>
66#include <net/bpfdesc.h>
67
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <sys/kernel.h>
71#include <sys/sysctl.h>
72
73static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
74
75#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
76
77#define PRINET  26			/* interruptible */
78
79/*
80 * The default read buffer size is patchable.
81 */
82static int bpf_bufsize = 4096;
83SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
84	&bpf_bufsize, 0, "");
85static int bpf_maxbufsize = BPF_MAXBUFSIZE;
86SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
87	&bpf_maxbufsize, 0, "");
88
89/*
90 *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
91 */
92static struct bpf_if	*bpf_iflist;
93static struct mtx	bpf_mtx;		/* bpf global lock */
94
95static int	bpf_allocbufs __P((struct bpf_d *));
96static void	bpf_attachd __P((struct bpf_d *d, struct bpf_if *bp));
97static void	bpf_detachd __P((struct bpf_d *d));
98static void	bpf_freed __P((struct bpf_d *));
99static void	bpf_mcopy __P((const void *, void *, size_t));
100static int	bpf_movein __P((struct uio *, int,
101		    struct mbuf **, struct sockaddr *, int *));
102static int	bpf_setif __P((struct bpf_d *, struct ifreq *));
103static __inline void
104		bpf_wakeup __P((struct bpf_d *));
105static void	catchpacket __P((struct bpf_d *, u_char *, u_int,
106		    u_int, void (*)(const void *, void *, size_t)));
107static void	reset_d __P((struct bpf_d *));
108static int	 bpf_setf __P((struct bpf_d *, struct bpf_program *));
109
110static	d_open_t	bpfopen;
111static	d_close_t	bpfclose;
112static	d_read_t	bpfread;
113static	d_write_t	bpfwrite;
114static	d_ioctl_t	bpfioctl;
115static	d_poll_t	bpfpoll;
116
117#define CDEV_MAJOR 23
118static struct cdevsw bpf_cdevsw = {
119	/* open */	bpfopen,
120	/* close */	bpfclose,
121	/* read */	bpfread,
122	/* write */	bpfwrite,
123	/* ioctl */	bpfioctl,
124	/* poll */	bpfpoll,
125	/* mmap */	nommap,
126	/* strategy */	nostrategy,
127	/* name */	"bpf",
128	/* maj */	CDEV_MAJOR,
129	/* dump */	nodump,
130	/* psize */	nopsize,
131	/* flags */	0,
132	/* bmaj */	-1
133};
134
135
136static int
137bpf_movein(uio, linktype, mp, sockp, datlen)
138	register struct uio *uio;
139	int linktype, *datlen;
140	register struct mbuf **mp;
141	register struct sockaddr *sockp;
142{
143	struct mbuf *m;
144	int error;
145	int len;
146	int hlen;
147
148	/*
149	 * Build a sockaddr based on the data link layer type.
150	 * We do this at this level because the ethernet header
151	 * is copied directly into the data field of the sockaddr.
152	 * In the case of SLIP, there is no header and the packet
153	 * is forwarded as is.
154	 * Also, we are careful to leave room at the front of the mbuf
155	 * for the link level header.
156	 */
157	switch (linktype) {
158
159	case DLT_SLIP:
160		sockp->sa_family = AF_INET;
161		hlen = 0;
162		break;
163
164	case DLT_EN10MB:
165		sockp->sa_family = AF_UNSPEC;
166		/* XXX Would MAXLINKHDR be better? */
167		hlen = sizeof(struct ether_header);
168		break;
169
170	case DLT_FDDI:
171		sockp->sa_family = AF_IMPLINK;
172		hlen = 0;
173		break;
174
175	case DLT_RAW:
176	case DLT_NULL:
177		sockp->sa_family = AF_UNSPEC;
178		hlen = 0;
179		break;
180
181	case DLT_ATM_RFC1483:
182		/*
183		 * en atm driver requires 4-byte atm pseudo header.
184		 * though it isn't standard, vpi:vci needs to be
185		 * specified anyway.
186		 */
187		sockp->sa_family = AF_UNSPEC;
188		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
189		break;
190
191	case DLT_PPP:
192		sockp->sa_family = AF_UNSPEC;
193		hlen = 4;	/* This should match PPP_HDRLEN */
194		break;
195
196	default:
197		return (EIO);
198	}
199
200	len = uio->uio_resid;
201	*datlen = len - hlen;
202	if ((unsigned)len > MCLBYTES)
203		return (EIO);
204
205	MGETHDR(m, M_TRYWAIT, MT_DATA);
206	if (m == 0)
207		return (ENOBUFS);
208	if (len > MHLEN) {
209		MCLGET(m, M_TRYWAIT);
210		if ((m->m_flags & M_EXT) == 0) {
211			error = ENOBUFS;
212			goto bad;
213		}
214	}
215	m->m_pkthdr.len = m->m_len = len;
216	m->m_pkthdr.rcvif = NULL;
217	*mp = m;
218	/*
219	 * Make room for link header.
220	 */
221	if (hlen != 0) {
222		m->m_pkthdr.len -= hlen;
223		m->m_len -= hlen;
224#if BSD >= 199103
225		m->m_data += hlen; /* XXX */
226#else
227		m->m_off += hlen;
228#endif
229		error = uiomove((caddr_t)sockp->sa_data, hlen, uio);
230		if (error)
231			goto bad;
232	}
233	error = uiomove(mtod(m, caddr_t), len - hlen, uio);
234	if (!error)
235		return (0);
236 bad:
237	m_freem(m);
238	return (error);
239}
240
241/*
242 * Attach file to the bpf interface, i.e. make d listen on bp.
243 */
244static void
245bpf_attachd(d, bp)
246	struct bpf_d *d;
247	struct bpf_if *bp;
248{
249	/*
250	 * Point d at bp, and add d to the interface's list of listeners.
251	 * Finally, point the driver's bpf cookie at the interface so
252	 * it will divert packets to bpf.
253	 */
254	BPFIF_LOCK(bp);
255	d->bd_bif = bp;
256	d->bd_next = bp->bif_dlist;
257	bp->bif_dlist = d;
258
259	bp->bif_ifp->if_bpf = bp;
260	BPFIF_UNLOCK(bp);
261}
262
263/*
264 * Detach a file from its interface.
265 */
266static void
267bpf_detachd(d)
268	struct bpf_d *d;
269{
270	int error;
271	struct bpf_d **p;
272	struct bpf_if *bp;
273
274	bp = d->bd_bif;
275	/*
276	 * Check if this descriptor had requested promiscuous mode.
277	 * If so, turn it off.
278	 */
279	if (d->bd_promisc) {
280		d->bd_promisc = 0;
281		error = ifpromisc(bp->bif_ifp, 0);
282		if (error != 0 && error != ENXIO) {
283			/*
284			 * ENXIO can happen if a pccard is unplugged
285			 * Something is really wrong if we were able to put
286			 * the driver into promiscuous mode, but can't
287			 * take it out.
288			 */
289			printf("%s%d: ifpromisc failed %d\n",
290			    bp->bif_ifp->if_name, bp->bif_ifp->if_unit, error);
291		}
292	}
293	/* Remove d from the interface's descriptor list. */
294	BPFIF_LOCK(bp);
295	p = &bp->bif_dlist;
296	while (*p != d) {
297		p = &(*p)->bd_next;
298		if (*p == 0)
299			panic("bpf_detachd: descriptor not in list");
300	}
301	*p = (*p)->bd_next;
302	if (bp->bif_dlist == 0)
303		/*
304		 * Let the driver know that there are no more listeners.
305		 */
306		d->bd_bif->bif_ifp->if_bpf = 0;
307	BPFIF_UNLOCK(bp);
308	d->bd_bif = 0;
309}
310
311/*
312 * Open ethernet device.  Returns ENXIO for illegal minor device number,
313 * EBUSY if file is open by another process.
314 */
315/* ARGSUSED */
316static	int
317bpfopen(dev, flags, fmt, p)
318	dev_t dev;
319	int flags;
320	int fmt;
321	struct proc *p;
322{
323	struct bpf_d *d;
324
325	mtx_lock(&bpf_mtx);
326	d = dev->si_drv1;
327	/*
328	 * Each minor can be opened by only one process.  If the requested
329	 * minor is in use, return EBUSY.
330	 */
331	if (d) {
332		mtx_unlock(&bpf_mtx);
333		return (EBUSY);
334	}
335	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
336	mtx_unlock(&bpf_mtx);
337
338	if ((dev->si_flags & SI_NAMED) == 0)
339		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
340		    "bpf%d", dev2unit(dev));
341	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
342	dev->si_drv1 = d;
343	d->bd_bufsize = bpf_bufsize;
344	d->bd_sig = SIGIO;
345	d->bd_seesent = 1;
346	mtx_init(&d->bd_mtx, devtoname(dev), MTX_DEF);
347
348	return (0);
349}
350
351/*
352 * Close the descriptor by detaching it from its interface,
353 * deallocating its buffers, and marking it free.
354 */
355/* ARGSUSED */
356static	int
357bpfclose(dev, flags, fmt, p)
358	dev_t dev;
359	int flags;
360	int fmt;
361	struct proc *p;
362{
363	struct bpf_d *d = dev->si_drv1;
364
365	funsetown(d->bd_sigio);
366	mtx_lock(&bpf_mtx);
367	if (d->bd_bif)
368		bpf_detachd(d);
369	mtx_unlock(&bpf_mtx);
370	bpf_freed(d);
371	dev->si_drv1 = 0;
372	FREE(d, M_BPF);
373
374	return (0);
375}
376
377
378/*
379 * Rotate the packet buffers in descriptor d.  Move the store buffer
380 * into the hold slot, and the free buffer into the store slot.
381 * Zero the length of the new store buffer.
382 */
383#define ROTATE_BUFFERS(d) \
384	(d)->bd_hbuf = (d)->bd_sbuf; \
385	(d)->bd_hlen = (d)->bd_slen; \
386	(d)->bd_sbuf = (d)->bd_fbuf; \
387	(d)->bd_slen = 0; \
388	(d)->bd_fbuf = 0;
389/*
390 *  bpfread - read next chunk of packets from buffers
391 */
392static	int
393bpfread(dev, uio, ioflag)
394	dev_t dev;
395	register struct uio *uio;
396	int ioflag;
397{
398	struct bpf_d *d = dev->si_drv1;
399	int error;
400
401	/*
402	 * Restrict application to use a buffer the same size as
403	 * as kernel buffers.
404	 */
405	if (uio->uio_resid != d->bd_bufsize)
406		return (EINVAL);
407
408	BPFD_LOCK(d);
409	/*
410	 * If the hold buffer is empty, then do a timed sleep, which
411	 * ends when the timeout expires or when enough packets
412	 * have arrived to fill the store buffer.
413	 */
414	while (d->bd_hbuf == 0) {
415		if (d->bd_immediate && d->bd_slen != 0) {
416			/*
417			 * A packet(s) either arrived since the previous
418			 * read or arrived while we were asleep.
419			 * Rotate the buffers and return what's here.
420			 */
421			ROTATE_BUFFERS(d);
422			break;
423		}
424
425		/*
426		 * No data is available, check to see if the bpf device
427		 * is still pointed at a real interface.  If not, return
428		 * ENXIO so that the userland process knows to rebind
429		 * it before using it again.
430		 */
431		if (d->bd_bif == NULL) {
432			BPFD_UNLOCK(d);
433			return (ENXIO);
434		}
435
436		if (ioflag & IO_NDELAY) {
437			BPFD_UNLOCK(d);
438			return (EWOULDBLOCK);
439		}
440		error = msleep((caddr_t)d, &d->bd_mtx, PRINET|PCATCH,
441		     "bpf", d->bd_rtout);
442		if (error == EINTR || error == ERESTART) {
443			BPFD_UNLOCK(d);
444			return (error);
445		}
446		if (error == EWOULDBLOCK) {
447			/*
448			 * On a timeout, return what's in the buffer,
449			 * which may be nothing.  If there is something
450			 * in the store buffer, we can rotate the buffers.
451			 */
452			if (d->bd_hbuf)
453				/*
454				 * We filled up the buffer in between
455				 * getting the timeout and arriving
456				 * here, so we don't need to rotate.
457				 */
458				break;
459
460			if (d->bd_slen == 0) {
461				BPFD_UNLOCK(d);
462				return (0);
463			}
464			ROTATE_BUFFERS(d);
465			break;
466		}
467	}
468	/*
469	 * At this point, we know we have something in the hold slot.
470	 */
471	BPFD_UNLOCK(d);
472
473	/*
474	 * Move data from hold buffer into user space.
475	 * We know the entire buffer is transferred since
476	 * we checked above that the read buffer is bpf_bufsize bytes.
477	 */
478	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
479
480	BPFD_LOCK(d);
481	d->bd_fbuf = d->bd_hbuf;
482	d->bd_hbuf = 0;
483	d->bd_hlen = 0;
484	BPFD_UNLOCK(d);
485
486	return (error);
487}
488
489
490/*
491 * If there are processes sleeping on this descriptor, wake them up.
492 */
493static __inline void
494bpf_wakeup(d)
495	register struct bpf_d *d;
496{
497	wakeup((caddr_t)d);
498	if (d->bd_async && d->bd_sig && d->bd_sigio)
499		pgsigio(d->bd_sigio, d->bd_sig, 0);
500
501	selwakeup(&d->bd_sel);
502	/* XXX */
503	d->bd_sel.si_pid = 0;
504}
505
506static	int
507bpfwrite(dev, uio, ioflag)
508	dev_t dev;
509	struct uio *uio;
510	int ioflag;
511{
512	struct bpf_d *d = dev->si_drv1;
513	struct ifnet *ifp;
514	struct mbuf *m;
515	int error;
516	static struct sockaddr dst;
517	int datlen;
518
519	if (d->bd_bif == 0)
520		return (ENXIO);
521
522	ifp = d->bd_bif->bif_ifp;
523
524	if (uio->uio_resid == 0)
525		return (0);
526
527	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
528	if (error)
529		return (error);
530
531	if (datlen > ifp->if_mtu)
532		return (EMSGSIZE);
533
534	if (d->bd_hdrcmplt)
535		dst.sa_family = pseudo_AF_HDRCMPLT;
536
537	mtx_lock(&Giant);
538	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
539	mtx_unlock(&Giant);
540	/*
541	 * The driver frees the mbuf.
542	 */
543	return (error);
544}
545
546/*
547 * Reset a descriptor by flushing its packet buffer and clearing the
548 * receive and drop counts.
549 */
550static void
551reset_d(d)
552	struct bpf_d *d;
553{
554
555	mtx_assert(&d->bd_mtx, MA_OWNED);
556	if (d->bd_hbuf) {
557		/* Free the hold buffer. */
558		d->bd_fbuf = d->bd_hbuf;
559		d->bd_hbuf = 0;
560	}
561	d->bd_slen = 0;
562	d->bd_hlen = 0;
563	d->bd_rcount = 0;
564	d->bd_dcount = 0;
565}
566
567/*
568 *  FIONREAD		Check for read packet available.
569 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
570 *  BIOCGBLEN		Get buffer len [for read()].
571 *  BIOCSETF		Set ethernet read filter.
572 *  BIOCFLUSH		Flush read packet buffer.
573 *  BIOCPROMISC		Put interface into promiscuous mode.
574 *  BIOCGDLT		Get link layer type.
575 *  BIOCGETIF		Get interface name.
576 *  BIOCSETIF		Set interface.
577 *  BIOCSRTIMEOUT	Set read timeout.
578 *  BIOCGRTIMEOUT	Get read timeout.
579 *  BIOCGSTATS		Get packet stats.
580 *  BIOCIMMEDIATE	Set immediate mode.
581 *  BIOCVERSION		Get filter language version.
582 *  BIOCGHDRCMPLT	Get "header already complete" flag
583 *  BIOCSHDRCMPLT	Set "header already complete" flag
584 *  BIOCGSEESENT	Get "see packets sent" flag
585 *  BIOCSSEESENT	Set "see packets sent" flag
586 */
587/* ARGSUSED */
588static	int
589bpfioctl(dev, cmd, addr, flags, p)
590	dev_t dev;
591	u_long cmd;
592	caddr_t addr;
593	int flags;
594	struct proc *p;
595{
596	struct bpf_d *d = dev->si_drv1;
597	int error = 0;
598
599	switch (cmd) {
600
601	default:
602		error = EINVAL;
603		break;
604
605	/*
606	 * Check for read packet available.
607	 */
608	case FIONREAD:
609		{
610			int n;
611
612			BPFD_LOCK(d);
613			n = d->bd_slen;
614			if (d->bd_hbuf)
615				n += d->bd_hlen;
616			BPFD_UNLOCK(d);
617
618			*(int *)addr = n;
619			break;
620		}
621
622	case SIOCGIFADDR:
623		{
624			struct ifnet *ifp;
625
626			if (d->bd_bif == 0)
627				error = EINVAL;
628			else {
629				ifp = d->bd_bif->bif_ifp;
630				error = (*ifp->if_ioctl)(ifp, cmd, addr);
631			}
632			break;
633		}
634
635	/*
636	 * Get buffer len [for read()].
637	 */
638	case BIOCGBLEN:
639		*(u_int *)addr = d->bd_bufsize;
640		break;
641
642	/*
643	 * Set buffer length.
644	 */
645	case BIOCSBLEN:
646		if (d->bd_bif != 0)
647			error = EINVAL;
648		else {
649			register u_int size = *(u_int *)addr;
650
651			if (size > bpf_maxbufsize)
652				*(u_int *)addr = size = bpf_maxbufsize;
653			else if (size < BPF_MINBUFSIZE)
654				*(u_int *)addr = size = BPF_MINBUFSIZE;
655			d->bd_bufsize = size;
656		}
657		break;
658
659	/*
660	 * Set link layer read filter.
661	 */
662	case BIOCSETF:
663		error = bpf_setf(d, (struct bpf_program *)addr);
664		break;
665
666	/*
667	 * Flush read packet buffer.
668	 */
669	case BIOCFLUSH:
670		BPFD_LOCK(d);
671		reset_d(d);
672		BPFD_UNLOCK(d);
673		break;
674
675	/*
676	 * Put interface into promiscuous mode.
677	 */
678	case BIOCPROMISC:
679		if (d->bd_bif == 0) {
680			/*
681			 * No interface attached yet.
682			 */
683			error = EINVAL;
684			break;
685		}
686		if (d->bd_promisc == 0) {
687			mtx_lock(&Giant);
688			error = ifpromisc(d->bd_bif->bif_ifp, 1);
689			mtx_unlock(&Giant);
690			if (error == 0)
691				d->bd_promisc = 1;
692		}
693		break;
694
695	/*
696	 * Get device parameters.
697	 */
698	case BIOCGDLT:
699		if (d->bd_bif == 0)
700			error = EINVAL;
701		else
702			*(u_int *)addr = d->bd_bif->bif_dlt;
703		break;
704
705	/*
706	 * Get interface name.
707	 */
708	case BIOCGETIF:
709		if (d->bd_bif == 0)
710			error = EINVAL;
711		else {
712			struct ifnet *const ifp = d->bd_bif->bif_ifp;
713			struct ifreq *const ifr = (struct ifreq *)addr;
714
715			snprintf(ifr->ifr_name, sizeof(ifr->ifr_name),
716			    "%s%d", ifp->if_name, ifp->if_unit);
717		}
718		break;
719
720	/*
721	 * Set interface.
722	 */
723	case BIOCSETIF:
724		error = bpf_setif(d, (struct ifreq *)addr);
725		break;
726
727	/*
728	 * Set read timeout.
729	 */
730	case BIOCSRTIMEOUT:
731		{
732			struct timeval *tv = (struct timeval *)addr;
733
734			/*
735			 * Subtract 1 tick from tvtohz() since this isn't
736			 * a one-shot timer.
737			 */
738			if ((error = itimerfix(tv)) == 0)
739				d->bd_rtout = tvtohz(tv) - 1;
740			break;
741		}
742
743	/*
744	 * Get read timeout.
745	 */
746	case BIOCGRTIMEOUT:
747		{
748			struct timeval *tv = (struct timeval *)addr;
749
750			tv->tv_sec = d->bd_rtout / hz;
751			tv->tv_usec = (d->bd_rtout % hz) * tick;
752			break;
753		}
754
755	/*
756	 * Get packet stats.
757	 */
758	case BIOCGSTATS:
759		{
760			struct bpf_stat *bs = (struct bpf_stat *)addr;
761
762			bs->bs_recv = d->bd_rcount;
763			bs->bs_drop = d->bd_dcount;
764			break;
765		}
766
767	/*
768	 * Set immediate mode.
769	 */
770	case BIOCIMMEDIATE:
771		d->bd_immediate = *(u_int *)addr;
772		break;
773
774	case BIOCVERSION:
775		{
776			struct bpf_version *bv = (struct bpf_version *)addr;
777
778			bv->bv_major = BPF_MAJOR_VERSION;
779			bv->bv_minor = BPF_MINOR_VERSION;
780			break;
781		}
782
783	/*
784	 * Get "header already complete" flag
785	 */
786	case BIOCGHDRCMPLT:
787		*(u_int *)addr = d->bd_hdrcmplt;
788		break;
789
790	/*
791	 * Set "header already complete" flag
792	 */
793	case BIOCSHDRCMPLT:
794		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
795		break;
796
797	/*
798	 * Get "see sent packets" flag
799	 */
800	case BIOCGSEESENT:
801		*(u_int *)addr = d->bd_seesent;
802		break;
803
804	/*
805	 * Set "see sent packets" flag
806	 */
807	case BIOCSSEESENT:
808		d->bd_seesent = *(u_int *)addr;
809		break;
810
811	case FIONBIO:		/* Non-blocking I/O */
812		break;
813
814	case FIOASYNC:		/* Send signal on receive packets */
815		d->bd_async = *(int *)addr;
816		break;
817
818	case FIOSETOWN:
819		error = fsetown(*(int *)addr, &d->bd_sigio);
820		break;
821
822	case FIOGETOWN:
823		*(int *)addr = fgetown(d->bd_sigio);
824		break;
825
826	/* This is deprecated, FIOSETOWN should be used instead. */
827	case TIOCSPGRP:
828		error = fsetown(-(*(int *)addr), &d->bd_sigio);
829		break;
830
831	/* This is deprecated, FIOGETOWN should be used instead. */
832	case TIOCGPGRP:
833		*(int *)addr = -fgetown(d->bd_sigio);
834		break;
835
836	case BIOCSRSIG:		/* Set receive signal */
837		{
838		 	u_int sig;
839
840			sig = *(u_int *)addr;
841
842			if (sig >= NSIG)
843				error = EINVAL;
844			else
845				d->bd_sig = sig;
846			break;
847		}
848	case BIOCGRSIG:
849		*(u_int *)addr = d->bd_sig;
850		break;
851	}
852	return (error);
853}
854
855/*
856 * Set d's packet filter program to fp.  If this file already has a filter,
857 * free it and replace it.  Returns EINVAL for bogus requests.
858 */
859static int
860bpf_setf(d, fp)
861	struct bpf_d *d;
862	struct bpf_program *fp;
863{
864	struct bpf_insn *fcode, *old;
865	u_int flen, size;
866
867	old = d->bd_filter;
868	if (fp->bf_insns == 0) {
869		if (fp->bf_len != 0)
870			return (EINVAL);
871		BPFD_LOCK(d);
872		d->bd_filter = 0;
873		reset_d(d);
874		BPFD_UNLOCK(d);
875		if (old != 0)
876			free((caddr_t)old, M_BPF);
877		return (0);
878	}
879	flen = fp->bf_len;
880	if (flen > BPF_MAXINSNS)
881		return (EINVAL);
882
883	size = flen * sizeof(*fp->bf_insns);
884	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
885	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
886	    bpf_validate(fcode, (int)flen)) {
887		BPFD_LOCK(d);
888		d->bd_filter = fcode;
889		reset_d(d);
890		BPFD_UNLOCK(d);
891		if (old != 0)
892			free((caddr_t)old, M_BPF);
893
894		return (0);
895	}
896	free((caddr_t)fcode, M_BPF);
897	return (EINVAL);
898}
899
900/*
901 * Detach a file from its current interface (if attached at all) and attach
902 * to the interface indicated by the name stored in ifr.
903 * Return an errno or 0.
904 */
905static int
906bpf_setif(d, ifr)
907	struct bpf_d *d;
908	struct ifreq *ifr;
909{
910	struct bpf_if *bp;
911	int error;
912	struct ifnet *theywant;
913
914	theywant = ifunit(ifr->ifr_name);
915	if (theywant == 0)
916		return ENXIO;
917
918	/*
919	 * Look through attached interfaces for the named one.
920	 */
921	mtx_lock(&bpf_mtx);
922	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
923		struct ifnet *ifp = bp->bif_ifp;
924
925		if (ifp == 0 || ifp != theywant)
926			continue;
927
928		mtx_unlock(&bpf_mtx);
929		/*
930		 * We found the requested interface.
931		 * If it's not up, return an error.
932		 * Allocate the packet buffers if we need to.
933		 * If we're already attached to requested interface,
934		 * just flush the buffer.
935		 */
936		if ((ifp->if_flags & IFF_UP) == 0)
937			return (ENETDOWN);
938
939		if (d->bd_sbuf == 0) {
940			error = bpf_allocbufs(d);
941			if (error != 0)
942				return (error);
943		}
944		if (bp != d->bd_bif) {
945			if (d->bd_bif)
946				/*
947				 * Detach if attached to something else.
948				 */
949				bpf_detachd(d);
950
951			bpf_attachd(d, bp);
952		}
953		BPFD_LOCK(d);
954		reset_d(d);
955		BPFD_UNLOCK(d);
956		return (0);
957	}
958	mtx_unlock(&bpf_mtx);
959	/* Not found. */
960	return (ENXIO);
961}
962
963/*
964 * Support for select() and poll() system calls
965 *
966 * Return true iff the specific operation will not block indefinitely.
967 * Otherwise, return false but make a note that a selwakeup() must be done.
968 */
969int
970bpfpoll(dev, events, p)
971	register dev_t dev;
972	int events;
973	struct proc *p;
974{
975	struct bpf_d *d;
976	int revents = 0;
977
978	/*
979	 * An imitation of the FIONREAD ioctl code.
980	 */
981	d = dev->si_drv1;
982
983	if (d->bd_bif == NULL)
984		return (ENXIO);
985
986	BPFD_LOCK(d);
987	if (events & (POLLIN | POLLRDNORM)) {
988		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
989			revents |= events & (POLLIN | POLLRDNORM);
990		else
991			selrecord(p, &d->bd_sel);
992	}
993	BPFD_UNLOCK(d);
994	return (revents);
995}
996
997/*
998 * Incoming linkage from device drivers.  Process the packet pkt, of length
999 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1000 * by each process' filter, and if accepted, stashed into the corresponding
1001 * buffer.
1002 */
1003void
1004bpf_tap(ifp, pkt, pktlen)
1005	struct ifnet *ifp;
1006	register u_char *pkt;
1007	register u_int pktlen;
1008{
1009	struct bpf_if *bp;
1010	register struct bpf_d *d;
1011	register u_int slen;
1012
1013	bp = ifp->if_bpf;
1014	BPFIF_LOCK(bp);
1015	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1016		BPFD_LOCK(d);
1017		++d->bd_rcount;
1018		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1019		if (slen != 0)
1020			catchpacket(d, pkt, pktlen, slen, bcopy);
1021		BPFD_UNLOCK(d);
1022	}
1023	BPFIF_UNLOCK(bp);
1024}
1025
1026/*
1027 * Copy data from an mbuf chain into a buffer.  This code is derived
1028 * from m_copydata in sys/uipc_mbuf.c.
1029 */
1030static void
1031bpf_mcopy(src_arg, dst_arg, len)
1032	const void *src_arg;
1033	void *dst_arg;
1034	register size_t len;
1035{
1036	register const struct mbuf *m;
1037	register u_int count;
1038	u_char *dst;
1039
1040	m = src_arg;
1041	dst = dst_arg;
1042	while (len > 0) {
1043		if (m == 0)
1044			panic("bpf_mcopy");
1045		count = min(m->m_len, len);
1046		bcopy(mtod(m, void *), dst, count);
1047		m = m->m_next;
1048		dst += count;
1049		len -= count;
1050	}
1051}
1052
1053/*
1054 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1055 */
1056void
1057bpf_mtap(ifp, m)
1058	struct ifnet *ifp;
1059	struct mbuf *m;
1060{
1061	struct bpf_if *bp = ifp->if_bpf;
1062	struct bpf_d *d;
1063	u_int pktlen, slen;
1064	struct mbuf *m0;
1065
1066	pktlen = 0;
1067	for (m0 = m; m0 != 0; m0 = m0->m_next)
1068		pktlen += m0->m_len;
1069
1070	BPFIF_LOCK(bp);
1071	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1072		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1073			continue;
1074		BPFD_LOCK(d);
1075		++d->bd_rcount;
1076		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1077		if (slen != 0)
1078			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
1079		BPFD_UNLOCK(d);
1080	}
1081	BPFIF_UNLOCK(bp);
1082}
1083
1084/*
1085 * Move the packet data from interface memory (pkt) into the
1086 * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1087 * otherwise 0.  "copy" is the routine called to do the actual data
1088 * transfer.  bcopy is passed in to copy contiguous chunks, while
1089 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1090 * pkt is really an mbuf.
1091 */
1092static void
1093catchpacket(d, pkt, pktlen, snaplen, cpfn)
1094	register struct bpf_d *d;
1095	register u_char *pkt;
1096	register u_int pktlen, snaplen;
1097	register void (*cpfn) __P((const void *, void *, size_t));
1098{
1099	register struct bpf_hdr *hp;
1100	register int totlen, curlen;
1101	register int hdrlen = d->bd_bif->bif_hdrlen;
1102	/*
1103	 * Figure out how many bytes to move.  If the packet is
1104	 * greater or equal to the snapshot length, transfer that
1105	 * much.  Otherwise, transfer the whole packet (unless
1106	 * we hit the buffer size limit).
1107	 */
1108	totlen = hdrlen + min(snaplen, pktlen);
1109	if (totlen > d->bd_bufsize)
1110		totlen = d->bd_bufsize;
1111
1112	/*
1113	 * Round up the end of the previous packet to the next longword.
1114	 */
1115	curlen = BPF_WORDALIGN(d->bd_slen);
1116	if (curlen + totlen > d->bd_bufsize) {
1117		/*
1118		 * This packet will overflow the storage buffer.
1119		 * Rotate the buffers if we can, then wakeup any
1120		 * pending reads.
1121		 */
1122		if (d->bd_fbuf == 0) {
1123			/*
1124			 * We haven't completed the previous read yet,
1125			 * so drop the packet.
1126			 */
1127			++d->bd_dcount;
1128			return;
1129		}
1130		ROTATE_BUFFERS(d);
1131		bpf_wakeup(d);
1132		curlen = 0;
1133	}
1134	else if (d->bd_immediate)
1135		/*
1136		 * Immediate mode is set.  A packet arrived so any
1137		 * reads should be woken up.
1138		 */
1139		bpf_wakeup(d);
1140
1141	/*
1142	 * Append the bpf header.
1143	 */
1144	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1145	microtime(&hp->bh_tstamp);
1146	hp->bh_datalen = pktlen;
1147	hp->bh_hdrlen = hdrlen;
1148	/*
1149	 * Copy the packet data into the store buffer and update its length.
1150	 */
1151	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1152	d->bd_slen = curlen + totlen;
1153}
1154
1155/*
1156 * Initialize all nonzero fields of a descriptor.
1157 */
1158static int
1159bpf_allocbufs(d)
1160	register struct bpf_d *d;
1161{
1162	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1163	if (d->bd_fbuf == 0)
1164		return (ENOBUFS);
1165
1166	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1167	if (d->bd_sbuf == 0) {
1168		free(d->bd_fbuf, M_BPF);
1169		return (ENOBUFS);
1170	}
1171	d->bd_slen = 0;
1172	d->bd_hlen = 0;
1173	return (0);
1174}
1175
1176/*
1177 * Free buffers currently in use by a descriptor.
1178 * Called on close.
1179 */
1180static void
1181bpf_freed(d)
1182	register struct bpf_d *d;
1183{
1184	/*
1185	 * We don't need to lock out interrupts since this descriptor has
1186	 * been detached from its interface and it yet hasn't been marked
1187	 * free.
1188	 */
1189	if (d->bd_sbuf != 0) {
1190		free(d->bd_sbuf, M_BPF);
1191		if (d->bd_hbuf != 0)
1192			free(d->bd_hbuf, M_BPF);
1193		if (d->bd_fbuf != 0)
1194			free(d->bd_fbuf, M_BPF);
1195	}
1196	if (d->bd_filter)
1197		free((caddr_t)d->bd_filter, M_BPF);
1198	mtx_destroy(&d->bd_mtx);
1199}
1200
1201/*
1202 * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
1203 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
1204 * size of the link header (variable length headers not yet supported).
1205 */
1206void
1207bpfattach(ifp, dlt, hdrlen)
1208	struct ifnet *ifp;
1209	u_int dlt, hdrlen;
1210{
1211	struct bpf_if *bp;
1212	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT);
1213	if (bp == 0)
1214		panic("bpfattach");
1215
1216	bp->bif_dlist = 0;
1217	bp->bif_ifp = ifp;
1218	bp->bif_dlt = dlt;
1219	mtx_init(&bp->bif_mtx, "bpf interface lock", MTX_DEF);
1220
1221	mtx_lock(&bpf_mtx);
1222	bp->bif_next = bpf_iflist;
1223	bpf_iflist = bp;
1224	mtx_unlock(&bpf_mtx);
1225
1226	bp->bif_ifp->if_bpf = 0;
1227
1228	/*
1229	 * Compute the length of the bpf header.  This is not necessarily
1230	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1231	 * that the network layer header begins on a longword boundary (for
1232	 * performance reasons and to alleviate alignment restrictions).
1233	 */
1234	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1235
1236	if (bootverbose)
1237		printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
1238}
1239
1240/*
1241 * Detach bpf from an interface.  This involves detaching each descriptor
1242 * associated with the interface, and leaving bd_bif NULL.  Notify each
1243 * descriptor as it's detached so that any sleepers wake up and get
1244 * ENXIO.
1245 */
1246void
1247bpfdetach(ifp)
1248	struct ifnet *ifp;
1249{
1250	struct bpf_if	*bp, *bp_prev;
1251	struct bpf_d	*d;
1252
1253	mtx_lock(&bpf_mtx);
1254
1255	/* Locate BPF interface information */
1256	bp_prev = NULL;
1257	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1258		if (ifp == bp->bif_ifp)
1259			break;
1260		bp_prev = bp;
1261	}
1262
1263	/* Interface wasn't attached */
1264	if (bp->bif_ifp == NULL) {
1265		mtx_unlock(&bpf_mtx);
1266		printf("bpfdetach: %s%d was not attached\n", ifp->if_name,
1267		    ifp->if_unit);
1268		return;
1269	}
1270
1271	if (bp_prev) {
1272		bp_prev->bif_next = bp->bif_next;
1273	} else {
1274		bpf_iflist = bp->bif_next;
1275	}
1276
1277	while ((d = bp->bif_dlist) != NULL) {
1278		bpf_detachd(d);
1279		BPFD_LOCK(d);
1280		bpf_wakeup(d);
1281		BPFD_UNLOCK(d);
1282	}
1283
1284	mtx_destroy(&bp->bif_mtx);
1285	free(bp, M_BPF);
1286
1287	mtx_unlock(&bpf_mtx);
1288}
1289
1290static void bpf_drvinit __P((void *unused));
1291
1292static void bpf_clone __P((void *arg, char *name, int namelen, dev_t *dev));
1293
1294static void
1295bpf_clone(arg, name, namelen, dev)
1296	void *arg;
1297	char *name;
1298	int namelen;
1299	dev_t *dev;
1300{
1301	int u;
1302
1303	if (*dev != NODEV)
1304		return;
1305	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1306		return;
1307	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1308	    "bpf%d", u);
1309	(*dev)->si_flags |= SI_CHEAPCLONE;
1310	return;
1311}
1312
1313static void
1314bpf_drvinit(unused)
1315	void *unused;
1316{
1317
1318	mtx_init(&bpf_mtx, "bpf global lock", MTX_DEF);
1319	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1320	cdevsw_add(&bpf_cdevsw);
1321}
1322
1323SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
1324
1325#else /* !DEV_BPF && !NETGRAPH_BPF */
1326/*
1327 * NOP stubs to allow bpf-using drivers to load and function.
1328 *
1329 * A 'better' implementation would allow the core bpf functionality
1330 * to be loaded at runtime.
1331 */
1332
1333void
1334bpf_tap(ifp, pkt, pktlen)
1335	struct ifnet *ifp;
1336	register u_char *pkt;
1337	register u_int pktlen;
1338{
1339}
1340
1341void
1342bpf_mtap(ifp, m)
1343	struct ifnet *ifp;
1344	struct mbuf *m;
1345{
1346}
1347
1348void
1349bpfattach(ifp, dlt, hdrlen)
1350	struct ifnet *ifp;
1351	u_int dlt, hdrlen;
1352{
1353}
1354
1355void
1356bpfdetach(ifp)
1357	struct ifnet *ifp;
1358{
1359}
1360
1361u_int
1362bpf_filter(pc, p, wirelen, buflen)
1363	register const struct bpf_insn *pc;
1364	register u_char *p;
1365	u_int wirelen;
1366	register u_int buflen;
1367{
1368	return -1;	/* "no filter" behaviour */
1369}
1370
1371int
1372bpf_validate(f, len)
1373	const struct bpf_insn *f;
1374	int len;
1375{
1376	return 0;		/* false */
1377}
1378
1379#endif /* !DEV_BPF && !NETGRAPH_BPF */
1380