bpf.c revision 104090
1298707Sjhb/*
2298707Sjhb * Copyright (c) 1990, 1991, 1993
3298707Sjhb *	The Regents of the University of California.  All rights reserved.
4298707Sjhb *
5298707Sjhb * This code is derived from the Stanford/CMU enet packet filter,
6298707Sjhb * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7298707Sjhb * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8298707Sjhb * Berkeley Laboratory.
9298707Sjhb *
10298707Sjhb * Redistribution and use in source and binary forms, with or without
11298707Sjhb * modification, are permitted provided that the following conditions
12298707Sjhb * are met:
13298707Sjhb * 1. Redistributions of source code must retain the above copyright
14298707Sjhb *    notice, this list of conditions and the following disclaimer.
15298707Sjhb * 2. Redistributions in binary form must reproduce the above copyright
16298707Sjhb *    notice, this list of conditions and the following disclaimer in the
17298707Sjhb *    documentation and/or other materials provided with the distribution.
18298707Sjhb * 3. All advertising materials mentioning features or use of this software
19298707Sjhb *    must display the following acknowledgement:
20298707Sjhb *	This product includes software developed by the University of
21298707Sjhb *	California, Berkeley and its contributors.
22298707Sjhb * 4. Neither the name of the University nor the names of its contributors
23298707Sjhb *    may be used to endorse or promote products derived from this software
24298707Sjhb *    without specific prior written permission.
25298707Sjhb *
26298707Sjhb * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27298707Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28298707Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29298707Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30298707Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31298707Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32298707Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33298707Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34298707Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35298707Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36298707Sjhb * SUCH DAMAGE.
37298707Sjhb *
38298707Sjhb *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
39298707Sjhb *
40298707Sjhb * $FreeBSD: head/sys/net/bpf.c 104090 2002-09-28 14:03:27Z phk $
41298707Sjhb */
42298707Sjhb
43298707Sjhb#include "opt_bpf.h"
44298707Sjhb#include "opt_mac.h"
45298707Sjhb#include "opt_netgraph.h"
46298707Sjhb
47298707Sjhb#include <sys/param.h>
48298707Sjhb#include <sys/systm.h>
49298707Sjhb#include <sys/conf.h>
50298707Sjhb#include <sys/mac.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/time.h>
54#include <sys/proc.h>
55#include <sys/signalvar.h>
56#include <sys/filio.h>
57#include <sys/sockio.h>
58#include <sys/ttycom.h>
59#include <sys/filedesc.h>
60
61#include <sys/poll.h>
62
63#include <sys/socket.h>
64#include <sys/vnode.h>
65
66#include <net/if.h>
67#include <net/bpf.h>
68#include <net/bpfdesc.h>
69
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <sys/kernel.h>
73#include <sys/sysctl.h>
74
75static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
76
77#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
78
79#define PRINET  26			/* interruptible */
80
81/*
82 * The default read buffer size is patchable.
83 */
84static int bpf_bufsize = 4096;
85SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
86	&bpf_bufsize, 0, "");
87static int bpf_maxbufsize = BPF_MAXBUFSIZE;
88SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
89	&bpf_maxbufsize, 0, "");
90
91/*
92 *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
93 */
94static struct bpf_if	*bpf_iflist;
95static struct mtx	bpf_mtx;		/* bpf global lock */
96
97static int	bpf_allocbufs(struct bpf_d *);
98static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
99static void	bpf_detachd(struct bpf_d *d);
100static void	bpf_freed(struct bpf_d *);
101static void	bpf_mcopy(const void *, void *, size_t);
102static int	bpf_movein(struct uio *, int,
103		    struct mbuf **, struct sockaddr *, int *);
104static int	bpf_setif(struct bpf_d *, struct ifreq *);
105static void	bpf_timed_out(void *);
106static __inline void
107		bpf_wakeup(struct bpf_d *);
108static void	catchpacket(struct bpf_d *, u_char *, u_int,
109		    u_int, void (*)(const void *, void *, size_t));
110static void	reset_d(struct bpf_d *);
111static int	 bpf_setf(struct bpf_d *, struct bpf_program *);
112
113static	d_open_t	bpfopen;
114static	d_close_t	bpfclose;
115static	d_read_t	bpfread;
116static	d_write_t	bpfwrite;
117static	d_ioctl_t	bpfioctl;
118static	d_poll_t	bpfpoll;
119
120#define CDEV_MAJOR 23
121static struct cdevsw bpf_cdevsw = {
122	/* open */	bpfopen,
123	/* close */	bpfclose,
124	/* read */	bpfread,
125	/* write */	bpfwrite,
126	/* ioctl */	bpfioctl,
127	/* poll */	bpfpoll,
128	/* mmap */	nommap,
129	/* strategy */	nostrategy,
130	/* name */	"bpf",
131	/* maj */	CDEV_MAJOR,
132	/* dump */	nodump,
133	/* psize */	nopsize,
134	/* flags */	0,
135};
136
137
138static int
139bpf_movein(uio, linktype, mp, sockp, datlen)
140	register struct uio *uio;
141	int linktype, *datlen;
142	register struct mbuf **mp;
143	register struct sockaddr *sockp;
144{
145	struct mbuf *m;
146	int error;
147	int len;
148	int hlen;
149
150	/*
151	 * Build a sockaddr based on the data link layer type.
152	 * We do this at this level because the ethernet header
153	 * is copied directly into the data field of the sockaddr.
154	 * In the case of SLIP, there is no header and the packet
155	 * is forwarded as is.
156	 * Also, we are careful to leave room at the front of the mbuf
157	 * for the link level header.
158	 */
159	switch (linktype) {
160
161	case DLT_SLIP:
162		sockp->sa_family = AF_INET;
163		hlen = 0;
164		break;
165
166	case DLT_EN10MB:
167		sockp->sa_family = AF_UNSPEC;
168		/* XXX Would MAXLINKHDR be better? */
169		hlen = sizeof(struct ether_header);
170		break;
171
172	case DLT_FDDI:
173		sockp->sa_family = AF_IMPLINK;
174		hlen = 0;
175		break;
176
177	case DLT_RAW:
178	case DLT_NULL:
179		sockp->sa_family = AF_UNSPEC;
180		hlen = 0;
181		break;
182
183	case DLT_ATM_RFC1483:
184		/*
185		 * en atm driver requires 4-byte atm pseudo header.
186		 * though it isn't standard, vpi:vci needs to be
187		 * specified anyway.
188		 */
189		sockp->sa_family = AF_UNSPEC;
190		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
191		break;
192
193	case DLT_PPP:
194		sockp->sa_family = AF_UNSPEC;
195		hlen = 4;	/* This should match PPP_HDRLEN */
196		break;
197
198	default:
199		return (EIO);
200	}
201
202	len = uio->uio_resid;
203	*datlen = len - hlen;
204	if ((unsigned)len > MCLBYTES)
205		return (EIO);
206
207	MGETHDR(m, M_TRYWAIT, MT_DATA);
208	if (m == 0)
209		return (ENOBUFS);
210	if (len > MHLEN) {
211		MCLGET(m, M_TRYWAIT);
212		if ((m->m_flags & M_EXT) == 0) {
213			error = ENOBUFS;
214			goto bad;
215		}
216	}
217	m->m_pkthdr.len = m->m_len = len;
218	m->m_pkthdr.rcvif = NULL;
219	*mp = m;
220	/*
221	 * Make room for link header.
222	 */
223	if (hlen != 0) {
224		m->m_pkthdr.len -= hlen;
225		m->m_len -= hlen;
226#if BSD >= 199103
227		m->m_data += hlen; /* XXX */
228#else
229		m->m_off += hlen;
230#endif
231		error = uiomove((caddr_t)sockp->sa_data, hlen, uio);
232		if (error)
233			goto bad;
234	}
235	error = uiomove(mtod(m, caddr_t), len - hlen, uio);
236	if (!error)
237		return (0);
238 bad:
239	m_freem(m);
240	return (error);
241}
242
243/*
244 * Attach file to the bpf interface, i.e. make d listen on bp.
245 */
246static void
247bpf_attachd(d, bp)
248	struct bpf_d *d;
249	struct bpf_if *bp;
250{
251	/*
252	 * Point d at bp, and add d to the interface's list of listeners.
253	 * Finally, point the driver's bpf cookie at the interface so
254	 * it will divert packets to bpf.
255	 */
256	BPFIF_LOCK(bp);
257	d->bd_bif = bp;
258	d->bd_next = bp->bif_dlist;
259	bp->bif_dlist = d;
260
261	bp->bif_ifp->if_bpf = bp;
262	BPFIF_UNLOCK(bp);
263}
264
265/*
266 * Detach a file from its interface.
267 */
268static void
269bpf_detachd(d)
270	struct bpf_d *d;
271{
272	int error;
273	struct bpf_d **p;
274	struct bpf_if *bp;
275
276	bp = d->bd_bif;
277	/*
278	 * Check if this descriptor had requested promiscuous mode.
279	 * If so, turn it off.
280	 */
281	if (d->bd_promisc) {
282		d->bd_promisc = 0;
283		error = ifpromisc(bp->bif_ifp, 0);
284		if (error != 0 && error != ENXIO) {
285			/*
286			 * ENXIO can happen if a pccard is unplugged
287			 * Something is really wrong if we were able to put
288			 * the driver into promiscuous mode, but can't
289			 * take it out.
290			 */
291			printf("%s%d: ifpromisc failed %d\n",
292			    bp->bif_ifp->if_name, bp->bif_ifp->if_unit, error);
293		}
294	}
295	/* Remove d from the interface's descriptor list. */
296	BPFIF_LOCK(bp);
297	p = &bp->bif_dlist;
298	while (*p != d) {
299		p = &(*p)->bd_next;
300		if (*p == 0)
301			panic("bpf_detachd: descriptor not in list");
302	}
303	*p = (*p)->bd_next;
304	if (bp->bif_dlist == 0)
305		/*
306		 * Let the driver know that there are no more listeners.
307		 */
308		d->bd_bif->bif_ifp->if_bpf = 0;
309	BPFIF_UNLOCK(bp);
310	d->bd_bif = 0;
311}
312
313/*
314 * Open ethernet device.  Returns ENXIO for illegal minor device number,
315 * EBUSY if file is open by another process.
316 */
317/* ARGSUSED */
318static	int
319bpfopen(dev, flags, fmt, td)
320	dev_t dev;
321	int flags;
322	int fmt;
323	struct thread *td;
324{
325	struct bpf_d *d;
326
327	mtx_lock(&bpf_mtx);
328	d = dev->si_drv1;
329	/*
330	 * Each minor can be opened by only one process.  If the requested
331	 * minor is in use, return EBUSY.
332	 */
333	if (d) {
334		mtx_unlock(&bpf_mtx);
335		return (EBUSY);
336	}
337	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
338	mtx_unlock(&bpf_mtx);
339
340	if ((dev->si_flags & SI_NAMED) == 0)
341		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
342		    "bpf%d", dev2unit(dev));
343	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
344	dev->si_drv1 = d;
345	d->bd_bufsize = bpf_bufsize;
346	d->bd_sig = SIGIO;
347	d->bd_seesent = 1;
348#ifdef MAC
349	mac_init_bpfdesc(d);
350	mac_create_bpfdesc(td->td_ucred, d);
351#endif
352	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
353	callout_init(&d->bd_callout, 1);
354
355	return (0);
356}
357
358/*
359 * Close the descriptor by detaching it from its interface,
360 * deallocating its buffers, and marking it free.
361 */
362/* ARGSUSED */
363static	int
364bpfclose(dev, flags, fmt, td)
365	dev_t dev;
366	int flags;
367	int fmt;
368	struct thread *td;
369{
370	struct bpf_d *d = dev->si_drv1;
371
372	BPFD_LOCK(d);
373	if (d->bd_state == BPF_WAITING)
374		callout_stop(&d->bd_callout);
375	d->bd_state = BPF_IDLE;
376	BPFD_UNLOCK(d);
377	funsetown(&d->bd_sigio);
378	mtx_lock(&bpf_mtx);
379	if (d->bd_bif)
380		bpf_detachd(d);
381	mtx_unlock(&bpf_mtx);
382#ifdef MAC
383	mac_destroy_bpfdesc(d);
384#endif /* MAC */
385	bpf_freed(d);
386	dev->si_drv1 = 0;
387	free(d, M_BPF);
388
389	return (0);
390}
391
392
393/*
394 * Rotate the packet buffers in descriptor d.  Move the store buffer
395 * into the hold slot, and the free buffer into the store slot.
396 * Zero the length of the new store buffer.
397 */
398#define ROTATE_BUFFERS(d) \
399	(d)->bd_hbuf = (d)->bd_sbuf; \
400	(d)->bd_hlen = (d)->bd_slen; \
401	(d)->bd_sbuf = (d)->bd_fbuf; \
402	(d)->bd_slen = 0; \
403	(d)->bd_fbuf = 0;
404/*
405 *  bpfread - read next chunk of packets from buffers
406 */
407static	int
408bpfread(dev, uio, ioflag)
409	dev_t dev;
410	register struct uio *uio;
411	int ioflag;
412{
413	struct bpf_d *d = dev->si_drv1;
414	int timed_out;
415	int error;
416
417	/*
418	 * Restrict application to use a buffer the same size as
419	 * as kernel buffers.
420	 */
421	if (uio->uio_resid != d->bd_bufsize)
422		return (EINVAL);
423
424	BPFD_LOCK(d);
425	if (d->bd_state == BPF_WAITING)
426		callout_stop(&d->bd_callout);
427	timed_out = (d->bd_state == BPF_TIMED_OUT);
428	d->bd_state = BPF_IDLE;
429	/*
430	 * If the hold buffer is empty, then do a timed sleep, which
431	 * ends when the timeout expires or when enough packets
432	 * have arrived to fill the store buffer.
433	 */
434	while (d->bd_hbuf == 0) {
435		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
436			/*
437			 * A packet(s) either arrived since the previous
438			 * read or arrived while we were asleep.
439			 * Rotate the buffers and return what's here.
440			 */
441			ROTATE_BUFFERS(d);
442			break;
443		}
444
445		/*
446		 * No data is available, check to see if the bpf device
447		 * is still pointed at a real interface.  If not, return
448		 * ENXIO so that the userland process knows to rebind
449		 * it before using it again.
450		 */
451		if (d->bd_bif == NULL) {
452			BPFD_UNLOCK(d);
453			return (ENXIO);
454		}
455
456		if (ioflag & IO_NDELAY) {
457			BPFD_UNLOCK(d);
458			return (EWOULDBLOCK);
459		}
460		error = msleep((caddr_t)d, &d->bd_mtx, PRINET|PCATCH,
461		     "bpf", d->bd_rtout);
462		if (error == EINTR || error == ERESTART) {
463			BPFD_UNLOCK(d);
464			return (error);
465		}
466		if (error == EWOULDBLOCK) {
467			/*
468			 * On a timeout, return what's in the buffer,
469			 * which may be nothing.  If there is something
470			 * in the store buffer, we can rotate the buffers.
471			 */
472			if (d->bd_hbuf)
473				/*
474				 * We filled up the buffer in between
475				 * getting the timeout and arriving
476				 * here, so we don't need to rotate.
477				 */
478				break;
479
480			if (d->bd_slen == 0) {
481				BPFD_UNLOCK(d);
482				return (0);
483			}
484			ROTATE_BUFFERS(d);
485			break;
486		}
487	}
488	/*
489	 * At this point, we know we have something in the hold slot.
490	 */
491	BPFD_UNLOCK(d);
492
493	/*
494	 * Move data from hold buffer into user space.
495	 * We know the entire buffer is transferred since
496	 * we checked above that the read buffer is bpf_bufsize bytes.
497	 */
498	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
499
500	BPFD_LOCK(d);
501	d->bd_fbuf = d->bd_hbuf;
502	d->bd_hbuf = 0;
503	d->bd_hlen = 0;
504	BPFD_UNLOCK(d);
505
506	return (error);
507}
508
509
510/*
511 * If there are processes sleeping on this descriptor, wake them up.
512 */
513static __inline void
514bpf_wakeup(d)
515	register struct bpf_d *d;
516{
517	if (d->bd_state == BPF_WAITING) {
518		callout_stop(&d->bd_callout);
519		d->bd_state = BPF_IDLE;
520	}
521	wakeup((caddr_t)d);
522	if (d->bd_async && d->bd_sig && d->bd_sigio)
523		pgsigio(&d->bd_sigio, d->bd_sig, 0);
524
525	selwakeup(&d->bd_sel);
526}
527
528static void
529bpf_timed_out(arg)
530	void *arg;
531{
532	struct bpf_d *d = (struct bpf_d *)arg;
533
534	BPFD_LOCK(d);
535	if (d->bd_state == BPF_WAITING) {
536		d->bd_state = BPF_TIMED_OUT;
537		if (d->bd_slen != 0)
538			bpf_wakeup(d);
539	}
540	BPFD_UNLOCK(d);
541}
542
543static	int
544bpfwrite(dev, uio, ioflag)
545	dev_t dev;
546	struct uio *uio;
547	int ioflag;
548{
549	struct bpf_d *d = dev->si_drv1;
550	struct ifnet *ifp;
551	struct mbuf *m;
552	int error;
553	static struct sockaddr dst;
554	int datlen;
555
556	if (d->bd_bif == 0)
557		return (ENXIO);
558
559	ifp = d->bd_bif->bif_ifp;
560
561	if (uio->uio_resid == 0)
562		return (0);
563
564	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
565	if (error)
566		return (error);
567
568	if (datlen > ifp->if_mtu)
569		return (EMSGSIZE);
570
571	if (d->bd_hdrcmplt)
572		dst.sa_family = pseudo_AF_HDRCMPLT;
573
574	mtx_lock(&Giant);
575#ifdef MAC
576	mac_create_mbuf_from_bpfdesc(d, m);
577#endif
578	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
579	mtx_unlock(&Giant);
580	/*
581	 * The driver frees the mbuf.
582	 */
583	return (error);
584}
585
586/*
587 * Reset a descriptor by flushing its packet buffer and clearing the
588 * receive and drop counts.
589 */
590static void
591reset_d(d)
592	struct bpf_d *d;
593{
594
595	mtx_assert(&d->bd_mtx, MA_OWNED);
596	if (d->bd_hbuf) {
597		/* Free the hold buffer. */
598		d->bd_fbuf = d->bd_hbuf;
599		d->bd_hbuf = 0;
600	}
601	d->bd_slen = 0;
602	d->bd_hlen = 0;
603	d->bd_rcount = 0;
604	d->bd_dcount = 0;
605}
606
607/*
608 *  FIONREAD		Check for read packet available.
609 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
610 *  BIOCGBLEN		Get buffer len [for read()].
611 *  BIOCSETF		Set ethernet read filter.
612 *  BIOCFLUSH		Flush read packet buffer.
613 *  BIOCPROMISC		Put interface into promiscuous mode.
614 *  BIOCGDLT		Get link layer type.
615 *  BIOCGETIF		Get interface name.
616 *  BIOCSETIF		Set interface.
617 *  BIOCSRTIMEOUT	Set read timeout.
618 *  BIOCGRTIMEOUT	Get read timeout.
619 *  BIOCGSTATS		Get packet stats.
620 *  BIOCIMMEDIATE	Set immediate mode.
621 *  BIOCVERSION		Get filter language version.
622 *  BIOCGHDRCMPLT	Get "header already complete" flag
623 *  BIOCSHDRCMPLT	Set "header already complete" flag
624 *  BIOCGSEESENT	Get "see packets sent" flag
625 *  BIOCSSEESENT	Set "see packets sent" flag
626 */
627/* ARGSUSED */
628static	int
629bpfioctl(dev, cmd, addr, flags, td)
630	dev_t dev;
631	u_long cmd;
632	caddr_t addr;
633	int flags;
634	struct thread *td;
635{
636	struct bpf_d *d = dev->si_drv1;
637	int error = 0;
638
639	BPFD_LOCK(d);
640	if (d->bd_state == BPF_WAITING)
641		callout_stop(&d->bd_callout);
642	d->bd_state = BPF_IDLE;
643	BPFD_UNLOCK(d);
644
645	switch (cmd) {
646
647	default:
648		error = EINVAL;
649		break;
650
651	/*
652	 * Check for read packet available.
653	 */
654	case FIONREAD:
655		{
656			int n;
657
658			BPFD_LOCK(d);
659			n = d->bd_slen;
660			if (d->bd_hbuf)
661				n += d->bd_hlen;
662			BPFD_UNLOCK(d);
663
664			*(int *)addr = n;
665			break;
666		}
667
668	case SIOCGIFADDR:
669		{
670			struct ifnet *ifp;
671
672			if (d->bd_bif == 0)
673				error = EINVAL;
674			else {
675				ifp = d->bd_bif->bif_ifp;
676				error = (*ifp->if_ioctl)(ifp, cmd, addr);
677			}
678			break;
679		}
680
681	/*
682	 * Get buffer len [for read()].
683	 */
684	case BIOCGBLEN:
685		*(u_int *)addr = d->bd_bufsize;
686		break;
687
688	/*
689	 * Set buffer length.
690	 */
691	case BIOCSBLEN:
692		if (d->bd_bif != 0)
693			error = EINVAL;
694		else {
695			register u_int size = *(u_int *)addr;
696
697			if (size > bpf_maxbufsize)
698				*(u_int *)addr = size = bpf_maxbufsize;
699			else if (size < BPF_MINBUFSIZE)
700				*(u_int *)addr = size = BPF_MINBUFSIZE;
701			d->bd_bufsize = size;
702		}
703		break;
704
705	/*
706	 * Set link layer read filter.
707	 */
708	case BIOCSETF:
709		error = bpf_setf(d, (struct bpf_program *)addr);
710		break;
711
712	/*
713	 * Flush read packet buffer.
714	 */
715	case BIOCFLUSH:
716		BPFD_LOCK(d);
717		reset_d(d);
718		BPFD_UNLOCK(d);
719		break;
720
721	/*
722	 * Put interface into promiscuous mode.
723	 */
724	case BIOCPROMISC:
725		if (d->bd_bif == 0) {
726			/*
727			 * No interface attached yet.
728			 */
729			error = EINVAL;
730			break;
731		}
732		if (d->bd_promisc == 0) {
733			mtx_lock(&Giant);
734			error = ifpromisc(d->bd_bif->bif_ifp, 1);
735			mtx_unlock(&Giant);
736			if (error == 0)
737				d->bd_promisc = 1;
738		}
739		break;
740
741	/*
742	 * Get device parameters.
743	 */
744	case BIOCGDLT:
745		if (d->bd_bif == 0)
746			error = EINVAL;
747		else
748			*(u_int *)addr = d->bd_bif->bif_dlt;
749		break;
750
751	/*
752	 * Get interface name.
753	 */
754	case BIOCGETIF:
755		if (d->bd_bif == 0)
756			error = EINVAL;
757		else {
758			struct ifnet *const ifp = d->bd_bif->bif_ifp;
759			struct ifreq *const ifr = (struct ifreq *)addr;
760
761			snprintf(ifr->ifr_name, sizeof(ifr->ifr_name),
762			    "%s%d", ifp->if_name, ifp->if_unit);
763		}
764		break;
765
766	/*
767	 * Set interface.
768	 */
769	case BIOCSETIF:
770		error = bpf_setif(d, (struct ifreq *)addr);
771		break;
772
773	/*
774	 * Set read timeout.
775	 */
776	case BIOCSRTIMEOUT:
777		{
778			struct timeval *tv = (struct timeval *)addr;
779
780			/*
781			 * Subtract 1 tick from tvtohz() since this isn't
782			 * a one-shot timer.
783			 */
784			if ((error = itimerfix(tv)) == 0)
785				d->bd_rtout = tvtohz(tv) - 1;
786			break;
787		}
788
789	/*
790	 * Get read timeout.
791	 */
792	case BIOCGRTIMEOUT:
793		{
794			struct timeval *tv = (struct timeval *)addr;
795
796			tv->tv_sec = d->bd_rtout / hz;
797			tv->tv_usec = (d->bd_rtout % hz) * tick;
798			break;
799		}
800
801	/*
802	 * Get packet stats.
803	 */
804	case BIOCGSTATS:
805		{
806			struct bpf_stat *bs = (struct bpf_stat *)addr;
807
808			bs->bs_recv = d->bd_rcount;
809			bs->bs_drop = d->bd_dcount;
810			break;
811		}
812
813	/*
814	 * Set immediate mode.
815	 */
816	case BIOCIMMEDIATE:
817		d->bd_immediate = *(u_int *)addr;
818		break;
819
820	case BIOCVERSION:
821		{
822			struct bpf_version *bv = (struct bpf_version *)addr;
823
824			bv->bv_major = BPF_MAJOR_VERSION;
825			bv->bv_minor = BPF_MINOR_VERSION;
826			break;
827		}
828
829	/*
830	 * Get "header already complete" flag
831	 */
832	case BIOCGHDRCMPLT:
833		*(u_int *)addr = d->bd_hdrcmplt;
834		break;
835
836	/*
837	 * Set "header already complete" flag
838	 */
839	case BIOCSHDRCMPLT:
840		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
841		break;
842
843	/*
844	 * Get "see sent packets" flag
845	 */
846	case BIOCGSEESENT:
847		*(u_int *)addr = d->bd_seesent;
848		break;
849
850	/*
851	 * Set "see sent packets" flag
852	 */
853	case BIOCSSEESENT:
854		d->bd_seesent = *(u_int *)addr;
855		break;
856
857	case FIONBIO:		/* Non-blocking I/O */
858		break;
859
860	case FIOASYNC:		/* Send signal on receive packets */
861		d->bd_async = *(int *)addr;
862		break;
863
864	case FIOSETOWN:
865		error = fsetown(*(int *)addr, &d->bd_sigio);
866		break;
867
868	case FIOGETOWN:
869		*(int *)addr = fgetown(d->bd_sigio);
870		break;
871
872	/* This is deprecated, FIOSETOWN should be used instead. */
873	case TIOCSPGRP:
874		error = fsetown(-(*(int *)addr), &d->bd_sigio);
875		break;
876
877	/* This is deprecated, FIOGETOWN should be used instead. */
878	case TIOCGPGRP:
879		*(int *)addr = -fgetown(d->bd_sigio);
880		break;
881
882	case BIOCSRSIG:		/* Set receive signal */
883		{
884		 	u_int sig;
885
886			sig = *(u_int *)addr;
887
888			if (sig >= NSIG)
889				error = EINVAL;
890			else
891				d->bd_sig = sig;
892			break;
893		}
894	case BIOCGRSIG:
895		*(u_int *)addr = d->bd_sig;
896		break;
897	}
898	return (error);
899}
900
901/*
902 * Set d's packet filter program to fp.  If this file already has a filter,
903 * free it and replace it.  Returns EINVAL for bogus requests.
904 */
905static int
906bpf_setf(d, fp)
907	struct bpf_d *d;
908	struct bpf_program *fp;
909{
910	struct bpf_insn *fcode, *old;
911	u_int flen, size;
912
913	old = d->bd_filter;
914	if (fp->bf_insns == 0) {
915		if (fp->bf_len != 0)
916			return (EINVAL);
917		BPFD_LOCK(d);
918		d->bd_filter = 0;
919		reset_d(d);
920		BPFD_UNLOCK(d);
921		if (old != 0)
922			free((caddr_t)old, M_BPF);
923		return (0);
924	}
925	flen = fp->bf_len;
926	if (flen > BPF_MAXINSNS)
927		return (EINVAL);
928
929	size = flen * sizeof(*fp->bf_insns);
930	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
931	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
932	    bpf_validate(fcode, (int)flen)) {
933		BPFD_LOCK(d);
934		d->bd_filter = fcode;
935		reset_d(d);
936		BPFD_UNLOCK(d);
937		if (old != 0)
938			free((caddr_t)old, M_BPF);
939
940		return (0);
941	}
942	free((caddr_t)fcode, M_BPF);
943	return (EINVAL);
944}
945
946/*
947 * Detach a file from its current interface (if attached at all) and attach
948 * to the interface indicated by the name stored in ifr.
949 * Return an errno or 0.
950 */
951static int
952bpf_setif(d, ifr)
953	struct bpf_d *d;
954	struct ifreq *ifr;
955{
956	struct bpf_if *bp;
957	int error;
958	struct ifnet *theywant;
959
960	theywant = ifunit(ifr->ifr_name);
961	if (theywant == 0)
962		return ENXIO;
963
964	/*
965	 * Look through attached interfaces for the named one.
966	 */
967	mtx_lock(&bpf_mtx);
968	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
969		struct ifnet *ifp = bp->bif_ifp;
970
971		if (ifp == 0 || ifp != theywant)
972			continue;
973
974		mtx_unlock(&bpf_mtx);
975		/*
976		 * We found the requested interface.
977		 * If it's not up, return an error.
978		 * Allocate the packet buffers if we need to.
979		 * If we're already attached to requested interface,
980		 * just flush the buffer.
981		 */
982		if ((ifp->if_flags & IFF_UP) == 0)
983			return (ENETDOWN);
984
985		if (d->bd_sbuf == 0) {
986			error = bpf_allocbufs(d);
987			if (error != 0)
988				return (error);
989		}
990		if (bp != d->bd_bif) {
991			if (d->bd_bif)
992				/*
993				 * Detach if attached to something else.
994				 */
995				bpf_detachd(d);
996
997			bpf_attachd(d, bp);
998		}
999		BPFD_LOCK(d);
1000		reset_d(d);
1001		BPFD_UNLOCK(d);
1002		return (0);
1003	}
1004	mtx_unlock(&bpf_mtx);
1005	/* Not found. */
1006	return (ENXIO);
1007}
1008
1009/*
1010 * Support for select() and poll() system calls
1011 *
1012 * Return true iff the specific operation will not block indefinitely.
1013 * Otherwise, return false but make a note that a selwakeup() must be done.
1014 */
1015int
1016bpfpoll(dev, events, td)
1017	register dev_t dev;
1018	int events;
1019	struct thread *td;
1020{
1021	struct bpf_d *d;
1022	int revents;
1023
1024	d = dev->si_drv1;
1025	if (d->bd_bif == NULL)
1026		return (ENXIO);
1027
1028	revents = events & (POLLOUT | POLLWRNORM);
1029	BPFD_LOCK(d);
1030	if (events & (POLLIN | POLLRDNORM)) {
1031		/*
1032		 * An imitation of the FIONREAD ioctl code.
1033		 * XXX not quite.  An exact imitation:
1034		 *	if (d->b_slen != 0 ||
1035		 *	    (d->bd_hbuf != NULL && d->bd_hlen != 0)
1036		 */
1037		if (d->bd_hlen != 0 ||
1038		    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1039		    d->bd_slen != 0))
1040			revents |= events & (POLLIN | POLLRDNORM);
1041		else {
1042			selrecord(td, &d->bd_sel);
1043			/* Start the read timeout if necessary. */
1044			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1045				callout_reset(&d->bd_callout, d->bd_rtout,
1046				    bpf_timed_out, d);
1047				d->bd_state = BPF_WAITING;
1048			}
1049		}
1050	}
1051	BPFD_UNLOCK(d);
1052	return (revents);
1053}
1054
1055/*
1056 * Incoming linkage from device drivers.  Process the packet pkt, of length
1057 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1058 * by each process' filter, and if accepted, stashed into the corresponding
1059 * buffer.
1060 */
1061void
1062bpf_tap(ifp, pkt, pktlen)
1063	struct ifnet *ifp;
1064	register u_char *pkt;
1065	register u_int pktlen;
1066{
1067	struct bpf_if *bp;
1068	register struct bpf_d *d;
1069	register u_int slen;
1070
1071	bp = ifp->if_bpf;
1072	BPFIF_LOCK(bp);
1073	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1074		BPFD_LOCK(d);
1075		++d->bd_rcount;
1076		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1077		if (slen != 0) {
1078#ifdef MAC
1079			if (mac_check_bpfdesc_receive(d, ifp) == 0)
1080#endif
1081				catchpacket(d, pkt, pktlen, slen, bcopy);
1082		}
1083		BPFD_UNLOCK(d);
1084	}
1085	BPFIF_UNLOCK(bp);
1086}
1087
1088/*
1089 * Copy data from an mbuf chain into a buffer.  This code is derived
1090 * from m_copydata in sys/uipc_mbuf.c.
1091 */
1092static void
1093bpf_mcopy(src_arg, dst_arg, len)
1094	const void *src_arg;
1095	void *dst_arg;
1096	register size_t len;
1097{
1098	register const struct mbuf *m;
1099	register u_int count;
1100	u_char *dst;
1101
1102	m = src_arg;
1103	dst = dst_arg;
1104	while (len > 0) {
1105		if (m == 0)
1106			panic("bpf_mcopy");
1107		count = min(m->m_len, len);
1108		bcopy(mtod(m, void *), dst, count);
1109		m = m->m_next;
1110		dst += count;
1111		len -= count;
1112	}
1113}
1114
1115/*
1116 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1117 */
1118void
1119bpf_mtap(ifp, m)
1120	struct ifnet *ifp;
1121	struct mbuf *m;
1122{
1123	struct bpf_if *bp = ifp->if_bpf;
1124	struct bpf_d *d;
1125	u_int pktlen, slen;
1126
1127	pktlen = m_length(m, NULL);
1128	if (pktlen == m->m_len) {
1129		bpf_tap(ifp, mtod(m, u_char *), pktlen);
1130		return;
1131	}
1132
1133	BPFIF_LOCK(bp);
1134	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1135		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1136			continue;
1137		BPFD_LOCK(d);
1138		++d->bd_rcount;
1139		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1140		if (slen != 0)
1141#ifdef MAC
1142			if (mac_check_bpfdesc_receive(d, ifp) == 0)
1143#endif
1144				catchpacket(d, (u_char *)m, pktlen, slen,
1145				    bpf_mcopy);
1146		BPFD_UNLOCK(d);
1147	}
1148	BPFIF_UNLOCK(bp);
1149}
1150
1151/*
1152 * Move the packet data from interface memory (pkt) into the
1153 * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1154 * otherwise 0.  "copy" is the routine called to do the actual data
1155 * transfer.  bcopy is passed in to copy contiguous chunks, while
1156 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1157 * pkt is really an mbuf.
1158 */
1159static void
1160catchpacket(d, pkt, pktlen, snaplen, cpfn)
1161	register struct bpf_d *d;
1162	register u_char *pkt;
1163	register u_int pktlen, snaplen;
1164	register void (*cpfn)(const void *, void *, size_t);
1165{
1166	register struct bpf_hdr *hp;
1167	register int totlen, curlen;
1168	register int hdrlen = d->bd_bif->bif_hdrlen;
1169	/*
1170	 * Figure out how many bytes to move.  If the packet is
1171	 * greater or equal to the snapshot length, transfer that
1172	 * much.  Otherwise, transfer the whole packet (unless
1173	 * we hit the buffer size limit).
1174	 */
1175	totlen = hdrlen + min(snaplen, pktlen);
1176	if (totlen > d->bd_bufsize)
1177		totlen = d->bd_bufsize;
1178
1179	/*
1180	 * Round up the end of the previous packet to the next longword.
1181	 */
1182	curlen = BPF_WORDALIGN(d->bd_slen);
1183	if (curlen + totlen > d->bd_bufsize) {
1184		/*
1185		 * This packet will overflow the storage buffer.
1186		 * Rotate the buffers if we can, then wakeup any
1187		 * pending reads.
1188		 */
1189		if (d->bd_fbuf == 0) {
1190			/*
1191			 * We haven't completed the previous read yet,
1192			 * so drop the packet.
1193			 */
1194			++d->bd_dcount;
1195			return;
1196		}
1197		ROTATE_BUFFERS(d);
1198		bpf_wakeup(d);
1199		curlen = 0;
1200	}
1201	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1202		/*
1203		 * Immediate mode is set, or the read timeout has
1204		 * already expired during a select call.  A packet
1205		 * arrived, so the reader should be woken up.
1206		 */
1207		bpf_wakeup(d);
1208
1209	/*
1210	 * Append the bpf header.
1211	 */
1212	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1213	microtime(&hp->bh_tstamp);
1214	hp->bh_datalen = pktlen;
1215	hp->bh_hdrlen = hdrlen;
1216	/*
1217	 * Copy the packet data into the store buffer and update its length.
1218	 */
1219	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1220	d->bd_slen = curlen + totlen;
1221}
1222
1223/*
1224 * Initialize all nonzero fields of a descriptor.
1225 */
1226static int
1227bpf_allocbufs(d)
1228	register struct bpf_d *d;
1229{
1230	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1231	if (d->bd_fbuf == 0)
1232		return (ENOBUFS);
1233
1234	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1235	if (d->bd_sbuf == 0) {
1236		free(d->bd_fbuf, M_BPF);
1237		return (ENOBUFS);
1238	}
1239	d->bd_slen = 0;
1240	d->bd_hlen = 0;
1241	return (0);
1242}
1243
1244/*
1245 * Free buffers currently in use by a descriptor.
1246 * Called on close.
1247 */
1248static void
1249bpf_freed(d)
1250	register struct bpf_d *d;
1251{
1252	/*
1253	 * We don't need to lock out interrupts since this descriptor has
1254	 * been detached from its interface and it yet hasn't been marked
1255	 * free.
1256	 */
1257	if (d->bd_sbuf != 0) {
1258		free(d->bd_sbuf, M_BPF);
1259		if (d->bd_hbuf != 0)
1260			free(d->bd_hbuf, M_BPF);
1261		if (d->bd_fbuf != 0)
1262			free(d->bd_fbuf, M_BPF);
1263	}
1264	if (d->bd_filter)
1265		free((caddr_t)d->bd_filter, M_BPF);
1266	mtx_destroy(&d->bd_mtx);
1267}
1268
1269/*
1270 * Attach an interface to bpf.  ifp is a pointer to the structure
1271 * defining the interface to be attached, dlt is the link layer type,
1272 * and hdrlen is the fixed size of the link header (variable length
1273 * headers are not yet supporrted).
1274 */
1275void
1276bpfattach(ifp, dlt, hdrlen)
1277	struct ifnet *ifp;
1278	u_int dlt, hdrlen;
1279{
1280	struct bpf_if *bp;
1281	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1282	if (bp == 0)
1283		panic("bpfattach");
1284
1285	bp->bif_ifp = ifp;
1286	bp->bif_dlt = dlt;
1287	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1288
1289	mtx_lock(&bpf_mtx);
1290	bp->bif_next = bpf_iflist;
1291	bpf_iflist = bp;
1292	mtx_unlock(&bpf_mtx);
1293
1294	bp->bif_ifp->if_bpf = 0;
1295
1296	/*
1297	 * Compute the length of the bpf header.  This is not necessarily
1298	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1299	 * that the network layer header begins on a longword boundary (for
1300	 * performance reasons and to alleviate alignment restrictions).
1301	 */
1302	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1303
1304	if (bootverbose)
1305		printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
1306}
1307
1308/*
1309 * Detach bpf from an interface.  This involves detaching each descriptor
1310 * associated with the interface, and leaving bd_bif NULL.  Notify each
1311 * descriptor as it's detached so that any sleepers wake up and get
1312 * ENXIO.
1313 */
1314void
1315bpfdetach(ifp)
1316	struct ifnet *ifp;
1317{
1318	struct bpf_if	*bp, *bp_prev;
1319	struct bpf_d	*d;
1320
1321	mtx_lock(&bpf_mtx);
1322
1323	/* Locate BPF interface information */
1324	bp_prev = NULL;
1325	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1326		if (ifp == bp->bif_ifp)
1327			break;
1328		bp_prev = bp;
1329	}
1330
1331	/* Interface wasn't attached */
1332	if (bp->bif_ifp == NULL) {
1333		mtx_unlock(&bpf_mtx);
1334		printf("bpfdetach: %s%d was not attached\n", ifp->if_name,
1335		    ifp->if_unit);
1336		return;
1337	}
1338
1339	if (bp_prev) {
1340		bp_prev->bif_next = bp->bif_next;
1341	} else {
1342		bpf_iflist = bp->bif_next;
1343	}
1344
1345	while ((d = bp->bif_dlist) != NULL) {
1346		bpf_detachd(d);
1347		BPFD_LOCK(d);
1348		bpf_wakeup(d);
1349		BPFD_UNLOCK(d);
1350	}
1351
1352	mtx_destroy(&bp->bif_mtx);
1353	free(bp, M_BPF);
1354
1355	mtx_unlock(&bpf_mtx);
1356}
1357
1358static void bpf_drvinit(void *unused);
1359
1360static void bpf_clone(void *arg, char *name, int namelen, dev_t *dev);
1361
1362static void
1363bpf_clone(arg, name, namelen, dev)
1364	void *arg;
1365	char *name;
1366	int namelen;
1367	dev_t *dev;
1368{
1369	int u;
1370
1371	if (*dev != NODEV)
1372		return;
1373	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1374		return;
1375	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1376	    "bpf%d", u);
1377	(*dev)->si_flags |= SI_CHEAPCLONE;
1378	return;
1379}
1380
1381static void
1382bpf_drvinit(unused)
1383	void *unused;
1384{
1385
1386	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1387	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1388	cdevsw_add(&bpf_cdevsw);
1389}
1390
1391SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
1392
1393#else /* !DEV_BPF && !NETGRAPH_BPF */
1394/*
1395 * NOP stubs to allow bpf-using drivers to load and function.
1396 *
1397 * A 'better' implementation would allow the core bpf functionality
1398 * to be loaded at runtime.
1399 */
1400
1401void
1402bpf_tap(ifp, pkt, pktlen)
1403	struct ifnet *ifp;
1404	register u_char *pkt;
1405	register u_int pktlen;
1406{
1407}
1408
1409void
1410bpf_mtap(ifp, m)
1411	struct ifnet *ifp;
1412	struct mbuf *m;
1413{
1414}
1415
1416void
1417bpfattach(ifp, dlt, hdrlen)
1418	struct ifnet *ifp;
1419	u_int dlt, hdrlen;
1420{
1421}
1422
1423void
1424bpfdetach(ifp)
1425	struct ifnet *ifp;
1426{
1427}
1428
1429u_int
1430bpf_filter(pc, p, wirelen, buflen)
1431	register const struct bpf_insn *pc;
1432	register u_char *p;
1433	u_int wirelen;
1434	register u_int buflen;
1435{
1436	return -1;	/* "no filter" behaviour */
1437}
1438
1439int
1440bpf_validate(f, len)
1441	const struct bpf_insn *f;
1442	int len;
1443{
1444	return 0;		/* false */
1445}
1446
1447#endif /* !DEV_BPF && !NETGRAPH_BPF */
1448