bpf.c revision 93752
1/*
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
39 *
40 * $FreeBSD: head/sys/net/bpf.c 93752 2002-04-04 06:03:17Z luigi $
41 */
42
43#include "opt_bpf.h"
44#include "opt_netgraph.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/conf.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/time.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/filio.h>
55#include <sys/sockio.h>
56#include <sys/ttycom.h>
57#include <sys/filedesc.h>
58
59#include <sys/poll.h>
60
61#include <sys/socket.h>
62#include <sys/vnode.h>
63
64#include <net/if.h>
65#include <net/bpf.h>
66#include <net/bpfdesc.h>
67
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <sys/kernel.h>
71#include <sys/sysctl.h>
72
73static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
74
75#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
76
77#define PRINET  26			/* interruptible */
78
79/*
80 * The default read buffer size is patchable.
81 */
82static int bpf_bufsize = 4096;
83SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
84	&bpf_bufsize, 0, "");
85static int bpf_maxbufsize = BPF_MAXBUFSIZE;
86SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
87	&bpf_maxbufsize, 0, "");
88
89/*
90 *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
91 */
92static struct bpf_if	*bpf_iflist;
93static struct mtx	bpf_mtx;		/* bpf global lock */
94
95static int	bpf_allocbufs(struct bpf_d *);
96static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
97static void	bpf_detachd(struct bpf_d *d);
98static void	bpf_freed(struct bpf_d *);
99static void	bpf_mcopy(const void *, void *, size_t);
100static int	bpf_movein(struct uio *, int,
101		    struct mbuf **, struct sockaddr *, int *);
102static int	bpf_setif(struct bpf_d *, struct ifreq *);
103static void	bpf_timed_out(void *);
104static __inline void
105		bpf_wakeup(struct bpf_d *);
106static void	catchpacket(struct bpf_d *, u_char *, u_int,
107		    u_int, void (*)(const void *, void *, size_t));
108static void	reset_d(struct bpf_d *);
109static int	 bpf_setf(struct bpf_d *, struct bpf_program *);
110
111static	d_open_t	bpfopen;
112static	d_close_t	bpfclose;
113static	d_read_t	bpfread;
114static	d_write_t	bpfwrite;
115static	d_ioctl_t	bpfioctl;
116static	d_poll_t	bpfpoll;
117
118#define CDEV_MAJOR 23
119static struct cdevsw bpf_cdevsw = {
120	/* open */	bpfopen,
121	/* close */	bpfclose,
122	/* read */	bpfread,
123	/* write */	bpfwrite,
124	/* ioctl */	bpfioctl,
125	/* poll */	bpfpoll,
126	/* mmap */	nommap,
127	/* strategy */	nostrategy,
128	/* name */	"bpf",
129	/* maj */	CDEV_MAJOR,
130	/* dump */	nodump,
131	/* psize */	nopsize,
132	/* flags */	0,
133};
134
135
136static int
137bpf_movein(uio, linktype, mp, sockp, datlen)
138	register struct uio *uio;
139	int linktype, *datlen;
140	register struct mbuf **mp;
141	register struct sockaddr *sockp;
142{
143	struct mbuf *m;
144	int error;
145	int len;
146	int hlen;
147
148	/*
149	 * Build a sockaddr based on the data link layer type.
150	 * We do this at this level because the ethernet header
151	 * is copied directly into the data field of the sockaddr.
152	 * In the case of SLIP, there is no header and the packet
153	 * is forwarded as is.
154	 * Also, we are careful to leave room at the front of the mbuf
155	 * for the link level header.
156	 */
157	switch (linktype) {
158
159	case DLT_SLIP:
160		sockp->sa_family = AF_INET;
161		hlen = 0;
162		break;
163
164	case DLT_EN10MB:
165		sockp->sa_family = AF_UNSPEC;
166		/* XXX Would MAXLINKHDR be better? */
167		hlen = sizeof(struct ether_header);
168		break;
169
170	case DLT_FDDI:
171		sockp->sa_family = AF_IMPLINK;
172		hlen = 0;
173		break;
174
175	case DLT_RAW:
176	case DLT_NULL:
177		sockp->sa_family = AF_UNSPEC;
178		hlen = 0;
179		break;
180
181	case DLT_ATM_RFC1483:
182		/*
183		 * en atm driver requires 4-byte atm pseudo header.
184		 * though it isn't standard, vpi:vci needs to be
185		 * specified anyway.
186		 */
187		sockp->sa_family = AF_UNSPEC;
188		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
189		break;
190
191	case DLT_PPP:
192		sockp->sa_family = AF_UNSPEC;
193		hlen = 4;	/* This should match PPP_HDRLEN */
194		break;
195
196	default:
197		return (EIO);
198	}
199
200	len = uio->uio_resid;
201	*datlen = len - hlen;
202	if ((unsigned)len > MCLBYTES)
203		return (EIO);
204
205	MGETHDR(m, M_TRYWAIT, MT_DATA);
206	if (m == 0)
207		return (ENOBUFS);
208	if (len > MHLEN) {
209		MCLGET(m, M_TRYWAIT);
210		if ((m->m_flags & M_EXT) == 0) {
211			error = ENOBUFS;
212			goto bad;
213		}
214	}
215	m->m_pkthdr.len = m->m_len = len;
216	m->m_pkthdr.rcvif = NULL;
217	*mp = m;
218	/*
219	 * Make room for link header.
220	 */
221	if (hlen != 0) {
222		m->m_pkthdr.len -= hlen;
223		m->m_len -= hlen;
224#if BSD >= 199103
225		m->m_data += hlen; /* XXX */
226#else
227		m->m_off += hlen;
228#endif
229		error = uiomove((caddr_t)sockp->sa_data, hlen, uio);
230		if (error)
231			goto bad;
232	}
233	error = uiomove(mtod(m, caddr_t), len - hlen, uio);
234	if (!error)
235		return (0);
236 bad:
237	m_freem(m);
238	return (error);
239}
240
241/*
242 * Attach file to the bpf interface, i.e. make d listen on bp.
243 */
244static void
245bpf_attachd(d, bp)
246	struct bpf_d *d;
247	struct bpf_if *bp;
248{
249	/*
250	 * Point d at bp, and add d to the interface's list of listeners.
251	 * Finally, point the driver's bpf cookie at the interface so
252	 * it will divert packets to bpf.
253	 */
254	BPFIF_LOCK(bp);
255	d->bd_bif = bp;
256	d->bd_next = bp->bif_dlist;
257	bp->bif_dlist = d;
258
259	bp->bif_ifp->if_bpf = bp;
260	BPFIF_UNLOCK(bp);
261}
262
263/*
264 * Detach a file from its interface.
265 */
266static void
267bpf_detachd(d)
268	struct bpf_d *d;
269{
270	int error;
271	struct bpf_d **p;
272	struct bpf_if *bp;
273
274	bp = d->bd_bif;
275	/*
276	 * Check if this descriptor had requested promiscuous mode.
277	 * If so, turn it off.
278	 */
279	if (d->bd_promisc) {
280		d->bd_promisc = 0;
281		error = ifpromisc(bp->bif_ifp, 0);
282		if (error != 0 && error != ENXIO) {
283			/*
284			 * ENXIO can happen if a pccard is unplugged
285			 * Something is really wrong if we were able to put
286			 * the driver into promiscuous mode, but can't
287			 * take it out.
288			 */
289			printf("%s%d: ifpromisc failed %d\n",
290			    bp->bif_ifp->if_name, bp->bif_ifp->if_unit, error);
291		}
292	}
293	/* Remove d from the interface's descriptor list. */
294	BPFIF_LOCK(bp);
295	p = &bp->bif_dlist;
296	while (*p != d) {
297		p = &(*p)->bd_next;
298		if (*p == 0)
299			panic("bpf_detachd: descriptor not in list");
300	}
301	*p = (*p)->bd_next;
302	if (bp->bif_dlist == 0)
303		/*
304		 * Let the driver know that there are no more listeners.
305		 */
306		d->bd_bif->bif_ifp->if_bpf = 0;
307	BPFIF_UNLOCK(bp);
308	d->bd_bif = 0;
309}
310
311/*
312 * Open ethernet device.  Returns ENXIO for illegal minor device number,
313 * EBUSY if file is open by another process.
314 */
315/* ARGSUSED */
316static	int
317bpfopen(dev, flags, fmt, td)
318	dev_t dev;
319	int flags;
320	int fmt;
321	struct thread *td;
322{
323	struct bpf_d *d;
324
325	mtx_lock(&bpf_mtx);
326	d = dev->si_drv1;
327	/*
328	 * Each minor can be opened by only one process.  If the requested
329	 * minor is in use, return EBUSY.
330	 */
331	if (d) {
332		mtx_unlock(&bpf_mtx);
333		return (EBUSY);
334	}
335	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
336	mtx_unlock(&bpf_mtx);
337
338	if ((dev->si_flags & SI_NAMED) == 0)
339		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
340		    "bpf%d", dev2unit(dev));
341	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
342	dev->si_drv1 = d;
343	d->bd_bufsize = bpf_bufsize;
344	d->bd_sig = SIGIO;
345	d->bd_seesent = 1;
346	mtx_init(&d->bd_mtx, devtoname(dev), MTX_DEF);
347	callout_init(&d->bd_callout, 1);
348
349	return (0);
350}
351
352/*
353 * Close the descriptor by detaching it from its interface,
354 * deallocating its buffers, and marking it free.
355 */
356/* ARGSUSED */
357static	int
358bpfclose(dev, flags, fmt, td)
359	dev_t dev;
360	int flags;
361	int fmt;
362	struct thread *td;
363{
364	struct bpf_d *d = dev->si_drv1;
365
366	BPFD_LOCK(d);
367	if (d->bd_state == BPF_WAITING)
368		callout_stop(&d->bd_callout);
369	d->bd_state = BPF_IDLE;
370	BPFD_UNLOCK(d);
371	funsetown(d->bd_sigio);
372	mtx_lock(&bpf_mtx);
373	if (d->bd_bif)
374		bpf_detachd(d);
375	mtx_unlock(&bpf_mtx);
376	bpf_freed(d);
377	dev->si_drv1 = 0;
378	free(d, M_BPF);
379
380	return (0);
381}
382
383
384/*
385 * Rotate the packet buffers in descriptor d.  Move the store buffer
386 * into the hold slot, and the free buffer into the store slot.
387 * Zero the length of the new store buffer.
388 */
389#define ROTATE_BUFFERS(d) \
390	(d)->bd_hbuf = (d)->bd_sbuf; \
391	(d)->bd_hlen = (d)->bd_slen; \
392	(d)->bd_sbuf = (d)->bd_fbuf; \
393	(d)->bd_slen = 0; \
394	(d)->bd_fbuf = 0;
395/*
396 *  bpfread - read next chunk of packets from buffers
397 */
398static	int
399bpfread(dev, uio, ioflag)
400	dev_t dev;
401	register struct uio *uio;
402	int ioflag;
403{
404	struct bpf_d *d = dev->si_drv1;
405	int timed_out;
406	int error;
407
408	/*
409	 * Restrict application to use a buffer the same size as
410	 * as kernel buffers.
411	 */
412	if (uio->uio_resid != d->bd_bufsize)
413		return (EINVAL);
414
415	BPFD_LOCK(d);
416	if (d->bd_state == BPF_WAITING)
417		callout_stop(&d->bd_callout);
418	timed_out = (d->bd_state == BPF_TIMED_OUT);
419	d->bd_state = BPF_IDLE;
420	/*
421	 * If the hold buffer is empty, then do a timed sleep, which
422	 * ends when the timeout expires or when enough packets
423	 * have arrived to fill the store buffer.
424	 */
425	while (d->bd_hbuf == 0) {
426		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
427			/*
428			 * A packet(s) either arrived since the previous
429			 * read or arrived while we were asleep.
430			 * Rotate the buffers and return what's here.
431			 */
432			ROTATE_BUFFERS(d);
433			break;
434		}
435
436		/*
437		 * No data is available, check to see if the bpf device
438		 * is still pointed at a real interface.  If not, return
439		 * ENXIO so that the userland process knows to rebind
440		 * it before using it again.
441		 */
442		if (d->bd_bif == NULL) {
443			BPFD_UNLOCK(d);
444			return (ENXIO);
445		}
446
447		if (ioflag & IO_NDELAY) {
448			BPFD_UNLOCK(d);
449			return (EWOULDBLOCK);
450		}
451		error = msleep((caddr_t)d, &d->bd_mtx, PRINET|PCATCH,
452		     "bpf", d->bd_rtout);
453		if (error == EINTR || error == ERESTART) {
454			BPFD_UNLOCK(d);
455			return (error);
456		}
457		if (error == EWOULDBLOCK) {
458			/*
459			 * On a timeout, return what's in the buffer,
460			 * which may be nothing.  If there is something
461			 * in the store buffer, we can rotate the buffers.
462			 */
463			if (d->bd_hbuf)
464				/*
465				 * We filled up the buffer in between
466				 * getting the timeout and arriving
467				 * here, so we don't need to rotate.
468				 */
469				break;
470
471			if (d->bd_slen == 0) {
472				BPFD_UNLOCK(d);
473				return (0);
474			}
475			ROTATE_BUFFERS(d);
476			break;
477		}
478	}
479	/*
480	 * At this point, we know we have something in the hold slot.
481	 */
482	BPFD_UNLOCK(d);
483
484	/*
485	 * Move data from hold buffer into user space.
486	 * We know the entire buffer is transferred since
487	 * we checked above that the read buffer is bpf_bufsize bytes.
488	 */
489	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
490
491	BPFD_LOCK(d);
492	d->bd_fbuf = d->bd_hbuf;
493	d->bd_hbuf = 0;
494	d->bd_hlen = 0;
495	BPFD_UNLOCK(d);
496
497	return (error);
498}
499
500
501/*
502 * If there are processes sleeping on this descriptor, wake them up.
503 */
504static __inline void
505bpf_wakeup(d)
506	register struct bpf_d *d;
507{
508	if (d->bd_state == BPF_WAITING) {
509		callout_stop(&d->bd_callout);
510		d->bd_state = BPF_IDLE;
511	}
512	wakeup((caddr_t)d);
513	if (d->bd_async && d->bd_sig && d->bd_sigio)
514		pgsigio(d->bd_sigio, d->bd_sig, 0);
515
516	selwakeup(&d->bd_sel);
517}
518
519static void
520bpf_timed_out(arg)
521	void *arg;
522{
523	struct bpf_d *d = (struct bpf_d *)arg;
524
525	BPFD_LOCK(d);
526	if (d->bd_state == BPF_WAITING) {
527		d->bd_state = BPF_TIMED_OUT;
528		if (d->bd_slen != 0)
529			bpf_wakeup(d);
530	}
531	BPFD_UNLOCK(d);
532}
533
534static	int
535bpfwrite(dev, uio, ioflag)
536	dev_t dev;
537	struct uio *uio;
538	int ioflag;
539{
540	struct bpf_d *d = dev->si_drv1;
541	struct ifnet *ifp;
542	struct mbuf *m;
543	int error;
544	static struct sockaddr dst;
545	int datlen;
546
547	if (d->bd_bif == 0)
548		return (ENXIO);
549
550	ifp = d->bd_bif->bif_ifp;
551
552	if (uio->uio_resid == 0)
553		return (0);
554
555	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
556	if (error)
557		return (error);
558
559	if (datlen > ifp->if_mtu)
560		return (EMSGSIZE);
561
562	if (d->bd_hdrcmplt)
563		dst.sa_family = pseudo_AF_HDRCMPLT;
564
565	mtx_lock(&Giant);
566	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
567	mtx_unlock(&Giant);
568	/*
569	 * The driver frees the mbuf.
570	 */
571	return (error);
572}
573
574/*
575 * Reset a descriptor by flushing its packet buffer and clearing the
576 * receive and drop counts.
577 */
578static void
579reset_d(d)
580	struct bpf_d *d;
581{
582
583	mtx_assert(&d->bd_mtx, MA_OWNED);
584	if (d->bd_hbuf) {
585		/* Free the hold buffer. */
586		d->bd_fbuf = d->bd_hbuf;
587		d->bd_hbuf = 0;
588	}
589	d->bd_slen = 0;
590	d->bd_hlen = 0;
591	d->bd_rcount = 0;
592	d->bd_dcount = 0;
593}
594
595/*
596 *  FIONREAD		Check for read packet available.
597 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
598 *  BIOCGBLEN		Get buffer len [for read()].
599 *  BIOCSETF		Set ethernet read filter.
600 *  BIOCFLUSH		Flush read packet buffer.
601 *  BIOCPROMISC		Put interface into promiscuous mode.
602 *  BIOCGDLT		Get link layer type.
603 *  BIOCGETIF		Get interface name.
604 *  BIOCSETIF		Set interface.
605 *  BIOCSRTIMEOUT	Set read timeout.
606 *  BIOCGRTIMEOUT	Get read timeout.
607 *  BIOCGSTATS		Get packet stats.
608 *  BIOCIMMEDIATE	Set immediate mode.
609 *  BIOCVERSION		Get filter language version.
610 *  BIOCGHDRCMPLT	Get "header already complete" flag
611 *  BIOCSHDRCMPLT	Set "header already complete" flag
612 *  BIOCGSEESENT	Get "see packets sent" flag
613 *  BIOCSSEESENT	Set "see packets sent" flag
614 */
615/* ARGSUSED */
616static	int
617bpfioctl(dev, cmd, addr, flags, td)
618	dev_t dev;
619	u_long cmd;
620	caddr_t addr;
621	int flags;
622	struct thread *td;
623{
624	struct bpf_d *d = dev->si_drv1;
625	int error = 0;
626
627	BPFD_LOCK(d);
628	if (d->bd_state == BPF_WAITING)
629		callout_stop(&d->bd_callout);
630	d->bd_state = BPF_IDLE;
631	BPFD_UNLOCK(d);
632
633	switch (cmd) {
634
635	default:
636		error = EINVAL;
637		break;
638
639	/*
640	 * Check for read packet available.
641	 */
642	case FIONREAD:
643		{
644			int n;
645
646			BPFD_LOCK(d);
647			n = d->bd_slen;
648			if (d->bd_hbuf)
649				n += d->bd_hlen;
650			BPFD_UNLOCK(d);
651
652			*(int *)addr = n;
653			break;
654		}
655
656	case SIOCGIFADDR:
657		{
658			struct ifnet *ifp;
659
660			if (d->bd_bif == 0)
661				error = EINVAL;
662			else {
663				ifp = d->bd_bif->bif_ifp;
664				error = (*ifp->if_ioctl)(ifp, cmd, addr);
665			}
666			break;
667		}
668
669	/*
670	 * Get buffer len [for read()].
671	 */
672	case BIOCGBLEN:
673		*(u_int *)addr = d->bd_bufsize;
674		break;
675
676	/*
677	 * Set buffer length.
678	 */
679	case BIOCSBLEN:
680		if (d->bd_bif != 0)
681			error = EINVAL;
682		else {
683			register u_int size = *(u_int *)addr;
684
685			if (size > bpf_maxbufsize)
686				*(u_int *)addr = size = bpf_maxbufsize;
687			else if (size < BPF_MINBUFSIZE)
688				*(u_int *)addr = size = BPF_MINBUFSIZE;
689			d->bd_bufsize = size;
690		}
691		break;
692
693	/*
694	 * Set link layer read filter.
695	 */
696	case BIOCSETF:
697		error = bpf_setf(d, (struct bpf_program *)addr);
698		break;
699
700	/*
701	 * Flush read packet buffer.
702	 */
703	case BIOCFLUSH:
704		BPFD_LOCK(d);
705		reset_d(d);
706		BPFD_UNLOCK(d);
707		break;
708
709	/*
710	 * Put interface into promiscuous mode.
711	 */
712	case BIOCPROMISC:
713		if (d->bd_bif == 0) {
714			/*
715			 * No interface attached yet.
716			 */
717			error = EINVAL;
718			break;
719		}
720		if (d->bd_promisc == 0) {
721			mtx_lock(&Giant);
722			error = ifpromisc(d->bd_bif->bif_ifp, 1);
723			mtx_unlock(&Giant);
724			if (error == 0)
725				d->bd_promisc = 1;
726		}
727		break;
728
729	/*
730	 * Get device parameters.
731	 */
732	case BIOCGDLT:
733		if (d->bd_bif == 0)
734			error = EINVAL;
735		else
736			*(u_int *)addr = d->bd_bif->bif_dlt;
737		break;
738
739	/*
740	 * Get interface name.
741	 */
742	case BIOCGETIF:
743		if (d->bd_bif == 0)
744			error = EINVAL;
745		else {
746			struct ifnet *const ifp = d->bd_bif->bif_ifp;
747			struct ifreq *const ifr = (struct ifreq *)addr;
748
749			snprintf(ifr->ifr_name, sizeof(ifr->ifr_name),
750			    "%s%d", ifp->if_name, ifp->if_unit);
751		}
752		break;
753
754	/*
755	 * Set interface.
756	 */
757	case BIOCSETIF:
758		error = bpf_setif(d, (struct ifreq *)addr);
759		break;
760
761	/*
762	 * Set read timeout.
763	 */
764	case BIOCSRTIMEOUT:
765		{
766			struct timeval *tv = (struct timeval *)addr;
767
768			/*
769			 * Subtract 1 tick from tvtohz() since this isn't
770			 * a one-shot timer.
771			 */
772			if ((error = itimerfix(tv)) == 0)
773				d->bd_rtout = tvtohz(tv) - 1;
774			break;
775		}
776
777	/*
778	 * Get read timeout.
779	 */
780	case BIOCGRTIMEOUT:
781		{
782			struct timeval *tv = (struct timeval *)addr;
783
784			tv->tv_sec = d->bd_rtout / hz;
785			tv->tv_usec = (d->bd_rtout % hz) * tick;
786			break;
787		}
788
789	/*
790	 * Get packet stats.
791	 */
792	case BIOCGSTATS:
793		{
794			struct bpf_stat *bs = (struct bpf_stat *)addr;
795
796			bs->bs_recv = d->bd_rcount;
797			bs->bs_drop = d->bd_dcount;
798			break;
799		}
800
801	/*
802	 * Set immediate mode.
803	 */
804	case BIOCIMMEDIATE:
805		d->bd_immediate = *(u_int *)addr;
806		break;
807
808	case BIOCVERSION:
809		{
810			struct bpf_version *bv = (struct bpf_version *)addr;
811
812			bv->bv_major = BPF_MAJOR_VERSION;
813			bv->bv_minor = BPF_MINOR_VERSION;
814			break;
815		}
816
817	/*
818	 * Get "header already complete" flag
819	 */
820	case BIOCGHDRCMPLT:
821		*(u_int *)addr = d->bd_hdrcmplt;
822		break;
823
824	/*
825	 * Set "header already complete" flag
826	 */
827	case BIOCSHDRCMPLT:
828		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
829		break;
830
831	/*
832	 * Get "see sent packets" flag
833	 */
834	case BIOCGSEESENT:
835		*(u_int *)addr = d->bd_seesent;
836		break;
837
838	/*
839	 * Set "see sent packets" flag
840	 */
841	case BIOCSSEESENT:
842		d->bd_seesent = *(u_int *)addr;
843		break;
844
845	case FIONBIO:		/* Non-blocking I/O */
846		break;
847
848	case FIOASYNC:		/* Send signal on receive packets */
849		d->bd_async = *(int *)addr;
850		break;
851
852	case FIOSETOWN:
853		error = fsetown(*(int *)addr, &d->bd_sigio);
854		break;
855
856	case FIOGETOWN:
857		*(int *)addr = fgetown(d->bd_sigio);
858		break;
859
860	/* This is deprecated, FIOSETOWN should be used instead. */
861	case TIOCSPGRP:
862		error = fsetown(-(*(int *)addr), &d->bd_sigio);
863		break;
864
865	/* This is deprecated, FIOGETOWN should be used instead. */
866	case TIOCGPGRP:
867		*(int *)addr = -fgetown(d->bd_sigio);
868		break;
869
870	case BIOCSRSIG:		/* Set receive signal */
871		{
872		 	u_int sig;
873
874			sig = *(u_int *)addr;
875
876			if (sig >= NSIG)
877				error = EINVAL;
878			else
879				d->bd_sig = sig;
880			break;
881		}
882	case BIOCGRSIG:
883		*(u_int *)addr = d->bd_sig;
884		break;
885	}
886	return (error);
887}
888
889/*
890 * Set d's packet filter program to fp.  If this file already has a filter,
891 * free it and replace it.  Returns EINVAL for bogus requests.
892 */
893static int
894bpf_setf(d, fp)
895	struct bpf_d *d;
896	struct bpf_program *fp;
897{
898	struct bpf_insn *fcode, *old;
899	u_int flen, size;
900
901	old = d->bd_filter;
902	if (fp->bf_insns == 0) {
903		if (fp->bf_len != 0)
904			return (EINVAL);
905		BPFD_LOCK(d);
906		d->bd_filter = 0;
907		reset_d(d);
908		BPFD_UNLOCK(d);
909		if (old != 0)
910			free((caddr_t)old, M_BPF);
911		return (0);
912	}
913	flen = fp->bf_len;
914	if (flen > BPF_MAXINSNS)
915		return (EINVAL);
916
917	size = flen * sizeof(*fp->bf_insns);
918	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
919	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
920	    bpf_validate(fcode, (int)flen)) {
921		BPFD_LOCK(d);
922		d->bd_filter = fcode;
923		reset_d(d);
924		BPFD_UNLOCK(d);
925		if (old != 0)
926			free((caddr_t)old, M_BPF);
927
928		return (0);
929	}
930	free((caddr_t)fcode, M_BPF);
931	return (EINVAL);
932}
933
934/*
935 * Detach a file from its current interface (if attached at all) and attach
936 * to the interface indicated by the name stored in ifr.
937 * Return an errno or 0.
938 */
939static int
940bpf_setif(d, ifr)
941	struct bpf_d *d;
942	struct ifreq *ifr;
943{
944	struct bpf_if *bp;
945	int error;
946	struct ifnet *theywant;
947
948	theywant = ifunit(ifr->ifr_name);
949	if (theywant == 0)
950		return ENXIO;
951
952	/*
953	 * Look through attached interfaces for the named one.
954	 */
955	mtx_lock(&bpf_mtx);
956	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
957		struct ifnet *ifp = bp->bif_ifp;
958
959		if (ifp == 0 || ifp != theywant)
960			continue;
961
962		mtx_unlock(&bpf_mtx);
963		/*
964		 * We found the requested interface.
965		 * If it's not up, return an error.
966		 * Allocate the packet buffers if we need to.
967		 * If we're already attached to requested interface,
968		 * just flush the buffer.
969		 */
970		if ((ifp->if_flags & IFF_UP) == 0)
971			return (ENETDOWN);
972
973		if (d->bd_sbuf == 0) {
974			error = bpf_allocbufs(d);
975			if (error != 0)
976				return (error);
977		}
978		if (bp != d->bd_bif) {
979			if (d->bd_bif)
980				/*
981				 * Detach if attached to something else.
982				 */
983				bpf_detachd(d);
984
985			bpf_attachd(d, bp);
986		}
987		BPFD_LOCK(d);
988		reset_d(d);
989		BPFD_UNLOCK(d);
990		return (0);
991	}
992	mtx_unlock(&bpf_mtx);
993	/* Not found. */
994	return (ENXIO);
995}
996
997/*
998 * Support for select() and poll() system calls
999 *
1000 * Return true iff the specific operation will not block indefinitely.
1001 * Otherwise, return false but make a note that a selwakeup() must be done.
1002 */
1003int
1004bpfpoll(dev, events, td)
1005	register dev_t dev;
1006	int events;
1007	struct thread *td;
1008{
1009	struct bpf_d *d;
1010	int revents;
1011
1012	d = dev->si_drv1;
1013	if (d->bd_bif == NULL)
1014		return (ENXIO);
1015
1016	revents = events & (POLLOUT | POLLWRNORM);
1017	BPFD_LOCK(d);
1018	if (events & (POLLIN | POLLRDNORM)) {
1019		/*
1020		 * An imitation of the FIONREAD ioctl code.
1021		 * XXX not quite.  An exact imitation:
1022		 *	if (d->b_slen != 0 ||
1023		 *	    (d->bd_hbuf != NULL && d->bd_hlen != 0)
1024		 */
1025		if (d->bd_hlen != 0 ||
1026		    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1027		    d->bd_slen != 0))
1028			revents |= events & (POLLIN | POLLRDNORM);
1029		else {
1030			selrecord(td, &d->bd_sel);
1031			/* Start the read timeout if necessary. */
1032			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1033				callout_reset(&d->bd_callout, d->bd_rtout,
1034				    bpf_timed_out, d);
1035				d->bd_state = BPF_WAITING;
1036			}
1037		}
1038	}
1039	BPFD_UNLOCK(d);
1040	return (revents);
1041}
1042
1043/*
1044 * Incoming linkage from device drivers.  Process the packet pkt, of length
1045 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1046 * by each process' filter, and if accepted, stashed into the corresponding
1047 * buffer.
1048 */
1049void
1050bpf_tap(ifp, pkt, pktlen)
1051	struct ifnet *ifp;
1052	register u_char *pkt;
1053	register u_int pktlen;
1054{
1055	struct bpf_if *bp;
1056	register struct bpf_d *d;
1057	register u_int slen;
1058
1059	bp = ifp->if_bpf;
1060	BPFIF_LOCK(bp);
1061	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1062		BPFD_LOCK(d);
1063		++d->bd_rcount;
1064		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1065		if (slen != 0)
1066			catchpacket(d, pkt, pktlen, slen, bcopy);
1067		BPFD_UNLOCK(d);
1068	}
1069	BPFIF_UNLOCK(bp);
1070}
1071
1072/*
1073 * Copy data from an mbuf chain into a buffer.  This code is derived
1074 * from m_copydata in sys/uipc_mbuf.c.
1075 */
1076static void
1077bpf_mcopy(src_arg, dst_arg, len)
1078	const void *src_arg;
1079	void *dst_arg;
1080	register size_t len;
1081{
1082	register const struct mbuf *m;
1083	register u_int count;
1084	u_char *dst;
1085
1086	m = src_arg;
1087	dst = dst_arg;
1088	while (len > 0) {
1089		if (m == 0)
1090			panic("bpf_mcopy");
1091		count = min(m->m_len, len);
1092		bcopy(mtod(m, void *), dst, count);
1093		m = m->m_next;
1094		dst += count;
1095		len -= count;
1096	}
1097}
1098
1099/*
1100 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1101 */
1102void
1103bpf_mtap(ifp, m)
1104	struct ifnet *ifp;
1105	struct mbuf *m;
1106{
1107	struct bpf_if *bp = ifp->if_bpf;
1108	struct bpf_d *d;
1109	u_int pktlen, slen;
1110	struct mbuf *m0;
1111
1112	pktlen = 0;
1113	for (m0 = m; m0 != 0; m0 = m0->m_next)
1114		pktlen += m0->m_len;
1115
1116	BPFIF_LOCK(bp);
1117	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1118		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1119			continue;
1120		BPFD_LOCK(d);
1121		++d->bd_rcount;
1122		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1123		if (slen != 0)
1124			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
1125		BPFD_UNLOCK(d);
1126	}
1127	BPFIF_UNLOCK(bp);
1128}
1129
1130/*
1131 * Move the packet data from interface memory (pkt) into the
1132 * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1133 * otherwise 0.  "copy" is the routine called to do the actual data
1134 * transfer.  bcopy is passed in to copy contiguous chunks, while
1135 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1136 * pkt is really an mbuf.
1137 */
1138static void
1139catchpacket(d, pkt, pktlen, snaplen, cpfn)
1140	register struct bpf_d *d;
1141	register u_char *pkt;
1142	register u_int pktlen, snaplen;
1143	register void (*cpfn)(const void *, void *, size_t);
1144{
1145	register struct bpf_hdr *hp;
1146	register int totlen, curlen;
1147	register int hdrlen = d->bd_bif->bif_hdrlen;
1148	/*
1149	 * Figure out how many bytes to move.  If the packet is
1150	 * greater or equal to the snapshot length, transfer that
1151	 * much.  Otherwise, transfer the whole packet (unless
1152	 * we hit the buffer size limit).
1153	 */
1154	totlen = hdrlen + min(snaplen, pktlen);
1155	if (totlen > d->bd_bufsize)
1156		totlen = d->bd_bufsize;
1157
1158	/*
1159	 * Round up the end of the previous packet to the next longword.
1160	 */
1161	curlen = BPF_WORDALIGN(d->bd_slen);
1162	if (curlen + totlen > d->bd_bufsize) {
1163		/*
1164		 * This packet will overflow the storage buffer.
1165		 * Rotate the buffers if we can, then wakeup any
1166		 * pending reads.
1167		 */
1168		if (d->bd_fbuf == 0) {
1169			/*
1170			 * We haven't completed the previous read yet,
1171			 * so drop the packet.
1172			 */
1173			++d->bd_dcount;
1174			return;
1175		}
1176		ROTATE_BUFFERS(d);
1177		bpf_wakeup(d);
1178		curlen = 0;
1179	}
1180	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1181		/*
1182		 * Immediate mode is set, or the read timeout has
1183		 * already expired during a select call.  A packet
1184		 * arrived, so the reader should be woken up.
1185		 */
1186		bpf_wakeup(d);
1187
1188	/*
1189	 * Append the bpf header.
1190	 */
1191	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1192	microtime(&hp->bh_tstamp);
1193	hp->bh_datalen = pktlen;
1194	hp->bh_hdrlen = hdrlen;
1195	/*
1196	 * Copy the packet data into the store buffer and update its length.
1197	 */
1198	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1199	d->bd_slen = curlen + totlen;
1200}
1201
1202/*
1203 * Initialize all nonzero fields of a descriptor.
1204 */
1205static int
1206bpf_allocbufs(d)
1207	register struct bpf_d *d;
1208{
1209	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1210	if (d->bd_fbuf == 0)
1211		return (ENOBUFS);
1212
1213	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1214	if (d->bd_sbuf == 0) {
1215		free(d->bd_fbuf, M_BPF);
1216		return (ENOBUFS);
1217	}
1218	d->bd_slen = 0;
1219	d->bd_hlen = 0;
1220	return (0);
1221}
1222
1223/*
1224 * Free buffers currently in use by a descriptor.
1225 * Called on close.
1226 */
1227static void
1228bpf_freed(d)
1229	register struct bpf_d *d;
1230{
1231	/*
1232	 * We don't need to lock out interrupts since this descriptor has
1233	 * been detached from its interface and it yet hasn't been marked
1234	 * free.
1235	 */
1236	if (d->bd_sbuf != 0) {
1237		free(d->bd_sbuf, M_BPF);
1238		if (d->bd_hbuf != 0)
1239			free(d->bd_hbuf, M_BPF);
1240		if (d->bd_fbuf != 0)
1241			free(d->bd_fbuf, M_BPF);
1242	}
1243	if (d->bd_filter)
1244		free((caddr_t)d->bd_filter, M_BPF);
1245	mtx_destroy(&d->bd_mtx);
1246}
1247
1248/*
1249 * Attach an interface to bpf.  ifp is a pointer to the structure
1250 * defining the interface to be attached, dlt is the link layer type,
1251 * and hdrlen is the fixed size of the link header (variable length
1252 * headers are not yet supporrted).
1253 */
1254void
1255bpfattach(ifp, dlt, hdrlen)
1256	struct ifnet *ifp;
1257	u_int dlt, hdrlen;
1258{
1259	struct bpf_if *bp;
1260	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1261	if (bp == 0)
1262		panic("bpfattach");
1263
1264	bp->bif_ifp = ifp;
1265	bp->bif_dlt = dlt;
1266	mtx_init(&bp->bif_mtx, "bpf interface lock", MTX_DEF);
1267
1268	mtx_lock(&bpf_mtx);
1269	bp->bif_next = bpf_iflist;
1270	bpf_iflist = bp;
1271	mtx_unlock(&bpf_mtx);
1272
1273	bp->bif_ifp->if_bpf = 0;
1274
1275	/*
1276	 * Compute the length of the bpf header.  This is not necessarily
1277	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1278	 * that the network layer header begins on a longword boundary (for
1279	 * performance reasons and to alleviate alignment restrictions).
1280	 */
1281	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1282
1283	if (bootverbose)
1284		printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
1285}
1286
1287/*
1288 * Detach bpf from an interface.  This involves detaching each descriptor
1289 * associated with the interface, and leaving bd_bif NULL.  Notify each
1290 * descriptor as it's detached so that any sleepers wake up and get
1291 * ENXIO.
1292 */
1293void
1294bpfdetach(ifp)
1295	struct ifnet *ifp;
1296{
1297	struct bpf_if	*bp, *bp_prev;
1298	struct bpf_d	*d;
1299
1300	mtx_lock(&bpf_mtx);
1301
1302	/* Locate BPF interface information */
1303	bp_prev = NULL;
1304	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1305		if (ifp == bp->bif_ifp)
1306			break;
1307		bp_prev = bp;
1308	}
1309
1310	/* Interface wasn't attached */
1311	if (bp->bif_ifp == NULL) {
1312		mtx_unlock(&bpf_mtx);
1313		printf("bpfdetach: %s%d was not attached\n", ifp->if_name,
1314		    ifp->if_unit);
1315		return;
1316	}
1317
1318	if (bp_prev) {
1319		bp_prev->bif_next = bp->bif_next;
1320	} else {
1321		bpf_iflist = bp->bif_next;
1322	}
1323
1324	while ((d = bp->bif_dlist) != NULL) {
1325		bpf_detachd(d);
1326		BPFD_LOCK(d);
1327		bpf_wakeup(d);
1328		BPFD_UNLOCK(d);
1329	}
1330
1331	mtx_destroy(&bp->bif_mtx);
1332	free(bp, M_BPF);
1333
1334	mtx_unlock(&bpf_mtx);
1335}
1336
1337static void bpf_drvinit(void *unused);
1338
1339static void bpf_clone(void *arg, char *name, int namelen, dev_t *dev);
1340
1341static void
1342bpf_clone(arg, name, namelen, dev)
1343	void *arg;
1344	char *name;
1345	int namelen;
1346	dev_t *dev;
1347{
1348	int u;
1349
1350	if (*dev != NODEV)
1351		return;
1352	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1353		return;
1354	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1355	    "bpf%d", u);
1356	(*dev)->si_flags |= SI_CHEAPCLONE;
1357	return;
1358}
1359
1360static void
1361bpf_drvinit(unused)
1362	void *unused;
1363{
1364
1365	mtx_init(&bpf_mtx, "bpf global lock", MTX_DEF);
1366	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1367	cdevsw_add(&bpf_cdevsw);
1368}
1369
1370SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
1371
1372#else /* !DEV_BPF && !NETGRAPH_BPF */
1373/*
1374 * NOP stubs to allow bpf-using drivers to load and function.
1375 *
1376 * A 'better' implementation would allow the core bpf functionality
1377 * to be loaded at runtime.
1378 */
1379
1380void
1381bpf_tap(ifp, pkt, pktlen)
1382	struct ifnet *ifp;
1383	register u_char *pkt;
1384	register u_int pktlen;
1385{
1386}
1387
1388void
1389bpf_mtap(ifp, m)
1390	struct ifnet *ifp;
1391	struct mbuf *m;
1392{
1393}
1394
1395void
1396bpfattach(ifp, dlt, hdrlen)
1397	struct ifnet *ifp;
1398	u_int dlt, hdrlen;
1399{
1400}
1401
1402void
1403bpfdetach(ifp)
1404	struct ifnet *ifp;
1405{
1406}
1407
1408u_int
1409bpf_filter(pc, p, wirelen, buflen)
1410	register const struct bpf_insn *pc;
1411	register u_char *p;
1412	u_int wirelen;
1413	register u_int buflen;
1414{
1415	return -1;	/* "no filter" behaviour */
1416}
1417
1418int
1419bpf_validate(f, len)
1420	const struct bpf_insn *f;
1421	int len;
1422{
1423	return 0;		/* false */
1424}
1425
1426#endif /* !DEV_BPF && !NETGRAPH_BPF */
1427