bpf.c revision 87955
1/*
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
39 *
40 * $FreeBSD: head/sys/net/bpf.c 87955 2001-12-14 22:17:54Z jdp $
41 */
42
43#include "opt_bpf.h"
44#include "opt_netgraph.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/conf.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/time.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/filio.h>
55#include <sys/sockio.h>
56#include <sys/ttycom.h>
57#include <sys/filedesc.h>
58
59#include <sys/poll.h>
60
61#include <sys/socket.h>
62#include <sys/vnode.h>
63
64#include <net/if.h>
65#include <net/bpf.h>
66#include <net/bpfdesc.h>
67
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <sys/kernel.h>
71#include <sys/sysctl.h>
72
73static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
74
75#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
76
77#define PRINET  26			/* interruptible */
78
79/*
80 * The default read buffer size is patchable.
81 */
82static int bpf_bufsize = 4096;
83SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
84	&bpf_bufsize, 0, "");
85static int bpf_maxbufsize = BPF_MAXBUFSIZE;
86SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
87	&bpf_maxbufsize, 0, "");
88
89/*
90 *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
91 */
92static struct bpf_if	*bpf_iflist;
93static struct mtx	bpf_mtx;		/* bpf global lock */
94
95static int	bpf_allocbufs __P((struct bpf_d *));
96static void	bpf_attachd __P((struct bpf_d *d, struct bpf_if *bp));
97static void	bpf_detachd __P((struct bpf_d *d));
98static void	bpf_freed __P((struct bpf_d *));
99static void	bpf_mcopy __P((const void *, void *, size_t));
100static int	bpf_movein __P((struct uio *, int,
101		    struct mbuf **, struct sockaddr *, int *));
102static int	bpf_setif __P((struct bpf_d *, struct ifreq *));
103static void	bpf_timed_out __P((void *));
104static __inline void
105		bpf_wakeup __P((struct bpf_d *));
106static void	catchpacket __P((struct bpf_d *, u_char *, u_int,
107		    u_int, void (*)(const void *, void *, size_t)));
108static void	reset_d __P((struct bpf_d *));
109static int	 bpf_setf __P((struct bpf_d *, struct bpf_program *));
110
111static	d_open_t	bpfopen;
112static	d_close_t	bpfclose;
113static	d_read_t	bpfread;
114static	d_write_t	bpfwrite;
115static	d_ioctl_t	bpfioctl;
116static	d_poll_t	bpfpoll;
117
118#define CDEV_MAJOR 23
119static struct cdevsw bpf_cdevsw = {
120	/* open */	bpfopen,
121	/* close */	bpfclose,
122	/* read */	bpfread,
123	/* write */	bpfwrite,
124	/* ioctl */	bpfioctl,
125	/* poll */	bpfpoll,
126	/* mmap */	nommap,
127	/* strategy */	nostrategy,
128	/* name */	"bpf",
129	/* maj */	CDEV_MAJOR,
130	/* dump */	nodump,
131	/* psize */	nopsize,
132	/* flags */	0,
133};
134
135
136static int
137bpf_movein(uio, linktype, mp, sockp, datlen)
138	register struct uio *uio;
139	int linktype, *datlen;
140	register struct mbuf **mp;
141	register struct sockaddr *sockp;
142{
143	struct mbuf *m;
144	int error;
145	int len;
146	int hlen;
147
148	/*
149	 * Build a sockaddr based on the data link layer type.
150	 * We do this at this level because the ethernet header
151	 * is copied directly into the data field of the sockaddr.
152	 * In the case of SLIP, there is no header and the packet
153	 * is forwarded as is.
154	 * Also, we are careful to leave room at the front of the mbuf
155	 * for the link level header.
156	 */
157	switch (linktype) {
158
159	case DLT_SLIP:
160		sockp->sa_family = AF_INET;
161		hlen = 0;
162		break;
163
164	case DLT_EN10MB:
165		sockp->sa_family = AF_UNSPEC;
166		/* XXX Would MAXLINKHDR be better? */
167		hlen = sizeof(struct ether_header);
168		break;
169
170	case DLT_FDDI:
171		sockp->sa_family = AF_IMPLINK;
172		hlen = 0;
173		break;
174
175	case DLT_RAW:
176	case DLT_NULL:
177		sockp->sa_family = AF_UNSPEC;
178		hlen = 0;
179		break;
180
181	case DLT_ATM_RFC1483:
182		/*
183		 * en atm driver requires 4-byte atm pseudo header.
184		 * though it isn't standard, vpi:vci needs to be
185		 * specified anyway.
186		 */
187		sockp->sa_family = AF_UNSPEC;
188		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
189		break;
190
191	case DLT_PPP:
192		sockp->sa_family = AF_UNSPEC;
193		hlen = 4;	/* This should match PPP_HDRLEN */
194		break;
195
196	default:
197		return (EIO);
198	}
199
200	len = uio->uio_resid;
201	*datlen = len - hlen;
202	if ((unsigned)len > MCLBYTES)
203		return (EIO);
204
205	MGETHDR(m, M_TRYWAIT, MT_DATA);
206	if (m == 0)
207		return (ENOBUFS);
208	if (len > MHLEN) {
209		MCLGET(m, M_TRYWAIT);
210		if ((m->m_flags & M_EXT) == 0) {
211			error = ENOBUFS;
212			goto bad;
213		}
214	}
215	m->m_pkthdr.len = m->m_len = len;
216	m->m_pkthdr.rcvif = NULL;
217	*mp = m;
218	/*
219	 * Make room for link header.
220	 */
221	if (hlen != 0) {
222		m->m_pkthdr.len -= hlen;
223		m->m_len -= hlen;
224#if BSD >= 199103
225		m->m_data += hlen; /* XXX */
226#else
227		m->m_off += hlen;
228#endif
229		error = uiomove((caddr_t)sockp->sa_data, hlen, uio);
230		if (error)
231			goto bad;
232	}
233	error = uiomove(mtod(m, caddr_t), len - hlen, uio);
234	if (!error)
235		return (0);
236 bad:
237	m_freem(m);
238	return (error);
239}
240
241/*
242 * Attach file to the bpf interface, i.e. make d listen on bp.
243 */
244static void
245bpf_attachd(d, bp)
246	struct bpf_d *d;
247	struct bpf_if *bp;
248{
249	/*
250	 * Point d at bp, and add d to the interface's list of listeners.
251	 * Finally, point the driver's bpf cookie at the interface so
252	 * it will divert packets to bpf.
253	 */
254	BPFIF_LOCK(bp);
255	d->bd_bif = bp;
256	d->bd_next = bp->bif_dlist;
257	bp->bif_dlist = d;
258
259	bp->bif_ifp->if_bpf = bp;
260	BPFIF_UNLOCK(bp);
261}
262
263/*
264 * Detach a file from its interface.
265 */
266static void
267bpf_detachd(d)
268	struct bpf_d *d;
269{
270	int error;
271	struct bpf_d **p;
272	struct bpf_if *bp;
273
274	bp = d->bd_bif;
275	/*
276	 * Check if this descriptor had requested promiscuous mode.
277	 * If so, turn it off.
278	 */
279	if (d->bd_promisc) {
280		d->bd_promisc = 0;
281		error = ifpromisc(bp->bif_ifp, 0);
282		if (error != 0 && error != ENXIO) {
283			/*
284			 * ENXIO can happen if a pccard is unplugged
285			 * Something is really wrong if we were able to put
286			 * the driver into promiscuous mode, but can't
287			 * take it out.
288			 */
289			printf("%s%d: ifpromisc failed %d\n",
290			    bp->bif_ifp->if_name, bp->bif_ifp->if_unit, error);
291		}
292	}
293	/* Remove d from the interface's descriptor list. */
294	BPFIF_LOCK(bp);
295	p = &bp->bif_dlist;
296	while (*p != d) {
297		p = &(*p)->bd_next;
298		if (*p == 0)
299			panic("bpf_detachd: descriptor not in list");
300	}
301	*p = (*p)->bd_next;
302	if (bp->bif_dlist == 0)
303		/*
304		 * Let the driver know that there are no more listeners.
305		 */
306		d->bd_bif->bif_ifp->if_bpf = 0;
307	BPFIF_UNLOCK(bp);
308	d->bd_bif = 0;
309}
310
311/*
312 * Open ethernet device.  Returns ENXIO for illegal minor device number,
313 * EBUSY if file is open by another process.
314 */
315/* ARGSUSED */
316static	int
317bpfopen(dev, flags, fmt, td)
318	dev_t dev;
319	int flags;
320	int fmt;
321	struct thread *td;
322{
323	struct bpf_d *d;
324
325	mtx_lock(&bpf_mtx);
326	d = dev->si_drv1;
327	/*
328	 * Each minor can be opened by only one process.  If the requested
329	 * minor is in use, return EBUSY.
330	 */
331	if (d) {
332		mtx_unlock(&bpf_mtx);
333		return (EBUSY);
334	}
335	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
336	mtx_unlock(&bpf_mtx);
337
338	if ((dev->si_flags & SI_NAMED) == 0)
339		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
340		    "bpf%d", dev2unit(dev));
341	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
342	dev->si_drv1 = d;
343	d->bd_bufsize = bpf_bufsize;
344	d->bd_sig = SIGIO;
345	d->bd_seesent = 1;
346	mtx_init(&d->bd_mtx, devtoname(dev), MTX_DEF);
347	callout_init(&d->bd_callout, 1);
348
349	return (0);
350}
351
352/*
353 * Close the descriptor by detaching it from its interface,
354 * deallocating its buffers, and marking it free.
355 */
356/* ARGSUSED */
357static	int
358bpfclose(dev, flags, fmt, td)
359	dev_t dev;
360	int flags;
361	int fmt;
362	struct thread *td;
363{
364	struct bpf_d *d = dev->si_drv1;
365
366	BPFD_LOCK(d);
367	if (d->bd_state == BPF_WAITING)
368		callout_stop(&d->bd_callout);
369	d->bd_state = BPF_IDLE;
370	BPFD_UNLOCK(d);
371	funsetown(d->bd_sigio);
372	mtx_lock(&bpf_mtx);
373	if (d->bd_bif)
374		bpf_detachd(d);
375	mtx_unlock(&bpf_mtx);
376	bpf_freed(d);
377	dev->si_drv1 = 0;
378	FREE(d, M_BPF);
379
380	return (0);
381}
382
383
384/*
385 * Rotate the packet buffers in descriptor d.  Move the store buffer
386 * into the hold slot, and the free buffer into the store slot.
387 * Zero the length of the new store buffer.
388 */
389#define ROTATE_BUFFERS(d) \
390	(d)->bd_hbuf = (d)->bd_sbuf; \
391	(d)->bd_hlen = (d)->bd_slen; \
392	(d)->bd_sbuf = (d)->bd_fbuf; \
393	(d)->bd_slen = 0; \
394	(d)->bd_fbuf = 0;
395/*
396 *  bpfread - read next chunk of packets from buffers
397 */
398static	int
399bpfread(dev, uio, ioflag)
400	dev_t dev;
401	register struct uio *uio;
402	int ioflag;
403{
404	struct bpf_d *d = dev->si_drv1;
405	int timed_out;
406	int error;
407
408	/*
409	 * Restrict application to use a buffer the same size as
410	 * as kernel buffers.
411	 */
412	if (uio->uio_resid != d->bd_bufsize)
413		return (EINVAL);
414
415	BPFD_LOCK(d);
416	if (d->bd_state == BPF_WAITING)
417		callout_stop(&d->bd_callout);
418	timed_out = (d->bd_state == BPF_TIMED_OUT);
419	d->bd_state = BPF_IDLE;
420	/*
421	 * If the hold buffer is empty, then do a timed sleep, which
422	 * ends when the timeout expires or when enough packets
423	 * have arrived to fill the store buffer.
424	 */
425	while (d->bd_hbuf == 0) {
426		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
427			/*
428			 * A packet(s) either arrived since the previous
429			 * read or arrived while we were asleep.
430			 * Rotate the buffers and return what's here.
431			 */
432			ROTATE_BUFFERS(d);
433			break;
434		}
435
436		/*
437		 * No data is available, check to see if the bpf device
438		 * is still pointed at a real interface.  If not, return
439		 * ENXIO so that the userland process knows to rebind
440		 * it before using it again.
441		 */
442		if (d->bd_bif == NULL) {
443			BPFD_UNLOCK(d);
444			return (ENXIO);
445		}
446
447		if (ioflag & IO_NDELAY) {
448			BPFD_UNLOCK(d);
449			return (EWOULDBLOCK);
450		}
451		error = msleep((caddr_t)d, &d->bd_mtx, PRINET|PCATCH,
452		     "bpf", d->bd_rtout);
453		if (error == EINTR || error == ERESTART) {
454			BPFD_UNLOCK(d);
455			return (error);
456		}
457		if (error == EWOULDBLOCK) {
458			/*
459			 * On a timeout, return what's in the buffer,
460			 * which may be nothing.  If there is something
461			 * in the store buffer, we can rotate the buffers.
462			 */
463			if (d->bd_hbuf)
464				/*
465				 * We filled up the buffer in between
466				 * getting the timeout and arriving
467				 * here, so we don't need to rotate.
468				 */
469				break;
470
471			if (d->bd_slen == 0) {
472				BPFD_UNLOCK(d);
473				return (0);
474			}
475			ROTATE_BUFFERS(d);
476			break;
477		}
478	}
479	/*
480	 * At this point, we know we have something in the hold slot.
481	 */
482	BPFD_UNLOCK(d);
483
484	/*
485	 * Move data from hold buffer into user space.
486	 * We know the entire buffer is transferred since
487	 * we checked above that the read buffer is bpf_bufsize bytes.
488	 */
489	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
490
491	BPFD_LOCK(d);
492	d->bd_fbuf = d->bd_hbuf;
493	d->bd_hbuf = 0;
494	d->bd_hlen = 0;
495	BPFD_UNLOCK(d);
496
497	return (error);
498}
499
500
501/*
502 * If there are processes sleeping on this descriptor, wake them up.
503 */
504static __inline void
505bpf_wakeup(d)
506	register struct bpf_d *d;
507{
508	if (d->bd_state == BPF_WAITING) {
509		callout_stop(&d->bd_callout);
510		d->bd_state = BPF_IDLE;
511	}
512	wakeup((caddr_t)d);
513	if (d->bd_async && d->bd_sig && d->bd_sigio)
514		pgsigio(d->bd_sigio, d->bd_sig, 0);
515
516	selwakeup(&d->bd_sel);
517	/* XXX */
518	d->bd_sel.si_pid = 0;
519}
520
521static void
522bpf_timed_out(arg)
523	void *arg;
524{
525	struct bpf_d *d = (struct bpf_d *)arg;
526
527	BPFD_LOCK(d);
528	if (d->bd_state == BPF_WAITING) {
529		d->bd_state = BPF_TIMED_OUT;
530		if (d->bd_slen != 0)
531			bpf_wakeup(d);
532	}
533	BPFD_UNLOCK(d);
534}
535
536static	int
537bpfwrite(dev, uio, ioflag)
538	dev_t dev;
539	struct uio *uio;
540	int ioflag;
541{
542	struct bpf_d *d = dev->si_drv1;
543	struct ifnet *ifp;
544	struct mbuf *m;
545	int error;
546	static struct sockaddr dst;
547	int datlen;
548
549	if (d->bd_bif == 0)
550		return (ENXIO);
551
552	ifp = d->bd_bif->bif_ifp;
553
554	if (uio->uio_resid == 0)
555		return (0);
556
557	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
558	if (error)
559		return (error);
560
561	if (datlen > ifp->if_mtu)
562		return (EMSGSIZE);
563
564	if (d->bd_hdrcmplt)
565		dst.sa_family = pseudo_AF_HDRCMPLT;
566
567	mtx_lock(&Giant);
568	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
569	mtx_unlock(&Giant);
570	/*
571	 * The driver frees the mbuf.
572	 */
573	return (error);
574}
575
576/*
577 * Reset a descriptor by flushing its packet buffer and clearing the
578 * receive and drop counts.
579 */
580static void
581reset_d(d)
582	struct bpf_d *d;
583{
584
585	mtx_assert(&d->bd_mtx, MA_OWNED);
586	if (d->bd_hbuf) {
587		/* Free the hold buffer. */
588		d->bd_fbuf = d->bd_hbuf;
589		d->bd_hbuf = 0;
590	}
591	d->bd_slen = 0;
592	d->bd_hlen = 0;
593	d->bd_rcount = 0;
594	d->bd_dcount = 0;
595}
596
597/*
598 *  FIONREAD		Check for read packet available.
599 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
600 *  BIOCGBLEN		Get buffer len [for read()].
601 *  BIOCSETF		Set ethernet read filter.
602 *  BIOCFLUSH		Flush read packet buffer.
603 *  BIOCPROMISC		Put interface into promiscuous mode.
604 *  BIOCGDLT		Get link layer type.
605 *  BIOCGETIF		Get interface name.
606 *  BIOCSETIF		Set interface.
607 *  BIOCSRTIMEOUT	Set read timeout.
608 *  BIOCGRTIMEOUT	Get read timeout.
609 *  BIOCGSTATS		Get packet stats.
610 *  BIOCIMMEDIATE	Set immediate mode.
611 *  BIOCVERSION		Get filter language version.
612 *  BIOCGHDRCMPLT	Get "header already complete" flag
613 *  BIOCSHDRCMPLT	Set "header already complete" flag
614 *  BIOCGSEESENT	Get "see packets sent" flag
615 *  BIOCSSEESENT	Set "see packets sent" flag
616 */
617/* ARGSUSED */
618static	int
619bpfioctl(dev, cmd, addr, flags, td)
620	dev_t dev;
621	u_long cmd;
622	caddr_t addr;
623	int flags;
624	struct thread *td;
625{
626	struct bpf_d *d = dev->si_drv1;
627	int error = 0;
628
629	BPFD_LOCK(d);
630	if (d->bd_state == BPF_WAITING)
631		callout_stop(&d->bd_callout);
632	d->bd_state = BPF_IDLE;
633	BPFD_UNLOCK(d);
634
635	switch (cmd) {
636
637	default:
638		error = EINVAL;
639		break;
640
641	/*
642	 * Check for read packet available.
643	 */
644	case FIONREAD:
645		{
646			int n;
647
648			BPFD_LOCK(d);
649			n = d->bd_slen;
650			if (d->bd_hbuf)
651				n += d->bd_hlen;
652			BPFD_UNLOCK(d);
653
654			*(int *)addr = n;
655			break;
656		}
657
658	case SIOCGIFADDR:
659		{
660			struct ifnet *ifp;
661
662			if (d->bd_bif == 0)
663				error = EINVAL;
664			else {
665				ifp = d->bd_bif->bif_ifp;
666				error = (*ifp->if_ioctl)(ifp, cmd, addr);
667			}
668			break;
669		}
670
671	/*
672	 * Get buffer len [for read()].
673	 */
674	case BIOCGBLEN:
675		*(u_int *)addr = d->bd_bufsize;
676		break;
677
678	/*
679	 * Set buffer length.
680	 */
681	case BIOCSBLEN:
682		if (d->bd_bif != 0)
683			error = EINVAL;
684		else {
685			register u_int size = *(u_int *)addr;
686
687			if (size > bpf_maxbufsize)
688				*(u_int *)addr = size = bpf_maxbufsize;
689			else if (size < BPF_MINBUFSIZE)
690				*(u_int *)addr = size = BPF_MINBUFSIZE;
691			d->bd_bufsize = size;
692		}
693		break;
694
695	/*
696	 * Set link layer read filter.
697	 */
698	case BIOCSETF:
699		error = bpf_setf(d, (struct bpf_program *)addr);
700		break;
701
702	/*
703	 * Flush read packet buffer.
704	 */
705	case BIOCFLUSH:
706		BPFD_LOCK(d);
707		reset_d(d);
708		BPFD_UNLOCK(d);
709		break;
710
711	/*
712	 * Put interface into promiscuous mode.
713	 */
714	case BIOCPROMISC:
715		if (d->bd_bif == 0) {
716			/*
717			 * No interface attached yet.
718			 */
719			error = EINVAL;
720			break;
721		}
722		if (d->bd_promisc == 0) {
723			mtx_lock(&Giant);
724			error = ifpromisc(d->bd_bif->bif_ifp, 1);
725			mtx_unlock(&Giant);
726			if (error == 0)
727				d->bd_promisc = 1;
728		}
729		break;
730
731	/*
732	 * Get device parameters.
733	 */
734	case BIOCGDLT:
735		if (d->bd_bif == 0)
736			error = EINVAL;
737		else
738			*(u_int *)addr = d->bd_bif->bif_dlt;
739		break;
740
741	/*
742	 * Get interface name.
743	 */
744	case BIOCGETIF:
745		if (d->bd_bif == 0)
746			error = EINVAL;
747		else {
748			struct ifnet *const ifp = d->bd_bif->bif_ifp;
749			struct ifreq *const ifr = (struct ifreq *)addr;
750
751			snprintf(ifr->ifr_name, sizeof(ifr->ifr_name),
752			    "%s%d", ifp->if_name, ifp->if_unit);
753		}
754		break;
755
756	/*
757	 * Set interface.
758	 */
759	case BIOCSETIF:
760		error = bpf_setif(d, (struct ifreq *)addr);
761		break;
762
763	/*
764	 * Set read timeout.
765	 */
766	case BIOCSRTIMEOUT:
767		{
768			struct timeval *tv = (struct timeval *)addr;
769
770			/*
771			 * Subtract 1 tick from tvtohz() since this isn't
772			 * a one-shot timer.
773			 */
774			if ((error = itimerfix(tv)) == 0)
775				d->bd_rtout = tvtohz(tv) - 1;
776			break;
777		}
778
779	/*
780	 * Get read timeout.
781	 */
782	case BIOCGRTIMEOUT:
783		{
784			struct timeval *tv = (struct timeval *)addr;
785
786			tv->tv_sec = d->bd_rtout / hz;
787			tv->tv_usec = (d->bd_rtout % hz) * tick;
788			break;
789		}
790
791	/*
792	 * Get packet stats.
793	 */
794	case BIOCGSTATS:
795		{
796			struct bpf_stat *bs = (struct bpf_stat *)addr;
797
798			bs->bs_recv = d->bd_rcount;
799			bs->bs_drop = d->bd_dcount;
800			break;
801		}
802
803	/*
804	 * Set immediate mode.
805	 */
806	case BIOCIMMEDIATE:
807		d->bd_immediate = *(u_int *)addr;
808		break;
809
810	case BIOCVERSION:
811		{
812			struct bpf_version *bv = (struct bpf_version *)addr;
813
814			bv->bv_major = BPF_MAJOR_VERSION;
815			bv->bv_minor = BPF_MINOR_VERSION;
816			break;
817		}
818
819	/*
820	 * Get "header already complete" flag
821	 */
822	case BIOCGHDRCMPLT:
823		*(u_int *)addr = d->bd_hdrcmplt;
824		break;
825
826	/*
827	 * Set "header already complete" flag
828	 */
829	case BIOCSHDRCMPLT:
830		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
831		break;
832
833	/*
834	 * Get "see sent packets" flag
835	 */
836	case BIOCGSEESENT:
837		*(u_int *)addr = d->bd_seesent;
838		break;
839
840	/*
841	 * Set "see sent packets" flag
842	 */
843	case BIOCSSEESENT:
844		d->bd_seesent = *(u_int *)addr;
845		break;
846
847	case FIONBIO:		/* Non-blocking I/O */
848		break;
849
850	case FIOASYNC:		/* Send signal on receive packets */
851		d->bd_async = *(int *)addr;
852		break;
853
854	case FIOSETOWN:
855		error = fsetown(*(int *)addr, &d->bd_sigio);
856		break;
857
858	case FIOGETOWN:
859		*(int *)addr = fgetown(d->bd_sigio);
860		break;
861
862	/* This is deprecated, FIOSETOWN should be used instead. */
863	case TIOCSPGRP:
864		error = fsetown(-(*(int *)addr), &d->bd_sigio);
865		break;
866
867	/* This is deprecated, FIOGETOWN should be used instead. */
868	case TIOCGPGRP:
869		*(int *)addr = -fgetown(d->bd_sigio);
870		break;
871
872	case BIOCSRSIG:		/* Set receive signal */
873		{
874		 	u_int sig;
875
876			sig = *(u_int *)addr;
877
878			if (sig >= NSIG)
879				error = EINVAL;
880			else
881				d->bd_sig = sig;
882			break;
883		}
884	case BIOCGRSIG:
885		*(u_int *)addr = d->bd_sig;
886		break;
887	}
888	return (error);
889}
890
891/*
892 * Set d's packet filter program to fp.  If this file already has a filter,
893 * free it and replace it.  Returns EINVAL for bogus requests.
894 */
895static int
896bpf_setf(d, fp)
897	struct bpf_d *d;
898	struct bpf_program *fp;
899{
900	struct bpf_insn *fcode, *old;
901	u_int flen, size;
902
903	old = d->bd_filter;
904	if (fp->bf_insns == 0) {
905		if (fp->bf_len != 0)
906			return (EINVAL);
907		BPFD_LOCK(d);
908		d->bd_filter = 0;
909		reset_d(d);
910		BPFD_UNLOCK(d);
911		if (old != 0)
912			free((caddr_t)old, M_BPF);
913		return (0);
914	}
915	flen = fp->bf_len;
916	if (flen > BPF_MAXINSNS)
917		return (EINVAL);
918
919	size = flen * sizeof(*fp->bf_insns);
920	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
921	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
922	    bpf_validate(fcode, (int)flen)) {
923		BPFD_LOCK(d);
924		d->bd_filter = fcode;
925		reset_d(d);
926		BPFD_UNLOCK(d);
927		if (old != 0)
928			free((caddr_t)old, M_BPF);
929
930		return (0);
931	}
932	free((caddr_t)fcode, M_BPF);
933	return (EINVAL);
934}
935
936/*
937 * Detach a file from its current interface (if attached at all) and attach
938 * to the interface indicated by the name stored in ifr.
939 * Return an errno or 0.
940 */
941static int
942bpf_setif(d, ifr)
943	struct bpf_d *d;
944	struct ifreq *ifr;
945{
946	struct bpf_if *bp;
947	int error;
948	struct ifnet *theywant;
949
950	theywant = ifunit(ifr->ifr_name);
951	if (theywant == 0)
952		return ENXIO;
953
954	/*
955	 * Look through attached interfaces for the named one.
956	 */
957	mtx_lock(&bpf_mtx);
958	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
959		struct ifnet *ifp = bp->bif_ifp;
960
961		if (ifp == 0 || ifp != theywant)
962			continue;
963
964		mtx_unlock(&bpf_mtx);
965		/*
966		 * We found the requested interface.
967		 * If it's not up, return an error.
968		 * Allocate the packet buffers if we need to.
969		 * If we're already attached to requested interface,
970		 * just flush the buffer.
971		 */
972		if ((ifp->if_flags & IFF_UP) == 0)
973			return (ENETDOWN);
974
975		if (d->bd_sbuf == 0) {
976			error = bpf_allocbufs(d);
977			if (error != 0)
978				return (error);
979		}
980		if (bp != d->bd_bif) {
981			if (d->bd_bif)
982				/*
983				 * Detach if attached to something else.
984				 */
985				bpf_detachd(d);
986
987			bpf_attachd(d, bp);
988		}
989		BPFD_LOCK(d);
990		reset_d(d);
991		BPFD_UNLOCK(d);
992		return (0);
993	}
994	mtx_unlock(&bpf_mtx);
995	/* Not found. */
996	return (ENXIO);
997}
998
999/*
1000 * Support for select() and poll() system calls
1001 *
1002 * Return true iff the specific operation will not block indefinitely.
1003 * Otherwise, return false but make a note that a selwakeup() must be done.
1004 */
1005int
1006bpfpoll(dev, events, td)
1007	register dev_t dev;
1008	int events;
1009	struct thread *td;
1010{
1011	struct bpf_d *d;
1012	int revents;
1013
1014	d = dev->si_drv1;
1015	if (d->bd_bif == NULL)
1016		return (ENXIO);
1017
1018	revents = events & (POLLOUT | POLLWRNORM);
1019	BPFD_LOCK(d);
1020	if (events & (POLLIN | POLLRDNORM)) {
1021		/*
1022		 * An imitation of the FIONREAD ioctl code.
1023		 * XXX not quite.  An exact imitation:
1024		 *	if (d->b_slen != 0 ||
1025		 *	    (d->bd_hbuf != NULL && d->bd_hlen != 0)
1026		 */
1027		if (d->bd_hlen != 0 ||
1028		    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1029		    d->bd_slen != 0))
1030			revents |= events & (POLLIN | POLLRDNORM);
1031		else {
1032			selrecord(td, &d->bd_sel);
1033			/* Start the read timeout if necessary. */
1034			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1035				callout_reset(&d->bd_callout, d->bd_rtout,
1036				    bpf_timed_out, d);
1037				d->bd_state = BPF_WAITING;
1038			}
1039		}
1040	}
1041	BPFD_UNLOCK(d);
1042	return (revents);
1043}
1044
1045/*
1046 * Incoming linkage from device drivers.  Process the packet pkt, of length
1047 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1048 * by each process' filter, and if accepted, stashed into the corresponding
1049 * buffer.
1050 */
1051void
1052bpf_tap(ifp, pkt, pktlen)
1053	struct ifnet *ifp;
1054	register u_char *pkt;
1055	register u_int pktlen;
1056{
1057	struct bpf_if *bp;
1058	register struct bpf_d *d;
1059	register u_int slen;
1060
1061	bp = ifp->if_bpf;
1062	BPFIF_LOCK(bp);
1063	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1064		BPFD_LOCK(d);
1065		++d->bd_rcount;
1066		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1067		if (slen != 0)
1068			catchpacket(d, pkt, pktlen, slen, bcopy);
1069		BPFD_UNLOCK(d);
1070	}
1071	BPFIF_UNLOCK(bp);
1072}
1073
1074/*
1075 * Copy data from an mbuf chain into a buffer.  This code is derived
1076 * from m_copydata in sys/uipc_mbuf.c.
1077 */
1078static void
1079bpf_mcopy(src_arg, dst_arg, len)
1080	const void *src_arg;
1081	void *dst_arg;
1082	register size_t len;
1083{
1084	register const struct mbuf *m;
1085	register u_int count;
1086	u_char *dst;
1087
1088	m = src_arg;
1089	dst = dst_arg;
1090	while (len > 0) {
1091		if (m == 0)
1092			panic("bpf_mcopy");
1093		count = min(m->m_len, len);
1094		bcopy(mtod(m, void *), dst, count);
1095		m = m->m_next;
1096		dst += count;
1097		len -= count;
1098	}
1099}
1100
1101/*
1102 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1103 */
1104void
1105bpf_mtap(ifp, m)
1106	struct ifnet *ifp;
1107	struct mbuf *m;
1108{
1109	struct bpf_if *bp = ifp->if_bpf;
1110	struct bpf_d *d;
1111	u_int pktlen, slen;
1112	struct mbuf *m0;
1113
1114	pktlen = 0;
1115	for (m0 = m; m0 != 0; m0 = m0->m_next)
1116		pktlen += m0->m_len;
1117
1118	BPFIF_LOCK(bp);
1119	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1120		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1121			continue;
1122		BPFD_LOCK(d);
1123		++d->bd_rcount;
1124		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1125		if (slen != 0)
1126			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
1127		BPFD_UNLOCK(d);
1128	}
1129	BPFIF_UNLOCK(bp);
1130}
1131
1132/*
1133 * Move the packet data from interface memory (pkt) into the
1134 * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1135 * otherwise 0.  "copy" is the routine called to do the actual data
1136 * transfer.  bcopy is passed in to copy contiguous chunks, while
1137 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1138 * pkt is really an mbuf.
1139 */
1140static void
1141catchpacket(d, pkt, pktlen, snaplen, cpfn)
1142	register struct bpf_d *d;
1143	register u_char *pkt;
1144	register u_int pktlen, snaplen;
1145	register void (*cpfn) __P((const void *, void *, size_t));
1146{
1147	register struct bpf_hdr *hp;
1148	register int totlen, curlen;
1149	register int hdrlen = d->bd_bif->bif_hdrlen;
1150	/*
1151	 * Figure out how many bytes to move.  If the packet is
1152	 * greater or equal to the snapshot length, transfer that
1153	 * much.  Otherwise, transfer the whole packet (unless
1154	 * we hit the buffer size limit).
1155	 */
1156	totlen = hdrlen + min(snaplen, pktlen);
1157	if (totlen > d->bd_bufsize)
1158		totlen = d->bd_bufsize;
1159
1160	/*
1161	 * Round up the end of the previous packet to the next longword.
1162	 */
1163	curlen = BPF_WORDALIGN(d->bd_slen);
1164	if (curlen + totlen > d->bd_bufsize) {
1165		/*
1166		 * This packet will overflow the storage buffer.
1167		 * Rotate the buffers if we can, then wakeup any
1168		 * pending reads.
1169		 */
1170		if (d->bd_fbuf == 0) {
1171			/*
1172			 * We haven't completed the previous read yet,
1173			 * so drop the packet.
1174			 */
1175			++d->bd_dcount;
1176			return;
1177		}
1178		ROTATE_BUFFERS(d);
1179		bpf_wakeup(d);
1180		curlen = 0;
1181	}
1182	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1183		/*
1184		 * Immediate mode is set, or the read timeout has
1185		 * already expired during a select call.  A packet
1186		 * arrived, so the reader should be woken up.
1187		 */
1188		bpf_wakeup(d);
1189
1190	/*
1191	 * Append the bpf header.
1192	 */
1193	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1194	microtime(&hp->bh_tstamp);
1195	hp->bh_datalen = pktlen;
1196	hp->bh_hdrlen = hdrlen;
1197	/*
1198	 * Copy the packet data into the store buffer and update its length.
1199	 */
1200	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1201	d->bd_slen = curlen + totlen;
1202}
1203
1204/*
1205 * Initialize all nonzero fields of a descriptor.
1206 */
1207static int
1208bpf_allocbufs(d)
1209	register struct bpf_d *d;
1210{
1211	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1212	if (d->bd_fbuf == 0)
1213		return (ENOBUFS);
1214
1215	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1216	if (d->bd_sbuf == 0) {
1217		free(d->bd_fbuf, M_BPF);
1218		return (ENOBUFS);
1219	}
1220	d->bd_slen = 0;
1221	d->bd_hlen = 0;
1222	return (0);
1223}
1224
1225/*
1226 * Free buffers currently in use by a descriptor.
1227 * Called on close.
1228 */
1229static void
1230bpf_freed(d)
1231	register struct bpf_d *d;
1232{
1233	/*
1234	 * We don't need to lock out interrupts since this descriptor has
1235	 * been detached from its interface and it yet hasn't been marked
1236	 * free.
1237	 */
1238	if (d->bd_sbuf != 0) {
1239		free(d->bd_sbuf, M_BPF);
1240		if (d->bd_hbuf != 0)
1241			free(d->bd_hbuf, M_BPF);
1242		if (d->bd_fbuf != 0)
1243			free(d->bd_fbuf, M_BPF);
1244	}
1245	if (d->bd_filter)
1246		free((caddr_t)d->bd_filter, M_BPF);
1247	mtx_destroy(&d->bd_mtx);
1248}
1249
1250/*
1251 * Attach an interface to bpf.  ifp is a pointer to the structure
1252 * defining the interface to be attached, dlt is the link layer type,
1253 * and hdrlen is the fixed size of the link header (variable length
1254 * headers are not yet supporrted).
1255 */
1256void
1257bpfattach(ifp, dlt, hdrlen)
1258	struct ifnet *ifp;
1259	u_int dlt, hdrlen;
1260{
1261	struct bpf_if *bp;
1262	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1263	if (bp == 0)
1264		panic("bpfattach");
1265
1266	bp->bif_ifp = ifp;
1267	bp->bif_dlt = dlt;
1268	mtx_init(&bp->bif_mtx, "bpf interface lock", MTX_DEF);
1269
1270	mtx_lock(&bpf_mtx);
1271	bp->bif_next = bpf_iflist;
1272	bpf_iflist = bp;
1273	mtx_unlock(&bpf_mtx);
1274
1275	bp->bif_ifp->if_bpf = 0;
1276
1277	/*
1278	 * Compute the length of the bpf header.  This is not necessarily
1279	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1280	 * that the network layer header begins on a longword boundary (for
1281	 * performance reasons and to alleviate alignment restrictions).
1282	 */
1283	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1284
1285	if (bootverbose)
1286		printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
1287}
1288
1289/*
1290 * Detach bpf from an interface.  This involves detaching each descriptor
1291 * associated with the interface, and leaving bd_bif NULL.  Notify each
1292 * descriptor as it's detached so that any sleepers wake up and get
1293 * ENXIO.
1294 */
1295void
1296bpfdetach(ifp)
1297	struct ifnet *ifp;
1298{
1299	struct bpf_if	*bp, *bp_prev;
1300	struct bpf_d	*d;
1301
1302	mtx_lock(&bpf_mtx);
1303
1304	/* Locate BPF interface information */
1305	bp_prev = NULL;
1306	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1307		if (ifp == bp->bif_ifp)
1308			break;
1309		bp_prev = bp;
1310	}
1311
1312	/* Interface wasn't attached */
1313	if (bp->bif_ifp == NULL) {
1314		mtx_unlock(&bpf_mtx);
1315		printf("bpfdetach: %s%d was not attached\n", ifp->if_name,
1316		    ifp->if_unit);
1317		return;
1318	}
1319
1320	if (bp_prev) {
1321		bp_prev->bif_next = bp->bif_next;
1322	} else {
1323		bpf_iflist = bp->bif_next;
1324	}
1325
1326	while ((d = bp->bif_dlist) != NULL) {
1327		bpf_detachd(d);
1328		BPFD_LOCK(d);
1329		bpf_wakeup(d);
1330		BPFD_UNLOCK(d);
1331	}
1332
1333	mtx_destroy(&bp->bif_mtx);
1334	free(bp, M_BPF);
1335
1336	mtx_unlock(&bpf_mtx);
1337}
1338
1339static void bpf_drvinit __P((void *unused));
1340
1341static void bpf_clone __P((void *arg, char *name, int namelen, dev_t *dev));
1342
1343static void
1344bpf_clone(arg, name, namelen, dev)
1345	void *arg;
1346	char *name;
1347	int namelen;
1348	dev_t *dev;
1349{
1350	int u;
1351
1352	if (*dev != NODEV)
1353		return;
1354	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1355		return;
1356	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1357	    "bpf%d", u);
1358	(*dev)->si_flags |= SI_CHEAPCLONE;
1359	return;
1360}
1361
1362static void
1363bpf_drvinit(unused)
1364	void *unused;
1365{
1366
1367	mtx_init(&bpf_mtx, "bpf global lock", MTX_DEF);
1368	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1369	cdevsw_add(&bpf_cdevsw);
1370}
1371
1372SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
1373
1374#else /* !DEV_BPF && !NETGRAPH_BPF */
1375/*
1376 * NOP stubs to allow bpf-using drivers to load and function.
1377 *
1378 * A 'better' implementation would allow the core bpf functionality
1379 * to be loaded at runtime.
1380 */
1381
1382void
1383bpf_tap(ifp, pkt, pktlen)
1384	struct ifnet *ifp;
1385	register u_char *pkt;
1386	register u_int pktlen;
1387{
1388}
1389
1390void
1391bpf_mtap(ifp, m)
1392	struct ifnet *ifp;
1393	struct mbuf *m;
1394{
1395}
1396
1397void
1398bpfattach(ifp, dlt, hdrlen)
1399	struct ifnet *ifp;
1400	u_int dlt, hdrlen;
1401{
1402}
1403
1404void
1405bpfdetach(ifp)
1406	struct ifnet *ifp;
1407{
1408}
1409
1410u_int
1411bpf_filter(pc, p, wirelen, buflen)
1412	register const struct bpf_insn *pc;
1413	register u_char *p;
1414	u_int wirelen;
1415	register u_int buflen;
1416{
1417	return -1;	/* "no filter" behaviour */
1418}
1419
1420int
1421bpf_validate(f, len)
1422	const struct bpf_insn *f;
1423	int len;
1424{
1425	return 0;		/* false */
1426}
1427
1428#endif /* !DEV_BPF && !NETGRAPH_BPF */
1429