bpf.c revision 153151
1/*-
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35 *
36 * $FreeBSD: head/sys/net/bpf.c 153151 2005-12-06 02:58:12Z jkim $
37 */
38
39#include "opt_bpf.h"
40#include "opt_mac.h"
41#include "opt_netgraph.h"
42
43#include <sys/types.h>
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/conf.h>
47#include <sys/fcntl.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/time.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/filio.h>
55#include <sys/sockio.h>
56#include <sys/ttycom.h>
57#include <sys/uio.h>
58
59#include <sys/event.h>
60#include <sys/file.h>
61#include <sys/poll.h>
62#include <sys/proc.h>
63
64#include <sys/socket.h>
65
66#include <net/if.h>
67#include <net/bpf.h>
68#ifdef BPF_JITTER
69#include <net/bpf_jitter.h>
70#endif
71#include <net/bpfdesc.h>
72
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <sys/kernel.h>
76#include <sys/sysctl.h>
77
78static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
79
80#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
81
82#define PRINET  26			/* interruptible */
83
84/*
85 * bpf_iflist is a list of BPF interface structures, each corresponding to a
86 * specific DLT.  The same network interface might have several BPF interface
87 * structures registered by different layers in the stack (i.e., 802.11
88 * frames, ethernet frames, etc).
89 */
90static LIST_HEAD(, bpf_if)	bpf_iflist;
91static struct mtx	bpf_mtx;		/* bpf global lock */
92static int		bpf_bpfd_cnt;
93
94static int	bpf_allocbufs(struct bpf_d *);
95static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
96static void	bpf_detachd(struct bpf_d *d);
97static void	bpf_freed(struct bpf_d *);
98static void	bpf_mcopy(const void *, void *, size_t);
99static int	bpf_movein(struct uio *, int, int,
100		    struct mbuf **, struct sockaddr *, struct bpf_insn *);
101static int	bpf_setif(struct bpf_d *, struct ifreq *);
102static void	bpf_timed_out(void *);
103static __inline void
104		bpf_wakeup(struct bpf_d *);
105static void	catchpacket(struct bpf_d *, u_char *, u_int,
106		    u_int, void (*)(const void *, void *, size_t));
107static void	reset_d(struct bpf_d *);
108static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
109static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
110static int	bpf_setdlt(struct bpf_d *, u_int);
111static void	filt_bpfdetach(struct knote *);
112static int	filt_bpfread(struct knote *, long);
113static void	bpf_drvinit(void *);
114static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
115static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
116
117/*
118 * The default read buffer size is patchable.
119 */
120SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
121static int bpf_bufsize = 4096;
122SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
123    &bpf_bufsize, 0, "");
124static int bpf_maxbufsize = BPF_MAXBUFSIZE;
125SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
126    &bpf_maxbufsize, 0, "");
127static int bpf_maxinsns = BPF_MAXINSNS;
128SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
129    &bpf_maxinsns, 0, "Maximum bpf program instructions");
130SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
131    bpf_stats_sysctl, "bpf statistics portal");
132#ifdef BPF_JITTER
133SYSCTL_NODE(_net_bpf, OID_AUTO, jitter, CTLFLAG_RW, 0, "bpf jitter sysctl");
134static int bpf_jitter_enable = 1;
135SYSCTL_INT(_net_bpf_jitter, OID_AUTO, enable, CTLFLAG_RW,
136    &bpf_jitter_enable, 0, "bpf JIT compiler");
137#endif
138
139static	d_open_t	bpfopen;
140static	d_close_t	bpfclose;
141static	d_read_t	bpfread;
142static	d_write_t	bpfwrite;
143static	d_ioctl_t	bpfioctl;
144static	d_poll_t	bpfpoll;
145static	d_kqfilter_t	bpfkqfilter;
146
147static struct cdevsw bpf_cdevsw = {
148	.d_version =	D_VERSION,
149	.d_flags =	D_NEEDGIANT,
150	.d_open =	bpfopen,
151	.d_close =	bpfclose,
152	.d_read =	bpfread,
153	.d_write =	bpfwrite,
154	.d_ioctl =	bpfioctl,
155	.d_poll =	bpfpoll,
156	.d_name =	"bpf",
157	.d_kqfilter =	bpfkqfilter,
158};
159
160static struct filterops bpfread_filtops =
161	{ 1, NULL, filt_bpfdetach, filt_bpfread };
162
163static int
164bpf_movein(uio, linktype, mtu, mp, sockp, wfilter)
165	struct uio *uio;
166	int linktype;
167	int mtu;
168	struct mbuf **mp;
169	struct sockaddr *sockp;
170	struct bpf_insn *wfilter;
171{
172	struct mbuf *m;
173	int error;
174	int len;
175	int hlen;
176	int slen;
177
178	/*
179	 * Build a sockaddr based on the data link layer type.
180	 * We do this at this level because the ethernet header
181	 * is copied directly into the data field of the sockaddr.
182	 * In the case of SLIP, there is no header and the packet
183	 * is forwarded as is.
184	 * Also, we are careful to leave room at the front of the mbuf
185	 * for the link level header.
186	 */
187	switch (linktype) {
188
189	case DLT_SLIP:
190		sockp->sa_family = AF_INET;
191		hlen = 0;
192		break;
193
194	case DLT_EN10MB:
195		sockp->sa_family = AF_UNSPEC;
196		/* XXX Would MAXLINKHDR be better? */
197		hlen = ETHER_HDR_LEN;
198		break;
199
200	case DLT_FDDI:
201		sockp->sa_family = AF_IMPLINK;
202		hlen = 0;
203		break;
204
205	case DLT_RAW:
206		sockp->sa_family = AF_UNSPEC;
207		hlen = 0;
208		break;
209
210	case DLT_NULL:
211		/*
212		 * null interface types require a 4 byte pseudo header which
213		 * corresponds to the address family of the packet.
214		 */
215		sockp->sa_family = AF_UNSPEC;
216		hlen = 4;
217		break;
218
219	case DLT_ATM_RFC1483:
220		/*
221		 * en atm driver requires 4-byte atm pseudo header.
222		 * though it isn't standard, vpi:vci needs to be
223		 * specified anyway.
224		 */
225		sockp->sa_family = AF_UNSPEC;
226		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
227		break;
228
229	case DLT_PPP:
230		sockp->sa_family = AF_UNSPEC;
231		hlen = 4;	/* This should match PPP_HDRLEN */
232		break;
233
234	default:
235		return (EIO);
236	}
237
238	len = uio->uio_resid;
239
240	if (len - hlen > mtu)
241		return (EMSGSIZE);
242
243	if ((unsigned)len > MCLBYTES)
244		return (EIO);
245
246	if (len > MHLEN) {
247		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
248	} else {
249		MGETHDR(m, M_TRYWAIT, MT_DATA);
250	}
251	if (m == NULL)
252		return (ENOBUFS);
253	m->m_pkthdr.len = m->m_len = len;
254	m->m_pkthdr.rcvif = NULL;
255	*mp = m;
256
257	if (m->m_len < hlen) {
258		error = EPERM;
259		goto bad;
260	}
261
262	error = uiomove(mtod(m, u_char *), len, uio);
263	if (error)
264		goto bad;
265
266	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
267	if (slen == 0) {
268		error = EPERM;
269		goto bad;
270	}
271
272	/*
273	 * Make room for link header, and copy it to sockaddr
274	 */
275	if (hlen != 0) {
276		bcopy(m->m_data, sockp->sa_data, hlen);
277		m->m_pkthdr.len -= hlen;
278		m->m_len -= hlen;
279#if BSD >= 199103
280		m->m_data += hlen; /* XXX */
281#else
282		m->m_off += hlen;
283#endif
284	}
285
286	return (0);
287bad:
288	m_freem(m);
289	return (error);
290}
291
292/*
293 * Attach file to the bpf interface, i.e. make d listen on bp.
294 */
295static void
296bpf_attachd(d, bp)
297	struct bpf_d *d;
298	struct bpf_if *bp;
299{
300	/*
301	 * Point d at bp, and add d to the interface's list of listeners.
302	 * Finally, point the driver's bpf cookie at the interface so
303	 * it will divert packets to bpf.
304	 */
305	BPFIF_LOCK(bp);
306	d->bd_bif = bp;
307	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
308
309	bpf_bpfd_cnt++;
310	*bp->bif_driverp = bp;
311	BPFIF_UNLOCK(bp);
312}
313
314/*
315 * Detach a file from its interface.
316 */
317static void
318bpf_detachd(d)
319	struct bpf_d *d;
320{
321	int error;
322	struct bpf_if *bp;
323	struct ifnet *ifp;
324
325	bp = d->bd_bif;
326	BPFIF_LOCK(bp);
327	BPFD_LOCK(d);
328	ifp = d->bd_bif->bif_ifp;
329
330	/*
331	 * Remove d from the interface's descriptor list.
332	 */
333	LIST_REMOVE(d, bd_next);
334
335	bpf_bpfd_cnt--;
336	/*
337	 * Let the driver know that there are no more listeners.
338	 */
339	if (LIST_EMPTY(&bp->bif_dlist))
340		*bp->bif_driverp = NULL;
341
342	d->bd_bif = NULL;
343	BPFD_UNLOCK(d);
344	BPFIF_UNLOCK(bp);
345
346	/*
347	 * Check if this descriptor had requested promiscuous mode.
348	 * If so, turn it off.
349	 */
350	if (d->bd_promisc) {
351		d->bd_promisc = 0;
352		error = ifpromisc(ifp, 0);
353		if (error != 0 && error != ENXIO) {
354			/*
355			 * ENXIO can happen if a pccard is unplugged
356			 * Something is really wrong if we were able to put
357			 * the driver into promiscuous mode, but can't
358			 * take it out.
359			 */
360			if_printf(bp->bif_ifp,
361				"bpf_detach: ifpromisc failed (%d)\n", error);
362		}
363	}
364}
365
366/*
367 * Open ethernet device.  Returns ENXIO for illegal minor device number,
368 * EBUSY if file is open by another process.
369 */
370/* ARGSUSED */
371static	int
372bpfopen(dev, flags, fmt, td)
373	struct cdev *dev;
374	int flags;
375	int fmt;
376	struct thread *td;
377{
378	struct bpf_d *d;
379
380	mtx_lock(&bpf_mtx);
381	d = dev->si_drv1;
382	/*
383	 * Each minor can be opened by only one process.  If the requested
384	 * minor is in use, return EBUSY.
385	 */
386	if (d != NULL) {
387		mtx_unlock(&bpf_mtx);
388		return (EBUSY);
389	}
390	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
391	mtx_unlock(&bpf_mtx);
392
393	if ((dev->si_flags & SI_NAMED) == 0)
394		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
395		    "bpf%d", dev2unit(dev));
396	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
397	dev->si_drv1 = d;
398	d->bd_bufsize = bpf_bufsize;
399	d->bd_sig = SIGIO;
400	d->bd_seesent = 1;
401	d->bd_pid = td->td_proc->p_pid;
402#ifdef MAC
403	mac_init_bpfdesc(d);
404	mac_create_bpfdesc(td->td_ucred, d);
405#endif
406	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
407	callout_init(&d->bd_callout, NET_CALLOUT_MPSAFE);
408	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
409
410	return (0);
411}
412
413/*
414 * Close the descriptor by detaching it from its interface,
415 * deallocating its buffers, and marking it free.
416 */
417/* ARGSUSED */
418static	int
419bpfclose(dev, flags, fmt, td)
420	struct cdev *dev;
421	int flags;
422	int fmt;
423	struct thread *td;
424{
425	struct bpf_d *d = dev->si_drv1;
426
427	BPFD_LOCK(d);
428	if (d->bd_state == BPF_WAITING)
429		callout_stop(&d->bd_callout);
430	d->bd_state = BPF_IDLE;
431	BPFD_UNLOCK(d);
432	funsetown(&d->bd_sigio);
433	mtx_lock(&bpf_mtx);
434	if (d->bd_bif)
435		bpf_detachd(d);
436	mtx_unlock(&bpf_mtx);
437	selwakeuppri(&d->bd_sel, PRINET);
438#ifdef MAC
439	mac_destroy_bpfdesc(d);
440#endif /* MAC */
441	knlist_destroy(&d->bd_sel.si_note);
442	bpf_freed(d);
443	dev->si_drv1 = NULL;
444	free(d, M_BPF);
445
446	return (0);
447}
448
449
450/*
451 * Rotate the packet buffers in descriptor d.  Move the store buffer
452 * into the hold slot, and the free buffer into the store slot.
453 * Zero the length of the new store buffer.
454 */
455#define ROTATE_BUFFERS(d) \
456	(d)->bd_hbuf = (d)->bd_sbuf; \
457	(d)->bd_hlen = (d)->bd_slen; \
458	(d)->bd_sbuf = (d)->bd_fbuf; \
459	(d)->bd_slen = 0; \
460	(d)->bd_fbuf = NULL;
461/*
462 *  bpfread - read next chunk of packets from buffers
463 */
464static	int
465bpfread(dev, uio, ioflag)
466	struct cdev *dev;
467	struct uio *uio;
468	int ioflag;
469{
470	struct bpf_d *d = dev->si_drv1;
471	int timed_out;
472	int error;
473
474	/*
475	 * Restrict application to use a buffer the same size as
476	 * as kernel buffers.
477	 */
478	if (uio->uio_resid != d->bd_bufsize)
479		return (EINVAL);
480
481	BPFD_LOCK(d);
482	if (d->bd_state == BPF_WAITING)
483		callout_stop(&d->bd_callout);
484	timed_out = (d->bd_state == BPF_TIMED_OUT);
485	d->bd_state = BPF_IDLE;
486	/*
487	 * If the hold buffer is empty, then do a timed sleep, which
488	 * ends when the timeout expires or when enough packets
489	 * have arrived to fill the store buffer.
490	 */
491	while (d->bd_hbuf == NULL) {
492		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
493			/*
494			 * A packet(s) either arrived since the previous
495			 * read or arrived while we were asleep.
496			 * Rotate the buffers and return what's here.
497			 */
498			ROTATE_BUFFERS(d);
499			break;
500		}
501
502		/*
503		 * No data is available, check to see if the bpf device
504		 * is still pointed at a real interface.  If not, return
505		 * ENXIO so that the userland process knows to rebind
506		 * it before using it again.
507		 */
508		if (d->bd_bif == NULL) {
509			BPFD_UNLOCK(d);
510			return (ENXIO);
511		}
512
513		if (ioflag & O_NONBLOCK) {
514			BPFD_UNLOCK(d);
515			return (EWOULDBLOCK);
516		}
517		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
518		     "bpf", d->bd_rtout);
519		if (error == EINTR || error == ERESTART) {
520			BPFD_UNLOCK(d);
521			return (error);
522		}
523		if (error == EWOULDBLOCK) {
524			/*
525			 * On a timeout, return what's in the buffer,
526			 * which may be nothing.  If there is something
527			 * in the store buffer, we can rotate the buffers.
528			 */
529			if (d->bd_hbuf)
530				/*
531				 * We filled up the buffer in between
532				 * getting the timeout and arriving
533				 * here, so we don't need to rotate.
534				 */
535				break;
536
537			if (d->bd_slen == 0) {
538				BPFD_UNLOCK(d);
539				return (0);
540			}
541			ROTATE_BUFFERS(d);
542			break;
543		}
544	}
545	/*
546	 * At this point, we know we have something in the hold slot.
547	 */
548	BPFD_UNLOCK(d);
549
550	/*
551	 * Move data from hold buffer into user space.
552	 * We know the entire buffer is transferred since
553	 * we checked above that the read buffer is bpf_bufsize bytes.
554	 */
555	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
556
557	BPFD_LOCK(d);
558	d->bd_fbuf = d->bd_hbuf;
559	d->bd_hbuf = NULL;
560	d->bd_hlen = 0;
561	BPFD_UNLOCK(d);
562
563	return (error);
564}
565
566
567/*
568 * If there are processes sleeping on this descriptor, wake them up.
569 */
570static __inline void
571bpf_wakeup(d)
572	struct bpf_d *d;
573{
574
575	BPFD_LOCK_ASSERT(d);
576	if (d->bd_state == BPF_WAITING) {
577		callout_stop(&d->bd_callout);
578		d->bd_state = BPF_IDLE;
579	}
580	wakeup(d);
581	if (d->bd_async && d->bd_sig && d->bd_sigio)
582		pgsigio(&d->bd_sigio, d->bd_sig, 0);
583
584	selwakeuppri(&d->bd_sel, PRINET);
585	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
586}
587
588static void
589bpf_timed_out(arg)
590	void *arg;
591{
592	struct bpf_d *d = (struct bpf_d *)arg;
593
594	BPFD_LOCK(d);
595	if (d->bd_state == BPF_WAITING) {
596		d->bd_state = BPF_TIMED_OUT;
597		if (d->bd_slen != 0)
598			bpf_wakeup(d);
599	}
600	BPFD_UNLOCK(d);
601}
602
603static	int
604bpfwrite(dev, uio, ioflag)
605	struct cdev *dev;
606	struct uio *uio;
607	int ioflag;
608{
609	struct bpf_d *d = dev->si_drv1;
610	struct ifnet *ifp;
611	struct mbuf *m;
612	int error;
613	struct sockaddr dst;
614
615	if (d->bd_bif == NULL)
616		return (ENXIO);
617
618	ifp = d->bd_bif->bif_ifp;
619
620	if ((ifp->if_flags & IFF_UP) == 0)
621		return (ENETDOWN);
622
623	if (uio->uio_resid == 0)
624		return (0);
625
626	bzero(&dst, sizeof(dst));
627	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu,
628	    &m, &dst, d->bd_wfilter);
629	if (error)
630		return (error);
631
632	if (d->bd_hdrcmplt)
633		dst.sa_family = pseudo_AF_HDRCMPLT;
634
635#ifdef MAC
636	BPFD_LOCK(d);
637	mac_create_mbuf_from_bpfdesc(d, m);
638	BPFD_UNLOCK(d);
639#endif
640	NET_LOCK_GIANT();
641	error = (*ifp->if_output)(ifp, m, &dst, NULL);
642	NET_UNLOCK_GIANT();
643	/*
644	 * The driver frees the mbuf.
645	 */
646	return (error);
647}
648
649/*
650 * Reset a descriptor by flushing its packet buffer and clearing the
651 * receive and drop counts.
652 */
653static void
654reset_d(d)
655	struct bpf_d *d;
656{
657
658	mtx_assert(&d->bd_mtx, MA_OWNED);
659	if (d->bd_hbuf) {
660		/* Free the hold buffer. */
661		d->bd_fbuf = d->bd_hbuf;
662		d->bd_hbuf = NULL;
663	}
664	d->bd_slen = 0;
665	d->bd_hlen = 0;
666	d->bd_rcount = 0;
667	d->bd_dcount = 0;
668	d->bd_fcount = 0;
669}
670
671/*
672 *  FIONREAD		Check for read packet available.
673 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
674 *  BIOCGBLEN		Get buffer len [for read()].
675 *  BIOCSETF		Set ethernet read filter.
676 *  BIOCSETWF		Set ethernet write filter.
677 *  BIOCFLUSH		Flush read packet buffer.
678 *  BIOCPROMISC		Put interface into promiscuous mode.
679 *  BIOCGDLT		Get link layer type.
680 *  BIOCGETIF		Get interface name.
681 *  BIOCSETIF		Set interface.
682 *  BIOCSRTIMEOUT	Set read timeout.
683 *  BIOCGRTIMEOUT	Get read timeout.
684 *  BIOCGSTATS		Get packet stats.
685 *  BIOCIMMEDIATE	Set immediate mode.
686 *  BIOCVERSION		Get filter language version.
687 *  BIOCGHDRCMPLT	Get "header already complete" flag
688 *  BIOCSHDRCMPLT	Set "header already complete" flag
689 *  BIOCGSEESENT	Get "see packets sent" flag
690 *  BIOCSSEESENT	Set "see packets sent" flag
691 *  BIOCLOCK		Set "locked" flag
692 */
693/* ARGSUSED */
694static	int
695bpfioctl(dev, cmd, addr, flags, td)
696	struct cdev *dev;
697	u_long cmd;
698	caddr_t addr;
699	int flags;
700	struct thread *td;
701{
702	struct bpf_d *d = dev->si_drv1;
703	int error = 0;
704
705	/*
706	 * Refresh PID associated with this descriptor.
707	 */
708	BPFD_LOCK(d);
709	d->bd_pid = td->td_proc->p_pid;
710	if (d->bd_state == BPF_WAITING)
711		callout_stop(&d->bd_callout);
712	d->bd_state = BPF_IDLE;
713	BPFD_UNLOCK(d);
714
715	if (d->bd_locked == 1) {
716		switch (cmd) {
717		case BIOCGBLEN:
718		case BIOCFLUSH:
719		case BIOCGDLT:
720		case BIOCGDLTLIST:
721		case BIOCGETIF:
722		case BIOCGRTIMEOUT:
723		case BIOCGSTATS:
724		case BIOCVERSION:
725		case BIOCGRSIG:
726		case BIOCGHDRCMPLT:
727		case FIONREAD:
728		case BIOCLOCK:
729		case BIOCSRTIMEOUT:
730		case BIOCIMMEDIATE:
731		case TIOCGPGRP:
732			break;
733		default:
734			return (EPERM);
735		}
736	}
737	switch (cmd) {
738
739	default:
740		error = EINVAL;
741		break;
742
743	/*
744	 * Check for read packet available.
745	 */
746	case FIONREAD:
747		{
748			int n;
749
750			BPFD_LOCK(d);
751			n = d->bd_slen;
752			if (d->bd_hbuf)
753				n += d->bd_hlen;
754			BPFD_UNLOCK(d);
755
756			*(int *)addr = n;
757			break;
758		}
759
760	case SIOCGIFADDR:
761		{
762			struct ifnet *ifp;
763
764			if (d->bd_bif == NULL)
765				error = EINVAL;
766			else {
767				ifp = d->bd_bif->bif_ifp;
768				error = (*ifp->if_ioctl)(ifp, cmd, addr);
769			}
770			break;
771		}
772
773	/*
774	 * Get buffer len [for read()].
775	 */
776	case BIOCGBLEN:
777		*(u_int *)addr = d->bd_bufsize;
778		break;
779
780	/*
781	 * Set buffer length.
782	 */
783	case BIOCSBLEN:
784		if (d->bd_bif != NULL)
785			error = EINVAL;
786		else {
787			u_int size = *(u_int *)addr;
788
789			if (size > bpf_maxbufsize)
790				*(u_int *)addr = size = bpf_maxbufsize;
791			else if (size < BPF_MINBUFSIZE)
792				*(u_int *)addr = size = BPF_MINBUFSIZE;
793			d->bd_bufsize = size;
794		}
795		break;
796
797	/*
798	 * Set link layer read filter.
799	 */
800	case BIOCSETF:
801	case BIOCSETWF:
802		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
803		break;
804
805	/*
806	 * Flush read packet buffer.
807	 */
808	case BIOCFLUSH:
809		BPFD_LOCK(d);
810		reset_d(d);
811		BPFD_UNLOCK(d);
812		break;
813
814	/*
815	 * Put interface into promiscuous mode.
816	 */
817	case BIOCPROMISC:
818		if (d->bd_bif == NULL) {
819			/*
820			 * No interface attached yet.
821			 */
822			error = EINVAL;
823			break;
824		}
825		if (d->bd_promisc == 0) {
826			mtx_lock(&Giant);
827			error = ifpromisc(d->bd_bif->bif_ifp, 1);
828			mtx_unlock(&Giant);
829			if (error == 0)
830				d->bd_promisc = 1;
831		}
832		break;
833
834	/*
835	 * Get current data link type.
836	 */
837	case BIOCGDLT:
838		if (d->bd_bif == NULL)
839			error = EINVAL;
840		else
841			*(u_int *)addr = d->bd_bif->bif_dlt;
842		break;
843
844	/*
845	 * Get a list of supported data link types.
846	 */
847	case BIOCGDLTLIST:
848		if (d->bd_bif == NULL)
849			error = EINVAL;
850		else
851			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
852		break;
853
854	/*
855	 * Set data link type.
856	 */
857	case BIOCSDLT:
858		if (d->bd_bif == NULL)
859			error = EINVAL;
860		else
861			error = bpf_setdlt(d, *(u_int *)addr);
862		break;
863
864	/*
865	 * Get interface name.
866	 */
867	case BIOCGETIF:
868		if (d->bd_bif == NULL)
869			error = EINVAL;
870		else {
871			struct ifnet *const ifp = d->bd_bif->bif_ifp;
872			struct ifreq *const ifr = (struct ifreq *)addr;
873
874			strlcpy(ifr->ifr_name, ifp->if_xname,
875			    sizeof(ifr->ifr_name));
876		}
877		break;
878
879	/*
880	 * Set interface.
881	 */
882	case BIOCSETIF:
883		error = bpf_setif(d, (struct ifreq *)addr);
884		break;
885
886	/*
887	 * Set read timeout.
888	 */
889	case BIOCSRTIMEOUT:
890		{
891			struct timeval *tv = (struct timeval *)addr;
892
893			/*
894			 * Subtract 1 tick from tvtohz() since this isn't
895			 * a one-shot timer.
896			 */
897			if ((error = itimerfix(tv)) == 0)
898				d->bd_rtout = tvtohz(tv) - 1;
899			break;
900		}
901
902	/*
903	 * Get read timeout.
904	 */
905	case BIOCGRTIMEOUT:
906		{
907			struct timeval *tv = (struct timeval *)addr;
908
909			tv->tv_sec = d->bd_rtout / hz;
910			tv->tv_usec = (d->bd_rtout % hz) * tick;
911			break;
912		}
913
914	/*
915	 * Get packet stats.
916	 */
917	case BIOCGSTATS:
918		{
919			struct bpf_stat *bs = (struct bpf_stat *)addr;
920
921			bs->bs_recv = d->bd_rcount;
922			bs->bs_drop = d->bd_dcount;
923			break;
924		}
925
926	/*
927	 * Set immediate mode.
928	 */
929	case BIOCIMMEDIATE:
930		d->bd_immediate = *(u_int *)addr;
931		break;
932
933	case BIOCVERSION:
934		{
935			struct bpf_version *bv = (struct bpf_version *)addr;
936
937			bv->bv_major = BPF_MAJOR_VERSION;
938			bv->bv_minor = BPF_MINOR_VERSION;
939			break;
940		}
941
942	/*
943	 * Get "header already complete" flag
944	 */
945	case BIOCGHDRCMPLT:
946		*(u_int *)addr = d->bd_hdrcmplt;
947		break;
948
949	case BIOCLOCK:
950		d->bd_locked = 1;
951		break;
952	/*
953	 * Set "header already complete" flag
954	 */
955	case BIOCSHDRCMPLT:
956		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
957		break;
958
959	/*
960	 * Get "see sent packets" flag
961	 */
962	case BIOCGSEESENT:
963		*(u_int *)addr = d->bd_seesent;
964		break;
965
966	/*
967	 * Set "see sent packets" flag
968	 */
969	case BIOCSSEESENT:
970		d->bd_seesent = *(u_int *)addr;
971		break;
972
973	case FIONBIO:		/* Non-blocking I/O */
974		break;
975
976	case FIOASYNC:		/* Send signal on receive packets */
977		d->bd_async = *(int *)addr;
978		break;
979
980	case FIOSETOWN:
981		error = fsetown(*(int *)addr, &d->bd_sigio);
982		break;
983
984	case FIOGETOWN:
985		*(int *)addr = fgetown(&d->bd_sigio);
986		break;
987
988	/* This is deprecated, FIOSETOWN should be used instead. */
989	case TIOCSPGRP:
990		error = fsetown(-(*(int *)addr), &d->bd_sigio);
991		break;
992
993	/* This is deprecated, FIOGETOWN should be used instead. */
994	case TIOCGPGRP:
995		*(int *)addr = -fgetown(&d->bd_sigio);
996		break;
997
998	case BIOCSRSIG:		/* Set receive signal */
999		{
1000			u_int sig;
1001
1002			sig = *(u_int *)addr;
1003
1004			if (sig >= NSIG)
1005				error = EINVAL;
1006			else
1007				d->bd_sig = sig;
1008			break;
1009		}
1010	case BIOCGRSIG:
1011		*(u_int *)addr = d->bd_sig;
1012		break;
1013	}
1014	return (error);
1015}
1016
1017/*
1018 * Set d's packet filter program to fp.  If this file already has a filter,
1019 * free it and replace it.  Returns EINVAL for bogus requests.
1020 */
1021static int
1022bpf_setf(d, fp, cmd)
1023	struct bpf_d *d;
1024	struct bpf_program *fp;
1025	u_long cmd;
1026{
1027	struct bpf_insn *fcode, *old;
1028	u_int wfilter, flen, size;
1029#if BPF_JITTER
1030	bpf_jit_filter *ofunc;
1031#endif
1032
1033	if (cmd == BIOCSETWF) {
1034		old = d->bd_wfilter;
1035		wfilter = 1;
1036#if BPF_JITTER
1037		ofunc = NULL;
1038#endif
1039	} else {
1040		wfilter = 0;
1041		old = d->bd_rfilter;
1042#if BPF_JITTER
1043		ofunc = d->bd_bfilter;
1044#endif
1045	}
1046	if (fp->bf_insns == NULL) {
1047		if (fp->bf_len != 0)
1048			return (EINVAL);
1049		BPFD_LOCK(d);
1050		if (wfilter)
1051			d->bd_wfilter = NULL;
1052		else {
1053			d->bd_rfilter = NULL;
1054#if BPF_JITTER
1055			d->bd_bfilter = NULL;
1056#endif
1057		}
1058		reset_d(d);
1059		BPFD_UNLOCK(d);
1060		if (old != NULL)
1061			free((caddr_t)old, M_BPF);
1062#if BPF_JITTER
1063		if (ofunc != NULL)
1064			bpf_destroy_jit_filter(ofunc);
1065#endif
1066		return (0);
1067	}
1068	flen = fp->bf_len;
1069	if (flen > bpf_maxinsns)
1070		return (EINVAL);
1071
1072	size = flen * sizeof(*fp->bf_insns);
1073	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1074	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1075	    bpf_validate(fcode, (int)flen)) {
1076		BPFD_LOCK(d);
1077		if (wfilter)
1078			d->bd_wfilter = fcode;
1079		else {
1080			d->bd_rfilter = fcode;
1081#if BPF_JITTER
1082			d->bd_bfilter = bpf_jitter(fcode, flen);
1083#endif
1084		}
1085		reset_d(d);
1086		BPFD_UNLOCK(d);
1087		if (old != NULL)
1088			free((caddr_t)old, M_BPF);
1089#if BPF_JITTER
1090		if (ofunc != NULL)
1091			bpf_destroy_jit_filter(ofunc);
1092#endif
1093
1094		return (0);
1095	}
1096	free((caddr_t)fcode, M_BPF);
1097	return (EINVAL);
1098}
1099
1100/*
1101 * Detach a file from its current interface (if attached at all) and attach
1102 * to the interface indicated by the name stored in ifr.
1103 * Return an errno or 0.
1104 */
1105static int
1106bpf_setif(d, ifr)
1107	struct bpf_d *d;
1108	struct ifreq *ifr;
1109{
1110	struct bpf_if *bp;
1111	int error;
1112	struct ifnet *theywant;
1113
1114	theywant = ifunit(ifr->ifr_name);
1115	if (theywant == NULL)
1116		return ENXIO;
1117
1118	/*
1119	 * Look through attached interfaces for the named one.
1120	 */
1121	mtx_lock(&bpf_mtx);
1122	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1123		struct ifnet *ifp = bp->bif_ifp;
1124
1125		if (ifp == NULL || ifp != theywant)
1126			continue;
1127		/* skip additional entry */
1128		if (bp->bif_driverp != &ifp->if_bpf)
1129			continue;
1130
1131		mtx_unlock(&bpf_mtx);
1132		/*
1133		 * We found the requested interface.
1134		 * Allocate the packet buffers if we need to.
1135		 * If we're already attached to requested interface,
1136		 * just flush the buffer.
1137		 */
1138		if (d->bd_sbuf == NULL) {
1139			error = bpf_allocbufs(d);
1140			if (error != 0)
1141				return (error);
1142		}
1143		if (bp != d->bd_bif) {
1144			if (d->bd_bif)
1145				/*
1146				 * Detach if attached to something else.
1147				 */
1148				bpf_detachd(d);
1149
1150			bpf_attachd(d, bp);
1151		}
1152		BPFD_LOCK(d);
1153		reset_d(d);
1154		BPFD_UNLOCK(d);
1155		return (0);
1156	}
1157	mtx_unlock(&bpf_mtx);
1158	/* Not found. */
1159	return (ENXIO);
1160}
1161
1162/*
1163 * Support for select() and poll() system calls
1164 *
1165 * Return true iff the specific operation will not block indefinitely.
1166 * Otherwise, return false but make a note that a selwakeup() must be done.
1167 */
1168static int
1169bpfpoll(dev, events, td)
1170	struct cdev *dev;
1171	int events;
1172	struct thread *td;
1173{
1174	struct bpf_d *d;
1175	int revents;
1176
1177	d = dev->si_drv1;
1178	if (d->bd_bif == NULL)
1179		return (ENXIO);
1180
1181	/*
1182	 * Refresh PID associated with this descriptor.
1183	 */
1184	revents = events & (POLLOUT | POLLWRNORM);
1185	BPFD_LOCK(d);
1186	d->bd_pid = td->td_proc->p_pid;
1187	if (events & (POLLIN | POLLRDNORM)) {
1188		if (bpf_ready(d))
1189			revents |= events & (POLLIN | POLLRDNORM);
1190		else {
1191			selrecord(td, &d->bd_sel);
1192			/* Start the read timeout if necessary. */
1193			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1194				callout_reset(&d->bd_callout, d->bd_rtout,
1195				    bpf_timed_out, d);
1196				d->bd_state = BPF_WAITING;
1197			}
1198		}
1199	}
1200	BPFD_UNLOCK(d);
1201	return (revents);
1202}
1203
1204/*
1205 * Support for kevent() system call.  Register EVFILT_READ filters and
1206 * reject all others.
1207 */
1208int
1209bpfkqfilter(dev, kn)
1210	struct cdev *dev;
1211	struct knote *kn;
1212{
1213	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1214
1215	if (kn->kn_filter != EVFILT_READ)
1216		return (1);
1217
1218	/*
1219	 * Refresh PID associated with this descriptor.
1220	 */
1221	BPFD_LOCK(d);
1222	d->bd_pid = curthread->td_proc->p_pid;
1223	kn->kn_fop = &bpfread_filtops;
1224	kn->kn_hook = d;
1225	knlist_add(&d->bd_sel.si_note, kn, 0);
1226	BPFD_UNLOCK(d);
1227
1228	return (0);
1229}
1230
1231static void
1232filt_bpfdetach(kn)
1233	struct knote *kn;
1234{
1235	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1236
1237	BPFD_LOCK(d);
1238	knlist_remove(&d->bd_sel.si_note, kn, 0);
1239	BPFD_UNLOCK(d);
1240}
1241
1242static int
1243filt_bpfread(kn, hint)
1244	struct knote *kn;
1245	long hint;
1246{
1247	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1248	int ready;
1249
1250	BPFD_LOCK_ASSERT(d);
1251	ready = bpf_ready(d);
1252	if (ready) {
1253		kn->kn_data = d->bd_slen;
1254		if (d->bd_hbuf)
1255			kn->kn_data += d->bd_hlen;
1256	}
1257	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1258		callout_reset(&d->bd_callout, d->bd_rtout,
1259		    bpf_timed_out, d);
1260		d->bd_state = BPF_WAITING;
1261	}
1262
1263	return (ready);
1264}
1265
1266/*
1267 * Incoming linkage from device drivers.  Process the packet pkt, of length
1268 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1269 * by each process' filter, and if accepted, stashed into the corresponding
1270 * buffer.
1271 */
1272void
1273bpf_tap(bp, pkt, pktlen)
1274	struct bpf_if *bp;
1275	u_char *pkt;
1276	u_int pktlen;
1277{
1278	struct bpf_d *d;
1279	u_int slen;
1280
1281	/*
1282	 * Lockless read to avoid cost of locking the interface if there are
1283	 * no descriptors attached.
1284	 */
1285	if (LIST_EMPTY(&bp->bif_dlist))
1286		return;
1287
1288	BPFIF_LOCK(bp);
1289	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1290		BPFD_LOCK(d);
1291		++d->bd_rcount;
1292#ifdef BPF_JITTER
1293		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL)
1294			slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen);
1295		else
1296#endif
1297		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1298		if (slen != 0) {
1299			d->bd_fcount++;
1300#ifdef MAC
1301			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1302#endif
1303				catchpacket(d, pkt, pktlen, slen, bcopy);
1304		}
1305		BPFD_UNLOCK(d);
1306	}
1307	BPFIF_UNLOCK(bp);
1308}
1309
1310/*
1311 * Copy data from an mbuf chain into a buffer.  This code is derived
1312 * from m_copydata in sys/uipc_mbuf.c.
1313 */
1314static void
1315bpf_mcopy(src_arg, dst_arg, len)
1316	const void *src_arg;
1317	void *dst_arg;
1318	size_t len;
1319{
1320	const struct mbuf *m;
1321	u_int count;
1322	u_char *dst;
1323
1324	m = src_arg;
1325	dst = dst_arg;
1326	while (len > 0) {
1327		if (m == NULL)
1328			panic("bpf_mcopy");
1329		count = min(m->m_len, len);
1330		bcopy(mtod(m, void *), dst, count);
1331		m = m->m_next;
1332		dst += count;
1333		len -= count;
1334	}
1335}
1336
1337/*
1338 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1339 */
1340void
1341bpf_mtap(bp, m)
1342	struct bpf_if *bp;
1343	struct mbuf *m;
1344{
1345	struct bpf_d *d;
1346	u_int pktlen, slen;
1347
1348	/*
1349	 * Lockless read to avoid cost of locking the interface if there are
1350	 * no descriptors attached.
1351	 */
1352	if (LIST_EMPTY(&bp->bif_dlist))
1353		return;
1354
1355	pktlen = m_length(m, NULL);
1356
1357	BPFIF_LOCK(bp);
1358	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1359		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1360			continue;
1361		BPFD_LOCK(d);
1362		++d->bd_rcount;
1363#ifdef BPF_JITTER
1364		/* XXX We cannot handle multiple mbufs. */
1365		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL &&
1366		    m->m_next == NULL)
1367			slen = (*(d->bd_bfilter->func))(mtod(m, u_char *),
1368			    pktlen, pktlen);
1369		else
1370#endif
1371		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1372		if (slen != 0) {
1373			d->bd_fcount++;
1374#ifdef MAC
1375			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1376#endif
1377				catchpacket(d, (u_char *)m, pktlen, slen,
1378				    bpf_mcopy);
1379		}
1380		BPFD_UNLOCK(d);
1381	}
1382	BPFIF_UNLOCK(bp);
1383}
1384
1385/*
1386 * Incoming linkage from device drivers, when packet is in
1387 * an mbuf chain and to be prepended by a contiguous header.
1388 */
1389void
1390bpf_mtap2(bp, data, dlen, m)
1391	struct bpf_if *bp;
1392	void *data;
1393	u_int dlen;
1394	struct mbuf *m;
1395{
1396	struct mbuf mb;
1397	struct bpf_d *d;
1398	u_int pktlen, slen;
1399
1400	/*
1401	 * Lockless read to avoid cost of locking the interface if there are
1402	 * no descriptors attached.
1403	 */
1404	if (LIST_EMPTY(&bp->bif_dlist))
1405		return;
1406
1407	pktlen = m_length(m, NULL);
1408	/*
1409	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1410	 * Note that we cut corners here; we only setup what's
1411	 * absolutely needed--this mbuf should never go anywhere else.
1412	 */
1413	mb.m_next = m;
1414	mb.m_data = data;
1415	mb.m_len = dlen;
1416	pktlen += dlen;
1417
1418	BPFIF_LOCK(bp);
1419	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1420		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1421			continue;
1422		BPFD_LOCK(d);
1423		++d->bd_rcount;
1424		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1425		if (slen != 0) {
1426			d->bd_fcount++;
1427#ifdef MAC
1428			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1429#endif
1430				catchpacket(d, (u_char *)&mb, pktlen, slen,
1431				    bpf_mcopy);
1432		}
1433		BPFD_UNLOCK(d);
1434	}
1435	BPFIF_UNLOCK(bp);
1436}
1437
1438/*
1439 * Move the packet data from interface memory (pkt) into the
1440 * store buffer.  "cpfn" is the routine called to do the actual data
1441 * transfer.  bcopy is passed in to copy contiguous chunks, while
1442 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1443 * pkt is really an mbuf.
1444 */
1445static void
1446catchpacket(d, pkt, pktlen, snaplen, cpfn)
1447	struct bpf_d *d;
1448	u_char *pkt;
1449	u_int pktlen, snaplen;
1450	void (*cpfn)(const void *, void *, size_t);
1451{
1452	struct bpf_hdr *hp;
1453	int totlen, curlen;
1454	int hdrlen = d->bd_bif->bif_hdrlen;
1455	int do_wakeup = 0;
1456
1457	BPFD_LOCK_ASSERT(d);
1458	/*
1459	 * Figure out how many bytes to move.  If the packet is
1460	 * greater or equal to the snapshot length, transfer that
1461	 * much.  Otherwise, transfer the whole packet (unless
1462	 * we hit the buffer size limit).
1463	 */
1464	totlen = hdrlen + min(snaplen, pktlen);
1465	if (totlen > d->bd_bufsize)
1466		totlen = d->bd_bufsize;
1467
1468	/*
1469	 * Round up the end of the previous packet to the next longword.
1470	 */
1471	curlen = BPF_WORDALIGN(d->bd_slen);
1472	if (curlen + totlen > d->bd_bufsize) {
1473		/*
1474		 * This packet will overflow the storage buffer.
1475		 * Rotate the buffers if we can, then wakeup any
1476		 * pending reads.
1477		 */
1478		if (d->bd_fbuf == NULL) {
1479			/*
1480			 * We haven't completed the previous read yet,
1481			 * so drop the packet.
1482			 */
1483			++d->bd_dcount;
1484			return;
1485		}
1486		ROTATE_BUFFERS(d);
1487		do_wakeup = 1;
1488		curlen = 0;
1489	}
1490	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1491		/*
1492		 * Immediate mode is set, or the read timeout has
1493		 * already expired during a select call.  A packet
1494		 * arrived, so the reader should be woken up.
1495		 */
1496		do_wakeup = 1;
1497
1498	/*
1499	 * Append the bpf header.
1500	 */
1501	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1502	microtime(&hp->bh_tstamp);
1503	hp->bh_datalen = pktlen;
1504	hp->bh_hdrlen = hdrlen;
1505	/*
1506	 * Copy the packet data into the store buffer and update its length.
1507	 */
1508	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1509	d->bd_slen = curlen + totlen;
1510
1511	if (do_wakeup)
1512		bpf_wakeup(d);
1513}
1514
1515/*
1516 * Initialize all nonzero fields of a descriptor.
1517 */
1518static int
1519bpf_allocbufs(d)
1520	struct bpf_d *d;
1521{
1522	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1523	if (d->bd_fbuf == NULL)
1524		return (ENOBUFS);
1525
1526	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1527	if (d->bd_sbuf == NULL) {
1528		free(d->bd_fbuf, M_BPF);
1529		return (ENOBUFS);
1530	}
1531	d->bd_slen = 0;
1532	d->bd_hlen = 0;
1533	return (0);
1534}
1535
1536/*
1537 * Free buffers currently in use by a descriptor.
1538 * Called on close.
1539 */
1540static void
1541bpf_freed(d)
1542	struct bpf_d *d;
1543{
1544	/*
1545	 * We don't need to lock out interrupts since this descriptor has
1546	 * been detached from its interface and it yet hasn't been marked
1547	 * free.
1548	 */
1549	if (d->bd_sbuf != NULL) {
1550		free(d->bd_sbuf, M_BPF);
1551		if (d->bd_hbuf != NULL)
1552			free(d->bd_hbuf, M_BPF);
1553		if (d->bd_fbuf != NULL)
1554			free(d->bd_fbuf, M_BPF);
1555	}
1556	if (d->bd_rfilter) {
1557		free((caddr_t)d->bd_rfilter, M_BPF);
1558#ifdef BPF_JITTER
1559		bpf_destroy_jit_filter(d->bd_bfilter);
1560#endif
1561	}
1562	if (d->bd_wfilter)
1563		free((caddr_t)d->bd_wfilter, M_BPF);
1564	mtx_destroy(&d->bd_mtx);
1565}
1566
1567/*
1568 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1569 * fixed size of the link header (variable length headers not yet supported).
1570 */
1571void
1572bpfattach(ifp, dlt, hdrlen)
1573	struct ifnet *ifp;
1574	u_int dlt, hdrlen;
1575{
1576
1577	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1578}
1579
1580/*
1581 * Attach an interface to bpf.  ifp is a pointer to the structure
1582 * defining the interface to be attached, dlt is the link layer type,
1583 * and hdrlen is the fixed size of the link header (variable length
1584 * headers are not yet supporrted).
1585 */
1586void
1587bpfattach2(ifp, dlt, hdrlen, driverp)
1588	struct ifnet *ifp;
1589	u_int dlt, hdrlen;
1590	struct bpf_if **driverp;
1591{
1592	struct bpf_if *bp;
1593	bp = (struct bpf_if *)malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1594	if (bp == NULL)
1595		panic("bpfattach");
1596
1597	LIST_INIT(&bp->bif_dlist);
1598	bp->bif_driverp = driverp;
1599	bp->bif_ifp = ifp;
1600	bp->bif_dlt = dlt;
1601	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1602
1603	mtx_lock(&bpf_mtx);
1604	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1605	mtx_unlock(&bpf_mtx);
1606
1607	*bp->bif_driverp = NULL;
1608
1609	/*
1610	 * Compute the length of the bpf header.  This is not necessarily
1611	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1612	 * that the network layer header begins on a longword boundary (for
1613	 * performance reasons and to alleviate alignment restrictions).
1614	 */
1615	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1616
1617	if (bootverbose)
1618		if_printf(ifp, "bpf attached\n");
1619}
1620
1621/*
1622 * Detach bpf from an interface.  This involves detaching each descriptor
1623 * associated with the interface, and leaving bd_bif NULL.  Notify each
1624 * descriptor as it's detached so that any sleepers wake up and get
1625 * ENXIO.
1626 */
1627void
1628bpfdetach(ifp)
1629	struct ifnet *ifp;
1630{
1631	struct bpf_if	*bp;
1632	struct bpf_d	*d;
1633
1634	/* Locate BPF interface information */
1635	mtx_lock(&bpf_mtx);
1636	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1637		if (ifp == bp->bif_ifp)
1638			break;
1639	}
1640
1641	/* Interface wasn't attached */
1642	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1643		mtx_unlock(&bpf_mtx);
1644		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1645		return;
1646	}
1647
1648	LIST_REMOVE(bp, bif_next);
1649	mtx_unlock(&bpf_mtx);
1650
1651	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1652		bpf_detachd(d);
1653		BPFD_LOCK(d);
1654		bpf_wakeup(d);
1655		BPFD_UNLOCK(d);
1656	}
1657
1658	mtx_destroy(&bp->bif_mtx);
1659	free(bp, M_BPF);
1660}
1661
1662/*
1663 * Get a list of available data link type of the interface.
1664 */
1665static int
1666bpf_getdltlist(d, bfl)
1667	struct bpf_d *d;
1668	struct bpf_dltlist *bfl;
1669{
1670	int n, error;
1671	struct ifnet *ifp;
1672	struct bpf_if *bp;
1673
1674	ifp = d->bd_bif->bif_ifp;
1675	n = 0;
1676	error = 0;
1677	mtx_lock(&bpf_mtx);
1678	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1679		if (bp->bif_ifp != ifp)
1680			continue;
1681		if (bfl->bfl_list != NULL) {
1682			if (n >= bfl->bfl_len) {
1683				mtx_unlock(&bpf_mtx);
1684				return (ENOMEM);
1685			}
1686			error = copyout(&bp->bif_dlt,
1687			    bfl->bfl_list + n, sizeof(u_int));
1688		}
1689		n++;
1690	}
1691	mtx_unlock(&bpf_mtx);
1692	bfl->bfl_len = n;
1693	return (error);
1694}
1695
1696/*
1697 * Set the data link type of a BPF instance.
1698 */
1699static int
1700bpf_setdlt(d, dlt)
1701	struct bpf_d *d;
1702	u_int dlt;
1703{
1704	int error, opromisc;
1705	struct ifnet *ifp;
1706	struct bpf_if *bp;
1707
1708	if (d->bd_bif->bif_dlt == dlt)
1709		return (0);
1710	ifp = d->bd_bif->bif_ifp;
1711	mtx_lock(&bpf_mtx);
1712	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1713		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1714			break;
1715	}
1716	mtx_unlock(&bpf_mtx);
1717	if (bp != NULL) {
1718		opromisc = d->bd_promisc;
1719		bpf_detachd(d);
1720		bpf_attachd(d, bp);
1721		BPFD_LOCK(d);
1722		reset_d(d);
1723		BPFD_UNLOCK(d);
1724		if (opromisc) {
1725			error = ifpromisc(bp->bif_ifp, 1);
1726			if (error)
1727				if_printf(bp->bif_ifp,
1728					"bpf_setdlt: ifpromisc failed (%d)\n",
1729					error);
1730			else
1731				d->bd_promisc = 1;
1732		}
1733	}
1734	return (bp == NULL ? EINVAL : 0);
1735}
1736
1737static void
1738bpf_clone(arg, cred, name, namelen, dev)
1739	void *arg;
1740	struct ucred *cred;
1741	char *name;
1742	int namelen;
1743	struct cdev **dev;
1744{
1745	int u;
1746
1747	if (*dev != NULL)
1748		return;
1749	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1750		return;
1751	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1752	    "bpf%d", u);
1753	dev_ref(*dev);
1754	(*dev)->si_flags |= SI_CHEAPCLONE;
1755	return;
1756}
1757
1758static void
1759bpf_drvinit(unused)
1760	void *unused;
1761{
1762
1763	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1764	LIST_INIT(&bpf_iflist);
1765	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1766}
1767
1768static void
1769bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
1770{
1771
1772	bzero(d, sizeof(*d));
1773	BPFD_LOCK_ASSERT(bd);
1774	d->bd_immediate = bd->bd_immediate;
1775	d->bd_promisc = bd->bd_promisc;
1776	d->bd_hdrcmplt = bd->bd_hdrcmplt;
1777	d->bd_seesent = bd->bd_seesent;
1778	d->bd_async = bd->bd_async;
1779	d->bd_rcount = bd->bd_rcount;
1780	d->bd_dcount = bd->bd_dcount;
1781	d->bd_fcount = bd->bd_fcount;
1782	d->bd_sig = bd->bd_sig;
1783	d->bd_slen = bd->bd_slen;
1784	d->bd_hlen = bd->bd_hlen;
1785	d->bd_bufsize = bd->bd_bufsize;
1786	d->bd_pid = bd->bd_pid;
1787	strlcpy(d->bd_ifname,
1788	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
1789	d->bd_locked = bd->bd_locked;
1790}
1791
1792static int
1793bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
1794{
1795	struct xbpf_d *xbdbuf, *xbd;
1796	int index, error;
1797	struct bpf_if *bp;
1798	struct bpf_d *bd;
1799
1800	/*
1801	 * XXX This is not technically correct. It is possible for non
1802	 * privileged users to open bpf devices. It would make sense
1803	 * if the users who opened the devices were able to retrieve
1804	 * the statistics for them, too.
1805	 */
1806	error = suser(req->td);
1807	if (error)
1808		return (error);
1809	if (req->oldptr == NULL)
1810		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
1811	if (bpf_bpfd_cnt == 0)
1812		return (SYSCTL_OUT(req, 0, 0));
1813	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
1814	mtx_lock(&bpf_mtx);
1815	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
1816		mtx_unlock(&bpf_mtx);
1817		free(xbdbuf, M_BPF);
1818		return (ENOMEM);
1819	}
1820	index = 0;
1821	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1822		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
1823			xbd = &xbdbuf[index++];
1824			BPFD_LOCK(bd);
1825			bpfstats_fill_xbpf(xbd, bd);
1826			BPFD_UNLOCK(bd);
1827		}
1828	}
1829	mtx_unlock(&bpf_mtx);
1830	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
1831	free(xbdbuf, M_BPF);
1832	return (error);
1833}
1834
1835SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1836
1837#else /* !DEV_BPF && !NETGRAPH_BPF */
1838/*
1839 * NOP stubs to allow bpf-using drivers to load and function.
1840 *
1841 * A 'better' implementation would allow the core bpf functionality
1842 * to be loaded at runtime.
1843 */
1844
1845void
1846bpf_tap(bp, pkt, pktlen)
1847	struct bpf_if *bp;
1848	u_char *pkt;
1849	u_int pktlen;
1850{
1851}
1852
1853void
1854bpf_mtap(bp, m)
1855	struct bpf_if *bp;
1856	struct mbuf *m;
1857{
1858}
1859
1860void
1861bpf_mtap2(bp, d, l, m)
1862	struct bpf_if *bp;
1863	void *d;
1864	u_int l;
1865	struct mbuf *m;
1866{
1867}
1868
1869void
1870bpfattach(ifp, dlt, hdrlen)
1871	struct ifnet *ifp;
1872	u_int dlt, hdrlen;
1873{
1874}
1875
1876void
1877bpfattach2(ifp, dlt, hdrlen, driverp)
1878	struct ifnet *ifp;
1879	u_int dlt, hdrlen;
1880	struct bpf_if **driverp;
1881{
1882}
1883
1884void
1885bpfdetach(ifp)
1886	struct ifnet *ifp;
1887{
1888}
1889
1890u_int
1891bpf_filter(pc, p, wirelen, buflen)
1892	const struct bpf_insn *pc;
1893	u_char *p;
1894	u_int wirelen;
1895	u_int buflen;
1896{
1897	return -1;	/* "no filter" behaviour */
1898}
1899
1900int
1901bpf_validate(f, len)
1902	const struct bpf_insn *f;
1903	int len;
1904{
1905	return 0;		/* false */
1906}
1907
1908#endif /* !DEV_BPF && !NETGRAPH_BPF */
1909