bpf.c revision 160087
1/*-
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35 *
36 * $FreeBSD: head/sys/net/bpf.c 160087 2006-07-03 20:02:06Z csjp $
37 */
38
39#include "opt_bpf.h"
40#include "opt_mac.h"
41#include "opt_netgraph.h"
42
43#include <sys/types.h>
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/conf.h>
47#include <sys/fcntl.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/time.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/filio.h>
55#include <sys/sockio.h>
56#include <sys/ttycom.h>
57#include <sys/uio.h>
58
59#include <sys/event.h>
60#include <sys/file.h>
61#include <sys/poll.h>
62#include <sys/proc.h>
63
64#include <sys/socket.h>
65
66#include <net/if.h>
67#include <net/bpf.h>
68#ifdef BPF_JITTER
69#include <net/bpf_jitter.h>
70#endif
71#include <net/bpfdesc.h>
72
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <sys/kernel.h>
76#include <sys/sysctl.h>
77
78static MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
79
80#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
81
82#define PRINET  26			/* interruptible */
83
84/*
85 * bpf_iflist is a list of BPF interface structures, each corresponding to a
86 * specific DLT.  The same network interface might have several BPF interface
87 * structures registered by different layers in the stack (i.e., 802.11
88 * frames, ethernet frames, etc).
89 */
90static LIST_HEAD(, bpf_if)	bpf_iflist;
91static struct mtx	bpf_mtx;		/* bpf global lock */
92static int		bpf_bpfd_cnt;
93
94static int	bpf_allocbufs(struct bpf_d *);
95static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
96static void	bpf_detachd(struct bpf_d *);
97static void	bpf_freed(struct bpf_d *);
98static void	bpf_mcopy(const void *, void *, size_t);
99static int	bpf_movein(struct uio *, int, int,
100		    struct mbuf **, struct sockaddr *, struct bpf_insn *);
101static int	bpf_setif(struct bpf_d *, struct ifreq *);
102static void	bpf_timed_out(void *);
103static __inline void
104		bpf_wakeup(struct bpf_d *);
105static void	catchpacket(struct bpf_d *, u_char *, u_int,
106		    u_int, void (*)(const void *, void *, size_t));
107static void	reset_d(struct bpf_d *);
108static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
109static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
110static int	bpf_setdlt(struct bpf_d *, u_int);
111static void	filt_bpfdetach(struct knote *);
112static int	filt_bpfread(struct knote *, long);
113static void	bpf_drvinit(void *);
114static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
115static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
116
117/*
118 * The default read buffer size is patchable.
119 */
120SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
121static int bpf_bufsize = 4096;
122SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
123    &bpf_bufsize, 0, "");
124static int bpf_maxbufsize = BPF_MAXBUFSIZE;
125SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
126    &bpf_maxbufsize, 0, "");
127static int bpf_maxinsns = BPF_MAXINSNS;
128SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
129    &bpf_maxinsns, 0, "Maximum bpf program instructions");
130SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
131    bpf_stats_sysctl, "bpf statistics portal");
132
133static	d_open_t	bpfopen;
134static	d_close_t	bpfclose;
135static	d_read_t	bpfread;
136static	d_write_t	bpfwrite;
137static	d_ioctl_t	bpfioctl;
138static	d_poll_t	bpfpoll;
139static	d_kqfilter_t	bpfkqfilter;
140
141static struct cdevsw bpf_cdevsw = {
142	.d_version =	D_VERSION,
143	.d_flags =	D_NEEDGIANT,
144	.d_open =	bpfopen,
145	.d_close =	bpfclose,
146	.d_read =	bpfread,
147	.d_write =	bpfwrite,
148	.d_ioctl =	bpfioctl,
149	.d_poll =	bpfpoll,
150	.d_name =	"bpf",
151	.d_kqfilter =	bpfkqfilter,
152};
153
154static struct filterops bpfread_filtops =
155	{ 1, NULL, filt_bpfdetach, filt_bpfread };
156
157static int
158bpf_movein(struct uio *uio, int linktype, int mtu, struct mbuf **mp,
159    struct sockaddr *sockp, struct bpf_insn *wfilter)
160{
161	struct mbuf *m;
162	int error;
163	int len;
164	int hlen;
165	int slen;
166
167	/*
168	 * Build a sockaddr based on the data link layer type.
169	 * We do this at this level because the ethernet header
170	 * is copied directly into the data field of the sockaddr.
171	 * In the case of SLIP, there is no header and the packet
172	 * is forwarded as is.
173	 * Also, we are careful to leave room at the front of the mbuf
174	 * for the link level header.
175	 */
176	switch (linktype) {
177
178	case DLT_SLIP:
179		sockp->sa_family = AF_INET;
180		hlen = 0;
181		break;
182
183	case DLT_EN10MB:
184		sockp->sa_family = AF_UNSPEC;
185		/* XXX Would MAXLINKHDR be better? */
186		hlen = ETHER_HDR_LEN;
187		break;
188
189	case DLT_FDDI:
190		sockp->sa_family = AF_IMPLINK;
191		hlen = 0;
192		break;
193
194	case DLT_RAW:
195		sockp->sa_family = AF_UNSPEC;
196		hlen = 0;
197		break;
198
199	case DLT_NULL:
200		/*
201		 * null interface types require a 4 byte pseudo header which
202		 * corresponds to the address family of the packet.
203		 */
204		sockp->sa_family = AF_UNSPEC;
205		hlen = 4;
206		break;
207
208	case DLT_ATM_RFC1483:
209		/*
210		 * en atm driver requires 4-byte atm pseudo header.
211		 * though it isn't standard, vpi:vci needs to be
212		 * specified anyway.
213		 */
214		sockp->sa_family = AF_UNSPEC;
215		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
216		break;
217
218	case DLT_PPP:
219		sockp->sa_family = AF_UNSPEC;
220		hlen = 4;	/* This should match PPP_HDRLEN */
221		break;
222
223	default:
224		return (EIO);
225	}
226
227	len = uio->uio_resid;
228
229	if (len - hlen > mtu)
230		return (EMSGSIZE);
231
232	if ((unsigned)len > MCLBYTES)
233		return (EIO);
234
235	if (len > MHLEN) {
236		m = m_getcl(M_TRYWAIT, MT_DATA, M_PKTHDR);
237	} else {
238		MGETHDR(m, M_TRYWAIT, MT_DATA);
239	}
240	if (m == NULL)
241		return (ENOBUFS);
242	m->m_pkthdr.len = m->m_len = len;
243	m->m_pkthdr.rcvif = NULL;
244	*mp = m;
245
246	if (m->m_len < hlen) {
247		error = EPERM;
248		goto bad;
249	}
250
251	error = uiomove(mtod(m, u_char *), len, uio);
252	if (error)
253		goto bad;
254
255	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
256	if (slen == 0) {
257		error = EPERM;
258		goto bad;
259	}
260
261	/*
262	 * Make room for link header, and copy it to sockaddr
263	 */
264	if (hlen != 0) {
265		bcopy(m->m_data, sockp->sa_data, hlen);
266		m->m_pkthdr.len -= hlen;
267		m->m_len -= hlen;
268#if BSD >= 199103
269		m->m_data += hlen; /* XXX */
270#else
271		m->m_off += hlen;
272#endif
273	}
274
275	return (0);
276bad:
277	m_freem(m);
278	return (error);
279}
280
281/*
282 * Attach file to the bpf interface, i.e. make d listen on bp.
283 */
284static void
285bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
286{
287	/*
288	 * Point d at bp, and add d to the interface's list of listeners.
289	 * Finally, point the driver's bpf cookie at the interface so
290	 * it will divert packets to bpf.
291	 */
292	BPFIF_LOCK(bp);
293	d->bd_bif = bp;
294	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
295
296	bpf_bpfd_cnt++;
297	BPFIF_UNLOCK(bp);
298}
299
300/*
301 * Detach a file from its interface.
302 */
303static void
304bpf_detachd(struct bpf_d *d)
305{
306	int error;
307	struct bpf_if *bp;
308	struct ifnet *ifp;
309
310	bp = d->bd_bif;
311	BPFIF_LOCK(bp);
312	BPFD_LOCK(d);
313	ifp = d->bd_bif->bif_ifp;
314
315	/*
316	 * Remove d from the interface's descriptor list.
317	 */
318	LIST_REMOVE(d, bd_next);
319
320	bpf_bpfd_cnt--;
321	d->bd_bif = NULL;
322	BPFD_UNLOCK(d);
323	BPFIF_UNLOCK(bp);
324
325	/*
326	 * Check if this descriptor had requested promiscuous mode.
327	 * If so, turn it off.
328	 */
329	if (d->bd_promisc) {
330		d->bd_promisc = 0;
331		error = ifpromisc(ifp, 0);
332		if (error != 0 && error != ENXIO) {
333			/*
334			 * ENXIO can happen if a pccard is unplugged
335			 * Something is really wrong if we were able to put
336			 * the driver into promiscuous mode, but can't
337			 * take it out.
338			 */
339			if_printf(bp->bif_ifp,
340				"bpf_detach: ifpromisc failed (%d)\n", error);
341		}
342	}
343}
344
345/*
346 * Open ethernet device.  Returns ENXIO for illegal minor device number,
347 * EBUSY if file is open by another process.
348 */
349/* ARGSUSED */
350static	int
351bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
352{
353	struct bpf_d *d;
354
355	mtx_lock(&bpf_mtx);
356	d = dev->si_drv1;
357	/*
358	 * Each minor can be opened by only one process.  If the requested
359	 * minor is in use, return EBUSY.
360	 */
361	if (d != NULL) {
362		mtx_unlock(&bpf_mtx);
363		return (EBUSY);
364	}
365	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
366	mtx_unlock(&bpf_mtx);
367
368	if ((dev->si_flags & SI_NAMED) == 0)
369		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
370		    "bpf%d", dev2unit(dev));
371	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
372	dev->si_drv1 = d;
373	d->bd_bufsize = bpf_bufsize;
374	d->bd_sig = SIGIO;
375	d->bd_seesent = 1;
376	d->bd_pid = td->td_proc->p_pid;
377#ifdef MAC
378	mac_init_bpfdesc(d);
379	mac_create_bpfdesc(td->td_ucred, d);
380#endif
381	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
382	callout_init(&d->bd_callout, NET_CALLOUT_MPSAFE);
383	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
384
385	return (0);
386}
387
388/*
389 * Close the descriptor by detaching it from its interface,
390 * deallocating its buffers, and marking it free.
391 */
392/* ARGSUSED */
393static	int
394bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td)
395{
396	struct bpf_d *d = dev->si_drv1;
397
398	BPFD_LOCK(d);
399	if (d->bd_state == BPF_WAITING)
400		callout_stop(&d->bd_callout);
401	d->bd_state = BPF_IDLE;
402	BPFD_UNLOCK(d);
403	funsetown(&d->bd_sigio);
404	mtx_lock(&bpf_mtx);
405	if (d->bd_bif)
406		bpf_detachd(d);
407	mtx_unlock(&bpf_mtx);
408	selwakeuppri(&d->bd_sel, PRINET);
409#ifdef MAC
410	mac_destroy_bpfdesc(d);
411#endif /* MAC */
412	knlist_destroy(&d->bd_sel.si_note);
413	bpf_freed(d);
414	dev->si_drv1 = NULL;
415	free(d, M_BPF);
416
417	return (0);
418}
419
420
421/*
422 * Rotate the packet buffers in descriptor d.  Move the store buffer
423 * into the hold slot, and the free buffer into the store slot.
424 * Zero the length of the new store buffer.
425 */
426#define ROTATE_BUFFERS(d) \
427	(d)->bd_hbuf = (d)->bd_sbuf; \
428	(d)->bd_hlen = (d)->bd_slen; \
429	(d)->bd_sbuf = (d)->bd_fbuf; \
430	(d)->bd_slen = 0; \
431	(d)->bd_fbuf = NULL;
432/*
433 *  bpfread - read next chunk of packets from buffers
434 */
435static	int
436bpfread(struct cdev *dev, struct uio *uio, int ioflag)
437{
438	struct bpf_d *d = dev->si_drv1;
439	int timed_out;
440	int error;
441
442	/*
443	 * Restrict application to use a buffer the same size as
444	 * as kernel buffers.
445	 */
446	if (uio->uio_resid != d->bd_bufsize)
447		return (EINVAL);
448
449	BPFD_LOCK(d);
450	if (d->bd_state == BPF_WAITING)
451		callout_stop(&d->bd_callout);
452	timed_out = (d->bd_state == BPF_TIMED_OUT);
453	d->bd_state = BPF_IDLE;
454	/*
455	 * If the hold buffer is empty, then do a timed sleep, which
456	 * ends when the timeout expires or when enough packets
457	 * have arrived to fill the store buffer.
458	 */
459	while (d->bd_hbuf == NULL) {
460		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
461			/*
462			 * A packet(s) either arrived since the previous
463			 * read or arrived while we were asleep.
464			 * Rotate the buffers and return what's here.
465			 */
466			ROTATE_BUFFERS(d);
467			break;
468		}
469
470		/*
471		 * No data is available, check to see if the bpf device
472		 * is still pointed at a real interface.  If not, return
473		 * ENXIO so that the userland process knows to rebind
474		 * it before using it again.
475		 */
476		if (d->bd_bif == NULL) {
477			BPFD_UNLOCK(d);
478			return (ENXIO);
479		}
480
481		if (ioflag & O_NONBLOCK) {
482			BPFD_UNLOCK(d);
483			return (EWOULDBLOCK);
484		}
485		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
486		     "bpf", d->bd_rtout);
487		if (error == EINTR || error == ERESTART) {
488			BPFD_UNLOCK(d);
489			return (error);
490		}
491		if (error == EWOULDBLOCK) {
492			/*
493			 * On a timeout, return what's in the buffer,
494			 * which may be nothing.  If there is something
495			 * in the store buffer, we can rotate the buffers.
496			 */
497			if (d->bd_hbuf)
498				/*
499				 * We filled up the buffer in between
500				 * getting the timeout and arriving
501				 * here, so we don't need to rotate.
502				 */
503				break;
504
505			if (d->bd_slen == 0) {
506				BPFD_UNLOCK(d);
507				return (0);
508			}
509			ROTATE_BUFFERS(d);
510			break;
511		}
512	}
513	/*
514	 * At this point, we know we have something in the hold slot.
515	 */
516	BPFD_UNLOCK(d);
517
518	/*
519	 * Move data from hold buffer into user space.
520	 * We know the entire buffer is transferred since
521	 * we checked above that the read buffer is bpf_bufsize bytes.
522	 */
523	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
524
525	BPFD_LOCK(d);
526	d->bd_fbuf = d->bd_hbuf;
527	d->bd_hbuf = NULL;
528	d->bd_hlen = 0;
529	BPFD_UNLOCK(d);
530
531	return (error);
532}
533
534
535/*
536 * If there are processes sleeping on this descriptor, wake them up.
537 */
538static __inline void
539bpf_wakeup(struct bpf_d *d)
540{
541
542	BPFD_LOCK_ASSERT(d);
543	if (d->bd_state == BPF_WAITING) {
544		callout_stop(&d->bd_callout);
545		d->bd_state = BPF_IDLE;
546	}
547	wakeup(d);
548	if (d->bd_async && d->bd_sig && d->bd_sigio)
549		pgsigio(&d->bd_sigio, d->bd_sig, 0);
550
551	selwakeuppri(&d->bd_sel, PRINET);
552	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
553}
554
555static void
556bpf_timed_out(void *arg)
557{
558	struct bpf_d *d = (struct bpf_d *)arg;
559
560	BPFD_LOCK(d);
561	if (d->bd_state == BPF_WAITING) {
562		d->bd_state = BPF_TIMED_OUT;
563		if (d->bd_slen != 0)
564			bpf_wakeup(d);
565	}
566	BPFD_UNLOCK(d);
567}
568
569static int
570bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
571{
572	struct bpf_d *d = dev->si_drv1;
573	struct ifnet *ifp;
574	struct mbuf *m;
575	int error;
576	struct sockaddr dst;
577
578	if (d->bd_bif == NULL)
579		return (ENXIO);
580
581	ifp = d->bd_bif->bif_ifp;
582
583	if ((ifp->if_flags & IFF_UP) == 0)
584		return (ENETDOWN);
585
586	if (uio->uio_resid == 0)
587		return (0);
588
589	bzero(&dst, sizeof(dst));
590	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu,
591	    &m, &dst, d->bd_wfilter);
592	if (error)
593		return (error);
594
595	if (d->bd_hdrcmplt)
596		dst.sa_family = pseudo_AF_HDRCMPLT;
597
598#ifdef MAC
599	BPFD_LOCK(d);
600	mac_create_mbuf_from_bpfdesc(d, m);
601	BPFD_UNLOCK(d);
602#endif
603	NET_LOCK_GIANT();
604	error = (*ifp->if_output)(ifp, m, &dst, NULL);
605	NET_UNLOCK_GIANT();
606	/*
607	 * The driver frees the mbuf.
608	 */
609	return (error);
610}
611
612/*
613 * Reset a descriptor by flushing its packet buffer and clearing the
614 * receive and drop counts.
615 */
616static void
617reset_d(struct bpf_d *d)
618{
619
620	mtx_assert(&d->bd_mtx, MA_OWNED);
621	if (d->bd_hbuf) {
622		/* Free the hold buffer. */
623		d->bd_fbuf = d->bd_hbuf;
624		d->bd_hbuf = NULL;
625	}
626	d->bd_slen = 0;
627	d->bd_hlen = 0;
628	d->bd_rcount = 0;
629	d->bd_dcount = 0;
630	d->bd_fcount = 0;
631}
632
633/*
634 *  FIONREAD		Check for read packet available.
635 *  SIOCGIFADDR		Get interface address - convenient hook to driver.
636 *  BIOCGBLEN		Get buffer len [for read()].
637 *  BIOCSETF		Set ethernet read filter.
638 *  BIOCSETWF		Set ethernet write filter.
639 *  BIOCFLUSH		Flush read packet buffer.
640 *  BIOCPROMISC		Put interface into promiscuous mode.
641 *  BIOCGDLT		Get link layer type.
642 *  BIOCGETIF		Get interface name.
643 *  BIOCSETIF		Set interface.
644 *  BIOCSRTIMEOUT	Set read timeout.
645 *  BIOCGRTIMEOUT	Get read timeout.
646 *  BIOCGSTATS		Get packet stats.
647 *  BIOCIMMEDIATE	Set immediate mode.
648 *  BIOCVERSION		Get filter language version.
649 *  BIOCGHDRCMPLT	Get "header already complete" flag
650 *  BIOCSHDRCMPLT	Set "header already complete" flag
651 *  BIOCGSEESENT	Get "see packets sent" flag
652 *  BIOCSSEESENT	Set "see packets sent" flag
653 *  BIOCLOCK		Set "locked" flag
654 */
655/* ARGSUSED */
656static	int
657bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
658    struct thread *td)
659{
660	struct bpf_d *d = dev->si_drv1;
661	int error = 0;
662
663	/*
664	 * Refresh PID associated with this descriptor.
665	 */
666	BPFD_LOCK(d);
667	d->bd_pid = td->td_proc->p_pid;
668	if (d->bd_state == BPF_WAITING)
669		callout_stop(&d->bd_callout);
670	d->bd_state = BPF_IDLE;
671	BPFD_UNLOCK(d);
672
673	if (d->bd_locked == 1) {
674		switch (cmd) {
675		case BIOCGBLEN:
676		case BIOCFLUSH:
677		case BIOCGDLT:
678		case BIOCGDLTLIST:
679		case BIOCGETIF:
680		case BIOCGRTIMEOUT:
681		case BIOCGSTATS:
682		case BIOCVERSION:
683		case BIOCGRSIG:
684		case BIOCGHDRCMPLT:
685		case FIONREAD:
686		case BIOCLOCK:
687		case BIOCSRTIMEOUT:
688		case BIOCIMMEDIATE:
689		case TIOCGPGRP:
690			break;
691		default:
692			return (EPERM);
693		}
694	}
695	switch (cmd) {
696
697	default:
698		error = EINVAL;
699		break;
700
701	/*
702	 * Check for read packet available.
703	 */
704	case FIONREAD:
705		{
706			int n;
707
708			BPFD_LOCK(d);
709			n = d->bd_slen;
710			if (d->bd_hbuf)
711				n += d->bd_hlen;
712			BPFD_UNLOCK(d);
713
714			*(int *)addr = n;
715			break;
716		}
717
718	case SIOCGIFADDR:
719		{
720			struct ifnet *ifp;
721
722			if (d->bd_bif == NULL)
723				error = EINVAL;
724			else {
725				ifp = d->bd_bif->bif_ifp;
726				error = (*ifp->if_ioctl)(ifp, cmd, addr);
727			}
728			break;
729		}
730
731	/*
732	 * Get buffer len [for read()].
733	 */
734	case BIOCGBLEN:
735		*(u_int *)addr = d->bd_bufsize;
736		break;
737
738	/*
739	 * Set buffer length.
740	 */
741	case BIOCSBLEN:
742		if (d->bd_bif != NULL)
743			error = EINVAL;
744		else {
745			u_int size = *(u_int *)addr;
746
747			if (size > bpf_maxbufsize)
748				*(u_int *)addr = size = bpf_maxbufsize;
749			else if (size < BPF_MINBUFSIZE)
750				*(u_int *)addr = size = BPF_MINBUFSIZE;
751			d->bd_bufsize = size;
752		}
753		break;
754
755	/*
756	 * Set link layer read filter.
757	 */
758	case BIOCSETF:
759	case BIOCSETWF:
760		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
761		break;
762
763	/*
764	 * Flush read packet buffer.
765	 */
766	case BIOCFLUSH:
767		BPFD_LOCK(d);
768		reset_d(d);
769		BPFD_UNLOCK(d);
770		break;
771
772	/*
773	 * Put interface into promiscuous mode.
774	 */
775	case BIOCPROMISC:
776		if (d->bd_bif == NULL) {
777			/*
778			 * No interface attached yet.
779			 */
780			error = EINVAL;
781			break;
782		}
783		if (d->bd_promisc == 0) {
784			mtx_lock(&Giant);
785			error = ifpromisc(d->bd_bif->bif_ifp, 1);
786			mtx_unlock(&Giant);
787			if (error == 0)
788				d->bd_promisc = 1;
789		}
790		break;
791
792	/*
793	 * Get current data link type.
794	 */
795	case BIOCGDLT:
796		if (d->bd_bif == NULL)
797			error = EINVAL;
798		else
799			*(u_int *)addr = d->bd_bif->bif_dlt;
800		break;
801
802	/*
803	 * Get a list of supported data link types.
804	 */
805	case BIOCGDLTLIST:
806		if (d->bd_bif == NULL)
807			error = EINVAL;
808		else
809			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
810		break;
811
812	/*
813	 * Set data link type.
814	 */
815	case BIOCSDLT:
816		if (d->bd_bif == NULL)
817			error = EINVAL;
818		else
819			error = bpf_setdlt(d, *(u_int *)addr);
820		break;
821
822	/*
823	 * Get interface name.
824	 */
825	case BIOCGETIF:
826		if (d->bd_bif == NULL)
827			error = EINVAL;
828		else {
829			struct ifnet *const ifp = d->bd_bif->bif_ifp;
830			struct ifreq *const ifr = (struct ifreq *)addr;
831
832			strlcpy(ifr->ifr_name, ifp->if_xname,
833			    sizeof(ifr->ifr_name));
834		}
835		break;
836
837	/*
838	 * Set interface.
839	 */
840	case BIOCSETIF:
841		error = bpf_setif(d, (struct ifreq *)addr);
842		break;
843
844	/*
845	 * Set read timeout.
846	 */
847	case BIOCSRTIMEOUT:
848		{
849			struct timeval *tv = (struct timeval *)addr;
850
851			/*
852			 * Subtract 1 tick from tvtohz() since this isn't
853			 * a one-shot timer.
854			 */
855			if ((error = itimerfix(tv)) == 0)
856				d->bd_rtout = tvtohz(tv) - 1;
857			break;
858		}
859
860	/*
861	 * Get read timeout.
862	 */
863	case BIOCGRTIMEOUT:
864		{
865			struct timeval *tv = (struct timeval *)addr;
866
867			tv->tv_sec = d->bd_rtout / hz;
868			tv->tv_usec = (d->bd_rtout % hz) * tick;
869			break;
870		}
871
872	/*
873	 * Get packet stats.
874	 */
875	case BIOCGSTATS:
876		{
877			struct bpf_stat *bs = (struct bpf_stat *)addr;
878
879			bs->bs_recv = d->bd_rcount;
880			bs->bs_drop = d->bd_dcount;
881			break;
882		}
883
884	/*
885	 * Set immediate mode.
886	 */
887	case BIOCIMMEDIATE:
888		d->bd_immediate = *(u_int *)addr;
889		break;
890
891	case BIOCVERSION:
892		{
893			struct bpf_version *bv = (struct bpf_version *)addr;
894
895			bv->bv_major = BPF_MAJOR_VERSION;
896			bv->bv_minor = BPF_MINOR_VERSION;
897			break;
898		}
899
900	/*
901	 * Get "header already complete" flag
902	 */
903	case BIOCGHDRCMPLT:
904		*(u_int *)addr = d->bd_hdrcmplt;
905		break;
906
907	case BIOCLOCK:
908		d->bd_locked = 1;
909		break;
910	/*
911	 * Set "header already complete" flag
912	 */
913	case BIOCSHDRCMPLT:
914		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
915		break;
916
917	/*
918	 * Get "see sent packets" flag
919	 */
920	case BIOCGSEESENT:
921		*(u_int *)addr = d->bd_seesent;
922		break;
923
924	/*
925	 * Set "see sent packets" flag
926	 */
927	case BIOCSSEESENT:
928		d->bd_seesent = *(u_int *)addr;
929		break;
930
931	case FIONBIO:		/* Non-blocking I/O */
932		break;
933
934	case FIOASYNC:		/* Send signal on receive packets */
935		d->bd_async = *(int *)addr;
936		break;
937
938	case FIOSETOWN:
939		error = fsetown(*(int *)addr, &d->bd_sigio);
940		break;
941
942	case FIOGETOWN:
943		*(int *)addr = fgetown(&d->bd_sigio);
944		break;
945
946	/* This is deprecated, FIOSETOWN should be used instead. */
947	case TIOCSPGRP:
948		error = fsetown(-(*(int *)addr), &d->bd_sigio);
949		break;
950
951	/* This is deprecated, FIOGETOWN should be used instead. */
952	case TIOCGPGRP:
953		*(int *)addr = -fgetown(&d->bd_sigio);
954		break;
955
956	case BIOCSRSIG:		/* Set receive signal */
957		{
958			u_int sig;
959
960			sig = *(u_int *)addr;
961
962			if (sig >= NSIG)
963				error = EINVAL;
964			else
965				d->bd_sig = sig;
966			break;
967		}
968	case BIOCGRSIG:
969		*(u_int *)addr = d->bd_sig;
970		break;
971	}
972	return (error);
973}
974
975/*
976 * Set d's packet filter program to fp.  If this file already has a filter,
977 * free it and replace it.  Returns EINVAL for bogus requests.
978 */
979static int
980bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
981{
982	struct bpf_insn *fcode, *old;
983	u_int wfilter, flen, size;
984#ifdef BPF_JITTER
985	bpf_jit_filter *ofunc;
986#endif
987
988	if (cmd == BIOCSETWF) {
989		old = d->bd_wfilter;
990		wfilter = 1;
991#ifdef BPF_JITTER
992		ofunc = NULL;
993#endif
994	} else {
995		wfilter = 0;
996		old = d->bd_rfilter;
997#ifdef BPF_JITTER
998		ofunc = d->bd_bfilter;
999#endif
1000	}
1001	if (fp->bf_insns == NULL) {
1002		if (fp->bf_len != 0)
1003			return (EINVAL);
1004		BPFD_LOCK(d);
1005		if (wfilter)
1006			d->bd_wfilter = NULL;
1007		else {
1008			d->bd_rfilter = NULL;
1009#ifdef BPF_JITTER
1010			d->bd_bfilter = NULL;
1011#endif
1012		}
1013		reset_d(d);
1014		BPFD_UNLOCK(d);
1015		if (old != NULL)
1016			free((caddr_t)old, M_BPF);
1017#ifdef BPF_JITTER
1018		if (ofunc != NULL)
1019			bpf_destroy_jit_filter(ofunc);
1020#endif
1021		return (0);
1022	}
1023	flen = fp->bf_len;
1024	if (flen > bpf_maxinsns)
1025		return (EINVAL);
1026
1027	size = flen * sizeof(*fp->bf_insns);
1028	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1029	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1030	    bpf_validate(fcode, (int)flen)) {
1031		BPFD_LOCK(d);
1032		if (wfilter)
1033			d->bd_wfilter = fcode;
1034		else {
1035			d->bd_rfilter = fcode;
1036#ifdef BPF_JITTER
1037			d->bd_bfilter = bpf_jitter(fcode, flen);
1038#endif
1039		}
1040		reset_d(d);
1041		BPFD_UNLOCK(d);
1042		if (old != NULL)
1043			free((caddr_t)old, M_BPF);
1044#ifdef BPF_JITTER
1045		if (ofunc != NULL)
1046			bpf_destroy_jit_filter(ofunc);
1047#endif
1048
1049		return (0);
1050	}
1051	free((caddr_t)fcode, M_BPF);
1052	return (EINVAL);
1053}
1054
1055/*
1056 * Detach a file from its current interface (if attached at all) and attach
1057 * to the interface indicated by the name stored in ifr.
1058 * Return an errno or 0.
1059 */
1060static int
1061bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1062{
1063	struct bpf_if *bp;
1064	int error;
1065	struct ifnet *theywant;
1066
1067	theywant = ifunit(ifr->ifr_name);
1068	if (theywant == NULL || theywant->if_bpf == NULL)
1069		return (ENXIO);
1070
1071	bp = theywant->if_bpf;
1072	/*
1073	 * Allocate the packet buffers if we need to.
1074	 * If we're already attached to requested interface,
1075	 * just flush the buffer.
1076	 */
1077	if (d->bd_sbuf == NULL) {
1078		error = bpf_allocbufs(d);
1079		if (error != 0)
1080			return (error);
1081	}
1082	if (bp != d->bd_bif) {
1083		if (d->bd_bif)
1084			/*
1085			 * Detach if attached to something else.
1086			 */
1087			bpf_detachd(d);
1088
1089		bpf_attachd(d, bp);
1090	}
1091	BPFD_LOCK(d);
1092	reset_d(d);
1093	BPFD_UNLOCK(d);
1094	return (0);
1095}
1096
1097/*
1098 * Support for select() and poll() system calls
1099 *
1100 * Return true iff the specific operation will not block indefinitely.
1101 * Otherwise, return false but make a note that a selwakeup() must be done.
1102 */
1103static int
1104bpfpoll(struct cdev *dev, int events, struct thread *td)
1105{
1106	struct bpf_d *d;
1107	int revents;
1108
1109	d = dev->si_drv1;
1110	if (d->bd_bif == NULL)
1111		return (ENXIO);
1112
1113	/*
1114	 * Refresh PID associated with this descriptor.
1115	 */
1116	revents = events & (POLLOUT | POLLWRNORM);
1117	BPFD_LOCK(d);
1118	d->bd_pid = td->td_proc->p_pid;
1119	if (events & (POLLIN | POLLRDNORM)) {
1120		if (bpf_ready(d))
1121			revents |= events & (POLLIN | POLLRDNORM);
1122		else {
1123			selrecord(td, &d->bd_sel);
1124			/* Start the read timeout if necessary. */
1125			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1126				callout_reset(&d->bd_callout, d->bd_rtout,
1127				    bpf_timed_out, d);
1128				d->bd_state = BPF_WAITING;
1129			}
1130		}
1131	}
1132	BPFD_UNLOCK(d);
1133	return (revents);
1134}
1135
1136/*
1137 * Support for kevent() system call.  Register EVFILT_READ filters and
1138 * reject all others.
1139 */
1140int
1141bpfkqfilter(struct cdev *dev, struct knote *kn)
1142{
1143	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1144
1145	if (kn->kn_filter != EVFILT_READ)
1146		return (1);
1147
1148	/*
1149	 * Refresh PID associated with this descriptor.
1150	 */
1151	BPFD_LOCK(d);
1152	d->bd_pid = curthread->td_proc->p_pid;
1153	kn->kn_fop = &bpfread_filtops;
1154	kn->kn_hook = d;
1155	knlist_add(&d->bd_sel.si_note, kn, 1);
1156	BPFD_UNLOCK(d);
1157
1158	return (0);
1159}
1160
1161static void
1162filt_bpfdetach(struct knote *kn)
1163{
1164	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1165
1166	knlist_remove(&d->bd_sel.si_note, kn, 0);
1167}
1168
1169static int
1170filt_bpfread(struct knote *kn, long hint)
1171{
1172	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1173	int ready;
1174
1175	BPFD_LOCK_ASSERT(d);
1176	ready = bpf_ready(d);
1177	if (ready) {
1178		kn->kn_data = d->bd_slen;
1179		if (d->bd_hbuf)
1180			kn->kn_data += d->bd_hlen;
1181	}
1182	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1183		callout_reset(&d->bd_callout, d->bd_rtout,
1184		    bpf_timed_out, d);
1185		d->bd_state = BPF_WAITING;
1186	}
1187
1188	return (ready);
1189}
1190
1191/*
1192 * Incoming linkage from device drivers.  Process the packet pkt, of length
1193 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1194 * by each process' filter, and if accepted, stashed into the corresponding
1195 * buffer.
1196 */
1197void
1198bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1199{
1200	struct bpf_d *d;
1201	u_int slen;
1202
1203	BPFIF_LOCK(bp);
1204	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1205		BPFD_LOCK(d);
1206		++d->bd_rcount;
1207#ifdef BPF_JITTER
1208		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL)
1209			slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen);
1210		else
1211#endif
1212		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1213		if (slen != 0) {
1214			d->bd_fcount++;
1215#ifdef MAC
1216			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1217#endif
1218				catchpacket(d, pkt, pktlen, slen, bcopy);
1219		}
1220		BPFD_UNLOCK(d);
1221	}
1222	BPFIF_UNLOCK(bp);
1223}
1224
1225/*
1226 * Copy data from an mbuf chain into a buffer.  This code is derived
1227 * from m_copydata in sys/uipc_mbuf.c.
1228 */
1229static void
1230bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1231{
1232	const struct mbuf *m;
1233	u_int count;
1234	u_char *dst;
1235
1236	m = src_arg;
1237	dst = dst_arg;
1238	while (len > 0) {
1239		if (m == NULL)
1240			panic("bpf_mcopy");
1241		count = min(m->m_len, len);
1242		bcopy(mtod(m, void *), dst, count);
1243		m = m->m_next;
1244		dst += count;
1245		len -= count;
1246	}
1247}
1248
1249/*
1250 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1251 */
1252void
1253bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1254{
1255	struct bpf_d *d;
1256	u_int pktlen, slen;
1257
1258	pktlen = m_length(m, NULL);
1259
1260	BPFIF_LOCK(bp);
1261	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1262		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1263			continue;
1264		BPFD_LOCK(d);
1265		++d->bd_rcount;
1266#ifdef BPF_JITTER
1267		/* XXX We cannot handle multiple mbufs. */
1268		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL &&
1269		    m->m_next == NULL)
1270			slen = (*(d->bd_bfilter->func))(mtod(m, u_char *),
1271			    pktlen, pktlen);
1272		else
1273#endif
1274		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1275		if (slen != 0) {
1276			d->bd_fcount++;
1277#ifdef MAC
1278			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1279#endif
1280				catchpacket(d, (u_char *)m, pktlen, slen,
1281				    bpf_mcopy);
1282		}
1283		BPFD_UNLOCK(d);
1284	}
1285	BPFIF_UNLOCK(bp);
1286}
1287
1288/*
1289 * Incoming linkage from device drivers, when packet is in
1290 * an mbuf chain and to be prepended by a contiguous header.
1291 */
1292void
1293bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1294{
1295	struct mbuf mb;
1296	struct bpf_d *d;
1297	u_int pktlen, slen;
1298
1299	pktlen = m_length(m, NULL);
1300	/*
1301	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1302	 * Note that we cut corners here; we only setup what's
1303	 * absolutely needed--this mbuf should never go anywhere else.
1304	 */
1305	mb.m_next = m;
1306	mb.m_data = data;
1307	mb.m_len = dlen;
1308	pktlen += dlen;
1309
1310	BPFIF_LOCK(bp);
1311	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1312		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1313			continue;
1314		BPFD_LOCK(d);
1315		++d->bd_rcount;
1316		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1317		if (slen != 0) {
1318			d->bd_fcount++;
1319#ifdef MAC
1320			if (mac_check_bpfdesc_receive(d, bp->bif_ifp) == 0)
1321#endif
1322				catchpacket(d, (u_char *)&mb, pktlen, slen,
1323				    bpf_mcopy);
1324		}
1325		BPFD_UNLOCK(d);
1326	}
1327	BPFIF_UNLOCK(bp);
1328}
1329
1330/*
1331 * Move the packet data from interface memory (pkt) into the
1332 * store buffer.  "cpfn" is the routine called to do the actual data
1333 * transfer.  bcopy is passed in to copy contiguous chunks, while
1334 * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1335 * pkt is really an mbuf.
1336 */
1337static void
1338catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1339    void (*cpfn)(const void *, void *, size_t))
1340{
1341	struct bpf_hdr *hp;
1342	int totlen, curlen;
1343	int hdrlen = d->bd_bif->bif_hdrlen;
1344	int do_wakeup = 0;
1345
1346	BPFD_LOCK_ASSERT(d);
1347	/*
1348	 * Figure out how many bytes to move.  If the packet is
1349	 * greater or equal to the snapshot length, transfer that
1350	 * much.  Otherwise, transfer the whole packet (unless
1351	 * we hit the buffer size limit).
1352	 */
1353	totlen = hdrlen + min(snaplen, pktlen);
1354	if (totlen > d->bd_bufsize)
1355		totlen = d->bd_bufsize;
1356
1357	/*
1358	 * Round up the end of the previous packet to the next longword.
1359	 */
1360	curlen = BPF_WORDALIGN(d->bd_slen);
1361	if (curlen + totlen > d->bd_bufsize) {
1362		/*
1363		 * This packet will overflow the storage buffer.
1364		 * Rotate the buffers if we can, then wakeup any
1365		 * pending reads.
1366		 */
1367		if (d->bd_fbuf == NULL) {
1368			/*
1369			 * We haven't completed the previous read yet,
1370			 * so drop the packet.
1371			 */
1372			++d->bd_dcount;
1373			return;
1374		}
1375		ROTATE_BUFFERS(d);
1376		do_wakeup = 1;
1377		curlen = 0;
1378	}
1379	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1380		/*
1381		 * Immediate mode is set, or the read timeout has
1382		 * already expired during a select call.  A packet
1383		 * arrived, so the reader should be woken up.
1384		 */
1385		do_wakeup = 1;
1386
1387	/*
1388	 * Append the bpf header.
1389	 */
1390	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1391	microtime(&hp->bh_tstamp);
1392	hp->bh_datalen = pktlen;
1393	hp->bh_hdrlen = hdrlen;
1394	/*
1395	 * Copy the packet data into the store buffer and update its length.
1396	 */
1397	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1398	d->bd_slen = curlen + totlen;
1399
1400	if (do_wakeup)
1401		bpf_wakeup(d);
1402}
1403
1404/*
1405 * Initialize all nonzero fields of a descriptor.
1406 */
1407static int
1408bpf_allocbufs(struct bpf_d *d)
1409{
1410	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1411	if (d->bd_fbuf == NULL)
1412		return (ENOBUFS);
1413
1414	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1415	if (d->bd_sbuf == NULL) {
1416		free(d->bd_fbuf, M_BPF);
1417		return (ENOBUFS);
1418	}
1419	d->bd_slen = 0;
1420	d->bd_hlen = 0;
1421	return (0);
1422}
1423
1424/*
1425 * Free buffers currently in use by a descriptor.
1426 * Called on close.
1427 */
1428static void
1429bpf_freed(struct bpf_d *d)
1430{
1431	/*
1432	 * We don't need to lock out interrupts since this descriptor has
1433	 * been detached from its interface and it yet hasn't been marked
1434	 * free.
1435	 */
1436	if (d->bd_sbuf != NULL) {
1437		free(d->bd_sbuf, M_BPF);
1438		if (d->bd_hbuf != NULL)
1439			free(d->bd_hbuf, M_BPF);
1440		if (d->bd_fbuf != NULL)
1441			free(d->bd_fbuf, M_BPF);
1442	}
1443	if (d->bd_rfilter) {
1444		free((caddr_t)d->bd_rfilter, M_BPF);
1445#ifdef BPF_JITTER
1446		bpf_destroy_jit_filter(d->bd_bfilter);
1447#endif
1448	}
1449	if (d->bd_wfilter)
1450		free((caddr_t)d->bd_wfilter, M_BPF);
1451	mtx_destroy(&d->bd_mtx);
1452}
1453
1454/*
1455 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1456 * fixed size of the link header (variable length headers not yet supported).
1457 */
1458void
1459bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1460{
1461
1462	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1463}
1464
1465/*
1466 * Attach an interface to bpf.  ifp is a pointer to the structure
1467 * defining the interface to be attached, dlt is the link layer type,
1468 * and hdrlen is the fixed size of the link header (variable length
1469 * headers are not yet supporrted).
1470 */
1471void
1472bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1473{
1474	struct bpf_if *bp;
1475
1476	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1477	if (bp == NULL)
1478		panic("bpfattach");
1479
1480	LIST_INIT(&bp->bif_dlist);
1481	bp->bif_ifp = ifp;
1482	bp->bif_dlt = dlt;
1483	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1484	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1485	*driverp = bp;
1486
1487	mtx_lock(&bpf_mtx);
1488	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1489	mtx_unlock(&bpf_mtx);
1490
1491	/*
1492	 * Compute the length of the bpf header.  This is not necessarily
1493	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1494	 * that the network layer header begins on a longword boundary (for
1495	 * performance reasons and to alleviate alignment restrictions).
1496	 */
1497	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1498
1499	if (bootverbose)
1500		if_printf(ifp, "bpf attached\n");
1501}
1502
1503/*
1504 * Detach bpf from an interface.  This involves detaching each descriptor
1505 * associated with the interface, and leaving bd_bif NULL.  Notify each
1506 * descriptor as it's detached so that any sleepers wake up and get
1507 * ENXIO.
1508 */
1509void
1510bpfdetach(struct ifnet *ifp)
1511{
1512	struct bpf_if	*bp;
1513	struct bpf_d	*d;
1514
1515	/* Locate BPF interface information */
1516	mtx_lock(&bpf_mtx);
1517	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1518		if (ifp == bp->bif_ifp)
1519			break;
1520	}
1521
1522	/* Interface wasn't attached */
1523	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1524		mtx_unlock(&bpf_mtx);
1525		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1526		return;
1527	}
1528
1529	LIST_REMOVE(bp, bif_next);
1530	mtx_unlock(&bpf_mtx);
1531
1532	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1533		bpf_detachd(d);
1534		BPFD_LOCK(d);
1535		bpf_wakeup(d);
1536		BPFD_UNLOCK(d);
1537	}
1538
1539	mtx_destroy(&bp->bif_mtx);
1540	free(bp, M_BPF);
1541}
1542
1543/*
1544 * Get a list of available data link type of the interface.
1545 */
1546static int
1547bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1548{
1549	int n, error;
1550	struct ifnet *ifp;
1551	struct bpf_if *bp;
1552
1553	ifp = d->bd_bif->bif_ifp;
1554	n = 0;
1555	error = 0;
1556	mtx_lock(&bpf_mtx);
1557	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1558		if (bp->bif_ifp != ifp)
1559			continue;
1560		if (bfl->bfl_list != NULL) {
1561			if (n >= bfl->bfl_len) {
1562				mtx_unlock(&bpf_mtx);
1563				return (ENOMEM);
1564			}
1565			error = copyout(&bp->bif_dlt,
1566			    bfl->bfl_list + n, sizeof(u_int));
1567		}
1568		n++;
1569	}
1570	mtx_unlock(&bpf_mtx);
1571	bfl->bfl_len = n;
1572	return (error);
1573}
1574
1575/*
1576 * Set the data link type of a BPF instance.
1577 */
1578static int
1579bpf_setdlt(struct bpf_d *d, u_int dlt)
1580{
1581	int error, opromisc;
1582	struct ifnet *ifp;
1583	struct bpf_if *bp;
1584
1585	if (d->bd_bif->bif_dlt == dlt)
1586		return (0);
1587	ifp = d->bd_bif->bif_ifp;
1588	mtx_lock(&bpf_mtx);
1589	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1590		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1591			break;
1592	}
1593	mtx_unlock(&bpf_mtx);
1594	if (bp != NULL) {
1595		opromisc = d->bd_promisc;
1596		bpf_detachd(d);
1597		bpf_attachd(d, bp);
1598		BPFD_LOCK(d);
1599		reset_d(d);
1600		BPFD_UNLOCK(d);
1601		if (opromisc) {
1602			error = ifpromisc(bp->bif_ifp, 1);
1603			if (error)
1604				if_printf(bp->bif_ifp,
1605					"bpf_setdlt: ifpromisc failed (%d)\n",
1606					error);
1607			else
1608				d->bd_promisc = 1;
1609		}
1610	}
1611	return (bp == NULL ? EINVAL : 0);
1612}
1613
1614static void
1615bpf_clone(void *arg, struct ucred *cred, char *name, int namelen,
1616    struct cdev **dev)
1617{
1618	int u;
1619
1620	if (*dev != NULL)
1621		return;
1622	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
1623		return;
1624	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
1625	    "bpf%d", u);
1626	dev_ref(*dev);
1627	(*dev)->si_flags |= SI_CHEAPCLONE;
1628	return;
1629}
1630
1631static void
1632bpf_drvinit(void *unused)
1633{
1634
1635	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
1636	LIST_INIT(&bpf_iflist);
1637	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
1638}
1639
1640static void
1641bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
1642{
1643
1644	bzero(d, sizeof(*d));
1645	BPFD_LOCK_ASSERT(bd);
1646	d->bd_immediate = bd->bd_immediate;
1647	d->bd_promisc = bd->bd_promisc;
1648	d->bd_hdrcmplt = bd->bd_hdrcmplt;
1649	d->bd_seesent = bd->bd_seesent;
1650	d->bd_async = bd->bd_async;
1651	d->bd_rcount = bd->bd_rcount;
1652	d->bd_dcount = bd->bd_dcount;
1653	d->bd_fcount = bd->bd_fcount;
1654	d->bd_sig = bd->bd_sig;
1655	d->bd_slen = bd->bd_slen;
1656	d->bd_hlen = bd->bd_hlen;
1657	d->bd_bufsize = bd->bd_bufsize;
1658	d->bd_pid = bd->bd_pid;
1659	strlcpy(d->bd_ifname,
1660	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
1661	d->bd_locked = bd->bd_locked;
1662}
1663
1664static int
1665bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
1666{
1667	struct xbpf_d *xbdbuf, *xbd;
1668	int index, error;
1669	struct bpf_if *bp;
1670	struct bpf_d *bd;
1671
1672	/*
1673	 * XXX This is not technically correct. It is possible for non
1674	 * privileged users to open bpf devices. It would make sense
1675	 * if the users who opened the devices were able to retrieve
1676	 * the statistics for them, too.
1677	 */
1678	error = suser(req->td);
1679	if (error)
1680		return (error);
1681	if (req->oldptr == NULL)
1682		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
1683	if (bpf_bpfd_cnt == 0)
1684		return (SYSCTL_OUT(req, 0, 0));
1685	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
1686	mtx_lock(&bpf_mtx);
1687	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
1688		mtx_unlock(&bpf_mtx);
1689		free(xbdbuf, M_BPF);
1690		return (ENOMEM);
1691	}
1692	index = 0;
1693	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1694		BPFIF_LOCK(bp);
1695		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
1696			xbd = &xbdbuf[index++];
1697			BPFD_LOCK(bd);
1698			bpfstats_fill_xbpf(xbd, bd);
1699			BPFD_UNLOCK(bd);
1700		}
1701		BPFIF_UNLOCK(bp);
1702	}
1703	mtx_unlock(&bpf_mtx);
1704	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
1705	free(xbdbuf, M_BPF);
1706	return (error);
1707}
1708
1709SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL)
1710
1711#else /* !DEV_BPF && !NETGRAPH_BPF */
1712/*
1713 * NOP stubs to allow bpf-using drivers to load and function.
1714 *
1715 * A 'better' implementation would allow the core bpf functionality
1716 * to be loaded at runtime.
1717 */
1718static struct bpf_if bp_null;
1719
1720void
1721bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1722{
1723}
1724
1725void
1726bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1727{
1728}
1729
1730void
1731bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
1732{
1733}
1734
1735void
1736bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1737{
1738
1739	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1740}
1741
1742void
1743bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1744{
1745
1746	*driverp = &bp_null;
1747}
1748
1749void
1750bpfdetach(struct ifnet *ifp)
1751{
1752}
1753
1754u_int
1755bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
1756{
1757	return -1;	/* "no filter" behaviour */
1758}
1759
1760int
1761bpf_validate(const struct bpf_insn *f, int len)
1762{
1763	return 0;		/* false */
1764}
1765
1766#endif /* !DEV_BPF && !NETGRAPH_BPF */
1767