1/*-
2 * Copyright (c) 1990, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: stable/11/sys/net/bpf.c 345795 2019-04-02 09:33:30Z ae $");
39
40#include "opt_bpf.h"
41#include "opt_compat.h"
42#include "opt_ddb.h"
43#include "opt_netgraph.h"
44
45#include <sys/types.h>
46#include <sys/param.h>
47#include <sys/lock.h>
48#include <sys/rwlock.h>
49#include <sys/systm.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/jail.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/time.h>
56#include <sys/priv.h>
57#include <sys/proc.h>
58#include <sys/signalvar.h>
59#include <sys/filio.h>
60#include <sys/sockio.h>
61#include <sys/ttycom.h>
62#include <sys/uio.h>
63
64#include <sys/event.h>
65#include <sys/file.h>
66#include <sys/poll.h>
67#include <sys/proc.h>
68
69#include <sys/socket.h>
70
71#ifdef DDB
72#include <ddb/ddb.h>
73#endif
74
75#include <net/if.h>
76#include <net/if_var.h>
77#include <net/if_dl.h>
78#include <net/bpf.h>
79#include <net/bpf_buffer.h>
80#ifdef BPF_JITTER
81#include <net/bpf_jitter.h>
82#endif
83#include <net/bpf_zerocopy.h>
84#include <net/bpfdesc.h>
85#include <net/route.h>
86#include <net/vnet.h>
87
88#include <netinet/in.h>
89#include <netinet/if_ether.h>
90#include <sys/kernel.h>
91#include <sys/sysctl.h>
92
93#include <net80211/ieee80211_freebsd.h>
94
95#include <security/mac/mac_framework.h>
96
97MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
98
99static struct bpf_if_ext dead_bpf_if = {
100	.bif_dlist = LIST_HEAD_INITIALIZER()
101};
102
103struct bpf_if {
104#define	bif_next	bif_ext.bif_next
105#define	bif_dlist	bif_ext.bif_dlist
106	struct bpf_if_ext bif_ext;	/* public members */
107	u_int		bif_dlt;	/* link layer type */
108	u_int		bif_hdrlen;	/* length of link header */
109	struct ifnet	*bif_ifp;	/* corresponding interface */
110	struct rwlock	bif_lock;	/* interface lock */
111	LIST_HEAD(, bpf_d) bif_wlist;	/* writer-only list */
112	int		bif_flags;	/* Interface flags */
113	struct bpf_if	**bif_bpf;	/* Pointer to pointer to us */
114};
115
116CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
117
118#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
119
120#define PRINET  26			/* interruptible */
121
122#define	SIZEOF_BPF_HDR(type)	\
123    (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
124
125#ifdef COMPAT_FREEBSD32
126#include <sys/mount.h>
127#include <compat/freebsd32/freebsd32.h>
128#define BPF_ALIGNMENT32 sizeof(int32_t)
129#define	BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
130
131#ifndef BURN_BRIDGES
132/*
133 * 32-bit version of structure prepended to each packet.  We use this header
134 * instead of the standard one for 32-bit streams.  We mark the a stream as
135 * 32-bit the first time we see a 32-bit compat ioctl request.
136 */
137struct bpf_hdr32 {
138	struct timeval32 bh_tstamp;	/* time stamp */
139	uint32_t	bh_caplen;	/* length of captured portion */
140	uint32_t	bh_datalen;	/* original length of packet */
141	uint16_t	bh_hdrlen;	/* length of bpf header (this struct
142					   plus alignment padding) */
143};
144#endif
145
146struct bpf_program32 {
147	u_int bf_len;
148	uint32_t bf_insns;
149};
150
151struct bpf_dltlist32 {
152	u_int	bfl_len;
153	u_int	bfl_list;
154};
155
156#define	BIOCSETF32	_IOW('B', 103, struct bpf_program32)
157#define	BIOCSRTIMEOUT32	_IOW('B', 109, struct timeval32)
158#define	BIOCGRTIMEOUT32	_IOR('B', 110, struct timeval32)
159#define	BIOCGDLTLIST32	_IOWR('B', 121, struct bpf_dltlist32)
160#define	BIOCSETWF32	_IOW('B', 123, struct bpf_program32)
161#define	BIOCSETFNR32	_IOW('B', 130, struct bpf_program32)
162#endif
163
164#define BPF_LOCK()	   sx_xlock(&bpf_sx)
165#define BPF_UNLOCK()		sx_xunlock(&bpf_sx)
166#define BPF_LOCK_ASSERT()	sx_assert(&bpf_sx, SA_XLOCKED)
167/*
168 * bpf_iflist is a list of BPF interface structures, each corresponding to a
169 * specific DLT.  The same network interface might have several BPF interface
170 * structures registered by different layers in the stack (i.e., 802.11
171 * frames, ethernet frames, etc).
172 */
173static LIST_HEAD(, bpf_if)	bpf_iflist, bpf_freelist;
174static struct sx	bpf_sx;		/* bpf global lock */
175static int		bpf_bpfd_cnt;
176
177static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
178static void	bpf_detachd(struct bpf_d *);
179static void	bpf_detachd_locked(struct bpf_d *);
180static void	bpf_freed(struct bpf_d *);
181static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
182		    struct sockaddr *, int *, struct bpf_d *);
183static int	bpf_setif(struct bpf_d *, struct ifreq *);
184static void	bpf_timed_out(void *);
185static __inline void
186		bpf_wakeup(struct bpf_d *);
187static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
188		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
189		    struct bintime *);
190static void	reset_d(struct bpf_d *);
191static int	bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
192static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
193static int	bpf_setdlt(struct bpf_d *, u_int);
194static void	filt_bpfdetach(struct knote *);
195static int	filt_bpfread(struct knote *, long);
196static void	bpf_drvinit(void *);
197static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
198
199SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
200int bpf_maxinsns = BPF_MAXINSNS;
201SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
202    &bpf_maxinsns, 0, "Maximum bpf program instructions");
203static int bpf_zerocopy_enable = 0;
204SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
205    &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
206static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
207    bpf_stats_sysctl, "bpf statistics portal");
208
209static VNET_DEFINE(int, bpf_optimize_writers) = 0;
210#define	V_bpf_optimize_writers VNET(bpf_optimize_writers)
211SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW,
212    &VNET_NAME(bpf_optimize_writers), 0,
213    "Do not send packets until BPF program is set");
214
215static	d_open_t	bpfopen;
216static	d_read_t	bpfread;
217static	d_write_t	bpfwrite;
218static	d_ioctl_t	bpfioctl;
219static	d_poll_t	bpfpoll;
220static	d_kqfilter_t	bpfkqfilter;
221
222static struct cdevsw bpf_cdevsw = {
223	.d_version =	D_VERSION,
224	.d_open =	bpfopen,
225	.d_read =	bpfread,
226	.d_write =	bpfwrite,
227	.d_ioctl =	bpfioctl,
228	.d_poll =	bpfpoll,
229	.d_name =	"bpf",
230	.d_kqfilter =	bpfkqfilter,
231};
232
233static struct filterops bpfread_filtops = {
234	.f_isfd = 1,
235	.f_detach = filt_bpfdetach,
236	.f_event = filt_bpfread,
237};
238
239eventhandler_tag	bpf_ifdetach_cookie = NULL;
240
241/*
242 * LOCKING MODEL USED BY BPF:
243 * Locks:
244 * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
245 * some global counters and every bpf_if reference.
246 * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
247 * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
248 *   used by bpf_mtap code.
249 *
250 * Lock order:
251 *
252 * Global lock, interface lock, descriptor lock
253 *
254 * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
255 * working model. In many places (like bpf_detachd) we start with BPF descriptor
256 * (and we need to at least rlock it to get reliable interface pointer). This
257 * gives us potential LOR. As a result, we use global lock to protect from bpf_if
258 * change in every such place.
259 *
260 * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
261 * 3) descriptor main wlock.
262 * Reading bd_bif can be protected by any of these locks, typically global lock.
263 *
264 * Changing read/write BPF filter is protected by the same three locks,
265 * the same applies for reading.
266 *
267 * Sleeping in global lock is not allowed due to bpfdetach() using it.
268 */
269
270/*
271 * Wrapper functions for various buffering methods.  If the set of buffer
272 * modes expands, we will probably want to introduce a switch data structure
273 * similar to protosw, et.
274 */
275static void
276bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
277    u_int len)
278{
279
280	BPFD_LOCK_ASSERT(d);
281
282	switch (d->bd_bufmode) {
283	case BPF_BUFMODE_BUFFER:
284		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
285
286	case BPF_BUFMODE_ZBUF:
287		d->bd_zcopy++;
288		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
289
290	default:
291		panic("bpf_buf_append_bytes");
292	}
293}
294
295static void
296bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
297    u_int len)
298{
299
300	BPFD_LOCK_ASSERT(d);
301
302	switch (d->bd_bufmode) {
303	case BPF_BUFMODE_BUFFER:
304		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
305
306	case BPF_BUFMODE_ZBUF:
307		d->bd_zcopy++;
308		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
309
310	default:
311		panic("bpf_buf_append_mbuf");
312	}
313}
314
315/*
316 * This function gets called when the free buffer is re-assigned.
317 */
318static void
319bpf_buf_reclaimed(struct bpf_d *d)
320{
321
322	BPFD_LOCK_ASSERT(d);
323
324	switch (d->bd_bufmode) {
325	case BPF_BUFMODE_BUFFER:
326		return;
327
328	case BPF_BUFMODE_ZBUF:
329		bpf_zerocopy_buf_reclaimed(d);
330		return;
331
332	default:
333		panic("bpf_buf_reclaimed");
334	}
335}
336
337/*
338 * If the buffer mechanism has a way to decide that a held buffer can be made
339 * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
340 * returned if the buffer can be discarded, (0) is returned if it cannot.
341 */
342static int
343bpf_canfreebuf(struct bpf_d *d)
344{
345
346	BPFD_LOCK_ASSERT(d);
347
348	switch (d->bd_bufmode) {
349	case BPF_BUFMODE_ZBUF:
350		return (bpf_zerocopy_canfreebuf(d));
351	}
352	return (0);
353}
354
355/*
356 * Allow the buffer model to indicate that the current store buffer is
357 * immutable, regardless of the appearance of space.  Return (1) if the
358 * buffer is writable, and (0) if not.
359 */
360static int
361bpf_canwritebuf(struct bpf_d *d)
362{
363	BPFD_LOCK_ASSERT(d);
364
365	switch (d->bd_bufmode) {
366	case BPF_BUFMODE_ZBUF:
367		return (bpf_zerocopy_canwritebuf(d));
368	}
369	return (1);
370}
371
372/*
373 * Notify buffer model that an attempt to write to the store buffer has
374 * resulted in a dropped packet, in which case the buffer may be considered
375 * full.
376 */
377static void
378bpf_buffull(struct bpf_d *d)
379{
380
381	BPFD_LOCK_ASSERT(d);
382
383	switch (d->bd_bufmode) {
384	case BPF_BUFMODE_ZBUF:
385		bpf_zerocopy_buffull(d);
386		break;
387	}
388}
389
390/*
391 * Notify the buffer model that a buffer has moved into the hold position.
392 */
393void
394bpf_bufheld(struct bpf_d *d)
395{
396
397	BPFD_LOCK_ASSERT(d);
398
399	switch (d->bd_bufmode) {
400	case BPF_BUFMODE_ZBUF:
401		bpf_zerocopy_bufheld(d);
402		break;
403	}
404}
405
406static void
407bpf_free(struct bpf_d *d)
408{
409
410	switch (d->bd_bufmode) {
411	case BPF_BUFMODE_BUFFER:
412		return (bpf_buffer_free(d));
413
414	case BPF_BUFMODE_ZBUF:
415		return (bpf_zerocopy_free(d));
416
417	default:
418		panic("bpf_buf_free");
419	}
420}
421
422static int
423bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
424{
425
426	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
427		return (EOPNOTSUPP);
428	return (bpf_buffer_uiomove(d, buf, len, uio));
429}
430
431static int
432bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
433{
434
435	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
436		return (EOPNOTSUPP);
437	return (bpf_buffer_ioctl_sblen(d, i));
438}
439
440static int
441bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
442{
443
444	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
445		return (EOPNOTSUPP);
446	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
447}
448
449static int
450bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
451{
452
453	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
454		return (EOPNOTSUPP);
455	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
456}
457
458static int
459bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
460{
461
462	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
463		return (EOPNOTSUPP);
464	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
465}
466
467/*
468 * General BPF functions.
469 */
470static int
471bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
472    struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
473{
474	const struct ieee80211_bpf_params *p;
475	struct ether_header *eh;
476	struct mbuf *m;
477	int error;
478	int len;
479	int hlen;
480	int slen;
481
482	/*
483	 * Build a sockaddr based on the data link layer type.
484	 * We do this at this level because the ethernet header
485	 * is copied directly into the data field of the sockaddr.
486	 * In the case of SLIP, there is no header and the packet
487	 * is forwarded as is.
488	 * Also, we are careful to leave room at the front of the mbuf
489	 * for the link level header.
490	 */
491	switch (linktype) {
492
493	case DLT_SLIP:
494		sockp->sa_family = AF_INET;
495		hlen = 0;
496		break;
497
498	case DLT_EN10MB:
499		sockp->sa_family = AF_UNSPEC;
500		/* XXX Would MAXLINKHDR be better? */
501		hlen = ETHER_HDR_LEN;
502		break;
503
504	case DLT_FDDI:
505		sockp->sa_family = AF_IMPLINK;
506		hlen = 0;
507		break;
508
509	case DLT_RAW:
510		sockp->sa_family = AF_UNSPEC;
511		hlen = 0;
512		break;
513
514	case DLT_NULL:
515		/*
516		 * null interface types require a 4 byte pseudo header which
517		 * corresponds to the address family of the packet.
518		 */
519		sockp->sa_family = AF_UNSPEC;
520		hlen = 4;
521		break;
522
523	case DLT_ATM_RFC1483:
524		/*
525		 * en atm driver requires 4-byte atm pseudo header.
526		 * though it isn't standard, vpi:vci needs to be
527		 * specified anyway.
528		 */
529		sockp->sa_family = AF_UNSPEC;
530		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
531		break;
532
533	case DLT_PPP:
534		sockp->sa_family = AF_UNSPEC;
535		hlen = 4;	/* This should match PPP_HDRLEN */
536		break;
537
538	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
539		sockp->sa_family = AF_IEEE80211;
540		hlen = 0;
541		break;
542
543	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
544		sockp->sa_family = AF_IEEE80211;
545		sockp->sa_len = 12;	/* XXX != 0 */
546		hlen = sizeof(struct ieee80211_bpf_params);
547		break;
548
549	default:
550		return (EIO);
551	}
552
553	len = uio->uio_resid;
554	if (len < hlen || len - hlen > ifp->if_mtu)
555		return (EMSGSIZE);
556
557	m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
558	if (m == NULL)
559		return (EIO);
560	m->m_pkthdr.len = m->m_len = len;
561	*mp = m;
562
563	error = uiomove(mtod(m, u_char *), len, uio);
564	if (error)
565		goto bad;
566
567	slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
568	if (slen == 0) {
569		error = EPERM;
570		goto bad;
571	}
572
573	/* Check for multicast destination */
574	switch (linktype) {
575	case DLT_EN10MB:
576		eh = mtod(m, struct ether_header *);
577		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
578			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
579			    ETHER_ADDR_LEN) == 0)
580				m->m_flags |= M_BCAST;
581			else
582				m->m_flags |= M_MCAST;
583		}
584		if (d->bd_hdrcmplt == 0) {
585			memcpy(eh->ether_shost, IF_LLADDR(ifp),
586			    sizeof(eh->ether_shost));
587		}
588		break;
589	}
590
591	/*
592	 * Make room for link header, and copy it to sockaddr
593	 */
594	if (hlen != 0) {
595		if (sockp->sa_family == AF_IEEE80211) {
596			/*
597			 * Collect true length from the parameter header
598			 * NB: sockp is known to be zero'd so if we do a
599			 *     short copy unspecified parameters will be
600			 *     zero.
601			 * NB: packet may not be aligned after stripping
602			 *     bpf params
603			 * XXX check ibp_vers
604			 */
605			p = mtod(m, const struct ieee80211_bpf_params *);
606			hlen = p->ibp_len;
607			if (hlen > sizeof(sockp->sa_data)) {
608				error = EINVAL;
609				goto bad;
610			}
611		}
612		bcopy(mtod(m, const void *), sockp->sa_data, hlen);
613	}
614	*hdrlen = hlen;
615
616	return (0);
617bad:
618	m_freem(m);
619	return (error);
620}
621
622/*
623 * Attach file to the bpf interface, i.e. make d listen on bp.
624 */
625static void
626bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
627{
628	int op_w;
629
630	BPF_LOCK_ASSERT();
631
632	/*
633	 * Save sysctl value to protect from sysctl change
634	 * between reads
635	 */
636	op_w = V_bpf_optimize_writers || d->bd_writer;
637
638	if (d->bd_bif != NULL)
639		bpf_detachd_locked(d);
640	/*
641	 * Point d at bp, and add d to the interface's list.
642	 * Since there are many applications using BPF for
643	 * sending raw packets only (dhcpd, cdpd are good examples)
644	 * we can delay adding d to the list of active listeners until
645	 * some filter is configured.
646	 */
647
648	BPFIF_WLOCK(bp);
649	BPFD_LOCK(d);
650
651	d->bd_bif = bp;
652
653	if (op_w != 0) {
654		/* Add to writers-only list */
655		LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
656		/*
657		 * We decrement bd_writer on every filter set operation.
658		 * First BIOCSETF is done by pcap_open_live() to set up
659		 * snap length. After that appliation usually sets its own filter
660		 */
661		d->bd_writer = 2;
662	} else
663		LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
664
665	BPFD_UNLOCK(d);
666	BPFIF_WUNLOCK(bp);
667
668	bpf_bpfd_cnt++;
669
670	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
671	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
672
673	if (op_w == 0)
674		EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
675}
676
677/*
678 * Check if we need to upgrade our descriptor @d from write-only mode.
679 */
680static int
681bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
682{
683	int is_snap, need_upgrade;
684
685	/*
686	 * Check if we've already upgraded or new filter is empty.
687	 */
688	if (d->bd_writer == 0 || fcode == NULL)
689		return (0);
690
691	need_upgrade = 0;
692
693	/*
694	 * Check if cmd looks like snaplen setting from
695	 * pcap_bpf.c:pcap_open_live().
696	 * Note we're not checking .k value here:
697	 * while pcap_open_live() definitely sets to non-zero value,
698	 * we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
699	 * do not consider upgrading immediately
700	 */
701	if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET | BPF_K))
702		is_snap = 1;
703	else
704		is_snap = 0;
705
706	if (is_snap == 0) {
707		/*
708		 * We're setting first filter and it doesn't look like
709		 * setting snaplen.  We're probably using bpf directly.
710		 * Upgrade immediately.
711		 */
712		need_upgrade = 1;
713	} else {
714		/*
715		 * Do not require upgrade by first BIOCSETF
716		 * (used to set snaplen) by pcap_open_live().
717		 */
718
719		if (--d->bd_writer == 0) {
720			/*
721			 * First snaplen filter has already
722			 * been set. This is probably catch-all
723			 * filter
724			 */
725			need_upgrade = 1;
726		}
727	}
728
729	CTR5(KTR_NET,
730	    "%s: filter function set by pid %d, "
731	    "bd_writer counter %d, snap %d upgrade %d",
732	    __func__, d->bd_pid, d->bd_writer,
733	    is_snap, need_upgrade);
734
735	return (need_upgrade);
736}
737
738/*
739 * Add d to the list of active bp filters.
740 * Requires bpf_attachd() to be called before.
741 */
742static void
743bpf_upgraded(struct bpf_d *d)
744{
745	struct bpf_if *bp;
746
747	BPF_LOCK_ASSERT();
748
749	bp = d->bd_bif;
750
751	/*
752	 * Filter can be set several times without specifying interface.
753	 * Mark d as reader and exit.
754	 */
755	if (bp == NULL) {
756		BPFD_LOCK(d);
757		d->bd_writer = 0;
758		BPFD_UNLOCK(d);
759		return;
760	}
761
762	BPFIF_WLOCK(bp);
763	BPFD_LOCK(d);
764
765	/* Remove from writers-only list */
766	LIST_REMOVE(d, bd_next);
767	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
768	/* Mark d as reader */
769	d->bd_writer = 0;
770
771	BPFD_UNLOCK(d);
772	BPFIF_WUNLOCK(bp);
773
774	CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
775
776	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
777}
778
779/*
780 * Detach a file from its interface.
781 */
782static void
783bpf_detachd(struct bpf_d *d)
784{
785	BPF_LOCK();
786	bpf_detachd_locked(d);
787	BPF_UNLOCK();
788}
789
790static void
791bpf_detachd_locked(struct bpf_d *d)
792{
793	int error;
794	struct bpf_if *bp;
795	struct ifnet *ifp;
796
797	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
798
799	BPF_LOCK_ASSERT();
800
801	/* Check if descriptor is attached */
802	if ((bp = d->bd_bif) == NULL)
803		return;
804
805	BPFIF_WLOCK(bp);
806	BPFD_LOCK(d);
807
808	/* Save bd_writer value */
809	error = d->bd_writer;
810
811	/*
812	 * Remove d from the interface's descriptor list.
813	 */
814	LIST_REMOVE(d, bd_next);
815
816	ifp = bp->bif_ifp;
817	d->bd_bif = NULL;
818	BPFD_UNLOCK(d);
819	BPFIF_WUNLOCK(bp);
820
821	bpf_bpfd_cnt--;
822
823	/* Call event handler iff d is attached */
824	if (error == 0)
825		EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
826
827	/*
828	 * Check if this descriptor had requested promiscuous mode.
829	 * If so, turn it off.
830	 */
831	if (d->bd_promisc) {
832		d->bd_promisc = 0;
833		CURVNET_SET(ifp->if_vnet);
834		error = ifpromisc(ifp, 0);
835		CURVNET_RESTORE();
836		if (error != 0 && error != ENXIO) {
837			/*
838			 * ENXIO can happen if a pccard is unplugged
839			 * Something is really wrong if we were able to put
840			 * the driver into promiscuous mode, but can't
841			 * take it out.
842			 */
843			if_printf(bp->bif_ifp,
844				"bpf_detach: ifpromisc failed (%d)\n", error);
845		}
846	}
847}
848
849/*
850 * Close the descriptor by detaching it from its interface,
851 * deallocating its buffers, and marking it free.
852 */
853static void
854bpf_dtor(void *data)
855{
856	struct bpf_d *d = data;
857
858	BPFD_LOCK(d);
859	if (d->bd_state == BPF_WAITING)
860		callout_stop(&d->bd_callout);
861	d->bd_state = BPF_IDLE;
862	BPFD_UNLOCK(d);
863	funsetown(&d->bd_sigio);
864	bpf_detachd(d);
865#ifdef MAC
866	mac_bpfdesc_destroy(d);
867#endif /* MAC */
868	seldrain(&d->bd_sel);
869	knlist_destroy(&d->bd_sel.si_note);
870	callout_drain(&d->bd_callout);
871	bpf_freed(d);
872	free(d, M_BPF);
873}
874
875/*
876 * Open ethernet device.  Returns ENXIO for illegal minor device number,
877 * EBUSY if file is open by another process.
878 */
879/* ARGSUSED */
880static	int
881bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
882{
883	struct bpf_d *d;
884	int error;
885
886	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
887	error = devfs_set_cdevpriv(d, bpf_dtor);
888	if (error != 0) {
889		free(d, M_BPF);
890		return (error);
891	}
892
893	/*
894	 * For historical reasons, perform a one-time initialization call to
895	 * the buffer routines, even though we're not yet committed to a
896	 * particular buffer method.
897	 */
898	bpf_buffer_init(d);
899	if ((flags & FREAD) == 0)
900		d->bd_writer = 2;
901	d->bd_hbuf_in_use = 0;
902	d->bd_bufmode = BPF_BUFMODE_BUFFER;
903	d->bd_sig = SIGIO;
904	d->bd_direction = BPF_D_INOUT;
905	BPF_PID_REFRESH(d, td);
906#ifdef MAC
907	mac_bpfdesc_init(d);
908	mac_bpfdesc_create(td->td_ucred, d);
909#endif
910	mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
911	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
912	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
913
914	return (0);
915}
916
917/*
918 *  bpfread - read next chunk of packets from buffers
919 */
920static	int
921bpfread(struct cdev *dev, struct uio *uio, int ioflag)
922{
923	struct bpf_d *d;
924	int error;
925	int non_block;
926	int timed_out;
927
928	error = devfs_get_cdevpriv((void **)&d);
929	if (error != 0)
930		return (error);
931
932	/*
933	 * Restrict application to use a buffer the same size as
934	 * as kernel buffers.
935	 */
936	if (uio->uio_resid != d->bd_bufsize)
937		return (EINVAL);
938
939	non_block = ((ioflag & O_NONBLOCK) != 0);
940
941	BPFD_LOCK(d);
942	BPF_PID_REFRESH_CUR(d);
943	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
944		BPFD_UNLOCK(d);
945		return (EOPNOTSUPP);
946	}
947	if (d->bd_state == BPF_WAITING)
948		callout_stop(&d->bd_callout);
949	timed_out = (d->bd_state == BPF_TIMED_OUT);
950	d->bd_state = BPF_IDLE;
951	while (d->bd_hbuf_in_use) {
952		error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
953		    PRINET|PCATCH, "bd_hbuf", 0);
954		if (error != 0) {
955			BPFD_UNLOCK(d);
956			return (error);
957		}
958	}
959	/*
960	 * If the hold buffer is empty, then do a timed sleep, which
961	 * ends when the timeout expires or when enough packets
962	 * have arrived to fill the store buffer.
963	 */
964	while (d->bd_hbuf == NULL) {
965		if (d->bd_slen != 0) {
966			/*
967			 * A packet(s) either arrived since the previous
968			 * read or arrived while we were asleep.
969			 */
970			if (d->bd_immediate || non_block || timed_out) {
971				/*
972				 * Rotate the buffers and return what's here
973				 * if we are in immediate mode, non-blocking
974				 * flag is set, or this descriptor timed out.
975				 */
976				ROTATE_BUFFERS(d);
977				break;
978			}
979		}
980
981		/*
982		 * No data is available, check to see if the bpf device
983		 * is still pointed at a real interface.  If not, return
984		 * ENXIO so that the userland process knows to rebind
985		 * it before using it again.
986		 */
987		if (d->bd_bif == NULL) {
988			BPFD_UNLOCK(d);
989			return (ENXIO);
990		}
991
992		if (non_block) {
993			BPFD_UNLOCK(d);
994			return (EWOULDBLOCK);
995		}
996		error = msleep(d, &d->bd_lock, PRINET|PCATCH,
997		     "bpf", d->bd_rtout);
998		if (error == EINTR || error == ERESTART) {
999			BPFD_UNLOCK(d);
1000			return (error);
1001		}
1002		if (error == EWOULDBLOCK) {
1003			/*
1004			 * On a timeout, return what's in the buffer,
1005			 * which may be nothing.  If there is something
1006			 * in the store buffer, we can rotate the buffers.
1007			 */
1008			if (d->bd_hbuf)
1009				/*
1010				 * We filled up the buffer in between
1011				 * getting the timeout and arriving
1012				 * here, so we don't need to rotate.
1013				 */
1014				break;
1015
1016			if (d->bd_slen == 0) {
1017				BPFD_UNLOCK(d);
1018				return (0);
1019			}
1020			ROTATE_BUFFERS(d);
1021			break;
1022		}
1023	}
1024	/*
1025	 * At this point, we know we have something in the hold slot.
1026	 */
1027	d->bd_hbuf_in_use = 1;
1028	BPFD_UNLOCK(d);
1029
1030	/*
1031	 * Move data from hold buffer into user space.
1032	 * We know the entire buffer is transferred since
1033	 * we checked above that the read buffer is bpf_bufsize bytes.
1034  	 *
1035	 * We do not have to worry about simultaneous reads because
1036	 * we waited for sole access to the hold buffer above.
1037	 */
1038	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
1039
1040	BPFD_LOCK(d);
1041	KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
1042	d->bd_fbuf = d->bd_hbuf;
1043	d->bd_hbuf = NULL;
1044	d->bd_hlen = 0;
1045	bpf_buf_reclaimed(d);
1046	d->bd_hbuf_in_use = 0;
1047	wakeup(&d->bd_hbuf_in_use);
1048	BPFD_UNLOCK(d);
1049
1050	return (error);
1051}
1052
1053/*
1054 * If there are processes sleeping on this descriptor, wake them up.
1055 */
1056static __inline void
1057bpf_wakeup(struct bpf_d *d)
1058{
1059
1060	BPFD_LOCK_ASSERT(d);
1061	if (d->bd_state == BPF_WAITING) {
1062		callout_stop(&d->bd_callout);
1063		d->bd_state = BPF_IDLE;
1064	}
1065	wakeup(d);
1066	if (d->bd_async && d->bd_sig && d->bd_sigio)
1067		pgsigio(&d->bd_sigio, d->bd_sig, 0);
1068
1069	selwakeuppri(&d->bd_sel, PRINET);
1070	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
1071}
1072
1073static void
1074bpf_timed_out(void *arg)
1075{
1076	struct bpf_d *d = (struct bpf_d *)arg;
1077
1078	BPFD_LOCK_ASSERT(d);
1079
1080	if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
1081		return;
1082	if (d->bd_state == BPF_WAITING) {
1083		d->bd_state = BPF_TIMED_OUT;
1084		if (d->bd_slen != 0)
1085			bpf_wakeup(d);
1086	}
1087}
1088
1089static int
1090bpf_ready(struct bpf_d *d)
1091{
1092
1093	BPFD_LOCK_ASSERT(d);
1094
1095	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
1096		return (1);
1097	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1098	    d->bd_slen != 0)
1099		return (1);
1100	return (0);
1101}
1102
1103static int
1104bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
1105{
1106	struct bpf_d *d;
1107	struct ifnet *ifp;
1108	struct mbuf *m, *mc;
1109	struct sockaddr dst;
1110	struct route ro;
1111	int error, hlen;
1112
1113	error = devfs_get_cdevpriv((void **)&d);
1114	if (error != 0)
1115		return (error);
1116
1117	BPF_PID_REFRESH_CUR(d);
1118	d->bd_wcount++;
1119	/* XXX: locking required */
1120	if (d->bd_bif == NULL) {
1121		d->bd_wdcount++;
1122		return (ENXIO);
1123	}
1124
1125	ifp = d->bd_bif->bif_ifp;
1126
1127	if ((ifp->if_flags & IFF_UP) == 0) {
1128		d->bd_wdcount++;
1129		return (ENETDOWN);
1130	}
1131
1132	if (uio->uio_resid == 0) {
1133		d->bd_wdcount++;
1134		return (0);
1135	}
1136
1137	bzero(&dst, sizeof(dst));
1138	m = NULL;
1139	hlen = 0;
1140	/* XXX: bpf_movein() can sleep */
1141	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
1142	    &m, &dst, &hlen, d);
1143	if (error) {
1144		d->bd_wdcount++;
1145		return (error);
1146	}
1147	d->bd_wfcount++;
1148	if (d->bd_hdrcmplt)
1149		dst.sa_family = pseudo_AF_HDRCMPLT;
1150
1151	if (d->bd_feedback) {
1152		mc = m_dup(m, M_NOWAIT);
1153		if (mc != NULL)
1154			mc->m_pkthdr.rcvif = ifp;
1155		/* Set M_PROMISC for outgoing packets to be discarded. */
1156		if (d->bd_direction == BPF_D_INOUT)
1157			m->m_flags |= M_PROMISC;
1158	} else
1159		mc = NULL;
1160
1161	m->m_pkthdr.len -= hlen;
1162	m->m_len -= hlen;
1163	m->m_data += hlen;	/* XXX */
1164
1165	CURVNET_SET(ifp->if_vnet);
1166#ifdef MAC
1167	BPFD_LOCK(d);
1168	mac_bpfdesc_create_mbuf(d, m);
1169	if (mc != NULL)
1170		mac_bpfdesc_create_mbuf(d, mc);
1171	BPFD_UNLOCK(d);
1172#endif
1173
1174	bzero(&ro, sizeof(ro));
1175	if (hlen != 0) {
1176		ro.ro_prepend = (u_char *)&dst.sa_data;
1177		ro.ro_plen = hlen;
1178		ro.ro_flags = RT_HAS_HEADER;
1179	}
1180
1181	error = (*ifp->if_output)(ifp, m, &dst, &ro);
1182	if (error)
1183		d->bd_wdcount++;
1184
1185	if (mc != NULL) {
1186		if (error == 0)
1187			(*ifp->if_input)(ifp, mc);
1188		else
1189			m_freem(mc);
1190	}
1191	CURVNET_RESTORE();
1192
1193	return (error);
1194}
1195
1196/*
1197 * Reset a descriptor by flushing its packet buffer and clearing the receive
1198 * and drop counts.  This is doable for kernel-only buffers, but with
1199 * zero-copy buffers, we can't write to (or rotate) buffers that are
1200 * currently owned by userspace.  It would be nice if we could encapsulate
1201 * this logic in the buffer code rather than here.
1202 */
1203static void
1204reset_d(struct bpf_d *d)
1205{
1206
1207	BPFD_LOCK_ASSERT(d);
1208
1209	while (d->bd_hbuf_in_use)
1210		mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
1211		    "bd_hbuf", 0);
1212	if ((d->bd_hbuf != NULL) &&
1213	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
1214		/* Free the hold buffer. */
1215		d->bd_fbuf = d->bd_hbuf;
1216		d->bd_hbuf = NULL;
1217		d->bd_hlen = 0;
1218		bpf_buf_reclaimed(d);
1219	}
1220	if (bpf_canwritebuf(d))
1221		d->bd_slen = 0;
1222	d->bd_rcount = 0;
1223	d->bd_dcount = 0;
1224	d->bd_fcount = 0;
1225	d->bd_wcount = 0;
1226	d->bd_wfcount = 0;
1227	d->bd_wdcount = 0;
1228	d->bd_zcopy = 0;
1229}
1230
1231/*
1232 *  FIONREAD		Check for read packet available.
1233 *  BIOCGBLEN		Get buffer len [for read()].
1234 *  BIOCSETF		Set read filter.
1235 *  BIOCSETFNR		Set read filter without resetting descriptor.
1236 *  BIOCSETWF		Set write filter.
1237 *  BIOCFLUSH		Flush read packet buffer.
1238 *  BIOCPROMISC		Put interface into promiscuous mode.
1239 *  BIOCGDLT		Get link layer type.
1240 *  BIOCGETIF		Get interface name.
1241 *  BIOCSETIF		Set interface.
1242 *  BIOCSRTIMEOUT	Set read timeout.
1243 *  BIOCGRTIMEOUT	Get read timeout.
1244 *  BIOCGSTATS		Get packet stats.
1245 *  BIOCIMMEDIATE	Set immediate mode.
1246 *  BIOCVERSION		Get filter language version.
1247 *  BIOCGHDRCMPLT	Get "header already complete" flag
1248 *  BIOCSHDRCMPLT	Set "header already complete" flag
1249 *  BIOCGDIRECTION	Get packet direction flag
1250 *  BIOCSDIRECTION	Set packet direction flag
1251 *  BIOCGTSTAMP		Get time stamp format and resolution.
1252 *  BIOCSTSTAMP		Set time stamp format and resolution.
1253 *  BIOCLOCK		Set "locked" flag
1254 *  BIOCFEEDBACK	Set packet feedback mode.
1255 *  BIOCSETZBUF		Set current zero-copy buffer locations.
1256 *  BIOCGETZMAX		Get maximum zero-copy buffer size.
1257 *  BIOCROTZBUF		Force rotation of zero-copy buffer
1258 *  BIOCSETBUFMODE	Set buffer mode.
1259 *  BIOCGETBUFMODE	Get current buffer mode.
1260 */
1261/* ARGSUSED */
1262static	int
1263bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1264    struct thread *td)
1265{
1266	struct bpf_d *d;
1267	int error;
1268
1269	error = devfs_get_cdevpriv((void **)&d);
1270	if (error != 0)
1271		return (error);
1272
1273	/*
1274	 * Refresh PID associated with this descriptor.
1275	 */
1276	BPFD_LOCK(d);
1277	BPF_PID_REFRESH(d, td);
1278	if (d->bd_state == BPF_WAITING)
1279		callout_stop(&d->bd_callout);
1280	d->bd_state = BPF_IDLE;
1281	BPFD_UNLOCK(d);
1282
1283	if (d->bd_locked == 1) {
1284		switch (cmd) {
1285		case BIOCGBLEN:
1286		case BIOCFLUSH:
1287		case BIOCGDLT:
1288		case BIOCGDLTLIST:
1289#ifdef COMPAT_FREEBSD32
1290		case BIOCGDLTLIST32:
1291#endif
1292		case BIOCGETIF:
1293		case BIOCGRTIMEOUT:
1294#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1295		case BIOCGRTIMEOUT32:
1296#endif
1297		case BIOCGSTATS:
1298		case BIOCVERSION:
1299		case BIOCGRSIG:
1300		case BIOCGHDRCMPLT:
1301		case BIOCSTSTAMP:
1302		case BIOCFEEDBACK:
1303		case FIONREAD:
1304		case BIOCLOCK:
1305		case BIOCSRTIMEOUT:
1306#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1307		case BIOCSRTIMEOUT32:
1308#endif
1309		case BIOCIMMEDIATE:
1310		case TIOCGPGRP:
1311		case BIOCROTZBUF:
1312			break;
1313		default:
1314			return (EPERM);
1315		}
1316	}
1317#ifdef COMPAT_FREEBSD32
1318	/*
1319	 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so
1320	 * that it will get 32-bit packet headers.
1321	 */
1322	switch (cmd) {
1323	case BIOCSETF32:
1324	case BIOCSETFNR32:
1325	case BIOCSETWF32:
1326	case BIOCGDLTLIST32:
1327	case BIOCGRTIMEOUT32:
1328	case BIOCSRTIMEOUT32:
1329		BPFD_LOCK(d);
1330		d->bd_compat32 = 1;
1331		BPFD_UNLOCK(d);
1332	}
1333#endif
1334
1335	CURVNET_SET(TD_TO_VNET(td));
1336	switch (cmd) {
1337
1338	default:
1339		error = EINVAL;
1340		break;
1341
1342	/*
1343	 * Check for read packet available.
1344	 */
1345	case FIONREAD:
1346		{
1347			int n;
1348
1349			BPFD_LOCK(d);
1350			n = d->bd_slen;
1351			while (d->bd_hbuf_in_use)
1352				mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
1353				    PRINET, "bd_hbuf", 0);
1354			if (d->bd_hbuf)
1355				n += d->bd_hlen;
1356			BPFD_UNLOCK(d);
1357
1358			*(int *)addr = n;
1359			break;
1360		}
1361
1362	/*
1363	 * Get buffer len [for read()].
1364	 */
1365	case BIOCGBLEN:
1366		BPFD_LOCK(d);
1367		*(u_int *)addr = d->bd_bufsize;
1368		BPFD_UNLOCK(d);
1369		break;
1370
1371	/*
1372	 * Set buffer length.
1373	 */
1374	case BIOCSBLEN:
1375		error = bpf_ioctl_sblen(d, (u_int *)addr);
1376		break;
1377
1378	/*
1379	 * Set link layer read filter.
1380	 */
1381	case BIOCSETF:
1382	case BIOCSETFNR:
1383	case BIOCSETWF:
1384#ifdef COMPAT_FREEBSD32
1385	case BIOCSETF32:
1386	case BIOCSETFNR32:
1387	case BIOCSETWF32:
1388#endif
1389		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1390		break;
1391
1392	/*
1393	 * Flush read packet buffer.
1394	 */
1395	case BIOCFLUSH:
1396		BPFD_LOCK(d);
1397		reset_d(d);
1398		BPFD_UNLOCK(d);
1399		break;
1400
1401	/*
1402	 * Put interface into promiscuous mode.
1403	 */
1404	case BIOCPROMISC:
1405		if (d->bd_bif == NULL) {
1406			/*
1407			 * No interface attached yet.
1408			 */
1409			error = EINVAL;
1410			break;
1411		}
1412		if (d->bd_promisc == 0) {
1413			error = ifpromisc(d->bd_bif->bif_ifp, 1);
1414			if (error == 0)
1415				d->bd_promisc = 1;
1416		}
1417		break;
1418
1419	/*
1420	 * Get current data link type.
1421	 */
1422	case BIOCGDLT:
1423		BPF_LOCK();
1424		if (d->bd_bif == NULL)
1425			error = EINVAL;
1426		else
1427			*(u_int *)addr = d->bd_bif->bif_dlt;
1428		BPF_UNLOCK();
1429		break;
1430
1431	/*
1432	 * Get a list of supported data link types.
1433	 */
1434#ifdef COMPAT_FREEBSD32
1435	case BIOCGDLTLIST32:
1436		{
1437			struct bpf_dltlist32 *list32;
1438			struct bpf_dltlist dltlist;
1439
1440			list32 = (struct bpf_dltlist32 *)addr;
1441			dltlist.bfl_len = list32->bfl_len;
1442			dltlist.bfl_list = PTRIN(list32->bfl_list);
1443			BPF_LOCK();
1444			if (d->bd_bif == NULL)
1445				error = EINVAL;
1446			else {
1447				error = bpf_getdltlist(d, &dltlist);
1448				if (error == 0)
1449					list32->bfl_len = dltlist.bfl_len;
1450			}
1451			BPF_UNLOCK();
1452			break;
1453		}
1454#endif
1455
1456	case BIOCGDLTLIST:
1457		BPF_LOCK();
1458		if (d->bd_bif == NULL)
1459			error = EINVAL;
1460		else
1461			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1462		BPF_UNLOCK();
1463		break;
1464
1465	/*
1466	 * Set data link type.
1467	 */
1468	case BIOCSDLT:
1469		BPF_LOCK();
1470		if (d->bd_bif == NULL)
1471			error = EINVAL;
1472		else
1473			error = bpf_setdlt(d, *(u_int *)addr);
1474		BPF_UNLOCK();
1475		break;
1476
1477	/*
1478	 * Get interface name.
1479	 */
1480	case BIOCGETIF:
1481		BPF_LOCK();
1482		if (d->bd_bif == NULL)
1483			error = EINVAL;
1484		else {
1485			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1486			struct ifreq *const ifr = (struct ifreq *)addr;
1487
1488			strlcpy(ifr->ifr_name, ifp->if_xname,
1489			    sizeof(ifr->ifr_name));
1490		}
1491		BPF_UNLOCK();
1492		break;
1493
1494	/*
1495	 * Set interface.
1496	 */
1497	case BIOCSETIF:
1498		{
1499			int alloc_buf, size;
1500
1501			/*
1502			 * Behavior here depends on the buffering model.  If
1503			 * we're using kernel memory buffers, then we can
1504			 * allocate them here.  If we're using zero-copy,
1505			 * then the user process must have registered buffers
1506			 * by the time we get here.
1507			 */
1508			alloc_buf = 0;
1509			BPFD_LOCK(d);
1510			if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
1511			    d->bd_sbuf == NULL)
1512				alloc_buf = 1;
1513			BPFD_UNLOCK(d);
1514			if (alloc_buf) {
1515				size = d->bd_bufsize;
1516				error = bpf_buffer_ioctl_sblen(d, &size);
1517				if (error != 0)
1518					break;
1519			}
1520			BPF_LOCK();
1521			error = bpf_setif(d, (struct ifreq *)addr);
1522			BPF_UNLOCK();
1523			break;
1524		}
1525
1526	/*
1527	 * Set read timeout.
1528	 */
1529	case BIOCSRTIMEOUT:
1530#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1531	case BIOCSRTIMEOUT32:
1532#endif
1533		{
1534			struct timeval *tv = (struct timeval *)addr;
1535#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1536			struct timeval32 *tv32;
1537			struct timeval tv64;
1538
1539			if (cmd == BIOCSRTIMEOUT32) {
1540				tv32 = (struct timeval32 *)addr;
1541				tv = &tv64;
1542				tv->tv_sec = tv32->tv_sec;
1543				tv->tv_usec = tv32->tv_usec;
1544			} else
1545#endif
1546				tv = (struct timeval *)addr;
1547
1548			/*
1549			 * Subtract 1 tick from tvtohz() since this isn't
1550			 * a one-shot timer.
1551			 */
1552			if ((error = itimerfix(tv)) == 0)
1553				d->bd_rtout = tvtohz(tv) - 1;
1554			break;
1555		}
1556
1557	/*
1558	 * Get read timeout.
1559	 */
1560	case BIOCGRTIMEOUT:
1561#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1562	case BIOCGRTIMEOUT32:
1563#endif
1564		{
1565			struct timeval *tv;
1566#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1567			struct timeval32 *tv32;
1568			struct timeval tv64;
1569
1570			if (cmd == BIOCGRTIMEOUT32)
1571				tv = &tv64;
1572			else
1573#endif
1574				tv = (struct timeval *)addr;
1575
1576			tv->tv_sec = d->bd_rtout / hz;
1577			tv->tv_usec = (d->bd_rtout % hz) * tick;
1578#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1579			if (cmd == BIOCGRTIMEOUT32) {
1580				tv32 = (struct timeval32 *)addr;
1581				tv32->tv_sec = tv->tv_sec;
1582				tv32->tv_usec = tv->tv_usec;
1583			}
1584#endif
1585
1586			break;
1587		}
1588
1589	/*
1590	 * Get packet stats.
1591	 */
1592	case BIOCGSTATS:
1593		{
1594			struct bpf_stat *bs = (struct bpf_stat *)addr;
1595
1596			/* XXXCSJP overflow */
1597			bs->bs_recv = d->bd_rcount;
1598			bs->bs_drop = d->bd_dcount;
1599			break;
1600		}
1601
1602	/*
1603	 * Set immediate mode.
1604	 */
1605	case BIOCIMMEDIATE:
1606		BPFD_LOCK(d);
1607		d->bd_immediate = *(u_int *)addr;
1608		BPFD_UNLOCK(d);
1609		break;
1610
1611	case BIOCVERSION:
1612		{
1613			struct bpf_version *bv = (struct bpf_version *)addr;
1614
1615			bv->bv_major = BPF_MAJOR_VERSION;
1616			bv->bv_minor = BPF_MINOR_VERSION;
1617			break;
1618		}
1619
1620	/*
1621	 * Get "header already complete" flag
1622	 */
1623	case BIOCGHDRCMPLT:
1624		BPFD_LOCK(d);
1625		*(u_int *)addr = d->bd_hdrcmplt;
1626		BPFD_UNLOCK(d);
1627		break;
1628
1629	/*
1630	 * Set "header already complete" flag
1631	 */
1632	case BIOCSHDRCMPLT:
1633		BPFD_LOCK(d);
1634		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1635		BPFD_UNLOCK(d);
1636		break;
1637
1638	/*
1639	 * Get packet direction flag
1640	 */
1641	case BIOCGDIRECTION:
1642		BPFD_LOCK(d);
1643		*(u_int *)addr = d->bd_direction;
1644		BPFD_UNLOCK(d);
1645		break;
1646
1647	/*
1648	 * Set packet direction flag
1649	 */
1650	case BIOCSDIRECTION:
1651		{
1652			u_int	direction;
1653
1654			direction = *(u_int *)addr;
1655			switch (direction) {
1656			case BPF_D_IN:
1657			case BPF_D_INOUT:
1658			case BPF_D_OUT:
1659				BPFD_LOCK(d);
1660				d->bd_direction = direction;
1661				BPFD_UNLOCK(d);
1662				break;
1663			default:
1664				error = EINVAL;
1665			}
1666		}
1667		break;
1668
1669	/*
1670	 * Get packet timestamp format and resolution.
1671	 */
1672	case BIOCGTSTAMP:
1673		BPFD_LOCK(d);
1674		*(u_int *)addr = d->bd_tstamp;
1675		BPFD_UNLOCK(d);
1676		break;
1677
1678	/*
1679	 * Set packet timestamp format and resolution.
1680	 */
1681	case BIOCSTSTAMP:
1682		{
1683			u_int	func;
1684
1685			func = *(u_int *)addr;
1686			if (BPF_T_VALID(func))
1687				d->bd_tstamp = func;
1688			else
1689				error = EINVAL;
1690		}
1691		break;
1692
1693	case BIOCFEEDBACK:
1694		BPFD_LOCK(d);
1695		d->bd_feedback = *(u_int *)addr;
1696		BPFD_UNLOCK(d);
1697		break;
1698
1699	case BIOCLOCK:
1700		BPFD_LOCK(d);
1701		d->bd_locked = 1;
1702		BPFD_UNLOCK(d);
1703		break;
1704
1705	case FIONBIO:		/* Non-blocking I/O */
1706		break;
1707
1708	case FIOASYNC:		/* Send signal on receive packets */
1709		BPFD_LOCK(d);
1710		d->bd_async = *(int *)addr;
1711		BPFD_UNLOCK(d);
1712		break;
1713
1714	case FIOSETOWN:
1715		/*
1716		 * XXX: Add some sort of locking here?
1717		 * fsetown() can sleep.
1718		 */
1719		error = fsetown(*(int *)addr, &d->bd_sigio);
1720		break;
1721
1722	case FIOGETOWN:
1723		BPFD_LOCK(d);
1724		*(int *)addr = fgetown(&d->bd_sigio);
1725		BPFD_UNLOCK(d);
1726		break;
1727
1728	/* This is deprecated, FIOSETOWN should be used instead. */
1729	case TIOCSPGRP:
1730		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1731		break;
1732
1733	/* This is deprecated, FIOGETOWN should be used instead. */
1734	case TIOCGPGRP:
1735		*(int *)addr = -fgetown(&d->bd_sigio);
1736		break;
1737
1738	case BIOCSRSIG:		/* Set receive signal */
1739		{
1740			u_int sig;
1741
1742			sig = *(u_int *)addr;
1743
1744			if (sig >= NSIG)
1745				error = EINVAL;
1746			else {
1747				BPFD_LOCK(d);
1748				d->bd_sig = sig;
1749				BPFD_UNLOCK(d);
1750			}
1751			break;
1752		}
1753	case BIOCGRSIG:
1754		BPFD_LOCK(d);
1755		*(u_int *)addr = d->bd_sig;
1756		BPFD_UNLOCK(d);
1757		break;
1758
1759	case BIOCGETBUFMODE:
1760		BPFD_LOCK(d);
1761		*(u_int *)addr = d->bd_bufmode;
1762		BPFD_UNLOCK(d);
1763		break;
1764
1765	case BIOCSETBUFMODE:
1766		/*
1767		 * Allow the buffering mode to be changed as long as we
1768		 * haven't yet committed to a particular mode.  Our
1769		 * definition of commitment, for now, is whether or not a
1770		 * buffer has been allocated or an interface attached, since
1771		 * that's the point where things get tricky.
1772		 */
1773		switch (*(u_int *)addr) {
1774		case BPF_BUFMODE_BUFFER:
1775			break;
1776
1777		case BPF_BUFMODE_ZBUF:
1778			if (bpf_zerocopy_enable)
1779				break;
1780			/* FALLSTHROUGH */
1781
1782		default:
1783			CURVNET_RESTORE();
1784			return (EINVAL);
1785		}
1786
1787		BPFD_LOCK(d);
1788		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1789		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
1790			BPFD_UNLOCK(d);
1791			CURVNET_RESTORE();
1792			return (EBUSY);
1793		}
1794		d->bd_bufmode = *(u_int *)addr;
1795		BPFD_UNLOCK(d);
1796		break;
1797
1798	case BIOCGETZMAX:
1799		error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
1800		break;
1801
1802	case BIOCSETZBUF:
1803		error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
1804		break;
1805
1806	case BIOCROTZBUF:
1807		error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
1808		break;
1809	}
1810	CURVNET_RESTORE();
1811	return (error);
1812}
1813
1814/*
1815 * Set d's packet filter program to fp.  If this file already has a filter,
1816 * free it and replace it.  Returns EINVAL for bogus requests.
1817 *
1818 * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
1819 * since reading d->bd_bif can't be protected by d or interface lock due to
1820 * lock order.
1821 *
1822 * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
1823 * interface read lock to read all filers.
1824 *
1825 */
1826static int
1827bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1828{
1829#ifdef COMPAT_FREEBSD32
1830	struct bpf_program fp_swab;
1831	struct bpf_program32 *fp32;
1832#endif
1833	struct bpf_insn *fcode, *old;
1834#ifdef BPF_JITTER
1835	bpf_jit_filter *jfunc, *ofunc;
1836#endif
1837	size_t size;
1838	u_int flen;
1839	int need_upgrade;
1840
1841#ifdef COMPAT_FREEBSD32
1842	switch (cmd) {
1843	case BIOCSETF32:
1844	case BIOCSETWF32:
1845	case BIOCSETFNR32:
1846		fp32 = (struct bpf_program32 *)fp;
1847		fp_swab.bf_len = fp32->bf_len;
1848		fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
1849		fp = &fp_swab;
1850		switch (cmd) {
1851		case BIOCSETF32:
1852			cmd = BIOCSETF;
1853			break;
1854		case BIOCSETWF32:
1855			cmd = BIOCSETWF;
1856			break;
1857		}
1858		break;
1859	}
1860#endif
1861
1862	fcode = NULL;
1863#ifdef BPF_JITTER
1864	jfunc = ofunc = NULL;
1865#endif
1866	need_upgrade = 0;
1867
1868	/*
1869	 * Check new filter validness before acquiring any locks.
1870	 * Allocate memory for new filter, if needed.
1871	 */
1872	flen = fp->bf_len;
1873	if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
1874		return (EINVAL);
1875	size = flen * sizeof(*fp->bf_insns);
1876	if (size > 0) {
1877		/* We're setting up new filter.  Copy and check actual data. */
1878		fcode = malloc(size, M_BPF, M_WAITOK);
1879		if (copyin(fp->bf_insns, fcode, size) != 0 ||
1880		    !bpf_validate(fcode, flen)) {
1881			free(fcode, M_BPF);
1882			return (EINVAL);
1883		}
1884#ifdef BPF_JITTER
1885		/* Filter is copied inside fcode and is perfectly valid. */
1886		jfunc = bpf_jitter(fcode, flen);
1887#endif
1888	}
1889
1890	BPF_LOCK();
1891
1892	/*
1893	 * Set up new filter.
1894	 * Protect filter change by interface lock.
1895	 * Additionally, we are protected by global lock here.
1896	 */
1897	if (d->bd_bif != NULL)
1898		BPFIF_WLOCK(d->bd_bif);
1899	BPFD_LOCK(d);
1900	if (cmd == BIOCSETWF) {
1901		old = d->bd_wfilter;
1902		d->bd_wfilter = fcode;
1903	} else {
1904		old = d->bd_rfilter;
1905		d->bd_rfilter = fcode;
1906#ifdef BPF_JITTER
1907		ofunc = d->bd_bfilter;
1908		d->bd_bfilter = jfunc;
1909#endif
1910		if (cmd == BIOCSETF)
1911			reset_d(d);
1912
1913		need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen);
1914	}
1915	BPFD_UNLOCK(d);
1916	if (d->bd_bif != NULL)
1917		BPFIF_WUNLOCK(d->bd_bif);
1918	if (old != NULL)
1919		free(old, M_BPF);
1920#ifdef BPF_JITTER
1921	if (ofunc != NULL)
1922		bpf_destroy_jit_filter(ofunc);
1923#endif
1924
1925	/* Move d to active readers list. */
1926	if (need_upgrade != 0)
1927		bpf_upgraded(d);
1928
1929	BPF_UNLOCK();
1930	return (0);
1931}
1932
1933/*
1934 * Detach a file from its current interface (if attached at all) and attach
1935 * to the interface indicated by the name stored in ifr.
1936 * Return an errno or 0.
1937 */
1938static int
1939bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1940{
1941	struct bpf_if *bp;
1942	struct ifnet *theywant;
1943
1944	BPF_LOCK_ASSERT();
1945
1946	theywant = ifunit(ifr->ifr_name);
1947	if (theywant == NULL || theywant->if_bpf == NULL)
1948		return (ENXIO);
1949
1950	bp = theywant->if_bpf;
1951
1952	/* Check if interface is not being detached from BPF */
1953	BPFIF_RLOCK(bp);
1954	if (bp->bif_flags & BPFIF_FLAG_DYING) {
1955		BPFIF_RUNLOCK(bp);
1956		return (ENXIO);
1957	}
1958	BPFIF_RUNLOCK(bp);
1959
1960	/*
1961	 * At this point, we expect the buffer is already allocated.  If not,
1962	 * return an error.
1963	 */
1964	switch (d->bd_bufmode) {
1965	case BPF_BUFMODE_BUFFER:
1966	case BPF_BUFMODE_ZBUF:
1967		if (d->bd_sbuf == NULL)
1968			return (EINVAL);
1969		break;
1970
1971	default:
1972		panic("bpf_setif: bufmode %d", d->bd_bufmode);
1973	}
1974	if (bp != d->bd_bif)
1975		bpf_attachd(d, bp);
1976	BPFD_LOCK(d);
1977	reset_d(d);
1978	BPFD_UNLOCK(d);
1979	return (0);
1980}
1981
1982/*
1983 * Support for select() and poll() system calls
1984 *
1985 * Return true iff the specific operation will not block indefinitely.
1986 * Otherwise, return false but make a note that a selwakeup() must be done.
1987 */
1988static int
1989bpfpoll(struct cdev *dev, int events, struct thread *td)
1990{
1991	struct bpf_d *d;
1992	int revents;
1993
1994	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
1995		return (events &
1996		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
1997
1998	/*
1999	 * Refresh PID associated with this descriptor.
2000	 */
2001	revents = events & (POLLOUT | POLLWRNORM);
2002	BPFD_LOCK(d);
2003	BPF_PID_REFRESH(d, td);
2004	if (events & (POLLIN | POLLRDNORM)) {
2005		if (bpf_ready(d))
2006			revents |= events & (POLLIN | POLLRDNORM);
2007		else {
2008			selrecord(td, &d->bd_sel);
2009			/* Start the read timeout if necessary. */
2010			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2011				callout_reset(&d->bd_callout, d->bd_rtout,
2012				    bpf_timed_out, d);
2013				d->bd_state = BPF_WAITING;
2014			}
2015		}
2016	}
2017	BPFD_UNLOCK(d);
2018	return (revents);
2019}
2020
2021/*
2022 * Support for kevent() system call.  Register EVFILT_READ filters and
2023 * reject all others.
2024 */
2025int
2026bpfkqfilter(struct cdev *dev, struct knote *kn)
2027{
2028	struct bpf_d *d;
2029
2030	if (devfs_get_cdevpriv((void **)&d) != 0 ||
2031	    kn->kn_filter != EVFILT_READ)
2032		return (1);
2033
2034	/*
2035	 * Refresh PID associated with this descriptor.
2036	 */
2037	BPFD_LOCK(d);
2038	BPF_PID_REFRESH_CUR(d);
2039	kn->kn_fop = &bpfread_filtops;
2040	kn->kn_hook = d;
2041	knlist_add(&d->bd_sel.si_note, kn, 1);
2042	BPFD_UNLOCK(d);
2043
2044	return (0);
2045}
2046
2047static void
2048filt_bpfdetach(struct knote *kn)
2049{
2050	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2051
2052	knlist_remove(&d->bd_sel.si_note, kn, 0);
2053}
2054
2055static int
2056filt_bpfread(struct knote *kn, long hint)
2057{
2058	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2059	int ready;
2060
2061	BPFD_LOCK_ASSERT(d);
2062	ready = bpf_ready(d);
2063	if (ready) {
2064		kn->kn_data = d->bd_slen;
2065		/*
2066		 * Ignore the hold buffer if it is being copied to user space.
2067		 */
2068		if (!d->bd_hbuf_in_use && d->bd_hbuf)
2069			kn->kn_data += d->bd_hlen;
2070	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
2071		callout_reset(&d->bd_callout, d->bd_rtout,
2072		    bpf_timed_out, d);
2073		d->bd_state = BPF_WAITING;
2074	}
2075
2076	return (ready);
2077}
2078
2079#define	BPF_TSTAMP_NONE		0
2080#define	BPF_TSTAMP_FAST		1
2081#define	BPF_TSTAMP_NORMAL	2
2082#define	BPF_TSTAMP_EXTERN	3
2083
2084static int
2085bpf_ts_quality(int tstype)
2086{
2087
2088	if (tstype == BPF_T_NONE)
2089		return (BPF_TSTAMP_NONE);
2090	if ((tstype & BPF_T_FAST) != 0)
2091		return (BPF_TSTAMP_FAST);
2092
2093	return (BPF_TSTAMP_NORMAL);
2094}
2095
2096static int
2097bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
2098{
2099	struct m_tag *tag;
2100	int quality;
2101
2102	quality = bpf_ts_quality(tstype);
2103	if (quality == BPF_TSTAMP_NONE)
2104		return (quality);
2105
2106	if (m != NULL) {
2107		tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
2108		if (tag != NULL) {
2109			*bt = *(struct bintime *)(tag + 1);
2110			return (BPF_TSTAMP_EXTERN);
2111		}
2112	}
2113	if (quality == BPF_TSTAMP_NORMAL)
2114		binuptime(bt);
2115	else
2116		getbinuptime(bt);
2117
2118	return (quality);
2119}
2120
2121/*
2122 * Incoming linkage from device drivers.  Process the packet pkt, of length
2123 * pktlen, which is stored in a contiguous buffer.  The packet is parsed
2124 * by each process' filter, and if accepted, stashed into the corresponding
2125 * buffer.
2126 */
2127void
2128bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2129{
2130	struct bintime bt;
2131	struct bpf_d *d;
2132#ifdef BPF_JITTER
2133	bpf_jit_filter *bf;
2134#endif
2135	u_int slen;
2136	int gottime;
2137
2138	gottime = BPF_TSTAMP_NONE;
2139
2140	BPFIF_RLOCK(bp);
2141
2142	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2143		/*
2144		 * We are not using any locks for d here because:
2145		 * 1) any filter change is protected by interface
2146		 * write lock
2147		 * 2) destroying/detaching d is protected by interface
2148		 * write lock, too
2149		 */
2150
2151		/* XXX: Do not protect counter for the sake of performance. */
2152		++d->bd_rcount;
2153		/*
2154		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
2155		 * way for the caller to indiciate to us whether this packet
2156		 * is inbound or outbound.  In the bpf_mtap() routines, we use
2157		 * the interface pointers on the mbuf to figure it out.
2158		 */
2159#ifdef BPF_JITTER
2160		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2161		if (bf != NULL)
2162			slen = (*(bf->func))(pkt, pktlen, pktlen);
2163		else
2164#endif
2165		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
2166		if (slen != 0) {
2167			/*
2168			 * Filter matches. Let's to acquire write lock.
2169			 */
2170			BPFD_LOCK(d);
2171
2172			d->bd_fcount++;
2173			if (gottime < bpf_ts_quality(d->bd_tstamp))
2174				gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
2175#ifdef MAC
2176			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2177#endif
2178				catchpacket(d, pkt, pktlen, slen,
2179				    bpf_append_bytes, &bt);
2180			BPFD_UNLOCK(d);
2181		}
2182	}
2183	BPFIF_RUNLOCK(bp);
2184}
2185
2186#define	BPF_CHECK_DIRECTION(d, r, i)				\
2187	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
2188	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
2189
2190/*
2191 * Incoming linkage from device drivers, when packet is in an mbuf chain.
2192 * Locking model is explained in bpf_tap().
2193 */
2194void
2195bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2196{
2197	struct bintime bt;
2198	struct bpf_d *d;
2199#ifdef BPF_JITTER
2200	bpf_jit_filter *bf;
2201#endif
2202	u_int pktlen, slen;
2203	int gottime;
2204
2205	/* Skip outgoing duplicate packets. */
2206	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2207		m->m_flags &= ~M_PROMISC;
2208		return;
2209	}
2210
2211	pktlen = m_length(m, NULL);
2212	gottime = BPF_TSTAMP_NONE;
2213
2214	BPFIF_RLOCK(bp);
2215
2216	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2217		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2218			continue;
2219		++d->bd_rcount;
2220#ifdef BPF_JITTER
2221		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2222		/* XXX We cannot handle multiple mbufs. */
2223		if (bf != NULL && m->m_next == NULL)
2224			slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
2225		else
2226#endif
2227		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
2228		if (slen != 0) {
2229			BPFD_LOCK(d);
2230
2231			d->bd_fcount++;
2232			if (gottime < bpf_ts_quality(d->bd_tstamp))
2233				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2234#ifdef MAC
2235			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2236#endif
2237				catchpacket(d, (u_char *)m, pktlen, slen,
2238				    bpf_append_mbuf, &bt);
2239			BPFD_UNLOCK(d);
2240		}
2241	}
2242	BPFIF_RUNLOCK(bp);
2243}
2244
2245/*
2246 * Incoming linkage from device drivers, when packet is in
2247 * an mbuf chain and to be prepended by a contiguous header.
2248 */
2249void
2250bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
2251{
2252	struct bintime bt;
2253	struct mbuf mb;
2254	struct bpf_d *d;
2255	u_int pktlen, slen;
2256	int gottime;
2257
2258	/* Skip outgoing duplicate packets. */
2259	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2260		m->m_flags &= ~M_PROMISC;
2261		return;
2262	}
2263
2264	pktlen = m_length(m, NULL);
2265	/*
2266	 * Craft on-stack mbuf suitable for passing to bpf_filter.
2267	 * Note that we cut corners here; we only setup what's
2268	 * absolutely needed--this mbuf should never go anywhere else.
2269	 */
2270	mb.m_next = m;
2271	mb.m_data = data;
2272	mb.m_len = dlen;
2273	pktlen += dlen;
2274
2275	gottime = BPF_TSTAMP_NONE;
2276
2277	BPFIF_RLOCK(bp);
2278
2279	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2280		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2281			continue;
2282		++d->bd_rcount;
2283		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
2284		if (slen != 0) {
2285			BPFD_LOCK(d);
2286
2287			d->bd_fcount++;
2288			if (gottime < bpf_ts_quality(d->bd_tstamp))
2289				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2290#ifdef MAC
2291			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2292#endif
2293				catchpacket(d, (u_char *)&mb, pktlen, slen,
2294				    bpf_append_mbuf, &bt);
2295			BPFD_UNLOCK(d);
2296		}
2297	}
2298	BPFIF_RUNLOCK(bp);
2299}
2300
2301#undef	BPF_CHECK_DIRECTION
2302
2303#undef	BPF_TSTAMP_NONE
2304#undef	BPF_TSTAMP_FAST
2305#undef	BPF_TSTAMP_NORMAL
2306#undef	BPF_TSTAMP_EXTERN
2307
2308static int
2309bpf_hdrlen(struct bpf_d *d)
2310{
2311	int hdrlen;
2312
2313	hdrlen = d->bd_bif->bif_hdrlen;
2314#ifndef BURN_BRIDGES
2315	if (d->bd_tstamp == BPF_T_NONE ||
2316	    BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
2317#ifdef COMPAT_FREEBSD32
2318		if (d->bd_compat32)
2319			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
2320		else
2321#endif
2322			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
2323	else
2324#endif
2325		hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
2326#ifdef COMPAT_FREEBSD32
2327	if (d->bd_compat32)
2328		hdrlen = BPF_WORDALIGN32(hdrlen);
2329	else
2330#endif
2331		hdrlen = BPF_WORDALIGN(hdrlen);
2332
2333	return (hdrlen - d->bd_bif->bif_hdrlen);
2334}
2335
2336static void
2337bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
2338{
2339	struct bintime bt2, boottimebin;
2340	struct timeval tsm;
2341	struct timespec tsn;
2342
2343	if ((tstype & BPF_T_MONOTONIC) == 0) {
2344		bt2 = *bt;
2345		getboottimebin(&boottimebin);
2346		bintime_add(&bt2, &boottimebin);
2347		bt = &bt2;
2348	}
2349	switch (BPF_T_FORMAT(tstype)) {
2350	case BPF_T_MICROTIME:
2351		bintime2timeval(bt, &tsm);
2352		ts->bt_sec = tsm.tv_sec;
2353		ts->bt_frac = tsm.tv_usec;
2354		break;
2355	case BPF_T_NANOTIME:
2356		bintime2timespec(bt, &tsn);
2357		ts->bt_sec = tsn.tv_sec;
2358		ts->bt_frac = tsn.tv_nsec;
2359		break;
2360	case BPF_T_BINTIME:
2361		ts->bt_sec = bt->sec;
2362		ts->bt_frac = bt->frac;
2363		break;
2364	}
2365}
2366
2367/*
2368 * Move the packet data from interface memory (pkt) into the
2369 * store buffer.  "cpfn" is the routine called to do the actual data
2370 * transfer.  bcopy is passed in to copy contiguous chunks, while
2371 * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
2372 * pkt is really an mbuf.
2373 */
2374static void
2375catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
2376    void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
2377    struct bintime *bt)
2378{
2379	struct bpf_xhdr hdr;
2380#ifndef BURN_BRIDGES
2381	struct bpf_hdr hdr_old;
2382#ifdef COMPAT_FREEBSD32
2383	struct bpf_hdr32 hdr32_old;
2384#endif
2385#endif
2386	int caplen, curlen, hdrlen, totlen;
2387	int do_wakeup = 0;
2388	int do_timestamp;
2389	int tstype;
2390
2391	BPFD_LOCK_ASSERT(d);
2392
2393	/*
2394	 * Detect whether user space has released a buffer back to us, and if
2395	 * so, move it from being a hold buffer to a free buffer.  This may
2396	 * not be the best place to do it (for example, we might only want to
2397	 * run this check if we need the space), but for now it's a reliable
2398	 * spot to do it.
2399	 */
2400	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
2401		d->bd_fbuf = d->bd_hbuf;
2402		d->bd_hbuf = NULL;
2403		d->bd_hlen = 0;
2404		bpf_buf_reclaimed(d);
2405	}
2406
2407	/*
2408	 * Figure out how many bytes to move.  If the packet is
2409	 * greater or equal to the snapshot length, transfer that
2410	 * much.  Otherwise, transfer the whole packet (unless
2411	 * we hit the buffer size limit).
2412	 */
2413	hdrlen = bpf_hdrlen(d);
2414	totlen = hdrlen + min(snaplen, pktlen);
2415	if (totlen > d->bd_bufsize)
2416		totlen = d->bd_bufsize;
2417
2418	/*
2419	 * Round up the end of the previous packet to the next longword.
2420	 *
2421	 * Drop the packet if there's no room and no hope of room
2422	 * If the packet would overflow the storage buffer or the storage
2423	 * buffer is considered immutable by the buffer model, try to rotate
2424	 * the buffer and wakeup pending processes.
2425	 */
2426#ifdef COMPAT_FREEBSD32
2427	if (d->bd_compat32)
2428		curlen = BPF_WORDALIGN32(d->bd_slen);
2429	else
2430#endif
2431		curlen = BPF_WORDALIGN(d->bd_slen);
2432	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
2433		if (d->bd_fbuf == NULL) {
2434			/*
2435			 * There's no room in the store buffer, and no
2436			 * prospect of room, so drop the packet.  Notify the
2437			 * buffer model.
2438			 */
2439			bpf_buffull(d);
2440			++d->bd_dcount;
2441			return;
2442		}
2443		KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
2444		ROTATE_BUFFERS(d);
2445		do_wakeup = 1;
2446		curlen = 0;
2447	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2448		/*
2449		 * Immediate mode is set, or the read timeout has already
2450		 * expired during a select call.  A packet arrived, so the
2451		 * reader should be woken up.
2452		 */
2453		do_wakeup = 1;
2454	caplen = totlen - hdrlen;
2455	tstype = d->bd_tstamp;
2456	do_timestamp = tstype != BPF_T_NONE;
2457#ifndef BURN_BRIDGES
2458	if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
2459		struct bpf_ts ts;
2460		if (do_timestamp)
2461			bpf_bintime2ts(bt, &ts, tstype);
2462#ifdef COMPAT_FREEBSD32
2463		if (d->bd_compat32) {
2464			bzero(&hdr32_old, sizeof(hdr32_old));
2465			if (do_timestamp) {
2466				hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
2467				hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
2468			}
2469			hdr32_old.bh_datalen = pktlen;
2470			hdr32_old.bh_hdrlen = hdrlen;
2471			hdr32_old.bh_caplen = caplen;
2472			bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
2473			    sizeof(hdr32_old));
2474			goto copy;
2475		}
2476#endif
2477		bzero(&hdr_old, sizeof(hdr_old));
2478		if (do_timestamp) {
2479			hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
2480			hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
2481		}
2482		hdr_old.bh_datalen = pktlen;
2483		hdr_old.bh_hdrlen = hdrlen;
2484		hdr_old.bh_caplen = caplen;
2485		bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
2486		    sizeof(hdr_old));
2487		goto copy;
2488	}
2489#endif
2490
2491	/*
2492	 * Append the bpf header.  Note we append the actual header size, but
2493	 * move forward the length of the header plus padding.
2494	 */
2495	bzero(&hdr, sizeof(hdr));
2496	if (do_timestamp)
2497		bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
2498	hdr.bh_datalen = pktlen;
2499	hdr.bh_hdrlen = hdrlen;
2500	hdr.bh_caplen = caplen;
2501	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
2502
2503	/*
2504	 * Copy the packet data into the store buffer and update its length.
2505	 */
2506#ifndef BURN_BRIDGES
2507copy:
2508#endif
2509	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
2510	d->bd_slen = curlen + totlen;
2511
2512	if (do_wakeup)
2513		bpf_wakeup(d);
2514}
2515
2516/*
2517 * Free buffers currently in use by a descriptor.
2518 * Called on close.
2519 */
2520static void
2521bpf_freed(struct bpf_d *d)
2522{
2523
2524	/*
2525	 * We don't need to lock out interrupts since this descriptor has
2526	 * been detached from its interface and it yet hasn't been marked
2527	 * free.
2528	 */
2529	bpf_free(d);
2530	if (d->bd_rfilter != NULL) {
2531		free((caddr_t)d->bd_rfilter, M_BPF);
2532#ifdef BPF_JITTER
2533		if (d->bd_bfilter != NULL)
2534			bpf_destroy_jit_filter(d->bd_bfilter);
2535#endif
2536	}
2537	if (d->bd_wfilter != NULL)
2538		free((caddr_t)d->bd_wfilter, M_BPF);
2539	mtx_destroy(&d->bd_lock);
2540}
2541
2542/*
2543 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
2544 * fixed size of the link header (variable length headers not yet supported).
2545 */
2546void
2547bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2548{
2549
2550	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2551}
2552
2553/*
2554 * Attach an interface to bpf.  ifp is a pointer to the structure
2555 * defining the interface to be attached, dlt is the link layer type,
2556 * and hdrlen is the fixed size of the link header (variable length
2557 * headers are not yet supporrted).
2558 */
2559void
2560bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2561{
2562	struct bpf_if *bp;
2563
2564	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
2565
2566	bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
2567
2568	rw_init(&bp->bif_lock, "bpf interface lock");
2569	LIST_INIT(&bp->bif_dlist);
2570	LIST_INIT(&bp->bif_wlist);
2571	bp->bif_ifp = ifp;
2572	bp->bif_dlt = dlt;
2573	bp->bif_hdrlen = hdrlen;
2574	bp->bif_bpf = driverp;
2575	*driverp = bp;
2576
2577	BPF_LOCK();
2578	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
2579	BPF_UNLOCK();
2580
2581	if (bootverbose && IS_DEFAULT_VNET(curvnet))
2582		if_printf(ifp, "bpf attached\n");
2583}
2584
2585#ifdef VIMAGE
2586/*
2587 * When moving interfaces between vnet instances we need a way to
2588 * query the dlt and hdrlen before detach so we can re-attch the if_bpf
2589 * after the vmove.  We unfortunately have no device driver infrastructure
2590 * to query the interface for these values after creation/attach, thus
2591 * add this as a workaround.
2592 */
2593int
2594bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
2595{
2596
2597	if (bp == NULL)
2598		return (ENXIO);
2599	if (bif_dlt == NULL && bif_hdrlen == NULL)
2600		return (0);
2601
2602	if (bif_dlt != NULL)
2603		*bif_dlt = bp->bif_dlt;
2604	if (bif_hdrlen != NULL)
2605		*bif_hdrlen = bp->bif_hdrlen;
2606
2607	return (0);
2608}
2609#endif
2610
2611/*
2612 * Detach bpf from an interface. This involves detaching each descriptor
2613 * associated with the interface. Notify each descriptor as it's detached
2614 * so that any sleepers wake up and get ENXIO.
2615 */
2616void
2617bpfdetach(struct ifnet *ifp)
2618{
2619	struct bpf_if	*bp, *bp_temp;
2620	struct bpf_d	*d;
2621	int ndetached;
2622
2623	ndetached = 0;
2624
2625	BPF_LOCK();
2626	/* Find all bpf_if struct's which reference ifp and detach them. */
2627	LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
2628		if (ifp != bp->bif_ifp)
2629			continue;
2630
2631		LIST_REMOVE(bp, bif_next);
2632		/* Add to to-be-freed list */
2633		LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
2634
2635		ndetached++;
2636		/*
2637		 * Delay freeing bp till interface is detached
2638		 * and all routes through this interface are removed.
2639		 * Mark bp as detached to restrict new consumers.
2640		 */
2641		BPFIF_WLOCK(bp);
2642		bp->bif_flags |= BPFIF_FLAG_DYING;
2643		*bp->bif_bpf = (struct bpf_if *)&dead_bpf_if;
2644		BPFIF_WUNLOCK(bp);
2645
2646		CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
2647		    __func__, bp->bif_dlt, bp, ifp);
2648
2649		/* Free common descriptors */
2650		while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
2651			bpf_detachd_locked(d);
2652			BPFD_LOCK(d);
2653			bpf_wakeup(d);
2654			BPFD_UNLOCK(d);
2655		}
2656
2657		/* Free writer-only descriptors */
2658		while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
2659			bpf_detachd_locked(d);
2660			BPFD_LOCK(d);
2661			bpf_wakeup(d);
2662			BPFD_UNLOCK(d);
2663		}
2664	}
2665	BPF_UNLOCK();
2666
2667#ifdef INVARIANTS
2668	if (ndetached == 0)
2669		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
2670#endif
2671}
2672
2673/*
2674 * Interface departure handler.
2675 * Note departure event does not guarantee interface is going down.
2676 * Interface renaming is currently done via departure/arrival event set.
2677 *
2678 * Departure handled is called after all routes pointing to
2679 * given interface are removed and interface is in down state
2680 * restricting any packets to be sent/received. We assume it is now safe
2681 * to free data allocated by BPF.
2682 */
2683static void
2684bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
2685{
2686	struct bpf_if *bp, *bp_temp;
2687	int nmatched = 0;
2688
2689	/* Ignore ifnet renaming. */
2690	if (ifp->if_flags & IFF_RENAMING)
2691		return;
2692
2693	BPF_LOCK();
2694	/*
2695	 * Find matching entries in free list.
2696	 * Nothing should be found if bpfdetach() was not called.
2697	 */
2698	LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
2699		if (ifp != bp->bif_ifp)
2700			continue;
2701
2702		CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
2703		    __func__, bp, ifp);
2704
2705		LIST_REMOVE(bp, bif_next);
2706
2707		rw_destroy(&bp->bif_lock);
2708		free(bp, M_BPF);
2709
2710		nmatched++;
2711	}
2712	BPF_UNLOCK();
2713}
2714
2715/*
2716 * Get a list of available data link type of the interface.
2717 */
2718static int
2719bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
2720{
2721	struct ifnet *ifp;
2722	struct bpf_if *bp;
2723	u_int *lst;
2724	int error, n, n1;
2725
2726	BPF_LOCK_ASSERT();
2727
2728	ifp = d->bd_bif->bif_ifp;
2729again:
2730	n1 = 0;
2731	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2732		if (bp->bif_ifp == ifp)
2733			n1++;
2734	}
2735	if (bfl->bfl_list == NULL) {
2736		bfl->bfl_len = n1;
2737		return (0);
2738	}
2739	if (n1 > bfl->bfl_len)
2740		return (ENOMEM);
2741	BPF_UNLOCK();
2742	lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
2743	n = 0;
2744	BPF_LOCK();
2745	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2746		if (bp->bif_ifp != ifp)
2747			continue;
2748		if (n >= n1) {
2749			free(lst, M_TEMP);
2750			goto again;
2751		}
2752		lst[n] = bp->bif_dlt;
2753		n++;
2754	}
2755	BPF_UNLOCK();
2756	error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
2757	free(lst, M_TEMP);
2758	BPF_LOCK();
2759	bfl->bfl_len = n;
2760	return (error);
2761}
2762
2763/*
2764 * Set the data link type of a BPF instance.
2765 */
2766static int
2767bpf_setdlt(struct bpf_d *d, u_int dlt)
2768{
2769	int error, opromisc;
2770	struct ifnet *ifp;
2771	struct bpf_if *bp;
2772
2773	BPF_LOCK_ASSERT();
2774
2775	if (d->bd_bif->bif_dlt == dlt)
2776		return (0);
2777	ifp = d->bd_bif->bif_ifp;
2778
2779	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2780		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2781			break;
2782	}
2783
2784	if (bp != NULL) {
2785		opromisc = d->bd_promisc;
2786		bpf_attachd(d, bp);
2787		BPFD_LOCK(d);
2788		reset_d(d);
2789		BPFD_UNLOCK(d);
2790		if (opromisc) {
2791			error = ifpromisc(bp->bif_ifp, 1);
2792			if (error)
2793				if_printf(bp->bif_ifp,
2794					"bpf_setdlt: ifpromisc failed (%d)\n",
2795					error);
2796			else
2797				d->bd_promisc = 1;
2798		}
2799	}
2800	return (bp == NULL ? EINVAL : 0);
2801}
2802
2803static void
2804bpf_drvinit(void *unused)
2805{
2806	struct cdev *dev;
2807
2808	sx_init(&bpf_sx, "bpf global lock");
2809	LIST_INIT(&bpf_iflist);
2810	LIST_INIT(&bpf_freelist);
2811
2812	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2813	/* For compatibility */
2814	make_dev_alias(dev, "bpf0");
2815
2816	/* Register interface departure handler */
2817	bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
2818		    ifnet_departure_event, bpf_ifdetach, NULL,
2819		    EVENTHANDLER_PRI_ANY);
2820}
2821
2822/*
2823 * Zero out the various packet counters associated with all of the bpf
2824 * descriptors.  At some point, we will probably want to get a bit more
2825 * granular and allow the user to specify descriptors to be zeroed.
2826 */
2827static void
2828bpf_zero_counters(void)
2829{
2830	struct bpf_if *bp;
2831	struct bpf_d *bd;
2832
2833	BPF_LOCK();
2834	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2835		BPFIF_RLOCK(bp);
2836		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2837			BPFD_LOCK(bd);
2838			bd->bd_rcount = 0;
2839			bd->bd_dcount = 0;
2840			bd->bd_fcount = 0;
2841			bd->bd_wcount = 0;
2842			bd->bd_wfcount = 0;
2843			bd->bd_zcopy = 0;
2844			BPFD_UNLOCK(bd);
2845		}
2846		BPFIF_RUNLOCK(bp);
2847	}
2848	BPF_UNLOCK();
2849}
2850
2851/*
2852 * Fill filter statistics
2853 */
2854static void
2855bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2856{
2857
2858	bzero(d, sizeof(*d));
2859	BPFD_LOCK_ASSERT(bd);
2860	d->bd_structsize = sizeof(*d);
2861	/* XXX: reading should be protected by global lock */
2862	d->bd_immediate = bd->bd_immediate;
2863	d->bd_promisc = bd->bd_promisc;
2864	d->bd_hdrcmplt = bd->bd_hdrcmplt;
2865	d->bd_direction = bd->bd_direction;
2866	d->bd_feedback = bd->bd_feedback;
2867	d->bd_async = bd->bd_async;
2868	d->bd_rcount = bd->bd_rcount;
2869	d->bd_dcount = bd->bd_dcount;
2870	d->bd_fcount = bd->bd_fcount;
2871	d->bd_sig = bd->bd_sig;
2872	d->bd_slen = bd->bd_slen;
2873	d->bd_hlen = bd->bd_hlen;
2874	d->bd_bufsize = bd->bd_bufsize;
2875	d->bd_pid = bd->bd_pid;
2876	strlcpy(d->bd_ifname,
2877	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
2878	d->bd_locked = bd->bd_locked;
2879	d->bd_wcount = bd->bd_wcount;
2880	d->bd_wdcount = bd->bd_wdcount;
2881	d->bd_wfcount = bd->bd_wfcount;
2882	d->bd_zcopy = bd->bd_zcopy;
2883	d->bd_bufmode = bd->bd_bufmode;
2884}
2885
2886/*
2887 * Handle `netstat -B' stats request
2888 */
2889static int
2890bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2891{
2892	static const struct xbpf_d zerostats;
2893	struct xbpf_d *xbdbuf, *xbd, tempstats;
2894	int index, error;
2895	struct bpf_if *bp;
2896	struct bpf_d *bd;
2897
2898	/*
2899	 * XXX This is not technically correct. It is possible for non
2900	 * privileged users to open bpf devices. It would make sense
2901	 * if the users who opened the devices were able to retrieve
2902	 * the statistics for them, too.
2903	 */
2904	error = priv_check(req->td, PRIV_NET_BPF);
2905	if (error)
2906		return (error);
2907	/*
2908	 * Check to see if the user is requesting that the counters be
2909	 * zeroed out.  Explicitly check that the supplied data is zeroed,
2910	 * as we aren't allowing the user to set the counters currently.
2911	 */
2912	if (req->newptr != NULL) {
2913		if (req->newlen != sizeof(tempstats))
2914			return (EINVAL);
2915		memset(&tempstats, 0, sizeof(tempstats));
2916		error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
2917		if (error)
2918			return (error);
2919		if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
2920			return (EINVAL);
2921		bpf_zero_counters();
2922		return (0);
2923	}
2924	if (req->oldptr == NULL)
2925		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2926	if (bpf_bpfd_cnt == 0)
2927		return (SYSCTL_OUT(req, 0, 0));
2928	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2929	BPF_LOCK();
2930	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2931		BPF_UNLOCK();
2932		free(xbdbuf, M_BPF);
2933		return (ENOMEM);
2934	}
2935	index = 0;
2936	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2937		BPFIF_RLOCK(bp);
2938		/* Send writers-only first */
2939		LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
2940			xbd = &xbdbuf[index++];
2941			BPFD_LOCK(bd);
2942			bpfstats_fill_xbpf(xbd, bd);
2943			BPFD_UNLOCK(bd);
2944		}
2945		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2946			xbd = &xbdbuf[index++];
2947			BPFD_LOCK(bd);
2948			bpfstats_fill_xbpf(xbd, bd);
2949			BPFD_UNLOCK(bd);
2950		}
2951		BPFIF_RUNLOCK(bp);
2952	}
2953	BPF_UNLOCK();
2954	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2955	free(xbdbuf, M_BPF);
2956	return (error);
2957}
2958
2959SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2960
2961#else /* !DEV_BPF && !NETGRAPH_BPF */
2962
2963/*
2964 * NOP stubs to allow bpf-using drivers to load and function.
2965 *
2966 * A 'better' implementation would allow the core bpf functionality
2967 * to be loaded at runtime.
2968 */
2969
2970void
2971bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2972{
2973}
2974
2975void
2976bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2977{
2978}
2979
2980void
2981bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2982{
2983}
2984
2985void
2986bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2987{
2988
2989	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2990}
2991
2992void
2993bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2994{
2995
2996	*driverp = (struct bpf_if *)&dead_bpf_if;
2997}
2998
2999void
3000bpfdetach(struct ifnet *ifp)
3001{
3002}
3003
3004u_int
3005bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
3006{
3007	return -1;	/* "no filter" behaviour */
3008}
3009
3010int
3011bpf_validate(const struct bpf_insn *f, int len)
3012{
3013	return 0;		/* false */
3014}
3015
3016#endif /* !DEV_BPF && !NETGRAPH_BPF */
3017
3018#ifdef DDB
3019static void
3020bpf_show_bpf_if(struct bpf_if *bpf_if)
3021{
3022
3023	if (bpf_if == NULL)
3024		return;
3025	db_printf("%p:\n", bpf_if);
3026#define	BPF_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, bpf_if->e);
3027	/* bif_ext.bif_next */
3028	/* bif_ext.bif_dlist */
3029	BPF_DB_PRINTF("%#x", bif_dlt);
3030	BPF_DB_PRINTF("%u", bif_hdrlen);
3031	BPF_DB_PRINTF("%p", bif_ifp);
3032	/* bif_lock */
3033	/* bif_wlist */
3034	BPF_DB_PRINTF("%#x", bif_flags);
3035}
3036
3037DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
3038{
3039
3040	if (!have_addr) {
3041		db_printf("usage: show bpf_if <struct bpf_if *>\n");
3042		return;
3043	}
3044
3045	bpf_show_bpf_if((struct bpf_if *)addr);
3046}
3047#endif
3048