bpf.c revision 256281
1139823Simp/*-
211150Swollman * Copyright (c) 1990, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * This code is derived from the Stanford/CMU enet packet filter,
61541Srgrimes * (net/enet.c) distributed as part of 4.3BSD, and code contributed
71541Srgrimes * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
81541Srgrimes * Berkeley Laboratory.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 4. Neither the name of the University nor the names of its contributors
191541Srgrimes *    may be used to endorse or promote products derived from this software
201541Srgrimes *    without specific prior written permission.
211541Srgrimes *
221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
251541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2911150Swollman * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32172467Ssilby * SUCH DAMAGE.
33172467Ssilby *
34172467Ssilby *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
3532752Seivind */
36125680Sbms
3754263Sshin#include <sys/cdefs.h>
3856041Sshin__FBSDID("$FreeBSD: stable/10/sys/net/bpf.c 250945 2013-05-23 21:33:10Z ghelmer $");
3929514Sjoerg
4029514Sjoerg#include "opt_bpf.h"
411541Srgrimes#include "opt_compat.h"
421541Srgrimes#include "opt_netgraph.h"
4350673Sjlemon
44216758Slstewart#include <sys/types.h>
4512172Sphk#include <sys/param.h>
46216758Slstewart#include <sys/lock.h>
4712172Sphk#include <sys/rwlock.h>
48194368Sbz#include <sys/systm.h>
491541Srgrimes#include <sys/conf.h>
501541Srgrimes#include <sys/fcntl.h>
5155679Sshin#include <sys/jail.h>
5255679Sshin#include <sys/malloc.h>
5355679Sshin#include <sys/mbuf.h>
54164033Srwatson#include <sys/time.h>
5548758Sgreen#include <sys/priv.h>
561541Srgrimes#include <sys/proc.h>
571541Srgrimes#include <sys/signalvar.h>
581541Srgrimes#include <sys/filio.h>
5975619Skris#include <sys/sockio.h>
6034923Sbde#include <sys/ttycom.h>
6192760Sjeff#include <sys/uio.h>
621541Srgrimes
631541Srgrimes#include <sys/event.h>
641541Srgrimes#include <sys/file.h>
65196019Srwatson#include <sys/poll.h>
661541Srgrimes#include <sys/proc.h>
67215166Slstewart
681541Srgrimes#include <sys/socket.h>
69221250Sbz
701541Srgrimes#include <net/if.h>
71221250Sbz#define	BPF_INTERNAL
721541Srgrimes#include <net/bpf.h>
73221250Sbz#include <net/bpf_buffer.h>
74221250Sbz#ifdef BPF_JITTER
7555679Sshin#include <net/bpf_jitter.h>
7655679Sshin#endif
7755679Sshin#include <net/bpf_zerocopy.h>
7855679Sshin#include <net/bpfdesc.h>
79148385Sume#include <net/vnet.h>
80122922Sandre
8155679Sshin#include <netinet/in.h>
82221250Sbz#include <netinet/if_ether.h>
831541Srgrimes#include <sys/kernel.h>
841541Srgrimes#include <sys/sysctl.h>
851541Srgrimes
861541Srgrimes#include <net80211/ieee80211_freebsd.h>
87171605Ssilby
8855679Sshin#include <security/mac/mac_framework.h>
8955679Sshin
9055679SshinMALLOC_DEFINE(M_BPF, "BPF", "BPF data");
911541Srgrimes
926283Swollman#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
936283Swollman
946283Swollman#define PRINET  26			/* interruptible */
95221250Sbz
9655679Sshin#define	SIZEOF_BPF_HDR(type)	\
97221250Sbz    (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
98252555Snp
99252555Snp#ifdef COMPAT_FREEBSD32
100252555Snp#include <sys/mount.h>
1011541Srgrimes#include <compat/freebsd32/freebsd32.h>
102171167Sgnn#define BPF_ALIGNMENT32 sizeof(int32_t)
103105199Ssam#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1))
104125680Sbms
105105199Ssam#ifndef BURN_BRIDGES
106105199Ssam/*
107105199Ssam * 32-bit version of structure prepended to each packet.  We use this header
108125680Sbms * instead of the standard one for 32-bit streams.  We mark the a stream as
109183001Sbz * 32-bit the first time we see a 32-bit compat ioctl request.
110171167Sgnn */
111105199Ssamstruct bpf_hdr32 {
11258698Sjlemon	struct timeval32 bh_tstamp;	/* time stamp */
11382122Ssilby	uint32_t	bh_caplen;	/* length of captured portion */
11458698Sjlemon	uint32_t	bh_datalen;	/* original length of packet */
115163606Srwatson	uint16_t	bh_hdrlen;	/* length of bpf header (this struct
116163606Srwatson					   plus alignment padding) */
117207369Sbz};
11852904Sshin#endif
119207369Sbz
12052904Sshinstruct bpf_program32 {
12152904Sshin	u_int bf_len;
122182846Sbz	uint32_t bf_insns;
123182846Sbz};
124182846Sbz
125182846Sbzstruct bpf_dltlist32 {
126182846Sbz	u_int	bfl_len;
127182848Sbz	u_int	bfl_list;
128182846Sbz};
129182846Sbz
130182846Sbz#define	BIOCSETF32	_IOW('B', 103, struct bpf_program32)
131182846Sbz#define	BIOCSRTIMEOUT32	_IOW('B', 109, struct timeval32)
132182846Sbz#define	BIOCGRTIMEOUT32	_IOR('B', 110, struct timeval32)
133182848Sbz#define	BIOCGDLTLIST32	_IOWR('B', 121, struct bpf_dltlist32)
134182846Sbz#define	BIOCSETWF32	_IOW('B', 123, struct bpf_program32)
135182846Sbz#define	BIOCSETFNR32	_IOW('B', 130, struct bpf_program32)
136182846Sbz#endif
137182846Sbz
138195699Srwatson/*
139195699Srwatson * bpf_iflist is a list of BPF interface structures, each corresponding to a
140185348Szec * specific DLT.  The same network interface might have several BPF interface
141185348Szec * structures registered by different layers in the stack (i.e., 802.11
142182846Sbz * frames, ethernet frames, etc).
143182846Sbz */
144182846Sbzstatic LIST_HEAD(, bpf_if)	bpf_iflist, bpf_freelist;
145182846Sbzstatic struct mtx	bpf_mtx;		/* bpf global lock */
146182846Sbzstatic int		bpf_bpfd_cnt;
147182846Sbz
148182846Sbzstatic void	bpf_attachd(struct bpf_d *, struct bpf_if *);
149182848Sbzstatic void	bpf_detachd(struct bpf_d *);
150182846Sbzstatic void	bpf_detachd_locked(struct bpf_d *);
151182846Sbzstatic void	bpf_freed(struct bpf_d *);
152182846Sbzstatic int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
153182846Sbz		    struct sockaddr *, int *, struct bpf_insn *);
154182846Sbzstatic int	bpf_setif(struct bpf_d *, struct ifreq *);
155182848Sbzstatic void	bpf_timed_out(void *);
156182846Sbzstatic __inline void
157182846Sbz		bpf_wakeup(struct bpf_d *);
158182846Sbzstatic void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
159182846Sbz		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
160195699Srwatson		    struct bintime *);
161195699Srwatsonstatic void	reset_d(struct bpf_d *);
162185348Szecstatic int	bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
163185348Szecstatic int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
164221250Sbzstatic int	bpf_setdlt(struct bpf_d *, u_int);
165182846Sbzstatic void	filt_bpfdetach(struct knote *);
166124258Sandrestatic int	filt_bpfread(struct knote *, long);
167124258Sandrestatic void	bpf_drvinit(void *);
168124258Sandrestatic int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
169124258Sandre
170124258SandreSYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
171124258Sandreint bpf_maxinsns = BPF_MAXINSNS;
172124258SandreSYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
173124258Sandre    &bpf_maxinsns, 0, "Maximum bpf program instructions");
174207369Sbzstatic int bpf_zerocopy_enable = 0;
175195699SrwatsonSYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
176195699Srwatson    &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
177252285Sjimharrisstatic SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
178124258Sandre    bpf_stats_sysctl, "bpf statistics portal");
179207369Sbz
180195699Srwatsonstatic VNET_DEFINE(int, bpf_optimize_writers) = 0;
181195699Srwatson#define	V_bpf_optimize_writers VNET(bpf_optimize_writers)
182183550SzecSYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers,
18312296Sphk    CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0,
184172454Srwatson    "Do not send packets until BPF program is set");
185171640Sandre
186171640Sandrestatic	d_open_t	bpfopen;
187171640Sandrestatic	d_read_t	bpfread;
18850426Sjlemonstatic	d_write_t	bpfwrite;
189121307Ssilbystatic	d_ioctl_t	bpfioctl;
190167721Sandrestatic	d_poll_t	bpfpoll;
19150426Sjlemonstatic	d_kqfilter_t	bpfkqfilter;
19255198Smsmith
193183550Szecstatic struct cdevsw bpf_cdevsw = {
194167721Sandre	.d_version =	D_VERSION,
19555198Smsmith	.d_open =	bpfopen,
196217322Smdf	.d_read =	bpfread,
197195699Srwatson	.d_write =	bpfwrite,
19836079Swollman	.d_ioctl =	bpfioctl,
199215701Sdim	.d_poll =	bpfpoll,
200207369Sbz	.d_name =	"bpf",
201195699Srwatson	.d_kqfilter =	bpfkqfilter,
202195699Srwatson};
20372959Sjlemon
20470103Sphkstatic struct filterops bpfread_filtops = {
205215701Sdim	.f_isfd = 1,
206207369Sbz	.f_detach = filt_bpfdetach,
207195699Srwatson	.f_event = filt_bpfread,
208195699Srwatson};
209183550Szec
21082122Ssilbyeventhandler_tag	bpf_ifdetach_cookie = NULL;
211197236Sandre
212197236Sandre/*
213197236Sandre * LOCKING MODEL USED BY BPF:
214197236Sandre * Locks:
215221023Sattilio * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
216221023Sattilio * some global counters and every bpf_if reference.
217221023Sattilio * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
218221023Sattilio * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
219221023Sattilio *   used by bpf_mtap code.
220221023Sattilio *
221195699Srwatson * Lock order:
222195727Srwatson *
223130989Sps * Global lock, interface lock, descriptor lock
224216758Slstewart *
225216758Slstewart * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
22698211Shsu * working model. In many places (like bpf_detachd) we start with BPF descriptor
227235051Sglebius * (and we need to at least rlock it to get reliable interface pointer). This
228211462Sandre * gives us potential LOR. As a result, we use global lock to protect from bpf_if
229211462Sandre * change in every such place.
23012296Sphk *
2317684Sdg * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
23232821Sdg * 3) descriptor main wlock.
23343562Smsmith * Reading bd_bif can be protected by any of these locks, typically global lock.
23443562Smsmith *
23543562Smsmith * Changing read/write BPF filter is protected by the same three locks,
2367684Sdg * the same applies for reading.
2377684Sdg *
23832821Sdg * Sleeping in global lock is not allowed due to bpfdetach() using it.
2397684Sdg */
2401541Srgrimes
2411541Srgrimes/*
242111145Sjlemon * Wrapper functions for various buffering methods.  If the set of buffer
243111145Sjlemon * modes expands, we will probably want to introduce a switch data structure
244123608Sjhb * similar to protosw, et.
245111145Sjlemon */
24634881Swollmanstatic void
247168615Sandrebpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
248168615Sandre    u_int len)
249172309Ssilby{
250216749Slstewart
251216758Slstewart	BPFD_LOCK_ASSERT(d);
25234881Swollman
25334881Swollman	switch (d->bd_bufmode) {
254215701Sdim	case BPF_BUFMODE_BUFFER:
255195727Srwatson		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
256195699Srwatson
257169683Sandre	case BPF_BUFMODE_ZBUF:
258157967Srwatson		d->bd_zcopy++;
259111145Sjlemon		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
260157977Srwatson
261157977Srwatson	default:
262157977Srwatson		panic("bpf_buf_append_bytes");
263157977Srwatson	}
26434881Swollman}
265157431Srwatson
2661541Srgrimesstatic void
267157927Spsbpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
268157927Sps    u_int len)
269157927Sps{
270157927Sps
271181803Sbz	BPFD_LOCK_ASSERT(d);
272190787Szec
273169541Sandre	switch (d->bd_bufmode) {
274157927Sps	case BPF_BUFMODE_BUFFER:
275157927Sps		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
276160491Sups
277160491Sups	case BPF_BUFMODE_ZBUF:
278160491Sups		d->bd_zcopy++;
279165657Sjhb		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
280165657Sjhb
281160491Sups	default:
282160491Sups		panic("bpf_buf_append_mbuf");
283160491Sups	}
284160491Sups}
2851541Srgrimes
286157431Srwatson/*
2871541Srgrimes * This function gets called when the free buffer is re-assigned.
288185088Szec */
289169454Srwatsonstatic void
290216758Slstewartbpf_buf_reclaimed(struct bpf_d *d)
291216758Slstewart{
292216758Slstewart
293216758Slstewart	BPFD_LOCK_ASSERT(d);
294216758Slstewart
295216758Slstewart	switch (d->bd_bufmode) {
296216758Slstewart	case BPF_BUFMODE_BUFFER:
297185088Szec		return;
29877900Speter
29943576Smsmith	case BPF_BUFMODE_ZBUF:
30043562Smsmith		bpf_zerocopy_buf_reclaimed(d);
30143562Smsmith		return;
30243562Smsmith
303205157Srwatson	default:
304222748Srwatson		panic("bpf_buf_reclaimed");
305222748Srwatson	}
306205157Srwatson}
307190787Szec
308190787Szec/*
309190787Szec * If the buffer mechanism has a way to decide that a held buffer can be made
310190787Szec * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
311190787Szec * returned if the buffer can be discarded, (0) is returned if it cannot.
312190787Szec */
313207369Sbzstatic int
314190787Szecbpf_canfreebuf(struct bpf_d *d)
315190787Szec{
316190787Szec
317207369Sbz	BPFD_LOCK_ASSERT(d);
318207369Sbz
319190787Szec	switch (d->bd_bufmode) {
320190787Szec	case BPF_BUFMODE_ZBUF:
321190787Szec		return (bpf_zerocopy_canfreebuf(d));
322190787Szec	}
323190787Szec	return (0);
324190787Szec}
325190787Szec
326285980Sdelphij/*
327285980Sdelphij * Allow the buffer model to indicate that the current store buffer is
328190787Szec * immutable, regardless of the appearance of space.  Return (1) if the
329190787Szec * buffer is writable, and (0) if not.
330190787Szec */
331190787Szecstatic int
332190787Szecbpf_canwritebuf(struct bpf_d *d)
333190787Szec{
334190787Szec	BPFD_LOCK_ASSERT(d);
335190787Szec
336190787Szec	switch (d->bd_bufmode) {
337190787Szec	case BPF_BUFMODE_ZBUF:
338190787Szec		return (bpf_zerocopy_canwritebuf(d));
339190787Szec	}
340190787Szec	return (1);
341190787Szec}
342197236Sandre
343197236Sandre/*
344224010Sbz * Notify buffer model that an attempt to write to the store buffer has
345197236Sandre * resulted in a dropped packet, in which case the buffer may be considered
346224010Sbz * full.
347223839Sandre */
348197236Sandrestatic void
349223839Sandrebpf_buffull(struct bpf_d *d)
350197236Sandre{
351197236Sandre
35255679Sshin	BPFD_LOCK_ASSERT(d);
35355679Sshin
35455679Sshin	switch (d->bd_bufmode) {
35555679Sshin	case BPF_BUFMODE_ZBUF:
35655679Sshin		bpf_zerocopy_buffull(d);
35755679Sshin		break;
35855679Sshin	}
35955679Sshin}
3601541Srgrimes
36155679Sshin/*
362190787Szec * Notify the buffer model that a buffer has moved into the hold position.
363157977Srwatson */
364128452Ssilbyvoid
365128452Ssilbybpf_bufheld(struct bpf_d *d)
366157927Sps{
367157927Sps
3681541Srgrimes	BPFD_LOCK_ASSERT(d);
3691541Srgrimes
370193731Szec	switch (d->bd_bufmode) {
371128452Ssilby	case BPF_BUFMODE_ZBUF:
372193731Szec		bpf_zerocopy_bufheld(d);
373193731Szec		break;
374193731Szec	}
375193731Szec}
376193731Szec
377204838Sbzstatic void
378205157Srwatsonbpf_free(struct bpf_d *d)
379204838Sbz{
380204838Sbz
381193731Szec	switch (d->bd_bufmode) {
382193731Szec	case BPF_BUFMODE_BUFFER:
383193731Szec		return (bpf_buffer_free(d));
384193731Szec
385157431Srwatson	case BPF_BUFMODE_ZBUF:
386128452Ssilby		return (bpf_zerocopy_free(d));
387157431Srwatson
388128452Ssilby	default:
389128452Ssilby		panic("bpf_buf_free");
3901541Srgrimes	}
39178642Ssilby}
39278642Ssilby
39378642Ssilbystatic int
3941541Srgrimesbpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
39578642Ssilby{
396157431Srwatson
3971541Srgrimes	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
398111144Sjlemon		return (EOPNOTSUPP);
3991541Srgrimes	return (bpf_buffer_uiomove(d, buf, len, uio));
400178285Srwatson}
401138410Srwatson
40255679Sshinstatic int
40355679Sshinbpf_ioctl_sblen(struct bpf_d *d, u_int *i)
40478642Ssilby{
40555679Sshin
40678642Ssilby	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
40755679Sshin		return (EOPNOTSUPP);
408186141Sbz	return (bpf_buffer_ioctl_sblen(d, i));
40955679Sshin}
41055679Sshin
41155679Sshinstatic int
412182855Sbzbpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
41355679Sshin{
41455679Sshin
415221250Sbz	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
416221250Sbz		return (EOPNOTSUPP);
417221250Sbz	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
418221250Sbz}
41955679Sshin
420221250Sbzstatic int
42178642Ssilbybpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
422111144Sjlemon{
42355679Sshin
424111144Sjlemon	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
425111144Sjlemon		return (EOPNOTSUPP);
426111144Sjlemon	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
427111144Sjlemon}
428111144Sjlemon
429111144Sjlemonstatic int
430111144Sjlemonbpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
431111144Sjlemon{
432111144Sjlemon
433111144Sjlemon	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
434111144Sjlemon		return (EOPNOTSUPP);
435111144Sjlemon	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
43678642Ssilby}
437221250Sbz
438111144Sjlemon/*
439111144Sjlemon * General BPF functions.
440111144Sjlemon */
441111144Sjlemonstatic int
442111144Sjlemonbpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
443111144Sjlemon    struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
444111144Sjlemon{
445111144Sjlemon	const struct ieee80211_bpf_params *p;
446111144Sjlemon	struct ether_header *eh;
447111144Sjlemon	struct mbuf *m;
44878642Ssilby	int error;
44978642Ssilby	int len;
45078642Ssilby	int hlen;
45178642Ssilby	int slen;
45278642Ssilby
45378642Ssilby	/*
45478642Ssilby	 * Build a sockaddr based on the data link layer type.
45578642Ssilby	 * We do this at this level because the ethernet header
456157431Srwatson	 * is copied directly into the data field of the sockaddr.
45778642Ssilby	 * In the case of SLIP, there is no header and the packet
458179487Srwatson	 * is forwarded as is.
45978642Ssilby	 * Also, we are careful to leave room at the front of the mbuf
460179487Srwatson	 * for the link level header.
461179487Srwatson	 */
462179487Srwatson	switch (linktype) {
463179487Srwatson
464179487Srwatson	case DLT_SLIP:
4651541Srgrimes		sockp->sa_family = AF_INET;
4661541Srgrimes		hlen = 0;
4671541Srgrimes		break;
4681541Srgrimes
469127870Srwatson	case DLT_EN10MB:
4701541Srgrimes		sockp->sa_family = AF_UNSPEC;
4711541Srgrimes		/* XXX Would MAXLINKHDR be better? */
47278642Ssilby		hlen = ETHER_HDR_LEN;
47378642Ssilby		break;
47478642Ssilby
4751541Srgrimes	case DLT_FDDI:
4761541Srgrimes		sockp->sa_family = AF_IMPLINK;
4771541Srgrimes		hlen = 0;
47831848Sjulian		break;
47931848Sjulian
4801541Srgrimes	case DLT_RAW:
4811541Srgrimes		sockp->sa_family = AF_UNSPEC;
482169454Srwatson		hlen = 0;
483169454Srwatson		break;
4841541Srgrimes
485167785Sandre	case DLT_NULL:
4861541Srgrimes		/*
48755679Sshin		 * null interface types require a 4 byte pseudo header which
48855679Sshin		 * corresponds to the address family of the packet.
48955679Sshin		 */
49055679Sshin		sockp->sa_family = AF_UNSPEC;
49155679Sshin		hlen = 4;
49255679Sshin		break;
49355679Sshin
494128905Srwatson	case DLT_ATM_RFC1483:
4951541Srgrimes		/*
496101137Srwatson		 * en atm driver requires 4-byte atm pseudo header.
497101137Srwatson		 * though it isn't standard, vpi:vci needs to be
49855679Sshin		 * specified anyway.
499221250Sbz		 */
50055679Sshin		sockp->sa_family = AF_UNSPEC;
50155679Sshin		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
50255679Sshin		break;
50355679Sshin
504127871Srwatson	case DLT_PPP:
505122327Ssam		sockp->sa_family = AF_UNSPEC;
506122327Ssam		hlen = 4;	/* This should match PPP_HDRLEN */
507178285Srwatson		break;
508128905Srwatson
509128905Srwatson	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
510128905Srwatson		sockp->sa_family = AF_IEEE80211;
511128905Srwatson		hlen = 0;
51257576Sps		break;
513122327Ssam
51457576Sps	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
51557576Sps		sockp->sa_family = AF_IEEE80211;
51657576Sps		sockp->sa_len = 12;	/* XXX != 0 */
5171541Srgrimes		hlen = sizeof(struct ieee80211_bpf_params);
518127870Srwatson		break;
519151967Sandre
5201541Srgrimes	default:
5211541Srgrimes		return (EIO);
5221541Srgrimes	}
5231541Srgrimes
52455679Sshin	len = uio->uio_resid;
52555679Sshin	if (len < hlen || len - hlen > ifp->if_mtu)
526133874Srwatson		return (EMSGSIZE);
52755679Sshin
52855679Sshin	m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
52955679Sshin	if (m == NULL)
53055679Sshin		return (EIO);
53155679Sshin	m->m_pkthdr.len = m->m_len = len;
53255679Sshin	*mp = m;
53355679Sshin
53455679Sshin	error = uiomove(mtod(m, u_char *), len, uio);
53555679Sshin	if (error)
53655679Sshin		goto bad;
53755679Sshin
5381541Srgrimes	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
5391541Srgrimes	if (slen == 0) {
540178888Sjulian		error = EPERM;
541178888Sjulian		goto bad;
542178888Sjulian	}
543178888Sjulian
5441541Srgrimes	/* Check for multicast destination */
545127870Srwatson	switch (linktype) {
54655679Sshin	case DLT_EN10MB:
547223765Seri		eh = mtod(m, struct ether_header *);
54855679Sshin		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5491541Srgrimes			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
5501541Srgrimes			    ETHER_ADDR_LEN) == 0)
55155679Sshin				m->m_flags |= M_BCAST;
55255679Sshin			else
55355679Sshin				m->m_flags |= M_MCAST;
55455679Sshin		}
55555679Sshin		break;
55655679Sshin	}
55755679Sshin
558188578Sluigi	/*
55955679Sshin	 * Make room for link header, and copy it to sockaddr
56055679Sshin	 */
56155679Sshin	if (hlen != 0) {
56255679Sshin		if (sockp->sa_family == AF_IEEE80211) {
56355679Sshin			/*
56455679Sshin			 * Collect true length from the parameter header
56555679Sshin			 * NB: sockp is known to be zero'd so if we do a
56655679Sshin			 *     short copy unspecified parameters will be
56755679Sshin			 *     zero.
56855679Sshin			 * NB: packet may not be aligned after stripping
56955679Sshin			 *     bpf params
570188578Sluigi			 * XXX check ibp_vers
5711541Srgrimes			 */
5721541Srgrimes			p = mtod(m, const struct ieee80211_bpf_params *);
57355679Sshin			hlen = p->ibp_len;
57455679Sshin			if (hlen > sizeof(sockp->sa_data)) {
57590198Sume				error = EINVAL;
57690198Sume				goto bad;
57790198Sume			}
578238237Sbz		}
57955679Sshin		bcopy(m->m_data, sockp->sa_data, hlen);
580221250Sbz	}
58155679Sshin	*hdrlen = hlen;
582221250Sbz
583221250Sbz	return (0);
584221250Sbzbad:
585221250Sbz	m_freem(m);
586133874Srwatson	return (error);
587133874Srwatson}
588133874Srwatson
589181803Sbz/*
590181803Sbz * Attach file to the bpf interface, i.e. make d listen on bp.
591133874Srwatson */
592133874Srwatsonstatic void
593221250Sbzbpf_attachd(struct bpf_d *d, struct bpf_if *bp)
5941541Srgrimes{
5951541Srgrimes	int op_w;
596127870Srwatson
597101106Srwatson	BPF_LOCK_ASSERT();
598122327Ssam
599101106Srwatson	/*
600101106Srwatson	 * Save sysctl value to protect from sysctl change
601101106Srwatson	 * between reads
602101106Srwatson	 */
603178285Srwatson	op_w = V_bpf_optimize_writers;
604172930Srwatson
605101106Srwatson	if (d->bd_bif != NULL)
606101106Srwatson		bpf_detachd_locked(d);
607119245Srwatson	/*
608119245Srwatson	 * Point d at bp, and add d to the interface's list.
609101106Srwatson	 * Since there are many applicaiotns using BPF for
610172930Srwatson	 * sending raw packets only (dhcpd, cdpd are good examples)
611101106Srwatson	 * we can delay adding d to the list of active listeners until
612101106Srwatson	 * some filter is configured.
61355679Sshin	 */
61455679Sshin
61555679Sshin	BPFIF_WLOCK(bp);
61655679Sshin	BPFD_LOCK(d);
61755679Sshin
618127870Srwatson	d->bd_bif = bp;
61955679Sshin
6201541Srgrimes	if (op_w != 0) {
62155679Sshin		/* Add to writers-only list */
62255679Sshin		LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
623238237Sbz		/*
624238237Sbz		 * We decrement bd_writer on every filter set operation.
62555679Sshin		 * First BIOCSETF is done by pcap_open_live() to set up
62655679Sshin		 * snap length. After that appliation usually sets its own filter
627238247Sbz		 */
628238237Sbz		d->bd_writer = 2;
629238237Sbz	} else
630127870Srwatson		LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
631127870Srwatson
632221250Sbz	BPFD_UNLOCK(d);
63355679Sshin	BPFIF_WUNLOCK(bp);
634221250Sbz
635221250Sbz	bpf_bpfd_cnt++;
636221250Sbz
637221250Sbz	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
638133874Srwatson	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
639238247Sbz
640133874Srwatson	if (op_w == 0)
641133874Srwatson		EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
642133874Srwatson}
643221250Sbz
6446283Swollman/*
645122327Ssam * Add d to the list of active bp filters.
64655679Sshin * Reuqires bpf_attachd() to be called before
6476283Swollman */
64855679Sshinstatic void
649122922Sandrebpf_upgraded(struct bpf_d *d)
650122922Sandre{
651221250Sbz	struct bpf_if *bp;
652221250Sbz
653122922Sandre	BPF_LOCK_ASSERT();
654221250Sbz
655221250Sbz	bp = d->bd_bif;
656221250Sbz
657221250Sbz	/*
6581541Srgrimes	 * Filter can be set several times without specifying interface.
6591541Srgrimes	 * Mark d as reader and exit.
6601541Srgrimes	 */
6611541Srgrimes	if (bp == NULL) {
6621541Srgrimes		BPFD_LOCK(d);
66334881Swollman		d->bd_writer = 0;
66434881Swollman		BPFD_UNLOCK(d);
6651541Srgrimes		return;
6661541Srgrimes	}
667157431Srwatson
6681541Srgrimes	BPFIF_WLOCK(bp);
669111145Sjlemon	BPFD_LOCK(d);
670111145Sjlemon
67155679Sshin	/* Remove from writers-only list */
67255679Sshin	LIST_REMOVE(d, bd_next);
67355679Sshin	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
6741541Srgrimes	/* Mark d as reader */
675190787Szec	d->bd_writer = 0;
676111145Sjlemon
677111145Sjlemon	BPFD_UNLOCK(d);
678111145Sjlemon	BPFIF_WUNLOCK(bp);
679215166Slstewart
680215166Slstewart	CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
681215166Slstewart
682215166Slstewart	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
683215166Slstewart}
684215166Slstewart
685215166Slstewart/*
686215166Slstewart * Detach a file from its interface.
687215166Slstewart */
688215166Slstewartstatic void
689215166Slstewartbpf_detachd(struct bpf_d *d)
690215166Slstewart{
691215166Slstewart	BPF_LOCK();
692215166Slstewart	bpf_detachd_locked(d);
693215166Slstewart	BPF_UNLOCK();
694215166Slstewart}
695215166Slstewart
696215166Slstewartstatic void
697215166Slstewartbpf_detachd_locked(struct bpf_d *d)
698215166Slstewart{
699216758Slstewart	int error;
700216758Slstewart	struct bpf_if *bp;
701216758Slstewart	struct ifnet *ifp;
702216758Slstewart
703216758Slstewart	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
704216758Slstewart
705191688Szec	BPF_LOCK_ASSERT();
706191688Szec
707191688Szec	/* Check if descriptor is attached */
708172309Ssilby	if ((bp = d->bd_bif) == NULL)
709111145Sjlemon		return;
71055679Sshin
71155679Sshin	BPFIF_WLOCK(bp);
712181803Sbz	BPFD_LOCK(d);
71355679Sshin
714181803Sbz	/* Save bd_writer value */
7151541Srgrimes	error = d->bd_writer;
71650673Sjlemon
717172309Ssilby	/*
718172309Ssilby	 * Remove d from the interface's descriptor list.
719172309Ssilby	 */
720172309Ssilby	LIST_REMOVE(d, bd_next);
721172309Ssilby
72250673Sjlemon	ifp = bp->bif_ifp;
723181803Sbz	d->bd_bif = NULL;
7246283Swollman	BPFD_UNLOCK(d);
725181803Sbz	BPFIF_WUNLOCK(bp);
726169317Sandre
727147735Sps	bpf_bpfd_cnt--;
72834881Swollman
7291541Srgrimes	/* Call event handler iff d is attached */
7301541Srgrimes	if (error == 0)
73116367Swollman		EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
7321541Srgrimes
7331541Srgrimes	/*
7341541Srgrimes	 * Check if this descriptor had requested promiscuous mode.
73516367Swollman	 * If so, turn it off.
736100335Sdillon	 */
73716367Swollman	if (d->bd_promisc) {
7381541Srgrimes		d->bd_promisc = 0;
7391541Srgrimes		CURVNET_SET(ifp->if_vnet);
74050673Sjlemon		error = ifpromisc(ifp, 0);
741133874Srwatson		CURVNET_RESTORE();
74256564Sshin		if (error != 0 && error != ENXIO) {
74356564Sshin			/*
74456564Sshin			 * ENXIO can happen if a pccard is unplugged
74556564Sshin			 * Something is really wrong if we were able to put
746181803Sbz			 * the driver into promiscuous mode, but can't
747157432Srwatson			 * take it out.
74834881Swollman			 */
7491541Srgrimes			if_printf(bp->bif_ifp,
7501541Srgrimes				"bpf_detach: ifpromisc failed (%d)\n", error);
7511541Srgrimes		}
752215392Slstewart	}
753215392Slstewart}
754215392Slstewart
755215392Slstewart/*
756215392Slstewart * Close the descriptor by detaching it from its interface,
757215392Slstewart * deallocating its buffers, and marking it free.
758215392Slstewart */
759215392Slstewartstatic void
760215392Slstewartbpf_dtor(void *data)
761215392Slstewart{
762215392Slstewart	struct bpf_d *d = data;
763215392Slstewart
764215392Slstewart	BPFD_LOCK(d);
765215392Slstewart	if (d->bd_state == BPF_WAITING)
766215392Slstewart		callout_stop(&d->bd_callout);
767215392Slstewart	d->bd_state = BPF_IDLE;
768215392Slstewart	BPFD_UNLOCK(d);
769215392Slstewart	funsetown(&d->bd_sigio);
770215392Slstewart	bpf_detachd(d);
771215392Slstewart#ifdef MAC
772215392Slstewart	mac_bpfdesc_destroy(d);
773215392Slstewart#endif /* MAC */
774215392Slstewart	seldrain(&d->bd_sel);
775215392Slstewart	knlist_destroy(&d->bd_sel.si_note);
776215392Slstewart	callout_drain(&d->bd_callout);
777215392Slstewart	bpf_freed(d);
778215392Slstewart	free(d, M_BPF);
779215392Slstewart}
780215392Slstewart
781215392Slstewart/*
782215392Slstewart * Open ethernet device.  Returns ENXIO for illegal minor device number,
783215392Slstewart * EBUSY if file is open by another process.
784215392Slstewart */
785215392Slstewart/* ARGSUSED */
786215392Slstewartstatic	int
787215392Slstewartbpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
788215392Slstewart{
789215392Slstewart	struct bpf_d *d;
790215392Slstewart	int error, size;
791215392Slstewart
792215392Slstewart	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
793215392Slstewart	error = devfs_set_cdevpriv(d, bpf_dtor);
794215392Slstewart	if (error != 0) {
795215392Slstewart		free(d, M_BPF);
796215392Slstewart		return (error);
797215392Slstewart	}
798215392Slstewart
799215392Slstewart	/*
800215392Slstewart	 * For historical reasons, perform a one-time initialization call to
801215392Slstewart	 * the buffer routines, even though we're not yet committed to a
802215392Slstewart	 * particular buffer method.
803215392Slstewart	 */
804215392Slstewart	bpf_buffer_init(d);
805215392Slstewart	d->bd_hbuf_in_use = 0;
806215392Slstewart	d->bd_bufmode = BPF_BUFMODE_BUFFER;
807215392Slstewart	d->bd_sig = SIGIO;
808215392Slstewart	d->bd_direction = BPF_D_INOUT;
809215392Slstewart	BPF_PID_REFRESH(d, td);
810215392Slstewart#ifdef MAC
811215392Slstewart	mac_bpfdesc_init(d);
812215392Slstewart	mac_bpfdesc_create(td->td_ucred, d);
813215392Slstewart#endif
814215392Slstewart	mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
8151541Srgrimes	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
8161541Srgrimes	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
8171541Srgrimes
8181541Srgrimes	/* Allocate default buffers */
8191541Srgrimes	size = d->bd_bufsize;
820157431Srwatson	bpf_buffer_ioctl_sblen(d, &size);
8211541Srgrimes
8221541Srgrimes	return (0);
8231541Srgrimes}
824181803Sbz
825178285Srwatson/*
826146864Srwatson *  bpfread - read next chunk of packets from buffers
8271541Srgrimes */
8281541Srgrimesstatic	int
829252555Snpbpfread(struct cdev *dev, struct uio *uio, int ioflag)
830190948Srwatson{
8311541Srgrimes	struct bpf_d *d;
832190948Srwatson	int error;
8331541Srgrimes	int non_block;
8341541Srgrimes	int timed_out;
8351541Srgrimes
8361541Srgrimes	error = devfs_get_cdevpriv((void **)&d);
8371541Srgrimes	if (error != 0)
8381541Srgrimes		return (error);
839157376Srwatson
840157431Srwatson	/*
8411541Srgrimes	 * Restrict application to use a buffer the same size as
8421541Srgrimes	 * as kernel buffers.
8431541Srgrimes	 */
84455679Sshin	if (uio->uio_resid != d->bd_bufsize)
84555679Sshin		return (EINVAL);
84655679Sshin
8471541Srgrimes	non_block = ((ioflag & O_NONBLOCK) != 0);
848178285Srwatson
849138410Srwatson	BPFD_LOCK(d);
8501541Srgrimes	BPF_PID_REFRESH_CUR(d);
851204829Srwatson	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
852204829Srwatson		BPFD_UNLOCK(d);
853204829Srwatson		return (EOPNOTSUPP);
854204829Srwatson	}
855204829Srwatson	if (d->bd_state == BPF_WAITING)
856204829Srwatson		callout_stop(&d->bd_callout);
857204829Srwatson	timed_out = (d->bd_state == BPF_TIMED_OUT);
858204829Srwatson	d->bd_state = BPF_IDLE;
859204829Srwatson	while (d->bd_hbuf_in_use) {
860204829Srwatson		error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
861204829Srwatson		    PRINET|PCATCH, "bd_hbuf", 0);
862204829Srwatson		if (error != 0) {
863204829Srwatson			BPFD_UNLOCK(d);
86450673Sjlemon			return (error);
865172309Ssilby		}
866172309Ssilby	}
867172309Ssilby	/*
868172309Ssilby	 * If the hold buffer is empty, then do a timed sleep, which
869172309Ssilby	 * ends when the timeout expires or when enough packets
87050673Sjlemon	 * have arrived to fill the store buffer.
87150673Sjlemon	 */
8729373Swollman	while (d->bd_hbuf == NULL) {
8739373Swollman		if (d->bd_slen != 0) {
874122922Sandre			/*
875122922Sandre			 * A packet(s) either arrived since the previous
876122922Sandre			 * read or arrived while we were asleep.
877122922Sandre			 */
878122922Sandre			if (d->bd_immediate || non_block || timed_out) {
879122922Sandre				/*
8801541Srgrimes				 * Rotate the buffers and return what's here
881122922Sandre				 * if we are in immediate mode, non-blocking
882122922Sandre				 * flag is set, or this descriptor timed out.
883122922Sandre				 */
8841541Srgrimes				ROTATE_BUFFERS(d);
885122922Sandre				break;
8861541Srgrimes			}
887122922Sandre		}
888122922Sandre
889122922Sandre		/*
890122922Sandre		 * No data is available, check to see if the bpf device
891160925Srwatson		 * is still pointed at a real interface.  If not, return
892160925Srwatson		 * ENXIO so that the userland process knows to rebind
893160925Srwatson		 * it before using it again.
8941541Srgrimes		 */
895122922Sandre		if (d->bd_bif == NULL) {
896122922Sandre			BPFD_UNLOCK(d);
8971541Srgrimes			return (ENXIO);
8981541Srgrimes		}
8991541Srgrimes
9001541Srgrimes		if (non_block) {
901122922Sandre			BPFD_UNLOCK(d);
902122922Sandre			return (EWOULDBLOCK);
903122922Sandre		}
904122922Sandre		error = msleep(d, &d->bd_lock, PRINET|PCATCH,
90555679Sshin		     "bpf", d->bd_rtout);
90655679Sshin		if (error == EINTR || error == ERESTART) {
90755679Sshin			BPFD_UNLOCK(d);
90855679Sshin			return (error);
90955679Sshin		}
91055679Sshin		if (error == EWOULDBLOCK) {
91155679Sshin			/*
91255679Sshin			 * On a timeout, return what's in the buffer,
91355679Sshin			 * which may be nothing.  If there is something
914122922Sandre			 * in the store buffer, we can rotate the buffers.
915122922Sandre			 */
916122922Sandre			if (d->bd_hbuf)
917122922Sandre				/*
918122922Sandre				 * We filled up the buffer in between
919122922Sandre				 * getting the timeout and arriving
920122922Sandre				 * here, so we don't need to rotate.
921133874Srwatson				 */
922122922Sandre				break;
923122922Sandre
924122922Sandre			if (d->bd_slen == 0) {
9251541Srgrimes				BPFD_UNLOCK(d);
926122922Sandre				return (0);
9271541Srgrimes			}
928213158Slstewart			ROTATE_BUFFERS(d);
929252555Snp			break;
930252555Snp		}
931174757Skmacy	}
932252555Snp	/*
933252555Snp	 * At this point, we know we have something in the hold slot.
934252555Snp	 */
935174757Skmacy	d->bd_hbuf_in_use = 1;
936130989Sps	BPFD_UNLOCK(d);
937215166Slstewart
938215166Slstewart	/*
939215166Slstewart	 * Move data from hold buffer into user space.
940215166Slstewart	 * We know the entire buffer is transferred since
941215166Slstewart	 * we checked above that the read buffer is bpf_bufsize bytes.
942216758Slstewart  	 *
943216758Slstewart	 * We do not have to worry about simultaneous reads because
944215166Slstewart	 * we waited for sole access to the hold buffer above.
94532821Sdg	 */
946108265Shsu	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
947190787Szec
948111145Sjlemon	BPFD_LOCK(d);
949111145Sjlemon	KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
950111145Sjlemon	d->bd_fbuf = d->bd_hbuf;
951157430Srwatson	d->bd_hbuf = NULL;
952157430Srwatson	d->bd_hlen = 0;
953111145Sjlemon	bpf_buf_reclaimed(d);
954111145Sjlemon	d->bd_hbuf_in_use = 0;
955157431Srwatson	wakeup(&d->bd_hbuf_in_use);
956111145Sjlemon	BPFD_UNLOCK(d);
957111145Sjlemon
958157376Srwatson	return (error);
959111145Sjlemon}
960181803Sbz
961178285Srwatson/*
962138410Srwatson * If there are processes sleeping on this descriptor, wake them up.
963252555Snp */
964174757Skmacystatic __inline void
965252555Snpbpf_wakeup(struct bpf_d *d)
966252555Snp{
967158009Srwatson
968190948Srwatson	BPFD_LOCK_ASSERT(d);
969157376Srwatson	if (d->bd_state == BPF_WAITING) {
970157376Srwatson		callout_stop(&d->bd_callout);
971157376Srwatson		d->bd_state = BPF_IDLE;
972189848Srwatson	}
973157376Srwatson	wakeup(d);
974157376Srwatson	if (d->bd_async && d->bd_sig && d->bd_sigio)
975189848Srwatson		pgsigio(&d->bd_sigio, d->bd_sig, 0);
976178285Srwatson
977157376Srwatson	selwakeuppri(&d->bd_sel, PRINET);
978157376Srwatson	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
979157376Srwatson}
980157376Srwatson
981157376Srwatsonstatic void
982157376Srwatsonbpf_timed_out(void *arg)
983157376Srwatson{
9841541Srgrimes	struct bpf_d *d = (struct bpf_d *)arg;
9851541Srgrimes
9861541Srgrimes	BPFD_LOCK_ASSERT(d);
987157431Srwatson
9881541Srgrimes	if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
989183550Szec		return;
990157431Srwatson	if (d->bd_state == BPF_WAITING) {
991183550Szec		d->bd_state = BPF_TIMED_OUT;
992183550Szec		if (d->bd_slen != 0)
993183550Szec			bpf_wakeup(d);
994195760Srwatson	}
995183550Szec}
996183550Szec
99755198Smsmithstatic int
99855198Smsmithbpf_ready(struct bpf_d *d)
9991541Srgrimes{
100055198Smsmith
100155198Smsmith	BPFD_LOCK_ASSERT(d);
100255198Smsmith
100355198Smsmith	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
100455198Smsmith		return (1);
1005133874Srwatson	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1006133874Srwatson	    d->bd_slen != 0)
100755198Smsmith		return (1);
1008181803Sbz	return (0);
1009181803Sbz}
1010189848Srwatson
1011111145Sjlemonstatic int
1012178285Srwatsonbpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
1013127870Srwatson{
1014213158Slstewart	struct bpf_d *d;
1015144857Sps	struct ifnet *ifp;
101655198Smsmith	struct mbuf *m, *mc;
1017178285Srwatson	struct sockaddr dst;
101855198Smsmith	int error, hlen;
1019181803Sbz
1020183550Szec	error = devfs_get_cdevpriv((void **)&d);
102155198Smsmith	if (error != 0)
1022195760Srwatson		return (error);
10231541Srgrimes
10241541Srgrimes	BPF_PID_REFRESH_CUR(d);
10251541Srgrimes	d->bd_wcount++;
10261541Srgrimes	/* XXX: locking required */
10271541Srgrimes	if (d->bd_bif == NULL) {
10281541Srgrimes		d->bd_wdcount++;
102972960Sjlemon		return (ENXIO);
103072960Sjlemon	}
103172960Sjlemon
10321541Srgrimes	ifp = d->bd_bif->bif_ifp;
103398211Shsu
1034157431Srwatson	if ((ifp->if_flags & IFF_UP) == 0) {
10351541Srgrimes		d->bd_wdcount++;
1036157432Srwatson		return (ENETDOWN);
10371541Srgrimes	}
1038181803Sbz
1039178285Srwatson	if (uio->uio_resid == 0) {
1040157433Srwatson		d->bd_wdcount++;
1041189848Srwatson		return (0);
1042189848Srwatson	}
1043157433Srwatson
1044157433Srwatson	bzero(&dst, sizeof(dst));
1045157432Srwatson	m = NULL;
1046157433Srwatson	hlen = 0;
1047138410Srwatson	/* XXX: bpf_movein() can sleep */
10481541Srgrimes	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
10491541Srgrimes	    &m, &dst, &hlen, d->bd_wfilter);
10501541Srgrimes	if (error) {
10511541Srgrimes		d->bd_wdcount++;
10521541Srgrimes		return (error);
10531541Srgrimes	}
10541541Srgrimes	d->bd_wfcount++;
10551541Srgrimes	if (d->bd_hdrcmplt)
1056110896Shsu		dst.sa_family = pseudo_AF_HDRCMPLT;
1057110896Shsu
1058139222Srwatson	if (d->bd_feedback) {
10591541Srgrimes		mc = m_dup(m, M_NOWAIT);
106098211Shsu		if (mc != NULL)
1061157376Srwatson			mc->m_pkthdr.rcvif = ifp;
1062157376Srwatson		/* Set M_PROMISC for outgoing packets to be discarded. */
1063157376Srwatson		if (d->bd_direction == BPF_D_INOUT)
1064157376Srwatson			m->m_flags |= M_PROMISC;
1065157376Srwatson	} else
106698211Shsu		mc = NULL;
10671541Srgrimes
1068139222Srwatson	m->m_pkthdr.len -= hlen;
106998211Shsu	m->m_len -= hlen;
107072960Sjlemon	m->m_data += hlen;	/* XXX */
1071111748Sdes
10721541Srgrimes	CURVNET_SET(ifp->if_vnet);
10731541Srgrimes#ifdef MAC
107472960Sjlemon	BPFD_LOCK(d);
10751541Srgrimes	mac_bpfdesc_create_mbuf(d, m);
10761541Srgrimes	if (mc != NULL)
107736079Swollman		mac_bpfdesc_create_mbuf(d, mc);
107862573Sphk	BPFD_UNLOCK(d);
107936079Swollman#endif
1080171605Ssilby
108136079Swollman	error = (*ifp->if_output)(ifp, m, &dst, NULL);
108236079Swollman	if (error)
108336079Swollman		d->bd_wdcount++;
108436079Swollman
108536079Swollman	if (mc != NULL) {
108636079Swollman		if (error == 0)
108736079Swollman			(*ifp->if_input)(ifp, mc);
108836079Swollman		else
1089127870Srwatson			m_freem(mc);
1090211888Sjhb	}
1091211888Sjhb	CURVNET_RESTORE();
1092211888Sjhb
1093139222Srwatson	return (error);
109436079Swollman}
109536079Swollman
1096127870Srwatson/*
1097139222Srwatson * Reset a descriptor by flushing its packet buffer and clearing the receive
109836079Swollman * and drop counts.  This is doable for kernel-only buffers, but with
109936079Swollman * zero-copy buffers, we can't write to (or rotate) buffers that are
110036079Swollman * currently owned by userspace.  It would be nice if we could encapsulate
110136079Swollman * this logic in the buffer code rather than here.
1102181803Sbz */
1103181803Sbzstatic void
1104181803Sbzreset_d(struct bpf_d *d)
1105181803Sbz{
110636079Swollman
1107171605Ssilby	BPFD_LOCK_ASSERT(d);
1108171605Ssilby
1109126253Struckman	while (d->bd_hbuf_in_use)
1110171605Ssilby		mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
1111126253Struckman		    "bd_hbuf", 0);
1112126253Struckman	if ((d->bd_hbuf != NULL) &&
1113100831Struckman	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
111436079Swollman		/* Free the hold buffer. */
1115171605Ssilby		d->bd_fbuf = d->bd_hbuf;
111636079Swollman		d->bd_hbuf = NULL;
111736079Swollman		d->bd_hlen = 0;
111836079Swollman		bpf_buf_reclaimed(d);
111936079Swollman	}
1120139222Srwatson	if (bpf_canwritebuf(d))
112136079Swollman		d->bd_slen = 0;
1122171605Ssilby	d->bd_rcount = 0;
1123171605Ssilby	d->bd_dcount = 0;
1124171605Ssilby	d->bd_fcount = 0;
1125171605Ssilby	d->bd_wcount = 0;
1126111119Simp	d->bd_wfcount = 0;
1127127870Srwatson	d->bd_wdcount = 0;
1128139222Srwatson	d->bd_zcopy = 0;
1129133874Srwatson}
1130181803Sbz
1131181887Sjulian/*
1132181888Sjulian *  FIONREAD		Check for read packet available.
1133205251Sbz *  SIOCGIFADDR		Get interface address - convenient hook to driver.
1134113345Srwatson *  BIOCGBLEN		Get buffer len [for read()].
1135113345Srwatson *  BIOCSETF		Set read filter.
1136113345Srwatson *  BIOCSETFNR		Set read filter without resetting descriptor.
1137113345Srwatson *  BIOCSETWF		Set write filter.
1138113345Srwatson *  BIOCFLUSH		Flush read packet buffer.
1139113345Srwatson *  BIOCPROMISC		Put interface into promiscuous mode.
1140189848Srwatson *  BIOCGDLT		Get link layer type.
1141157474Srwatson *  BIOCGETIF		Get interface name.
1142157474Srwatson *  BIOCSETIF		Set interface.
1143157474Srwatson *  BIOCSRTIMEOUT	Set read timeout.
1144157474Srwatson *  BIOCGRTIMEOUT	Get read timeout.
1145157474Srwatson *  BIOCGSTATS		Get packet stats.
1146157474Srwatson *  BIOCIMMEDIATE	Set immediate mode.
1147183982Sbz *  BIOCVERSION		Get filter language version.
1148205251Sbz *  BIOCGHDRCMPLT	Get "header already complete" flag
1149205251Sbz *  BIOCSHDRCMPLT	Set "header already complete" flag
1150113345Srwatson *  BIOCGDIRECTION	Get packet direction flag
1151205251Sbz *  BIOCSDIRECTION	Set packet direction flag
1152113345Srwatson *  BIOCGTSTAMP		Get time stamp format and resolution.
1153205251Sbz *  BIOCSTSTAMP		Set time stamp format and resolution.
115436079Swollman *  BIOCLOCK		Set "locked" flag
1155181803Sbz *  BIOCFEEDBACK	Set packet feedback mode.
115636079Swollman *  BIOCSETZBUF		Set current zero-copy buffer locations.
115736079Swollman *  BIOCGETZMAX		Get maximum zero-copy buffer size.
115836079Swollman *  BIOCROTZBUF		Force rotation of zero-copy buffer
115936079Swollman *  BIOCSETBUFMODE	Set buffer mode.
116036079Swollman *  BIOCGETBUFMODE	Get current buffer mode.
1161179414Srwatson */
116236079Swollman/* ARGSUSED */
116336079Swollmanstatic	int
1164157432Srwatsonbpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
1165145978Scperciva    struct thread *td)
1166145978Scperciva{
116736079Swollman	struct bpf_d *d;
116836079Swollman	int error;
116936079Swollman
117047960Stegge	error = devfs_get_cdevpriv((void **)&d);
1171111145Sjlemon	if (error != 0)
1172111145Sjlemon		return (error);
1173189848Srwatson
1174111145Sjlemon	/*
1175111145Sjlemon	 * Refresh PID associated with this descriptor.
1176197244Ssilby	 */
117747960Stegge	BPFD_LOCK(d);
1178197244Ssilby	BPF_PID_REFRESH(d, td);
1179197244Ssilby	if (d->bd_state == BPF_WAITING)
1180197244Ssilby		callout_stop(&d->bd_callout);
1181127870Srwatson	d->bd_state = BPF_IDLE;
118236079Swollman	BPFD_UNLOCK(d);
1183111145Sjlemon
1184111145Sjlemon	if (d->bd_locked == 1) {
1185111145Sjlemon		switch (cmd) {
1186111145Sjlemon		case BIOCGBLEN:
1187110896Shsu		case BIOCFLUSH:
1188179414Srwatson		case BIOCGDLT:
118936079Swollman		case BIOCGDLTLIST:
1190160491Sups#ifdef COMPAT_FREEBSD32
1191179414Srwatson		case BIOCGDLTLIST32:
119236079Swollman#endif
1193205251Sbz		case BIOCGETIF:
1194205251Sbz		case BIOCGRTIMEOUT:
1195205251Sbz#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1196222488Srwatson		case BIOCGRTIMEOUT32:
1197222488Srwatson#endif
1198222488Srwatson		case BIOCGSTATS:
1199205251Sbz		case BIOCVERSION:
1200205251Sbz		case BIOCGRSIG:
1201205251Sbz		case BIOCGHDRCMPLT:
120236079Swollman		case BIOCSTSTAMP:
120336079Swollman		case BIOCFEEDBACK:
120436079Swollman		case FIONREAD:
120536079Swollman		case BIOCLOCK:
120636079Swollman		case BIOCSRTIMEOUT:
120736079Swollman#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
120836079Swollman		case BIOCSRTIMEOUT32:
120936079Swollman#endif
1210181803Sbz		case BIOCIMMEDIATE:
1211181803Sbz		case TIOCGPGRP:
121236079Swollman		case BIOCROTZBUF:
1213181803Sbz			break;
1214181803Sbz		default:
121536079Swollman			return (EPERM);
121636079Swollman		}
121736079Swollman	}
1218139222Srwatson#ifdef COMPAT_FREEBSD32
121936079Swollman	/*
122036079Swollman	 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so
1221217554Smdf	 * that it will get 32-bit packet headers.
1222217554Smdf	 */
1223167785Sandre	switch (cmd) {
122436079Swollman	case BIOCSETF32:
1225221250Sbz	case BIOCSETFNR32:
122648758Sgreen	case BIOCSETWF32:
122762573Sphk	case BIOCGDLTLIST32:
122848758Sgreen	case BIOCGRTIMEOUT32:
122972650Sgreen	case BIOCSRTIMEOUT32:
123048758Sgreen		BPFD_LOCK(d);
123148758Sgreen		d->bd_compat32 = 1;
1232148156Srwatson		BPFD_UNLOCK(d);
123348758Sgreen	}
1234170587Srwatson#endif
123548758Sgreen
123648758Sgreen	CURVNET_SET(TD_TO_VNET(td));
123748758Sgreen	switch (cmd) {
123848758Sgreen
123948758Sgreen	default:
1240222488Srwatson		error = EINVAL;
1241222488Srwatson		break;
1242179414Srwatson
1243179414Srwatson	/*
1244179414Srwatson	 * Check for read packet available.
1245179414Srwatson	 */
1246183982Sbz	case FIONREAD:
1247179414Srwatson		{
1248183606Sbz			int n;
1249179414Srwatson
1250222488Srwatson			BPFD_LOCK(d);
125148758Sgreen			n = d->bd_slen;
125299838Struckman			while (d->bd_hbuf_in_use)
125399838Struckman				mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
125448758Sgreen				    PRINET, "bd_hbuf", 0);
125548758Sgreen			if (d->bd_hbuf)
125648758Sgreen				n += d->bd_hlen;
125778697Sdwmalone			BPFD_UNLOCK(d);
125878697Sdwmalone
125978697Sdwmalone			*(int *)addr = n;
1260221250Sbz			break;
126148758Sgreen		}
126255679Sshin
126355679Sshin	case SIOCGIFADDR:
126462573Sphk		{
126555679Sshin			struct ifnet *ifp;
126672650Sgreen
126755679Sshin			if (d->bd_bif == NULL)
126855679Sshin				error = EINVAL;
1269221250Sbz			else {
1270221250Sbz				ifp = d->bd_bif->bif_ifp;
1271221250Sbz				error = (*ifp->if_ioctl)(ifp, cmd, addr);
1272221250Sbz			}
127355679Sshin			break;
1274170587Srwatson		}
127555679Sshin
127655679Sshin	/*
127755679Sshin	 * Get buffer len [for read()].
127855679Sshin	 */
127955679Sshin	case BIOCGBLEN:
1280181803Sbz		BPFD_LOCK(d);
1281181803Sbz		*(u_int *)addr = d->bd_bufsize;
1282148385Sume		BPFD_UNLOCK(d);
1283148385Sume		break;
128455679Sshin
1285221250Sbz	/*
128655679Sshin	 * Set buffer length.
128755679Sshin	 */
128855679Sshin	case BIOCSBLEN:
1289221250Sbz		error = bpf_ioctl_sblen(d, (u_int *)addr);
129055679Sshin		break;
129155679Sshin
1292148385Sume	/*
1293221250Sbz	 * Set link layer read filter.
129455679Sshin	 */
1295222488Srwatson	case BIOCSETF:
129655679Sshin	case BIOCSETFNR:
129755679Sshin	case BIOCSETWF:
129855679Sshin#ifdef COMPAT_FREEBSD32
1299222488Srwatson	case BIOCSETF32:
130055679Sshin	case BIOCSETFNR32:
1301221250Sbz	case BIOCSETWF32:
1302222488Srwatson#endif
1303151254Sphilip		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1304222488Srwatson		break;
1305222488Srwatson
1306179414Srwatson	/*
1307179414Srwatson	 * Flush read packet buffer.
1308179414Srwatson	 */
1309179414Srwatson	case BIOCFLUSH:
1310183982Sbz		BPFD_LOCK(d);
1311179414Srwatson		reset_d(d);
1312183606Sbz		BPFD_UNLOCK(d);
1313179414Srwatson		break;
1314222488Srwatson
131555679Sshin	/*
131699838Struckman	 * Put interface into promiscuous mode.
131799838Struckman	 */
131855679Sshin	case BIOCPROMISC:
131955679Sshin		if (d->bd_bif == NULL) {
132055679Sshin			/*
132178697Sdwmalone			 * No interface attached yet.
132278697Sdwmalone			 */
132378697Sdwmalone			error = EINVAL;
1324221250Sbz			break;
132555679Sshin		}
132655679Sshin		if (d->bd_promisc == 0) {
1327221250Sbz			error = ifpromisc(d->bd_bif->bif_ifp, 1);
13281541Srgrimes			if (error == 0)
1329157431Srwatson				d->bd_promisc = 1;
13301541Srgrimes		}
133172959Sjlemon		break;
133272959Sjlemon
133373109Sjlemon	/*
133473109Sjlemon	 * Get current data link type.
133573109Sjlemon	 */
133698211Shsu	case BIOCGDLT:
1337145360Sandre		BPF_LOCK();
1338145360Sandre		if (d->bd_bif == NULL)
1339145360Sandre			error = EINVAL;
1340148156Srwatson		else
13411541Srgrimes			*(u_int *)addr = d->bd_bif->bif_dlt;
134273109Sjlemon		BPF_UNLOCK();
134373109Sjlemon		break;
134473109Sjlemon
134573109Sjlemon	/*
1346145355Sandre	 * Get a list of supported data link types.
1347235051Sglebius	 */
1348181803Sbz#ifdef COMPAT_FREEBSD32
134999156Sjesper	case BIOCGDLTLIST32:
135072959Sjlemon		{
1351122922Sandre			struct bpf_dltlist32 *list32;
1352122922Sandre			struct bpf_dltlist dltlist;
1353122922Sandre
1354122922Sandre			list32 = (struct bpf_dltlist32 *)addr;
1355122922Sandre			dltlist.bfl_len = list32->bfl_len;
1356122922Sandre			dltlist.bfl_list = PTRIN(list32->bfl_list);
1357145355Sandre			BPF_LOCK();
1358145355Sandre			if (d->bd_bif == NULL)
1359145355Sandre				error = EINVAL;
1360145355Sandre			else {
1361145355Sandre				error = bpf_getdltlist(d, &dltlist);
1362122922Sandre				if (error == 0)
1363122922Sandre					list32->bfl_len = dltlist.bfl_len;
1364122922Sandre			}
1365122922Sandre			BPF_UNLOCK();
1366122922Sandre			break;
1367127870Srwatson		}
1368119995Sru#endif
13691541Srgrimes
1370127870Srwatson	case BIOCGDLTLIST:
1371145360Sandre		BPF_LOCK();
1372145360Sandre		if (d->bd_bif == NULL)
1373133874Srwatson			error = EINVAL;
1374105586Sphk		else
1375181803Sbz			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1376222488Srwatson		BPF_UNLOCK();
1377222488Srwatson		break;
137898102Shsu
1379189848Srwatson	/*
1380189848Srwatson	 * Set data link type.
1381157433Srwatson	 */
1382145360Sandre	case BIOCSDLT:
138398102Shsu		BPF_LOCK();
1384145360Sandre		if (d->bd_bif == NULL)
1385145360Sandre			error = EINVAL;
1386145360Sandre		else
1387145360Sandre			error = bpf_setdlt(d, *(u_int *)addr);
1388145360Sandre		BPF_UNLOCK();
1389145360Sandre		break;
1390145360Sandre
1391145360Sandre	/*
1392145360Sandre	 * Get interface name.
1393145360Sandre	 */
1394145360Sandre	case BIOCGETIF:
1395178888Sjulian		BPF_LOCK();
1396178888Sjulian		if (d->bd_bif == NULL)
1397145360Sandre			error = EINVAL;
1398145360Sandre		else {
1399145869Sandre			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1400145869Sandre			struct ifreq *const ifr = (struct ifreq *)addr;
1401145869Sandre
1402149929Sandre			strlcpy(ifr->ifr_name, ifp->if_xname,
1403149929Sandre			    sizeof(ifr->ifr_name));
1404145869Sandre		}
1405145360Sandre		BPF_UNLOCK();
1406149929Sandre		break;
1407145869Sandre
1408211333Sandre	/*
1409211333Sandre	 * Set interface.
1410211333Sandre	 */
1411145869Sandre	case BIOCSETIF:
1412145869Sandre		BPF_LOCK();
1413218909Sbrucec		error = bpf_setif(d, (struct ifreq *)addr);
1414145869Sandre		BPF_UNLOCK();
1415145869Sandre		break;
1416145869Sandre
1417145869Sandre	/*
1418162084Sandre	 * Set read timeout.
1419145360Sandre	 */
1420235051Sglebius	case BIOCSRTIMEOUT:
1421235051Sglebius#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1422235051Sglebius	case BIOCSRTIMEOUT32:
1423235051Sglebius#endif
1424145360Sandre		{
142598102Shsu			struct timeval *tv = (struct timeval *)addr;
1426127870Srwatson#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
1427178285Srwatson			struct timeval32 *tv32;
142886764Sjlemon			struct timeval tv64;
1429186222Sbz
143086764Sjlemon			if (cmd == BIOCSRTIMEOUT32) {
143186764Sjlemon				tv32 = (struct timeval32 *)addr;
143286764Sjlemon				tv = &tv64;
143386764Sjlemon				tv->tv_sec = tv32->tv_sec;
143486764Sjlemon				tv->tv_usec = tv32->tv_usec;
143573109Sjlemon			} else
1436181803Sbz#endif
14371541Srgrimes				tv = (struct timeval *)addr;
1438181803Sbz
14391541Srgrimes			/*
1440221250Sbz			 * Subtract 1 tick from tvtohz() since this isn't
14411541Srgrimes			 * a one-shot timer.
144255679Sshin			 */
144355679Sshin			if ((error = itimerfix(tv)) == 0)
1444157431Srwatson				d->bd_rtout = tvtohz(tv) - 1;
144555679Sshin			break;
144655679Sshin		}
144798211Shsu
144855679Sshin	/*
144955679Sshin	 * Get read timeout.
145078064Sume	 */
145178064Sume	case BIOCGRTIMEOUT:
145255679Sshin#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
145378064Sume	case BIOCGRTIMEOUT32:
145478064Sume#endif
145578064Sume		{
145678064Sume			struct timeval *tv;
145755679Sshin#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
145855679Sshin			struct timeval32 *tv32;
145955679Sshin			struct timeval tv64;
146055679Sshin
146155679Sshin			if (cmd == BIOCGRTIMEOUT32)
1462145355Sandre				tv = &tv64;
1463235051Sglebius			else
146455679Sshin#endif
1465119995Sru				tv = (struct timeval *)addr;
146655679Sshin
1467145355Sandre			tv->tv_sec = d->bd_rtout / hz;
1468145355Sandre			tv->tv_usec = (d->bd_rtout % hz) * tick;
1469145355Sandre#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
147055679Sshin			if (cmd == BIOCGRTIMEOUT32) {
147155679Sshin				tv32 = (struct timeval32 *)addr;
147255679Sshin				tv32->tv_sec = tv->tv_sec;
147378064Sume				tv32->tv_usec = tv->tv_usec;
147455679Sshin			}
147555679Sshin#endif
147655679Sshin
147778064Sume			break;
147855679Sshin		}
147955679Sshin
148055679Sshin	/*
148167456Sitojun	 * Get packet stats.
148278064Sume	 */
148355679Sshin	case BIOCGSTATS:
148455679Sshin		{
1485127870Srwatson			struct bpf_stat *bs = (struct bpf_stat *)addr;
148686764Sjlemon
148755679Sshin			/* XXXCSJP overflow */
148855679Sshin			bs->bs_recv = d->bd_rcount;
148955679Sshin			bs->bs_drop = d->bd_dcount;
149055679Sshin			break;
149155679Sshin		}
149267456Sitojun
149378064Sume	/*
149467456Sitojun	 * Set immediate mode.
149567456Sitojun	 */
149678064Sume	case BIOCIMMEDIATE:
149778064Sume		BPFD_LOCK(d);
149878064Sume		d->bd_immediate = *(u_int *)addr;
1499181803Sbz		BPFD_UNLOCK(d);
150078064Sume		break;
1501125776Sume
150286764Sjlemon	case BIOCVERSION:
1503186222Sbz		{
150486764Sjlemon			struct bpf_version *bv = (struct bpf_version *)addr;
150586764Sjlemon
150686764Sjlemon			bv->bv_major = BPF_MAJOR_VERSION;
150786764Sjlemon			bv->bv_minor = BPF_MINOR_VERSION;
1508186222Sbz			break;
1509181803Sbz		}
151086764Sjlemon
1511181803Sbz	/*
151255679Sshin	 * Get "header already complete" flag
1513181803Sbz	 */
1514125776Sume	case BIOCGHDRCMPLT:
151555679Sshin		BPFD_LOCK(d);
151655679Sshin		*(u_int *)addr = d->bd_hdrcmplt;
151755679Sshin		BPFD_UNLOCK(d);
151880428Speter		break;
151982122Ssilby
152082122Ssilby	/*
152182122Ssilby	 * Set "header already complete" flag
152282122Ssilby	 */
152382122Ssilby	case BIOCSHDRCMPLT:
152482122Ssilby		BPFD_LOCK(d);
152582122Ssilby		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
152694390Ssilby		BPFD_UNLOCK(d);
152794390Ssilby		break;
152882122Ssilby
152982122Ssilby	/*
153082122Ssilby	 * Get packet direction flag
153182122Ssilby	 */
153282122Ssilby	case BIOCGDIRECTION:
1533133874Srwatson		BPFD_LOCK(d);
153482122Ssilby		*(u_int *)addr = d->bd_direction;
153582122Ssilby		BPFD_UNLOCK(d);
153682122Ssilby		break;
153782122Ssilby
153882122Ssilby	/*
153982122Ssilby	 * Set packet direction flag
154082122Ssilby	 */
154182122Ssilby	case BIOCSDIRECTION:
1542128452Ssilby		{
1543128452Ssilby			u_int	direction;
1544128452Ssilby
1545128452Ssilby			direction = *(u_int *)addr;
1546128452Ssilby			switch (direction) {
1547128452Ssilby			case BPF_D_IN:
1548128452Ssilby			case BPF_D_INOUT:
1549128452Ssilby			case BPF_D_OUT:
1550128452Ssilby				BPFD_LOCK(d);
1551128452Ssilby				d->bd_direction = direction;
1552128452Ssilby				BPFD_UNLOCK(d);
1553128452Ssilby				break;
155482122Ssilby			default:
155582122Ssilby				error = EINVAL;
155682122Ssilby			}
155782122Ssilby		}
1558138018Srwatson		break;
1559138018Srwatson
1560138018Srwatson	/*
156182122Ssilby	 * Get packet timestamp format and resolution.
156279413Ssilby	 */
156382122Ssilby	case BIOCGTSTAMP:
1564128452Ssilby		BPFD_LOCK(d);
1565128452Ssilby		*(u_int *)addr = d->bd_tstamp;
156679413Ssilby		BPFD_UNLOCK(d);
1567215701Sdim		break;
1568221690Smav
1569215701Sdim	/*
1570215701Sdim	 * Set packet timestamp format and resolution.
1571215701Sdim	 */
157275619Skris	case BIOCSTSTAMP:
1573195727Srwatson		{
1574221690Smav			u_int	func;
1575195727Srwatson
1576195727Srwatson			func = *(u_int *)addr;
1577195727Srwatson			if (BPF_T_VALID(func))
1578195699Srwatson				d->bd_tstamp = func;
157975619Skris			else
1580157431Srwatson				error = EINVAL;
158175619Skris		}
1582186057Sbz		break;
158382122Ssilby
158482122Ssilby	case BIOCFEEDBACK:
1585221690Smav		BPFD_LOCK(d);
158675619Skris		d->bd_feedback = *(u_int *)addr;
1587178285Srwatson		BPFD_UNLOCK(d);
1588138018Srwatson		break;
1589157977Srwatson
159082122Ssilby	case BIOCLOCK:
1591181803Sbz		BPFD_LOCK(d);
1592181803Sbz		d->bd_locked = 1;
159382122Ssilby		BPFD_UNLOCK(d);
1594181803Sbz		break;
1595181803Sbz
159682122Ssilby	case FIONBIO:		/* Non-blocking I/O */
1597133874Srwatson		break;
159882122Ssilby
1599186057Sbz	case FIOASYNC:		/* Send signal on receive packets */
1600186057Sbz		BPFD_LOCK(d);
1601186057Sbz		d->bd_async = *(int *)addr;
160282122Ssilby		BPFD_UNLOCK(d);
160382122Ssilby		break;
1604186057Sbz
160582122Ssilby	case FIOSETOWN:
1606186057Sbz		/*
160782122Ssilby		 * XXX: Add some sort of locking here?
160882122Ssilby		 * fsetown() can sleep.
160982122Ssilby		 */
161082122Ssilby		error = fsetown(*(int *)addr, &d->bd_sigio);
1611186057Sbz		break;
161282122Ssilby
1613186057Sbz	case FIOGETOWN:
161482122Ssilby		BPFD_LOCK(d);
161582122Ssilby		*(int *)addr = fgetown(&d->bd_sigio);
1616186057Sbz		BPFD_UNLOCK(d);
1617186057Sbz		break;
161882122Ssilby
1619181803Sbz	/* This is deprecated, FIOSETOWN should be used instead. */
1620128452Ssilby	case TIOCSPGRP:
1621221690Smav		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1622221690Smav		break;
1623221690Smav
1624183550Szec	/* This is deprecated, FIOGETOWN should be used instead. */
1625183550Szec	case TIOCGPGRP:
1626183550Szec		*(int *)addr = -fgetown(&d->bd_sigio);
1627221690Smav		break;
1628183550Szec
1629221690Smav	case BIOCSRSIG:		/* Set receive signal */
1630190787Szec		{
1631221690Smav			u_int sig;
1632128452Ssilby
1633128452Ssilby			sig = *(u_int *)addr;
1634128452Ssilby
163572959Sjlemon			if (sig >= NSIG)
163672959Sjlemon				error = EINVAL;
163772959Sjlemon			else {
163870103Sphk				BPFD_LOCK(d);
163998211Shsu				d->bd_sig = sig;
1640157431Srwatson				BPFD_UNLOCK(d);
164170103Sphk			}
1642157432Srwatson			break;
164370103Sphk		}
1644181803Sbz	case BIOCGRSIG:
1645178285Srwatson		BPFD_LOCK(d);
1646157433Srwatson		*(u_int *)addr = d->bd_sig;
1647189848Srwatson		BPFD_UNLOCK(d);
1648189848Srwatson		break;
1649157433Srwatson
1650157433Srwatson	case BIOCGETBUFMODE:
1651157432Srwatson		BPFD_LOCK(d);
1652157433Srwatson		*(u_int *)addr = d->bd_bufmode;
1653157433Srwatson		BPFD_UNLOCK(d);
1654146864Srwatson		break;
1655157433Srwatson
1656157433Srwatson	case BIOCSETBUFMODE:
1657157433Srwatson		/*
1658157433Srwatson		 * Allow the buffering mode to be changed as long as we
1659157433Srwatson		 * haven't yet committed to a particular mode.  Our
166072638Sphk		 * definition of commitment, for now, is whether or not a
166172638Sphk		 * buffer has been allocated or an interface attached, since
166272638Sphk		 * that's the point where things get tricky.
166310881Swollman		 */
1664235051Sglebius		switch (*(u_int *)addr) {
1665235051Sglebius		case BPF_BUFMODE_BUFFER:
166610930Swollman			break;
166710881Swollman
1668235051Sglebius		case BPF_BUFMODE_ZBUF:
1669235051Sglebius			if (bpf_zerocopy_enable)
1670235051Sglebius				break;
1671235051Sglebius			/* FALLSTHROUGH */
1672235051Sglebius
1673235051Sglebius		default:
1674235051Sglebius			CURVNET_RESTORE();
167598211Shsu			return (EINVAL);
1676235051Sglebius		}
167710881Swollman
1678157432Srwatson		BPFD_LOCK(d);
1679182851Sbz		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
168010881Swollman		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
1681178285Srwatson			BPFD_UNLOCK(d);
1682189848Srwatson			CURVNET_RESTORE();
1683189848Srwatson			return (EBUSY);
1684157433Srwatson		}
1685157433Srwatson		d->bd_bufmode = *(u_int *)addr;
1686157432Srwatson		BPFD_UNLOCK(d);
1687157433Srwatson		break;
1688157433Srwatson
1689235051Sglebius	case BIOCGETZMAX:
1690182851Sbz		error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
1691182851Sbz		break;
1692182851Sbz
1693182851Sbz	case BIOCSETZBUF:
1694182851Sbz		error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
1695182851Sbz		break;
1696182851Sbz
169755679Sshin	case BIOCROTZBUF:
1698190948Srwatson		error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
1699157433Srwatson		break;
1700157433Srwatson	}
1701161645Smohans	CURVNET_RESTORE();
1702161645Smohans	return (error);
1703169317Sandre}
1704215166Slstewart
1705252555Snp/*
1706139222Srwatson * Set d's packet filter program to fp.  If this file already has a filter,
170710881Swollman * free it and replace it.  Returns EINVAL for bogus requests.
170810881Swollman *
1709221250Sbz * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
171010881Swollman * since reading d->bd_bif can't be protected by d or interface lock due to
17116283Swollman * lock order.
1712184721Sbz *
1713184721Sbz * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
1714184721Sbz * interface read lock to read all filers.
17156283Swollman *
1716133874Srwatson */
1717252781Sandrestatic int
17186283Swollmanbpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1719122922Sandre{
1720122922Sandre#ifdef COMPAT_FREEBSD32
1721122922Sandre	struct bpf_program fp_swab;
1722122922Sandre	struct bpf_program32 *fp32;
17236283Swollman#endif
1724122922Sandre	struct bpf_insn *fcode, *old;
1725122922Sandre#ifdef BPF_JITTER
1726122996Sandre	bpf_jit_filter *jfunc, *ofunc;
1727122922Sandre#endif
1728122922Sandre	size_t size;
1729122922Sandre	u_int flen;
1730122922Sandre	int need_upgrade;
1731122922Sandre
1732186119Sqingli#ifdef COMPAT_FREEBSD32
17336283Swollman	switch (cmd) {
1734122922Sandre	case BIOCSETF32:
1735122922Sandre	case BIOCSETWF32:
1736122922Sandre	case BIOCSETFNR32:
1737122922Sandre		fp32 = (struct bpf_program32 *)fp;
1738122922Sandre		fp_swab.bf_len = fp32->bf_len;
1739122922Sandre		fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
1740162084Sandre		fp = &fp_swab;
1741162084Sandre		switch (cmd) {
1742252781Sandre		case BIOCSETF32:
1743162084Sandre			cmd = BIOCSETF;
1744265357Srmacklem			break;
1745252781Sandre		case BIOCSETWF32:
1746252781Sandre			cmd = BIOCSETWF;
1747265357Srmacklem			break;
1748162084Sandre		}
1749122922Sandre		break;
1750122922Sandre	}
1751122922Sandre#endif
17526283Swollman
1753221250Sbz	fcode = NULL;
17546283Swollman#ifdef BPF_JITTER
175555679Sshin	jfunc = ofunc = NULL;
1756122922Sandre#endif
1757252781Sandre	need_upgrade = 0;
175855679Sshin
1759122922Sandre	/*
1760122922Sandre	 * Check new filter validness before acquiring any locks.
1761122922Sandre	 * Allocate memory for new filter, if needed.
176255679Sshin	 */
1763122922Sandre	flen = fp->bf_len;
1764122922Sandre	if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
1765122996Sandre		return (EINVAL);
1766122922Sandre	size = flen * sizeof(*fp->bf_insns);
1767122922Sandre	if (size > 0) {
1768122922Sandre		/* We're setting up new filter.  Copy and check actual data. */
1769122922Sandre		fcode = malloc(size, M_BPF, M_WAITOK);
1770232292Sbz		if (copyin(fp->bf_insns, fcode, size) != 0 ||
177155679Sshin		    !bpf_validate(fcode, flen)) {
1772122922Sandre			free(fcode, M_BPF);
1773122922Sandre			return (EINVAL);
1774122922Sandre		}
1775122922Sandre#ifdef BPF_JITTER
1776122922Sandre		/* Filter is copied inside fcode and is perfectly valid. */
1777122922Sandre		jfunc = bpf_jitter(fcode, flen);
1778122922Sandre#endif
1779162084Sandre	}
1780162084Sandre
1781252781Sandre	BPF_LOCK();
1782162084Sandre
1783265357Srmacklem	/*
1784252781Sandre	 * Set up new filter.
1785252781Sandre	 * Protect filter change by interface lock.
1786265357Srmacklem	 * Additionally, we are protected by global lock here.
1787162084Sandre	 */
1788122922Sandre	if (d->bd_bif != NULL)
1789122922Sandre		BPFIF_WLOCK(d->bd_bif);
1790122922Sandre	BPFD_LOCK(d);
1791122922Sandre	if (cmd == BIOCSETWF) {
179255679Sshin		old = d->bd_wfilter;
179355679Sshin		d->bd_wfilter = fcode;
179455679Sshin	} else {
1795171167Sgnn		old = d->bd_rfilter;
179655679Sshin		d->bd_rfilter = fcode;
179755679Sshin#ifdef BPF_JITTER
1798157431Srwatson		ofunc = d->bd_bfilter;
179955679Sshin		d->bd_bfilter = jfunc;
180055679Sshin#endif
180155679Sshin		if (cmd == BIOCSETF)
180255679Sshin			reset_d(d);
180355679Sshin
180455679Sshin		if (fcode != NULL) {
180555679Sshin			/*
1806111145Sjlemon			 * Do not require upgrade by first BIOCSETF
180755679Sshin			 * (used to set snaplen) by pcap_open_live().
180855679Sshin			 */
180978642Ssilby			if (d->bd_writer != 0 && --d->bd_writer == 0)
1810139222Srwatson				need_upgrade = 1;
1811111119Simp			CTR4(KTR_NET, "%s: filter function set by pid %d, "
181255679Sshin			    "bd_writer counter %d, need_upgrade %d",
1813139222Srwatson			    __func__, d->bd_pid, d->bd_writer, need_upgrade);
181455679Sshin		}
181555679Sshin	}
181655679Sshin	BPFD_UNLOCK(d);
181755679Sshin	if (d->bd_bif != NULL)
181855679Sshin		BPFIF_WUNLOCK(d->bd_bif);
181955679Sshin	if (old != NULL)
182055679Sshin		free(old, M_BPF);
1821111144Sjlemon#ifdef BPF_JITTER
1822188306Sbz	if (ofunc != NULL)
182355679Sshin		bpf_destroy_jit_filter(ofunc);
182455679Sshin#endif
1825133874Srwatson
1826133874Srwatson	/* Move d to active readers list. */
1827133874Srwatson	if (need_upgrade)
1828133874Srwatson		bpf_upgraded(d);
1829133874Srwatson
1830188306Sbz	BPF_UNLOCK();
1831133874Srwatson	return (0);
183255679Sshin}
183355679Sshin
1834139222Srwatson/*
183555679Sshin * Detach a file from its current interface (if attached at all) and attach
1836171167Sgnn * to the interface indicated by the name stored in ifr.
183755679Sshin * Return an errno or 0.
1838125680Sbms */
1839125680Sbmsstatic int
1840125783Sbmsbpf_setif(struct bpf_d *d, struct ifreq *ifr)
1841125783Sbms{
1842125783Sbms	struct bpf_if *bp;
1843125783Sbms	struct ifnet *theywant;
1844125783Sbms
1845125783Sbms	BPF_LOCK_ASSERT();
1846125783Sbms
1847125819Sbms	theywant = ifunit(ifr->ifr_name);
1848125783Sbms	if (theywant == NULL || theywant->if_bpf == NULL)
1849125783Sbms		return (ENXIO);
1850125783Sbms
1851125783Sbms	bp = theywant->if_bpf;
1852183001Sbz
1853125680Sbms	/* Check if interface is not being detached from BPF */
1854125741Sbms	BPFIF_RLOCK(bp);
1855125741Sbms	if (bp->flags & BPFIF_FLAG_DYING) {
1856183001Sbz		BPFIF_RUNLOCK(bp);
1857125741Sbms		return (ENXIO);
1858125741Sbms	}
1859125741Sbms	BPFIF_RUNLOCK(bp);
1860125741Sbms
1861125741Sbms	/*
1862125680Sbms	 * Behavior here depends on the buffering model.  If we're using
1863125680Sbms	 * kernel memory buffers, then we can allocate them here.  If we're
1864125680Sbms	 * using zero-copy, then the user process must have registered
1865125680Sbms	 * buffers by the time we get here.  If not, return an error.
1866125680Sbms	 */
1867125680Sbms	switch (d->bd_bufmode) {
1868125680Sbms	case BPF_BUFMODE_BUFFER:
1869125680Sbms	case BPF_BUFMODE_ZBUF:
1870125680Sbms		if (d->bd_sbuf == NULL)
1871125680Sbms			return (EINVAL);
1872125680Sbms		break;
1873125680Sbms
1874125680Sbms	default:
1875183001Sbz		panic("bpf_setif: bufmode %d", d->bd_bufmode);
1876125741Sbms	}
1877125680Sbms	if (bp != d->bd_bif)
1878125680Sbms		bpf_attachd(d, bp);
1879221250Sbz	BPFD_LOCK(d);
1880125680Sbms	reset_d(d);
1881221250Sbz	BPFD_UNLOCK(d);
1882125680Sbms	return (0);
1883125680Sbms}
1884125680Sbms
1885221250Sbz/*
1886125680Sbms * Support for select() and poll() system calls
1887221250Sbz *
1888125680Sbms * Return true iff the specific operation will not block indefinitely.
1889125680Sbms * Otherwise, return false but make a note that a selwakeup() must be done.
1890183001Sbz */
1891183001Sbzstatic int
1892183001Sbzbpfpoll(struct cdev *dev, int events, struct thread *td)
1893183001Sbz{
1894183001Sbz	struct bpf_d *d;
1895183001Sbz	int revents;
1896183001Sbz
1897125680Sbms	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
1898125680Sbms		return (events &
1899125741Sbms		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
1900125741Sbms
1901125741Sbms	/*
1902125741Sbms	 * Refresh PID associated with this descriptor.
1903183001Sbz	 */
1904125680Sbms	revents = events & (POLLOUT | POLLWRNORM);
1905183001Sbz	BPFD_LOCK(d);
1906183001Sbz	BPF_PID_REFRESH(d, td);
1907183001Sbz	if (events & (POLLIN | POLLRDNORM)) {
1908183001Sbz		if (bpf_ready(d))
1909221250Sbz			revents |= events & (POLLIN | POLLRDNORM);
1910183001Sbz		else {
1911183001Sbz			selrecord(td, &d->bd_sel);
1912183001Sbz			/* Start the read timeout if necessary. */
1913183001Sbz			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1914183001Sbz				callout_reset(&d->bd_callout, d->bd_rtout,
1915183001Sbz				    bpf_timed_out, d);
1916221250Sbz				d->bd_state = BPF_WAITING;
1917183001Sbz			}
1918183001Sbz		}
1919183001Sbz	}
1920183001Sbz	BPFD_UNLOCK(d);
1921183001Sbz	return (revents);
1922183001Sbz}
1923183001Sbz
1924183001Sbz/*
1925183001Sbz * Support for kevent() system call.  Register EVFILT_READ filters and
1926183001Sbz * reject all others.
1927183001Sbz */
1928183001Sbzint
1929183001Sbzbpfkqfilter(struct cdev *dev, struct knote *kn)
1930183001Sbz{
1931125741Sbms	struct bpf_d *d;
1932125741Sbms
1933125680Sbms	if (devfs_get_cdevpriv((void **)&d) != 0 ||
1934125680Sbms	    kn->kn_filter != EVFILT_READ)
1935183001Sbz		return (1);
1936183001Sbz
1937183001Sbz	/*
1938183001Sbz	 * Refresh PID associated with this descriptor.
1939183001Sbz	 */
1940183001Sbz	BPFD_LOCK(d);
1941183001Sbz	BPF_PID_REFRESH_CUR(d);
1942125680Sbms	kn->kn_fop = &bpfread_filtops;
1943125680Sbms	kn->kn_hook = d;
1944125741Sbms	knlist_add(&d->bd_sel.si_note, kn, 1);
1945125680Sbms	BPFD_UNLOCK(d);
1946125680Sbms
1947183001Sbz	return (0);
1948125680Sbms}
1949125680Sbms
1950125680Sbmsstatic void
1951125680Sbmsfilt_bpfdetach(struct knote *kn)
1952125680Sbms{
1953125680Sbms	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1954125680Sbms
1955183001Sbz	knlist_remove(&d->bd_sel.si_note, kn, 0);
1956221250Sbz}
1957183001Sbz
1958183001Sbzstatic int
1959183001Sbzfilt_bpfread(struct knote *kn, long hint)
1960183001Sbz{
1961183001Sbz	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1962183001Sbz	int ready;
1963183001Sbz
1964183001Sbz	BPFD_LOCK_ASSERT(d);
1965183001Sbz	ready = bpf_ready(d);
1966125741Sbms	if (ready) {
1967183001Sbz		kn->kn_data = d->bd_slen;
1968183001Sbz		while (d->bd_hbuf_in_use)
1969183001Sbz			mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
1970221250Sbz			    PRINET, "bd_hbuf", 0);
1971183001Sbz		if (d->bd_hbuf)
1972125680Sbms			kn->kn_data += d->bd_hlen;
1973183001Sbz	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1974183001Sbz		callout_reset(&d->bd_callout, d->bd_rtout,
1975183001Sbz		    bpf_timed_out, d);
1976183001Sbz		d->bd_state = BPF_WAITING;
1977183001Sbz	}
1978183001Sbz
1979183001Sbz	return (ready);
1980183001Sbz}
1981183001Sbz
1982183001Sbz#define	BPF_TSTAMP_NONE		0
1983183001Sbz#define	BPF_TSTAMP_FAST		1
1984183001Sbz#define	BPF_TSTAMP_NORMAL	2
1985183001Sbz#define	BPF_TSTAMP_EXTERN	3
1986183001Sbz
1987183001Sbzstatic int
1988183001Sbzbpf_ts_quality(int tstype)
1989183001Sbz{
1990183001Sbz
1991183001Sbz	if (tstype == BPF_T_NONE)
1992183001Sbz		return (BPF_TSTAMP_NONE);
1993183001Sbz	if ((tstype & BPF_T_FAST) != 0)
1994183001Sbz		return (BPF_TSTAMP_FAST);
1995183001Sbz
1996183001Sbz	return (BPF_TSTAMP_NORMAL);
1997183001Sbz}
1998183001Sbz
1999183001Sbzstatic int
2000183001Sbzbpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
2001183001Sbz{
2002183001Sbz	struct m_tag *tag;
2003183001Sbz	int quality;
2004183001Sbz
2005183001Sbz	quality = bpf_ts_quality(tstype);
2006183001Sbz	if (quality == BPF_TSTAMP_NONE)
2007183001Sbz		return (quality);
2008125680Sbms
2009125680Sbms	if (m != NULL) {
2010125680Sbms		tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
2011125680Sbms		if (tag != NULL) {
2012125680Sbms			*bt = *(struct bintime *)(tag + 1);
2013125680Sbms			return (BPF_TSTAMP_EXTERN);
2014125680Sbms		}
2015125741Sbms	}
2016125680Sbms	if (quality == BPF_TSTAMP_NORMAL)
2017125680Sbms		binuptime(bt);
2018125680Sbms	else
2019125680Sbms		getbinuptime(bt);
2020125680Sbms
2021125783Sbms	return (quality);
2022125741Sbms}
2023125680Sbms
2024125680Sbms/*
2025125680Sbms * Incoming linkage from device drivers.  Process the packet pkt, of length
2026174022Sbz * pktlen, which is stored in a contiguous buffer.  The packet is parsed
2027125680Sbms * by each process' filter, and if accepted, stashed into the corresponding
2028125741Sbms * buffer.
2029125680Sbms */
2030125680Sbmsvoid
2031125680Sbmsbpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2032125680Sbms{
2033221023Sattilio	struct bintime bt;
2034221023Sattilio	struct bpf_d *d;
2035221023Sattilio#ifdef BPF_JITTER
2036221023Sattilio	bpf_jit_filter *bf;
2037221023Sattilio#endif
2038221023Sattilio	u_int slen;
2039221023Sattilio	int gottime;
2040221023Sattilio
2041221023Sattilio	gottime = BPF_TSTAMP_NONE;
2042221023Sattilio
2043221023Sattilio	BPFIF_RLOCK(bp);
2044221023Sattilio
2045221023Sattilio	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2046221023Sattilio		/*
2047221023Sattilio		 * We are not using any locks for d here because:
2048221023Sattilio		 * 1) any filter change is protected by interface
2049221023Sattilio		 * write lock
2050221023Sattilio		 * 2) destroying/detaching d is protected by interface
2051221023Sattilio		 * write lock, too
2052221023Sattilio		 */
2053221023Sattilio
2054221023Sattilio		/* XXX: Do not protect counter for the sake of performance. */
2055221023Sattilio		++d->bd_rcount;
2056221023Sattilio		/*
2057221023Sattilio		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
2058221023Sattilio		 * way for the caller to indiciate to us whether this packet
2059221023Sattilio		 * is inbound or outbound.  In the bpf_mtap() routines, we use
2060221023Sattilio		 * the interface pointers on the mbuf to figure it out.
2061221023Sattilio		 */
2062221023Sattilio#ifdef BPF_JITTER
2063221023Sattilio		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2064221023Sattilio		if (bf != NULL)
2065221023Sattilio			slen = (*(bf->func))(pkt, pktlen, pktlen);
2066221023Sattilio		else
2067221023Sattilio#endif
2068221023Sattilio		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
2069221023Sattilio		if (slen != 0) {
2070221023Sattilio			/*
2071221023Sattilio			 * Filter matches. Let's to acquire write lock.
2072221023Sattilio			 */
2073221023Sattilio			BPFD_LOCK(d);
2074221023Sattilio
2075221023Sattilio			d->bd_fcount++;
2076221023Sattilio			if (gottime < bpf_ts_quality(d->bd_tstamp))
2077221023Sattilio				gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
2078221023Sattilio#ifdef MAC
2079221023Sattilio			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2080221023Sattilio#endif
2081221023Sattilio				catchpacket(d, pkt, pktlen, slen,
2082221023Sattilio				    bpf_append_bytes, &bt);
2083221023Sattilio			BPFD_UNLOCK(d);
2084221023Sattilio		}
2085221023Sattilio	}
2086221023Sattilio	BPFIF_RUNLOCK(bp);
2087221023Sattilio}
2088221023Sattilio
2089221023Sattilio#define	BPF_CHECK_DIRECTION(d, r, i)				\
2090221023Sattilio	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
2091221023Sattilio	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
2092221023Sattilio
2093125680Sbms/*
2094141886Smaxim * Incoming linkage from device drivers, when packet is in an mbuf chain.
2095141886Smaxim * Locking model is explained in bpf_tap().
2096141886Smaxim */
2097141886Smaximvoid
2098141886Smaximbpf_mtap(struct bpf_if *bp, struct mbuf *m)
2099141886Smaxim{
2100141886Smaxim	struct bintime bt;
2101141886Smaxim	struct bpf_d *d;
2102150804Smaxim#ifdef BPF_JITTER
2103141886Smaxim	bpf_jit_filter *bf;
2104141886Smaxim#endif
2105141886Smaxim	u_int pktlen, slen;
2106141886Smaxim	int gottime;
2107141886Smaxim
2108141886Smaxim	/* Skip outgoing duplicate packets. */
2109141886Smaxim	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2110141886Smaxim		m->m_flags &= ~M_PROMISC;
2111141886Smaxim		return;
2112141886Smaxim	}
2113141886Smaxim
2114141886Smaxim	pktlen = m_length(m, NULL);
2115141886Smaxim	gottime = BPF_TSTAMP_NONE;
2116141886Smaxim
2117141886Smaxim	BPFIF_RLOCK(bp);
2118141886Smaxim
2119141886Smaxim	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2120141886Smaxim		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2121141886Smaxim			continue;
2122141886Smaxim		++d->bd_rcount;
2123141886Smaxim#ifdef BPF_JITTER
2124141886Smaxim		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
2125141886Smaxim		/* XXX We cannot handle multiple mbufs. */
2126141886Smaxim		if (bf != NULL && m->m_next == NULL)
2127141886Smaxim			slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
2128141886Smaxim		else
2129141886Smaxim#endif
2130141886Smaxim		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
2131141886Smaxim		if (slen != 0) {
2132141886Smaxim			BPFD_LOCK(d);
2133141886Smaxim
2134141886Smaxim			d->bd_fcount++;
2135141886Smaxim			if (gottime < bpf_ts_quality(d->bd_tstamp))
2136141886Smaxim				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2137141886Smaxim#ifdef MAC
2138141886Smaxim			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2139141886Smaxim#endif
2140141886Smaxim				catchpacket(d, (u_char *)m, pktlen, slen,
2141141886Smaxim				    bpf_append_mbuf, &bt);
2142141886Smaxim			BPFD_UNLOCK(d);
2143181803Sbz		}
2144141886Smaxim	}
2145148385Sume	BPFIF_RUNLOCK(bp);
2146181803Sbz}
2147148616Sume
2148148385Sume/*
2149141886Smaxim * Incoming linkage from device drivers, when packet is in
2150141886Smaxim * an mbuf chain and to be prepended by a contiguous header.
2151221250Sbz */
2152141886Smaximvoid
2153141886Smaximbpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
2154141886Smaxim{
2155141886Smaxim	struct bintime bt;
2156141886Smaxim	struct mbuf mb;
2157141886Smaxim	struct bpf_d *d;
2158141886Smaxim	u_int pktlen, slen;
2159221250Sbz	int gottime;
2160141886Smaxim
2161141886Smaxim	/* Skip outgoing duplicate packets. */
2162141886Smaxim	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
2163181803Sbz		m->m_flags &= ~M_PROMISC;
2164141886Smaxim		return;
2165141886Smaxim	}
2166141886Smaxim
2167222488Srwatson	pktlen = m_length(m, NULL);
2168222488Srwatson	/*
2169222488Srwatson	 * Craft on-stack mbuf suitable for passing to bpf_filter.
2170141886Smaxim	 * Note that we cut corners here; we only setup what's
2171141886Smaxim	 * absolutely needed--this mbuf should never go anywhere else.
2172221250Sbz	 */
2173141886Smaxim	mb.m_next = m;
2174222488Srwatson	mb.m_data = data;
2175222488Srwatson	mb.m_len = dlen;
2176141886Smaxim	pktlen += dlen;
2177221250Sbz
2178141886Smaxim	gottime = BPF_TSTAMP_NONE;
2179141886Smaxim
2180189848Srwatson	BPFIF_RLOCK(bp);
2181157474Srwatson
2182157474Srwatson	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
2183157474Srwatson		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
2184157474Srwatson			continue;
2185157474Srwatson		++d->bd_rcount;
2186157474Srwatson		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
2187157427Srwatson		if (slen != 0) {
2188157474Srwatson			BPFD_LOCK(d);
2189157474Srwatson
2190173884Srwatson			d->bd_fcount++;
2191178285Srwatson			if (gottime < bpf_ts_quality(d->bd_tstamp))
2192189848Srwatson				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
2193157427Srwatson#ifdef MAC
2194157427Srwatson			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
2195173884Srwatson#endif
2196173884Srwatson				catchpacket(d, (u_char *)&mb, pktlen, slen,
2197178285Srwatson				    bpf_append_mbuf, &bt);
2198173884Srwatson			BPFD_UNLOCK(d);
2199178285Srwatson		}
2200141886Smaxim	}
2201141886Smaxim	BPFIF_RUNLOCK(bp);
2202181803Sbz}
2203141886Smaxim
2204141886Smaxim#undef	BPF_CHECK_DIRECTION
2205141886Smaxim
2206250744Szec#undef	BPF_TSTAMP_NONE
2207141886Smaxim#undef	BPF_TSTAMP_FAST
2208141886Smaxim#undef	BPF_TSTAMP_NORMAL
2209169683Sandre#undef	BPF_TSTAMP_EXTERN
2210169683Sandre
2211169683Sandrestatic int
2212169683Sandrebpf_hdrlen(struct bpf_d *d)
2213169683Sandre{
2214169683Sandre	int hdrlen;
2215169683Sandre
2216169683Sandre	hdrlen = d->bd_bif->bif_hdrlen;
2217169683Sandre#ifndef BURN_BRIDGES
2218169683Sandre	if (d->bd_tstamp == BPF_T_NONE ||
2219169683Sandre	    BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
2220169683Sandre#ifdef COMPAT_FREEBSD32
2221169683Sandre		if (d->bd_compat32)
2222211462Sandre			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
2223211462Sandre		else
2224211462Sandre#endif
2225211462Sandre			hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
2226211462Sandre	else
2227211462Sandre#endif
2228211462Sandre		hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
2229211462Sandre#ifdef COMPAT_FREEBSD32
2230211462Sandre	if (d->bd_compat32)
2231211462Sandre		hdrlen = BPF_WORDALIGN32(hdrlen);
2232211462Sandre	else
2233211462Sandre#endif
2234169683Sandre		hdrlen = BPF_WORDALIGN(hdrlen);
2235171229Speter
2236169683Sandre	return (hdrlen - d->bd_bif->bif_hdrlen);
2237211462Sandre}
2238211462Sandre
2239211462Sandrestatic void
2240211462Sandrebpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
2241211462Sandre{
2242211462Sandre	struct bintime bt2;
2243211462Sandre	struct timeval tsm;
2244211462Sandre	struct timespec tsn;
2245211462Sandre
2246211462Sandre	if ((tstype & BPF_T_MONOTONIC) == 0) {
2247211462Sandre		bt2 = *bt;
2248211462Sandre		bintime_add(&bt2, &boottimebin);
2249169683Sandre		bt = &bt2;
2250169683Sandre	}
2251169683Sandre	switch (BPF_T_FORMAT(tstype)) {
2252169683Sandre	case BPF_T_MICROTIME:
2253171230Speter		bintime2timeval(bt, &tsm);
2254169683Sandre		ts->bt_sec = tsm.tv_sec;
2255171230Speter		ts->bt_frac = tsm.tv_usec;
2256169683Sandre		break;
2257169683Sandre	case BPF_T_NANOTIME:
2258169683Sandre		bintime2timespec(bt, &tsn);
2259169683Sandre		ts->bt_sec = tsn.tv_sec;
2260169913Sandre		ts->bt_frac = tsn.tv_nsec;
2261169913Sandre		break;
2262169683Sandre	case BPF_T_BINTIME:
2263169913Sandre		ts->bt_sec = bt->sec;
2264169913Sandre		ts->bt_frac = bt->frac;
2265169683Sandre		break;
2266169913Sandre	}
2267169683Sandre}
2268169913Sandre
2269169683Sandre/*
2270169683Sandre * Move the packet data from interface memory (pkt) into the
2271170019Srwatson * store buffer.  "cpfn" is the routine called to do the actual data
2272169683Sandre * transfer.  bcopy is passed in to copy contiguous chunks, while
2273169683Sandre * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
2274169683Sandre * pkt is really an mbuf.
2275169683Sandre */
2276169683Sandrestatic void
2277169683Sandrecatchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
2278186222Sbz    void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
2279169683Sandre    struct bintime *bt)
2280169683Sandre{
2281169683Sandre	struct bpf_xhdr hdr;
2282169683Sandre#ifndef BURN_BRIDGES
2283169683Sandre	struct bpf_hdr hdr_old;
2284169683Sandre#ifdef COMPAT_FREEBSD32
2285169683Sandre	struct bpf_hdr32 hdr32_old;
2286169683Sandre#endif
2287169683Sandre#endif
2288169683Sandre	int caplen, curlen, hdrlen, totlen;
2289169683Sandre	int do_wakeup = 0;
2290169683Sandre	int do_timestamp;
2291169683Sandre	int tstype;
2292169683Sandre
2293169683Sandre	BPFD_LOCK_ASSERT(d);
2294169683Sandre
2295169683Sandre	/*
2296169683Sandre	 * Detect whether user space has released a buffer back to us, and if
2297169683Sandre	 * so, move it from being a hold buffer to a free buffer.  This may
2298169683Sandre	 * not be the best place to do it (for example, we might only want to
2299169683Sandre	 * run this check if we need the space), but for now it's a reliable
2300169683Sandre	 * spot to do it.
2301169683Sandre	 */
2302169683Sandre	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
2303169683Sandre		while (d->bd_hbuf_in_use)
2304221250Sbz			mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
2305169683Sandre			    PRINET, "bd_hbuf", 0);
2306169683Sandre		d->bd_fbuf = d->bd_hbuf;
2307169683Sandre		d->bd_hbuf = NULL;
2308169683Sandre		d->bd_hlen = 0;
2309169683Sandre		bpf_buf_reclaimed(d);
2310169683Sandre	}
2311169683Sandre
2312169683Sandre	/*
2313221250Sbz	 * Figure out how many bytes to move.  If the packet is
2314169683Sandre	 * greater or equal to the snapshot length, transfer that
2315169683Sandre	 * much.  Otherwise, transfer the whole packet (unless
2316169683Sandre	 * we hit the buffer size limit).
2317169683Sandre	 */
2318169683Sandre	hdrlen = bpf_hdrlen(d);
2319169683Sandre	totlen = hdrlen + min(snaplen, pktlen);
2320169683Sandre	if (totlen > d->bd_bufsize)
2321169913Sandre		totlen = d->bd_bufsize;
2322169683Sandre
2323169683Sandre	/*
2324169683Sandre	 * Round up the end of the previous packet to the next longword.
2325	 *
2326	 * Drop the packet if there's no room and no hope of room
2327	 * If the packet would overflow the storage buffer or the storage
2328	 * buffer is considered immutable by the buffer model, try to rotate
2329	 * the buffer and wakeup pending processes.
2330	 */
2331#ifdef COMPAT_FREEBSD32
2332	if (d->bd_compat32)
2333		curlen = BPF_WORDALIGN32(d->bd_slen);
2334	else
2335#endif
2336		curlen = BPF_WORDALIGN(d->bd_slen);
2337	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
2338		if (d->bd_fbuf == NULL) {
2339			/*
2340			 * There's no room in the store buffer, and no
2341			 * prospect of room, so drop the packet.  Notify the
2342			 * buffer model.
2343			 */
2344			bpf_buffull(d);
2345			++d->bd_dcount;
2346			return;
2347		}
2348		while (d->bd_hbuf_in_use)
2349			mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
2350			    PRINET, "bd_hbuf", 0);
2351		ROTATE_BUFFERS(d);
2352		do_wakeup = 1;
2353		curlen = 0;
2354	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
2355		/*
2356		 * Immediate mode is set, or the read timeout has already
2357		 * expired during a select call.  A packet arrived, so the
2358		 * reader should be woken up.
2359		 */
2360		do_wakeup = 1;
2361	caplen = totlen - hdrlen;
2362	tstype = d->bd_tstamp;
2363	do_timestamp = tstype != BPF_T_NONE;
2364#ifndef BURN_BRIDGES
2365	if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
2366		struct bpf_ts ts;
2367		if (do_timestamp)
2368			bpf_bintime2ts(bt, &ts, tstype);
2369#ifdef COMPAT_FREEBSD32
2370		if (d->bd_compat32) {
2371			bzero(&hdr32_old, sizeof(hdr32_old));
2372			if (do_timestamp) {
2373				hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
2374				hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
2375			}
2376			hdr32_old.bh_datalen = pktlen;
2377			hdr32_old.bh_hdrlen = hdrlen;
2378			hdr32_old.bh_caplen = caplen;
2379			bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
2380			    sizeof(hdr32_old));
2381			goto copy;
2382		}
2383#endif
2384		bzero(&hdr_old, sizeof(hdr_old));
2385		if (do_timestamp) {
2386			hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
2387			hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
2388		}
2389		hdr_old.bh_datalen = pktlen;
2390		hdr_old.bh_hdrlen = hdrlen;
2391		hdr_old.bh_caplen = caplen;
2392		bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
2393		    sizeof(hdr_old));
2394		goto copy;
2395	}
2396#endif
2397
2398	/*
2399	 * Append the bpf header.  Note we append the actual header size, but
2400	 * move forward the length of the header plus padding.
2401	 */
2402	bzero(&hdr, sizeof(hdr));
2403	if (do_timestamp)
2404		bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
2405	hdr.bh_datalen = pktlen;
2406	hdr.bh_hdrlen = hdrlen;
2407	hdr.bh_caplen = caplen;
2408	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
2409
2410	/*
2411	 * Copy the packet data into the store buffer and update its length.
2412	 */
2413#ifndef BURN_BRIDGES
2414copy:
2415#endif
2416	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
2417	d->bd_slen = curlen + totlen;
2418
2419	if (do_wakeup)
2420		bpf_wakeup(d);
2421}
2422
2423/*
2424 * Free buffers currently in use by a descriptor.
2425 * Called on close.
2426 */
2427static void
2428bpf_freed(struct bpf_d *d)
2429{
2430
2431	/*
2432	 * We don't need to lock out interrupts since this descriptor has
2433	 * been detached from its interface and it yet hasn't been marked
2434	 * free.
2435	 */
2436	bpf_free(d);
2437	if (d->bd_rfilter != NULL) {
2438		free((caddr_t)d->bd_rfilter, M_BPF);
2439#ifdef BPF_JITTER
2440		if (d->bd_bfilter != NULL)
2441			bpf_destroy_jit_filter(d->bd_bfilter);
2442#endif
2443	}
2444	if (d->bd_wfilter != NULL)
2445		free((caddr_t)d->bd_wfilter, M_BPF);
2446	mtx_destroy(&d->bd_lock);
2447}
2448
2449/*
2450 * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
2451 * fixed size of the link header (variable length headers not yet supported).
2452 */
2453void
2454bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2455{
2456
2457	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2458}
2459
2460/*
2461 * Attach an interface to bpf.  ifp is a pointer to the structure
2462 * defining the interface to be attached, dlt is the link layer type,
2463 * and hdrlen is the fixed size of the link header (variable length
2464 * headers are not yet supporrted).
2465 */
2466void
2467bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2468{
2469	struct bpf_if *bp;
2470
2471	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
2472	if (bp == NULL)
2473		panic("bpfattach");
2474
2475	LIST_INIT(&bp->bif_dlist);
2476	LIST_INIT(&bp->bif_wlist);
2477	bp->bif_ifp = ifp;
2478	bp->bif_dlt = dlt;
2479	rw_init(&bp->bif_lock, "bpf interface lock");
2480	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
2481	*driverp = bp;
2482
2483	BPF_LOCK();
2484	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
2485	BPF_UNLOCK();
2486
2487	bp->bif_hdrlen = hdrlen;
2488
2489	if (bootverbose)
2490		if_printf(ifp, "bpf attached\n");
2491}
2492
2493/*
2494 * Detach bpf from an interface. This involves detaching each descriptor
2495 * associated with the interface. Notify each descriptor as it's detached
2496 * so that any sleepers wake up and get ENXIO.
2497 */
2498void
2499bpfdetach(struct ifnet *ifp)
2500{
2501	struct bpf_if	*bp, *bp_temp;
2502	struct bpf_d	*d;
2503	int ndetached;
2504
2505	ndetached = 0;
2506
2507	BPF_LOCK();
2508	/* Find all bpf_if struct's which reference ifp and detach them. */
2509	LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
2510		if (ifp != bp->bif_ifp)
2511			continue;
2512
2513		LIST_REMOVE(bp, bif_next);
2514		/* Add to to-be-freed list */
2515		LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
2516
2517		ndetached++;
2518		/*
2519		 * Delay freeing bp till interface is detached
2520		 * and all routes through this interface are removed.
2521		 * Mark bp as detached to restrict new consumers.
2522		 */
2523		BPFIF_WLOCK(bp);
2524		bp->flags |= BPFIF_FLAG_DYING;
2525		BPFIF_WUNLOCK(bp);
2526
2527		CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
2528		    __func__, bp->bif_dlt, bp, ifp);
2529
2530		/* Free common descriptors */
2531		while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
2532			bpf_detachd_locked(d);
2533			BPFD_LOCK(d);
2534			bpf_wakeup(d);
2535			BPFD_UNLOCK(d);
2536		}
2537
2538		/* Free writer-only descriptors */
2539		while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
2540			bpf_detachd_locked(d);
2541			BPFD_LOCK(d);
2542			bpf_wakeup(d);
2543			BPFD_UNLOCK(d);
2544		}
2545	}
2546	BPF_UNLOCK();
2547
2548#ifdef INVARIANTS
2549	if (ndetached == 0)
2550		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
2551#endif
2552}
2553
2554/*
2555 * Interface departure handler.
2556 * Note departure event does not guarantee interface is going down.
2557 * Interface renaming is currently done via departure/arrival event set.
2558 *
2559 * Departure handled is called after all routes pointing to
2560 * given interface are removed and interface is in down state
2561 * restricting any packets to be sent/received. We assume it is now safe
2562 * to free data allocated by BPF.
2563 */
2564static void
2565bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
2566{
2567	struct bpf_if *bp, *bp_temp;
2568	int nmatched = 0;
2569
2570	BPF_LOCK();
2571	/*
2572	 * Find matching entries in free list.
2573	 * Nothing should be found if bpfdetach() was not called.
2574	 */
2575	LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
2576		if (ifp != bp->bif_ifp)
2577			continue;
2578
2579		CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
2580		    __func__, bp, ifp);
2581
2582		LIST_REMOVE(bp, bif_next);
2583
2584		rw_destroy(&bp->bif_lock);
2585		free(bp, M_BPF);
2586
2587		nmatched++;
2588	}
2589	BPF_UNLOCK();
2590
2591	/*
2592	 * Note that we cannot zero other pointers to
2593	 * custom DLTs possibly used by given interface.
2594	 */
2595	if (nmatched != 0)
2596		ifp->if_bpf = NULL;
2597}
2598
2599/*
2600 * Get a list of available data link type of the interface.
2601 */
2602static int
2603bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
2604{
2605	int n, error;
2606	struct ifnet *ifp;
2607	struct bpf_if *bp;
2608
2609	BPF_LOCK_ASSERT();
2610
2611	ifp = d->bd_bif->bif_ifp;
2612	n = 0;
2613	error = 0;
2614	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2615		if (bp->bif_ifp != ifp)
2616			continue;
2617		if (bfl->bfl_list != NULL) {
2618			if (n >= bfl->bfl_len)
2619				return (ENOMEM);
2620			error = copyout(&bp->bif_dlt,
2621			    bfl->bfl_list + n, sizeof(u_int));
2622		}
2623		n++;
2624	}
2625	bfl->bfl_len = n;
2626	return (error);
2627}
2628
2629/*
2630 * Set the data link type of a BPF instance.
2631 */
2632static int
2633bpf_setdlt(struct bpf_d *d, u_int dlt)
2634{
2635	int error, opromisc;
2636	struct ifnet *ifp;
2637	struct bpf_if *bp;
2638
2639	BPF_LOCK_ASSERT();
2640
2641	if (d->bd_bif->bif_dlt == dlt)
2642		return (0);
2643	ifp = d->bd_bif->bif_ifp;
2644
2645	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2646		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2647			break;
2648	}
2649
2650	if (bp != NULL) {
2651		opromisc = d->bd_promisc;
2652		bpf_attachd(d, bp);
2653		BPFD_LOCK(d);
2654		reset_d(d);
2655		BPFD_UNLOCK(d);
2656		if (opromisc) {
2657			error = ifpromisc(bp->bif_ifp, 1);
2658			if (error)
2659				if_printf(bp->bif_ifp,
2660					"bpf_setdlt: ifpromisc failed (%d)\n",
2661					error);
2662			else
2663				d->bd_promisc = 1;
2664		}
2665	}
2666	return (bp == NULL ? EINVAL : 0);
2667}
2668
2669static void
2670bpf_drvinit(void *unused)
2671{
2672	struct cdev *dev;
2673
2674	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
2675	LIST_INIT(&bpf_iflist);
2676	LIST_INIT(&bpf_freelist);
2677
2678	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2679	/* For compatibility */
2680	make_dev_alias(dev, "bpf0");
2681
2682	/* Register interface departure handler */
2683	bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
2684		    ifnet_departure_event, bpf_ifdetach, NULL,
2685		    EVENTHANDLER_PRI_ANY);
2686}
2687
2688/*
2689 * Zero out the various packet counters associated with all of the bpf
2690 * descriptors.  At some point, we will probably want to get a bit more
2691 * granular and allow the user to specify descriptors to be zeroed.
2692 */
2693static void
2694bpf_zero_counters(void)
2695{
2696	struct bpf_if *bp;
2697	struct bpf_d *bd;
2698
2699	BPF_LOCK();
2700	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2701		BPFIF_RLOCK(bp);
2702		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2703			BPFD_LOCK(bd);
2704			bd->bd_rcount = 0;
2705			bd->bd_dcount = 0;
2706			bd->bd_fcount = 0;
2707			bd->bd_wcount = 0;
2708			bd->bd_wfcount = 0;
2709			bd->bd_zcopy = 0;
2710			BPFD_UNLOCK(bd);
2711		}
2712		BPFIF_RUNLOCK(bp);
2713	}
2714	BPF_UNLOCK();
2715}
2716
2717/*
2718 * Fill filter statistics
2719 */
2720static void
2721bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2722{
2723
2724	bzero(d, sizeof(*d));
2725	BPFD_LOCK_ASSERT(bd);
2726	d->bd_structsize = sizeof(*d);
2727	/* XXX: reading should be protected by global lock */
2728	d->bd_immediate = bd->bd_immediate;
2729	d->bd_promisc = bd->bd_promisc;
2730	d->bd_hdrcmplt = bd->bd_hdrcmplt;
2731	d->bd_direction = bd->bd_direction;
2732	d->bd_feedback = bd->bd_feedback;
2733	d->bd_async = bd->bd_async;
2734	d->bd_rcount = bd->bd_rcount;
2735	d->bd_dcount = bd->bd_dcount;
2736	d->bd_fcount = bd->bd_fcount;
2737	d->bd_sig = bd->bd_sig;
2738	d->bd_slen = bd->bd_slen;
2739	d->bd_hlen = bd->bd_hlen;
2740	d->bd_bufsize = bd->bd_bufsize;
2741	d->bd_pid = bd->bd_pid;
2742	strlcpy(d->bd_ifname,
2743	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
2744	d->bd_locked = bd->bd_locked;
2745	d->bd_wcount = bd->bd_wcount;
2746	d->bd_wdcount = bd->bd_wdcount;
2747	d->bd_wfcount = bd->bd_wfcount;
2748	d->bd_zcopy = bd->bd_zcopy;
2749	d->bd_bufmode = bd->bd_bufmode;
2750}
2751
2752/*
2753 * Handle `netstat -B' stats request
2754 */
2755static int
2756bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2757{
2758	struct xbpf_d *xbdbuf, *xbd, zerostats;
2759	int index, error;
2760	struct bpf_if *bp;
2761	struct bpf_d *bd;
2762
2763	/*
2764	 * XXX This is not technically correct. It is possible for non
2765	 * privileged users to open bpf devices. It would make sense
2766	 * if the users who opened the devices were able to retrieve
2767	 * the statistics for them, too.
2768	 */
2769	error = priv_check(req->td, PRIV_NET_BPF);
2770	if (error)
2771		return (error);
2772	/*
2773	 * Check to see if the user is requesting that the counters be
2774	 * zeroed out.  Explicitly check that the supplied data is zeroed,
2775	 * as we aren't allowing the user to set the counters currently.
2776	 */
2777	if (req->newptr != NULL) {
2778		if (req->newlen != sizeof(zerostats))
2779			return (EINVAL);
2780		bzero(&zerostats, sizeof(zerostats));
2781		xbd = req->newptr;
2782		if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
2783			return (EINVAL);
2784		bpf_zero_counters();
2785		return (0);
2786	}
2787	if (req->oldptr == NULL)
2788		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2789	if (bpf_bpfd_cnt == 0)
2790		return (SYSCTL_OUT(req, 0, 0));
2791	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2792	BPF_LOCK();
2793	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2794		BPF_UNLOCK();
2795		free(xbdbuf, M_BPF);
2796		return (ENOMEM);
2797	}
2798	index = 0;
2799	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2800		BPFIF_RLOCK(bp);
2801		/* Send writers-only first */
2802		LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
2803			xbd = &xbdbuf[index++];
2804			BPFD_LOCK(bd);
2805			bpfstats_fill_xbpf(xbd, bd);
2806			BPFD_UNLOCK(bd);
2807		}
2808		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2809			xbd = &xbdbuf[index++];
2810			BPFD_LOCK(bd);
2811			bpfstats_fill_xbpf(xbd, bd);
2812			BPFD_UNLOCK(bd);
2813		}
2814		BPFIF_RUNLOCK(bp);
2815	}
2816	BPF_UNLOCK();
2817	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2818	free(xbdbuf, M_BPF);
2819	return (error);
2820}
2821
2822SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2823
2824#else /* !DEV_BPF && !NETGRAPH_BPF */
2825/*
2826 * NOP stubs to allow bpf-using drivers to load and function.
2827 *
2828 * A 'better' implementation would allow the core bpf functionality
2829 * to be loaded at runtime.
2830 */
2831static struct bpf_if bp_null;
2832
2833void
2834bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2835{
2836}
2837
2838void
2839bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2840{
2841}
2842
2843void
2844bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2845{
2846}
2847
2848void
2849bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2850{
2851
2852	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2853}
2854
2855void
2856bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2857{
2858
2859	*driverp = &bp_null;
2860}
2861
2862void
2863bpfdetach(struct ifnet *ifp)
2864{
2865}
2866
2867u_int
2868bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
2869{
2870	return -1;	/* "no filter" behaviour */
2871}
2872
2873int
2874bpf_validate(const struct bpf_insn *f, int len)
2875{
2876	return 0;		/* false */
2877}
2878
2879#endif /* !DEV_BPF && !NETGRAPH_BPF */
2880