if_tap.c revision 156783
162216Sdes/*-
2330449Seadler * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3330449Seadler * All rights reserved.
4261233Sdes *
5253680Sdes * Redistribution and use in source and binary forms, with or without
662216Sdes * modification, are permitted provided that the following conditions
762216Sdes * are met:
862216Sdes * 1. Redistributions of source code must retain the above copyright
962216Sdes *    notice, this list of conditions and the following disclaimer.
1062216Sdes * 2. Redistributions in binary form must reproduce the above copyright
1162216Sdes *    notice, this list of conditions and the following disclaimer in the
1262216Sdes *    documentation and/or other materials provided with the distribution.
1362216Sdes *
1462216Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1562216Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1662216Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1762216Sdes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1862216Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1962216Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2062216Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2162216Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2262216Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2362216Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2462216Sdes * SUCH DAMAGE.
2562216Sdes *
2662216Sdes * BASED ON:
2762216Sdes * -------------------------------------------------------------------------
2862216Sdes *
2962216Sdes * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
3062216Sdes * Nottingham University 1987.
3162216Sdes */
3293213Scharnier
3393213Scharnier/*
3493213Scharnier * $FreeBSD: head/sys/net/if_tap.c 156783 2006-03-16 18:22:01Z emax $
3562216Sdes * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
3691225Sbde */
3762216Sdes
3893257Sbde#include "opt_inet.h"
3962216Sdes
40200462Sdelphij#include <sys/param.h>
4162216Sdes#include <sys/conf.h>
4262216Sdes#include <sys/fcntl.h>
43253680Sdes#include <sys/filio.h>
4463235Sdes#include <sys/kernel.h>
45125976Sdes#include <sys/malloc.h>
4662216Sdes#include <sys/mbuf.h>
4762216Sdes#include <sys/module.h>
4862216Sdes#include <sys/poll.h>
4977241Sdes#include <sys/proc.h>
5062216Sdes#include <sys/selinfo.h>
5162216Sdes#include <sys/signalvar.h>
5262216Sdes#include <sys/socket.h>
5362216Sdes#include <sys/sockio.h>
54261234Sdes#include <sys/sysctl.h>
55187361Sdes#include <sys/systm.h>
5662216Sdes#include <sys/ttycom.h>
5762216Sdes#include <sys/uio.h>
58241737Sed#include <sys/queue.h>
59241737Sed
60241737Sed#include <net/bpf.h>
61241737Sed#include <net/ethernet.h>
62241737Sed#include <net/if.h>
63241737Sed#include <net/if_dl.h>
64241737Sed#include <net/route.h>
65241737Sed#include <net/if_types.h>
66241737Sed
67241737Sed#include <netinet/in.h>
68241737Sed
69241737Sed#include <net/if_tapvar.h>
70241737Sed#include <net/if_tap.h>
71241737Sed
72241737Sed
73241737Sed#define CDEV_NAME	"tap"
74241737Sed#define TAPDEBUG	if (tapdebug) printf
75241737Sed
76241737Sed#define TAP		"tap"
77241737Sed#define VMNET		"vmnet"
78241737Sed#define TAPMAXUNIT	0x7fff
79241737Sed#define VMNET_DEV_MASK	CLONE_FLAG0
80241737Sed
81241737Sed/* module */
82241737Sedstatic int		tapmodevent(module_t, int, void *);
83241737Sed
84241737Sed/* device */
85241737Sedstatic void		tapclone(void *, struct ucred *, char *, int,
86241737Sed			    struct cdev **);
87241737Sedstatic void		tapcreate(struct cdev *);
88339250Sdes
89241737Sed/* network interface */
90241737Sedstatic void		tapifstart(struct ifnet *);
91241737Sedstatic int		tapifioctl(struct ifnet *, u_long, caddr_t);
9262216Sdesstatic void		tapifinit(void *);
93241737Sed
94241737Sed/* character device */
95241737Sedstatic d_open_t		tapopen;
9662216Sdesstatic d_close_t	tapclose;
97241737Sedstatic d_read_t		tapread;
98241737Sedstatic d_write_t	tapwrite;
99241737Sedstatic d_ioctl_t	tapioctl;
10062216Sdesstatic d_poll_t		tappoll;
101253680Sdesstatic d_kqfilter_t	tapkqfilter;
102253680Sdes
103253680Sdes/* kqueue(2) */
104253680Sdesstatic int		tapkqread(struct knote *, long);
105253680Sdesstatic int		tapkqwrite(struct knote *, long);
106253680Sdesstatic void		tapkqdetach(struct knote *);
107253680Sdes
108253680Sdesstatic struct filterops	tap_read_filterops = {
109253680Sdes	.f_isfd =	1,
110253680Sdes	.f_attach =	NULL,
111253680Sdes	.f_detach =	tapkqdetach,
112253680Sdes	.f_event =	tapkqread,
113253680Sdes};
114261233Sdes
115253680Sdesstatic struct filterops	tap_write_filterops = {
116253680Sdes	.f_isfd =	1,
117253680Sdes	.f_attach =	NULL,
11862216Sdes	.f_detach =	tapkqdetach,
119253680Sdes	.f_event =	tapkqwrite,
120253680Sdes};
121253680Sdes
122253680Sdesstatic struct cdevsw	tap_cdevsw = {
123253680Sdes	.d_version =	D_VERSION,
124253680Sdes	.d_flags =	D_PSEUDO | D_NEEDGIANT,
125253680Sdes	.d_open =	tapopen,
126253680Sdes	.d_close =	tapclose,
127253680Sdes	.d_read =	tapread,
128253680Sdes	.d_write =	tapwrite,
129253680Sdes	.d_ioctl =	tapioctl,
130253680Sdes	.d_poll =	tappoll,
131253680Sdes	.d_name =	CDEV_NAME,
132253680Sdes	.d_kqfilter =	tapkqfilter,
133253680Sdes};
134253680Sdes
135253680Sdes/*
136253680Sdes * All global variables in if_tap.c are locked with tapmtx, with the
137253680Sdes * exception of tapdebug, which is accessed unlocked; tapclones is
138253680Sdes * static at runtime.
139253680Sdes */
140253680Sdesstatic struct mtx		tapmtx;
141253680Sdesstatic int			tapdebug = 0;        /* debug flag   */
142253680Sdesstatic int			tapuopen = 0;        /* allow user open() */
143253680Sdesstatic SLIST_HEAD(, tap_softc)	taphead;             /* first device */
144253680Sdesstatic struct clonedevs 	*tapclones;
145253680Sdes
146253680SdesMALLOC_DECLARE(M_TAP);
147253680SdesMALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
148253680SdesSYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
149253680Sdes
150253680SdesSYSCTL_DECL(_net_link);
151253680SdesSYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
152261233Sdes    "Ethernet tunnel software network interface");
153253680SdesSYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
154253680Sdes	"Allow user to open /dev/tap (based on node permissions)");
155253680SdesSYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
156253680Sdes
157253680SdesDEV_MODULE(if_tap, tapmodevent, NULL);
158253680Sdes
159253680Sdes/*
160253680Sdes * tapmodevent
161253680Sdes *
162253680Sdes * module event handler
163253680Sdes */
164253680Sdesstatic int
165253680Sdestapmodevent(module_t mod, int type, void *data)
166253680Sdes{
167253680Sdes	static eventhandler_tag	 eh_tag = NULL;
168253680Sdes	struct tap_softc	*tp = NULL;
169253680Sdes	struct ifnet		*ifp = NULL;
170253680Sdes	int			 s;
171253680Sdes
17281863Sdes	switch (type) {
17381863Sdes	case MOD_LOAD:
17481863Sdes
17579837Sdes		/* intitialize device */
17662216Sdes
17762216Sdes		mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
17879837Sdes		SLIST_INIT(&taphead);
17979837Sdes
18079837Sdes		clone_setup(&tapclones);
18179837Sdes		eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
18279837Sdes		if (eh_tag == NULL) {
18379837Sdes			clone_cleanup(&tapclones);
18479837Sdes			mtx_destroy(&tapmtx);
18579837Sdes			return (ENOMEM);
18679837Sdes		}
18779837Sdes		return (0);
18879837Sdes
18962216Sdes	case MOD_UNLOAD:
19062216Sdes		/*
19162216Sdes		 * The EBUSY algorithm here can't quite atomically
192125965Sdes		 * guarantee that this is race-free since we have to
193243147Sandre		 * release the tap mtx to deregister the clone handler.
194243147Sandre		 */
195243147Sandre		mtx_lock(&tapmtx);
196243147Sandre		SLIST_FOREACH(tp, &taphead, tap_next) {
197243147Sandre			mtx_lock(&tp->tap_mtx);
198243147Sandre			if (tp->tap_flags & TAP_OPEN) {
199243147Sandre				mtx_unlock(&tp->tap_mtx);
20062216Sdes				mtx_unlock(&tapmtx);
20162216Sdes				return (EBUSY);
20281863Sdes			}
203339250Sdes			mtx_unlock(&tp->tap_mtx);
204339250Sdes		}
205339250Sdes		mtx_unlock(&tapmtx);
206339250Sdes
207339250Sdes		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
208339250Sdes
209339250Sdes		mtx_lock(&tapmtx);
210339250Sdes		while ((tp = SLIST_FIRST(&taphead)) != NULL) {
211339250Sdes			SLIST_REMOVE_HEAD(&taphead, tap_next);
212339250Sdes			mtx_unlock(&tapmtx);
213339250Sdes
214339250Sdes			ifp = tp->tap_ifp;
215339250Sdes
216339250Sdes			TAPDEBUG("detaching %s\n", ifp->if_xname);
217339250Sdes
218339250Sdes			/* Unlocked read. */
219339250Sdes			KASSERT(!(tp->tap_flags & TAP_OPEN),
220339250Sdes				("%s flags is out of sync", ifp->if_xname));
221339250Sdes
222339250Sdes			destroy_dev(tp->tap_dev);
223339250Sdes			s = splimp();
224339250Sdes			ether_ifdetach(ifp);
225109702Sdes			if_free_type(ifp, IFT_ETHER);
226109702Sdes			splx(s);
227339250Sdes
228339250Sdes			mtx_destroy(&tp->tap_mtx);
229109702Sdes			free(tp, M_TAP);
230125976Sdes			mtx_lock(&tapmtx);
231125976Sdes		}
232109702Sdes		mtx_unlock(&tapmtx);
233109702Sdes		clone_cleanup(&tapclones);
234112083Sdes
235112083Sdes		mtx_destroy(&tapmtx);
236112114Sdes
237339250Sdes		break;
238339250Sdes
239125965Sdes	default:
240339250Sdes		return (EOPNOTSUPP);
241125965Sdes	}
242125965Sdes
243125965Sdes	return (0);
244125965Sdes} /* tapmodevent */
245125965Sdes
246125965Sdes
247339250Sdes/*
248339250Sdes * DEVFS handler
249125965Sdes *
250125965Sdes * We need to support two kind of devices - tap and vmnet
251125965Sdes */
252125965Sdesstatic void
253125965Sdestapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev)
254125965Sdes{
255109702Sdes	u_int		extra;
256339250Sdes	int		i, unit;
257109702Sdes	char		*device_name = name;
258109702Sdes
259109702Sdes	if (*dev != NULL)
260109702Sdes		return;
261109702Sdes
262339250Sdes	device_name = TAP;
263339250Sdes	extra = 0;
264109702Sdes	if (strcmp(name, TAP) == 0) {
265339250Sdes		unit = -1;
266109735Sdes	} else if (strcmp(name, VMNET) == 0) {
267109702Sdes		device_name = VMNET;
268339250Sdes		extra = VMNET_DEV_MASK;
269339250Sdes		unit = -1;
270243147Sandre	} else if (dev_stdclone(name, NULL, device_name, &unit) != 1) {
271109735Sdes		device_name = VMNET;
272339250Sdes		extra = VMNET_DEV_MASK;
273125965Sdes		if (dev_stdclone(name, NULL, device_name, &unit) != 1)
274244058Sandre			return;
275339250Sdes	}
276339250Sdes
277109735Sdes	/* find any existing device, or allocate new unit number */
278109702Sdes	i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
279109702Sdes	if (i) {
280109702Sdes		*dev = make_dev(&tap_cdevsw, unit2minor(unit | extra),
28181863Sdes		     UID_ROOT, GID_WHEEL, 0600, "%s%d", device_name, unit);
28281863Sdes		if (*dev != NULL) {
28379837Sdes			dev_ref(*dev);
28463046Sdes			(*dev)->si_flags |= SI_CHEAPCLONE;
28562216Sdes		}
286339250Sdes	}
28779837Sdes} /* tapclone */
28883863Sdes
28979837Sdes
29083863Sdes/*
291339250Sdes * tapcreate
29283863Sdes *
29383863Sdes * to create interface
294106043Sdes */
29579837Sdesstatic void
29679837Sdestapcreate(struct cdev *dev)
29779837Sdes{
298243147Sandre	struct ifnet		*ifp = NULL;
29979837Sdes	struct tap_softc	*tp = NULL;
30079837Sdes	unsigned short		 macaddr_hi;
301131615Sdes	int			 unit, s;
302339250Sdes	char			*name = NULL;
303339250Sdes	u_char			eaddr[6];
304339250Sdes
305339250Sdes	dev->si_flags &= ~SI_CHEAPCLONE;
306106041Sdes
307339250Sdes	/* allocate driver storage and create device */
308153894Sdes	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
309153894Sdes	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
310339250Sdes	mtx_lock(&tapmtx);
311125965Sdes	SLIST_INSERT_HEAD(&taphead, tp, tap_next);
312125965Sdes	mtx_unlock(&tapmtx);
313339250Sdes
314106041Sdes	unit = dev2unit(dev);
315243147Sandre
316243147Sandre	/* select device: tap or vmnet */
317243147Sandre	if (unit & VMNET_DEV_MASK) {
318243147Sandre		name = VMNET;
319339250Sdes		tp->tap_flags |= TAP_VMNET;
320339250Sdes	} else
321243147Sandre		name = TAP;
322243147Sandre
323339250Sdes	unit &= TAPMAXUNIT;
324339250Sdes
325339250Sdes	TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, minor(dev));
326339250Sdes
327243147Sandre	/* generate fake MAC address: 00 bd xx xx xx unit_no */
32862216Sdes	macaddr_hi = htons(0x00bd);
32962216Sdes	bcopy(&macaddr_hi, eaddr, sizeof(short));
33081863Sdes	bcopy(&ticks, &eaddr[2], sizeof(long));
33181863Sdes	eaddr[5] = (u_char)unit;
33281863Sdes
33379837Sdes	/* fill the rest and attach interface */
33479837Sdes	ifp = tp->tap_ifp = if_alloc(IFT_ETHER);
33563046Sdes	if (ifp == NULL)
336339250Sdes		panic("%s%d: can not if_alloc()", name, unit);
337339250Sdes	ifp->if_softc = tp;
33879837Sdes	if_initname(ifp, name, unit);
33979837Sdes	ifp->if_init = tapifinit;
340339250Sdes	ifp->if_start = tapifstart;
34179837Sdes	ifp->if_ioctl = tapifioctl;
34279837Sdes	ifp->if_mtu = ETHERMTU;
34379837Sdes	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
344243147Sandre	ifp->if_snd.ifq_maxlen = ifqmaxlen;
345339250Sdes
346125965Sdes	dev->si_drv1 = tp;
347125965Sdes	tp->tap_dev = dev;
348125965Sdes
34963046Sdes	s = splimp();
35063046Sdes	ether_ifattach(ifp, eaddr);
35181863Sdes	splx(s);
35281863Sdes
35381863Sdes	mtx_lock(&tp->tap_mtx);
35479837Sdes	tp->tap_flags |= TAP_INITED;
35579837Sdes	mtx_unlock(&tp->tap_mtx);
35663046Sdes
357339250Sdes	TAPDEBUG("interface %s is created. minor = %#x\n",
35879837Sdes		ifp->if_xname, minor(dev));
359339250Sdes} /* tapcreate */
360125965Sdes
36163046Sdes
36263046Sdes/*
36381863Sdes * tapopen
36481863Sdes *
36581863Sdes * to open tunnel. must be superuser
36679837Sdes */
36762216Sdesstatic int
36862216Sdestapopen(struct cdev *dev, int flag, int mode, struct thread *td)
369339250Sdes{
370339250Sdes	struct tap_softc	*tp = NULL;
371109735Sdes	struct ifnet		*ifp = NULL;
372339250Sdes	int			 s;
373243147Sandre
374125965Sdes	if (tapuopen == 0 && suser(td) != 0)
375125965Sdes		return (EPERM);
376339250Sdes
377339250Sdes	if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
378339250Sdes		return (ENXIO);
379339250Sdes
380125965Sdes	/*
38162216Sdes	 * XXXRW: Non-atomic test-and-set of si_drv1.  Currently protected
38262216Sdes	 * by Giant, but the race actually exists under memory pressure as
38381863Sdes	 * well even when running with Giant, as malloc() may sleep.
38481863Sdes	 */
38581863Sdes	tp = dev->si_drv1;
38679837Sdes	if (tp == NULL) {
38777241Sdes		tapcreate(dev);
38877241Sdes		tp = dev->si_drv1;
38979837Sdes	}
39079837Sdes
39179837Sdes	mtx_lock(&tp->tap_mtx);
39277241Sdes	if (tp->tap_flags & TAP_OPEN) {
39379837Sdes		mtx_unlock(&tp->tap_mtx);
39486242Siedowse		return (EBUSY);
39579837Sdes	}
39679837Sdes
39779837Sdes	bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
398132695Sdes	tp->tap_pid = td->td_proc->p_pid;
399132696Sdes	tp->tap_flags |= TAP_OPEN;
400132696Sdes	ifp = tp->tap_ifp;
40179837Sdes	mtx_unlock(&tp->tap_mtx);
40279837Sdes
40379837Sdes	s = splimp();
40479837Sdes	ifp->if_drv_flags |= IFF_DRV_RUNNING;
40579837Sdes	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
40679837Sdes	splx(s);
40779837Sdes
40879837Sdes	knlist_init(&tp->tap_rsel.si_note, NULL, NULL, NULL, NULL);
40979837Sdes
41079837Sdes	TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, minor(dev));
41179837Sdes
41279837Sdes	return (0);
41379837Sdes} /* tapopen */
41479837Sdes
41579837Sdes
416132695Sdes/*
417132696Sdes * tapclose
418132696Sdes *
419132696Sdes * close the device - mark i/f down & delete routing info
42079837Sdes */
421132695Sdesstatic int
42277241Sdestapclose(struct cdev *dev, int foo, int bar, struct thread *td)
42377241Sdes{
42481863Sdes	struct ifaddr		*ifa;
42581863Sdes	struct tap_softc	*tp = dev->si_drv1;
42681863Sdes	struct ifnet		*ifp = tp->tap_ifp;
42779837Sdes	int			s;
42879837Sdes
42962216Sdes	/* junk all pending output */
43079837Sdes	IF_DRAIN(&ifp->if_snd);
43179837Sdes
43283217Sdes	/*
43379837Sdes	 * do not bring the interface down, and do not anything with
43479837Sdes	 * interface, if we are in VMnet mode. just close the device.
435230307Sdes	 */
43679837Sdes
43779837Sdes	mtx_lock(&tp->tap_mtx);
43883217Sdes	if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
43983217Sdes		mtx_unlock(&tp->tap_mtx);
44079837Sdes		s = splimp();
441125976Sdes		if_down(ifp);
442125976Sdes		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
44362216Sdes			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
44479837Sdes				rtinit(ifa, (int)RTM_DELETE, 0);
44583217Sdes			}
44662216Sdes			if_purgeaddrs(ifp);
447109702Sdes			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
448109702Sdes		}
449109702Sdes		splx(s);
450109702Sdes	} else
451109702Sdes		mtx_unlock(&tp->tap_mtx);
452109702Sdes
453109702Sdes	funsetown(&tp->tap_sigio);
454109702Sdes	selwakeuppri(&tp->tap_rsel, PZERO+1);
455109702Sdes	KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
456109702Sdes
45779837Sdes	mtx_lock(&tp->tap_mtx);
458201290Sru	tp->tap_flags &= ~TAP_OPEN;
459201290Sru	tp->tap_pid = 0;
460201290Sru	mtx_unlock(&tp->tap_mtx);
461201290Sru
462201290Sru	knlist_destroy(&tp->tap_rsel.si_note);
46379837Sdes
46479837Sdes	TAPDEBUG("%s is closed. minor = %#x\n",
46579837Sdes		ifp->if_xname, minor(dev));
46679837Sdes
46762216Sdes	return (0);
46879837Sdes} /* tapclose */
46979837Sdes
47079837Sdes
47179837Sdes/*
47279837Sdes * tapifinit
47379837Sdes *
47479837Sdes * network interface initialization function
47579837Sdes */
47679837Sdesstatic void
47769976Sdestapifinit(void *xtp)
47879837Sdes{
47979837Sdes	struct tap_softc	*tp = (struct tap_softc *)xtp;
48079837Sdes	struct ifnet		*ifp = tp->tap_ifp;
48179837Sdes
48279837Sdes	TAPDEBUG("initializing %s\n", ifp->if_xname);
48379837Sdes
48479837Sdes	ifp->if_drv_flags |= IFF_DRV_RUNNING;
48579837Sdes	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
48679837Sdes
48762216Sdes	/* attempt to start output */
48879837Sdes	tapifstart(ifp);
489181962Sobrien} /* tapifinit */
49079837Sdes
49179837Sdes
49279837Sdes/*
49379837Sdes * tapifioctl
49479837Sdes *
49579837Sdes * Process an ioctl request on network interface
49679837Sdes */
49779837Sdesstatic int
49862216Sdestapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
49979837Sdes{
500185912Sdes	struct tap_softc	*tp = (struct tap_softc *)(ifp->if_softc);
501185912Sdes	struct ifstat		*ifs = NULL;
50279837Sdes	int			 s, dummy;
50379837Sdes
50479837Sdes	switch (cmd) {
50579837Sdes		case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
50679837Sdes		case SIOCADDMULTI:
507186124Smurray		case SIOCDELMULTI:
508186124Smurray			break;
509186124Smurray
510186124Smurray		case SIOCGIFSTATUS:
511186124Smurray			s = splimp();
512186124Smurray			ifs = (struct ifstat *)data;
513186124Smurray			dummy = strlen(ifs->ascii);
514186124Smurray			mtx_lock(&tp->tap_mtx);
51579837Sdes			if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
51662216Sdes				snprintf(ifs->ascii + dummy,
51779837Sdes					sizeof(ifs->ascii) - dummy,
51879837Sdes					"\tOpened by PID %d\n", tp->tap_pid);
51962216Sdes			mtx_unlock(&tp->tap_mtx);
52079837Sdes			splx(s);
52179837Sdes			break;
522106041Sdes
523106041Sdes		default:
524106041Sdes			s = splimp();
525106041Sdes			dummy = ether_ioctl(ifp, cmd, data);
526106043Sdes			splx(s);
527106041Sdes			return (dummy);
528106041Sdes			/* NOT REACHED */
529106041Sdes	}
530106041Sdes
53179837Sdes	return (0);
532106041Sdes} /* tapifioctl */
53379837Sdes
53479837Sdes
53579837Sdes/*
536125976Sdes * tapifstart
53779837Sdes *
53863345Sdes * queue packets from higher level ready to put out
53979837Sdes */
54079837Sdesstatic void
54179837Sdestapifstart(struct ifnet *ifp)
54279837Sdes{
54379837Sdes	struct tap_softc	*tp = ifp->if_softc;
54479837Sdes	int			 s;
54579837Sdes
54679837Sdes	TAPDEBUG("%s starting\n", ifp->if_xname);
54779837Sdes
54879837Sdes	/*
54979837Sdes	 * do not junk pending output if we are in VMnet mode.
55079837Sdes	 * XXX: can this do any harm because of queue overflow?
55179837Sdes	 */
55279837Sdes
55383217Sdes	mtx_lock(&tp->tap_mtx);
554133779Sdes	if (((tp->tap_flags & TAP_VMNET) == 0) &&
555133779Sdes	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
556134350Sdes		struct mbuf	*m = NULL;
557133779Sdes
558153919Sdes		mtx_unlock(&tp->tap_mtx);
559133779Sdes
560133779Sdes		/* Unlocked read. */
561133779Sdes		TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
562133779Sdes		    tp->tap_flags);
563133779Sdes
564133779Sdes		s = splimp();
565133779Sdes		do {
566153919Sdes			IF_DEQUEUE(&ifp->if_snd, m);
567153919Sdes			if (m != NULL)
568153919Sdes				m_freem(m);
569153919Sdes			ifp->if_oerrors ++;
57062216Sdes		} while (m != NULL);
57162216Sdes		splx(s);
57279837Sdes
573106041Sdes		return;
574106041Sdes	}
575106041Sdes	mtx_unlock(&tp->tap_mtx);
576106042Sdes
577106042Sdes	s = splimp();
578106041Sdes	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
579106041Sdes
580106041Sdes	if (ifp->if_snd.ifq_len != 0) {
581107353Sdes		mtx_lock(&tp->tap_mtx);
582339250Sdes		if (tp->tap_flags & TAP_RWAIT) {
583339250Sdes			tp->tap_flags &= ~TAP_RWAIT;
584339250Sdes			wakeup(tp);
585339250Sdes		}
586186124Smurray
587186124Smurray		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
588186124Smurray			mtx_unlock(&tp->tap_mtx);
589186124Smurray			pgsigio(&tp->tap_sigio, SIGIO, 0);
590186124Smurray		} else
59179837Sdes			mtx_unlock(&tp->tap_mtx);
59279837Sdes
59363345Sdes		selwakeuppri(&tp->tap_rsel, PZERO+1);
59479837Sdes		KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
59579837Sdes		ifp->if_opackets ++; /* obytes are counted in ether_output */
59679837Sdes	}
59779837Sdes
598107353Sdes	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
59979837Sdes	splx(s);
600125976Sdes} /* tapifstart */
601125976Sdes
60279837Sdes
60379837Sdes/*
60479837Sdes * tapioctl
60579837Sdes *
60679837Sdes * the cdevsw interface is now pretty minimal
60779837Sdes */
60879837Sdesstatic int
60979837Sdestapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
61079837Sdes{
61179837Sdes	struct tap_softc	*tp = dev->si_drv1;
61263568Sdes	struct ifnet		*ifp = tp->tap_ifp;
61363345Sdes	struct tapinfo		*tapp = NULL;
61479837Sdes	int			 s;
615106051Sdes	int			 f;
616107353Sdes
61779837Sdes	switch (cmd) {
61879837Sdes		case TAPSIFINFO:
619125976Sdes			s = splimp();
620125976Sdes			tapp = (struct tapinfo *)data;
62179837Sdes			ifp->if_mtu = tapp->mtu;
622125976Sdes			ifp->if_type = tapp->type;
623125976Sdes			ifp->if_baudrate = tapp->baudrate;
62462216Sdes			splx(s);
62562216Sdes			break;
62679837Sdes
62779837Sdes		case TAPGIFINFO:
62879837Sdes			tapp = (struct tapinfo *)data;
62979837Sdes			tapp->mtu = ifp->if_mtu;
63083217Sdes			tapp->type = ifp->if_type;
63179837Sdes			tapp->baudrate = ifp->if_baudrate;
63279837Sdes			break;
63379837Sdes
63479837Sdes		case TAPSDEBUG:
63579837Sdes			tapdebug = *(intptr_t *)data;
63679837Sdes			break;
63779837Sdes
63879837Sdes		case TAPGDEBUG:
63979837Sdes			*(intptr_t *)data = tapdebug;
64079837Sdes			break;
641225599Sdes
642225599Sdes		case FIONBIO:
643225599Sdes			break;
644225599Sdes
645225599Sdes		case FIOASYNC:
646225599Sdes			s = splimp();
647127941Sdes			mtx_lock(&tp->tap_mtx);
64879837Sdes			if (*(intptr_t *)data)
64979837Sdes				tp->tap_flags |= TAP_ASYNC;
65079837Sdes			else
65179837Sdes				tp->tap_flags &= ~TAP_ASYNC;
65279837Sdes			mtx_unlock(&tp->tap_mtx);
653125976Sdes			splx(s);
654125976Sdes			break;
655125976Sdes
65679837Sdes		case FIONREAD:
65779837Sdes			s = splimp();
65883217Sdes			if (ifp->if_snd.ifq_head) {
659225800Sdes				struct mbuf	*mb = ifp->if_snd.ifq_head;
66079837Sdes
66179837Sdes				for(*(intptr_t *)data = 0;mb != NULL;mb = mb->m_next)
66279837Sdes					*(intptr_t *)data += mb->m_len;
66383217Sdes			} else
66483217Sdes				*(intptr_t *)data = 0;
66583217Sdes			splx(s);
66683217Sdes			break;
66779837Sdes
66879837Sdes		case FIOSETOWN:
66983217Sdes			return (fsetown(*(intptr_t *)data, &tp->tap_sigio));
670251262Seadler
67183217Sdes		case FIOGETOWN:
672107353Sdes			*(intptr_t *)data = fgetown(&tp->tap_sigio);
67383217Sdes			return (0);
67483217Sdes
67583217Sdes		/* this is deprecated, FIOSETOWN should be used instead */
676225599Sdes		case TIOCSPGRP:
67783217Sdes			return (fsetown(-(*(intptr_t *)data), &tp->tap_sigio));
67879837Sdes
679225800Sdes		/* this is deprecated, FIOGETOWN should be used instead */
680225805Sdes		case TIOCGPGRP:
681225805Sdes			*(intptr_t *)data = -fgetown(&tp->tap_sigio);
682225800Sdes			return (0);
683225800Sdes
684225800Sdes		/* VMware/VMnet port ioctl's */
685225800Sdes
68683217Sdes		case SIOCGIFFLAGS:	/* get ifnet flags */
68779837Sdes			bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
68879837Sdes			break;
68979837Sdes
69079837Sdes		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
691106043Sdes			f = *(intptr_t *)data;
69283217Sdes			f &= 0x0fff;
69379837Sdes			f &= ~IFF_CANTCHANGE;
69479837Sdes			f |= IFF_UP;
69579837Sdes
69679837Sdes			s = splimp();
69779837Sdes			ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
698106043Sdes			splx(s);
699109702Sdes			break;
70083217Sdes
70183217Sdes		case OSIOCGIFADDR:	/* get MAC address of the remote side */
70283217Sdes		case SIOCGIFADDR:
70383217Sdes			mtx_lock(&tp->tap_mtx);
70483217Sdes			bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
70583217Sdes			mtx_unlock(&tp->tap_mtx);
70683217Sdes			break;
707107353Sdes
70883217Sdes		case SIOCSIFADDR:	/* set MAC address of the remote side */
70983217Sdes			mtx_lock(&tp->tap_mtx);
71083217Sdes			bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
71183217Sdes			mtx_unlock(&tp->tap_mtx);
71283217Sdes			break;
71383217Sdes
71483217Sdes		default:
71583217Sdes			return (ENOTTY);
71683217Sdes	}
71783217Sdes	return (0);
71883217Sdes} /* tapioctl */
71983217Sdes
72083217Sdes
72183307Smike/*
722100834Sdes * tapread
723244037Seadler *
724244037Seadler * the cdevsw read interface - reads a packet at a time, or at
725244037Seadler * least as much of a packet as can be read
726244037Seadler */
727100834Sdesstatic int
728164152Sdestapread(struct cdev *dev, struct uio *uio, int flag)
729164152Sdes{
730100834Sdes	struct tap_softc	*tp = dev->si_drv1;
73183217Sdes	struct ifnet		*ifp = tp->tap_ifp;
732100834Sdes	struct mbuf		*m = NULL;
73383217Sdes	int			 error = 0, len, s;
73483217Sdes
73579837Sdes	TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, minor(dev));
73679837Sdes
73779837Sdes	mtx_lock(&tp->tap_mtx);
73879837Sdes	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
73979837Sdes		mtx_unlock(&tp->tap_mtx);
74062216Sdes
74179837Sdes		/* Unlocked read. */
74279837Sdes		TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
74363046Sdes			ifp->if_xname, minor(dev), tp->tap_flags);
74479837Sdes
74579837Sdes		return (EHOSTDOWN);
74679837Sdes	}
747261234Sdes
74879837Sdes	tp->tap_flags &= ~TAP_RWAIT;
749106041Sdes	mtx_unlock(&tp->tap_mtx);
750137854Scperciva
751137854Scperciva	/* sleep until we get a packet */
75279837Sdes	do {
75379837Sdes		s = splimp();
75479837Sdes		IF_DEQUEUE(&ifp->if_snd, m);
75579837Sdes		splx(s);
75679837Sdes
75779837Sdes		if (m == NULL) {
75879837Sdes			if (flag & O_NONBLOCK)
759230307Sdes				return (EWOULDBLOCK);
760230307Sdes
761230307Sdes			mtx_lock(&tp->tap_mtx);
762230307Sdes			tp->tap_flags |= TAP_RWAIT;
763230307Sdes			mtx_unlock(&tp->tap_mtx);
764106041Sdes			error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
76579837Sdes			if (error)
766230307Sdes				return (error);
76779837Sdes		}
76879837Sdes	} while (m == NULL);
769230307Sdes
770230307Sdes	/* feed packet to bpf */
771230307Sdes	BPF_MTAP(ifp, m);
772230307Sdes
773106041Sdes	/* xfer packet to user space */
77479837Sdes	while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
77579837Sdes		len = min(uio->uio_resid, m->m_len);
77679837Sdes		if (len == 0)
77779837Sdes			break;
778230307Sdes
77979837Sdes		error = uiomove(mtod(m, void *), len, uio);
78077241Sdes		m = m_free(m);
781106041Sdes	}
782106041Sdes
78379837Sdes	if (m != NULL) {
78479837Sdes		TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
78579837Sdes			minor(dev));
78662216Sdes		m_freem(m);
787106041Sdes	}
788106041Sdes
789106041Sdes	return (error);
790106041Sdes} /* tapread */
791106041Sdes
792106041Sdes
79379837Sdes/*
794106857Sdes * tapwrite
795106857Sdes *
79679837Sdes * the cdevsw write interface - an atomic write is a packet - or else!
797106043Sdes */
79879837Sdesstatic int
79979837Sdestapwrite(struct cdev *dev, struct uio *uio, int flag)
80079837Sdes{
80179837Sdes	struct tap_softc	*tp = dev->si_drv1;
80290729Sdes	struct ifnet		*ifp = tp->tap_ifp;
80390729Sdes	struct mbuf		*m;
80479837Sdes	int			 error = 0;
80563015Sdes
80679837Sdes	TAPDEBUG("%s writting, minor = %#x\n",
80779837Sdes		ifp->if_xname, minor(dev));
80879837Sdes
80979837Sdes	if (uio->uio_resid == 0)
81079837Sdes		return (0);
81179837Sdes
81279837Sdes	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
81363046Sdes		TAPDEBUG("%s invalid packet len = %d, minor = %#x\n",
814106586Sfenner			ifp->if_xname, uio->uio_resid, minor(dev));
815106586Sfenner
816106586Sfenner		return (EIO);
817106586Sfenner	}
81879837Sdes
81979837Sdes	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN)) == NULL) {
82079837Sdes		ifp->if_ierrors ++;
82179837Sdes		return (error);
82279837Sdes	}
82379837Sdes
82479837Sdes	m->m_pkthdr.rcvif = ifp;
82579837Sdes
82679837Sdes	/* Pass packet up to parent. */
82779837Sdes	(*ifp->if_input)(ifp, m);
82879837Sdes	ifp->if_ipackets ++; /* ibytes are counted in parent */
82979837Sdes
830125976Sdes	return (0);
831125976Sdes} /* tapwrite */
83279837Sdes
83379837Sdes
83479837Sdes/*
83579837Sdes * tappoll
83679837Sdes *
83779837Sdes * the poll interface, this is only useful on reads
83879837Sdes * really. the write detect always returns true, write never blocks
83979837Sdes * anyway, it either accepts the packet or drops it
84079837Sdes */
84179837Sdesstatic int
84279837Sdestappoll(struct cdev *dev, int events, struct thread *td)
84379837Sdes{
84462216Sdes	struct tap_softc	*tp = dev->si_drv1;
84579837Sdes	struct ifnet		*ifp = tp->tap_ifp;
84683217Sdes	int			 s, revents = 0;
84783217Sdes
84883217Sdes	TAPDEBUG("%s polling, minor = %#x\n",
84983217Sdes		ifp->if_xname, minor(dev));
85079837Sdes
85162216Sdes	s = splimp();
85279837Sdes	if (events & (POLLIN | POLLRDNORM)) {
85379837Sdes		if (ifp->if_snd.ifq_len > 0) {
85483217Sdes			TAPDEBUG("%s have data in queue. len = %d, " \
85583217Sdes				"minor = %#x\n", ifp->if_xname,
85683217Sdes				ifp->if_snd.ifq_len, minor(dev));
85763046Sdes
85879837Sdes			revents |= (events & (POLLIN | POLLRDNORM));
85979837Sdes		} else {
86062216Sdes			TAPDEBUG("%s waiting for data, minor = %#x\n",
86179837Sdes				ifp->if_xname, minor(dev));
86279837Sdes
86379837Sdes			selrecord(td, &tp->tap_rsel);
86479837Sdes		}
86579837Sdes	}
86679837Sdes
86783217Sdes	if (events & (POLLOUT | POLLWRNORM))
86883217Sdes		revents |= (events & (POLLOUT | POLLWRNORM));
869132695Sdes
87062216Sdes	splx(s);
87162216Sdes	return (revents);
87279837Sdes} /* tappoll */
87362216Sdes
87462216Sdes
875253680Sdes/*
876280630Sjkim * tap_kqfilter
877280630Sjkim *
878280630Sjkim * support for kevent() system call
879280630Sjkim */
880280630Sjkimstatic int
881253680Sdestapkqfilter(struct cdev *dev, struct knote *kn)
882280630Sjkim{
883280630Sjkim    	int			 s;
884280630Sjkim	struct tap_softc	*tp = dev->si_drv1;
885280630Sjkim	struct ifnet		*ifp = tp->tap_ifp;
886280630Sjkim
887253680Sdes	s = splimp();
88862216Sdes	switch (kn->kn_filter) {
88962216Sdes	case EVFILT_READ:
89062216Sdes		TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n",
89181863Sdes			ifp->if_xname, minor(dev));
89281863Sdes		kn->kn_fop = &tap_read_filterops;
89381863Sdes		break;
89462216Sdes
89562216Sdes	case EVFILT_WRITE:
89662216Sdes		TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n",
89779837Sdes			ifp->if_xname, minor(dev));
89879837Sdes		kn->kn_fop = &tap_write_filterops;
89979837Sdes		break;
900100834Sdes
90179837Sdes	default:
90262216Sdes		TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n",
903253680Sdes			ifp->if_xname, minor(dev));
904253680Sdes		splx(s);
905253680Sdes		return (EINVAL);
906253680Sdes		/* NOT REACHED */
90779837Sdes	}
90879837Sdes	splx(s);
90979837Sdes
91079837Sdes	kn->kn_hook = (caddr_t) dev;
91179837Sdes	knlist_add(&tp->tap_rsel.si_note, kn, 0);
91279837Sdes
91379837Sdes	return (0);
91479837Sdes} /* tapkqfilter */
91579837Sdes
91679837Sdes
91779837Sdes/*
91879837Sdes * tap_kqread
91979837Sdes *
92079837Sdes * Return true if there is data in the interface queue
92179837Sdes */
92279837Sdesstatic int
92379837Sdestapkqread(struct knote *kn, long hint)
924100834Sdes{
925100834Sdes	int			 ret, s;
92680521Sse	struct cdev		*dev = (struct cdev *)(kn->kn_hook);
92779837Sdes	struct tap_softc	*tp = dev->si_drv1;
92879837Sdes	struct ifnet		*ifp = tp->tap_ifp;
92979837Sdes
93079837Sdes	s = splimp();
93179837Sdes	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
93279837Sdes		TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n",
93379837Sdes			ifp->if_xname, ifp->if_snd.ifq_len, minor(dev));
93479837Sdes		ret = 1;
93579837Sdes	} else {
93679837Sdes		TAPDEBUG("%s waiting for data, minor = %#x\n",
93779837Sdes			ifp->if_xname, minor(dev));
93879837Sdes		ret = 0;
93979837Sdes	}
94079837Sdes	splx(s);
94179837Sdes
94279837Sdes	return (ret);
94379837Sdes} /* tapkqread */
94479837Sdes
94593213Scharnier
94679837Sdes/*
94779837Sdes * tap_kqwrite
94879837Sdes *
94979837Sdes * Always can write. Return the MTU in kn->data
95079837Sdes */
951186124Smurraystatic int
952186124Smurraytapkqwrite(struct knote *kn, long hint)
953186124Smurray{
954186124Smurray	int			 s;
95579837Sdes	struct tap_softc	*tp = ((struct cdev *) kn->kn_hook)->si_drv1;
95679837Sdes	struct ifnet		*ifp = tp->tap_ifp;
95779837Sdes
95879837Sdes	s = splimp();
95979837Sdes	kn->kn_data = ifp->if_mtu;
96079837Sdes	splx(s);
96179837Sdes
96279837Sdes	return (1);
96379837Sdes} /* tapkqwrite */
96479837Sdes
96579837Sdes
96679837Sdesstatic void
96779837Sdestapkqdetach(struct knote *kn)
96879837Sdes{
969109702Sdes	struct tap_softc	*tp = ((struct cdev *) kn->kn_hook)->si_drv1;
970109702Sdes
971109702Sdes	knlist_remove(&tp->tap_rsel.si_note, kn, 0);
97279837Sdes} /* tapkqdetach */
97379837Sdes
97479837Sdes