if_tap.c revision 164033
1/*-
2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * BASED ON:
27 * -------------------------------------------------------------------------
28 *
29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30 * Nottingham University 1987.
31 */
32
33/*
34 * $FreeBSD: head/sys/net/if_tap.c 164033 2006-11-06 13:42:10Z rwatson $
35 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
36 */
37
38#include "opt_compat.h"
39#include "opt_inet.h"
40
41#include <sys/param.h>
42#include <sys/conf.h>
43#include <sys/fcntl.h>
44#include <sys/filio.h>
45#include <sys/kernel.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/poll.h>
50#include <sys/priv.h>
51#include <sys/proc.h>
52#include <sys/selinfo.h>
53#include <sys/signalvar.h>
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
57#include <sys/systm.h>
58#include <sys/ttycom.h>
59#include <sys/uio.h>
60#include <sys/queue.h>
61
62#include <net/bpf.h>
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_dl.h>
66#include <net/route.h>
67#include <net/if_types.h>
68
69#include <netinet/in.h>
70
71#include <net/if_tapvar.h>
72#include <net/if_tap.h>
73
74
75#define CDEV_NAME	"tap"
76#define TAPDEBUG	if (tapdebug) printf
77
78#define TAP		"tap"
79#define VMNET		"vmnet"
80#define TAPMAXUNIT	0x7fff
81#define VMNET_DEV_MASK	CLONE_FLAG0
82
83/* module */
84static int		tapmodevent(module_t, int, void *);
85
86/* device */
87static void		tapclone(void *, struct ucred *, char *, int,
88			    struct cdev **);
89static void		tapcreate(struct cdev *);
90
91/* network interface */
92static void		tapifstart(struct ifnet *);
93static int		tapifioctl(struct ifnet *, u_long, caddr_t);
94static void		tapifinit(void *);
95
96/* character device */
97static d_open_t		tapopen;
98static d_close_t	tapclose;
99static d_read_t		tapread;
100static d_write_t	tapwrite;
101static d_ioctl_t	tapioctl;
102static d_poll_t		tappoll;
103static d_kqfilter_t	tapkqfilter;
104
105/* kqueue(2) */
106static int		tapkqread(struct knote *, long);
107static int		tapkqwrite(struct knote *, long);
108static void		tapkqdetach(struct knote *);
109
110static struct filterops	tap_read_filterops = {
111	.f_isfd =	1,
112	.f_attach =	NULL,
113	.f_detach =	tapkqdetach,
114	.f_event =	tapkqread,
115};
116
117static struct filterops	tap_write_filterops = {
118	.f_isfd =	1,
119	.f_attach =	NULL,
120	.f_detach =	tapkqdetach,
121	.f_event =	tapkqwrite,
122};
123
124static struct cdevsw	tap_cdevsw = {
125	.d_version =	D_VERSION,
126	.d_flags =	D_PSEUDO | D_NEEDGIANT,
127	.d_open =	tapopen,
128	.d_close =	tapclose,
129	.d_read =	tapread,
130	.d_write =	tapwrite,
131	.d_ioctl =	tapioctl,
132	.d_poll =	tappoll,
133	.d_name =	CDEV_NAME,
134	.d_kqfilter =	tapkqfilter,
135};
136
137/*
138 * All global variables in if_tap.c are locked with tapmtx, with the
139 * exception of tapdebug, which is accessed unlocked; tapclones is
140 * static at runtime.
141 */
142static struct mtx		tapmtx;
143static int			tapdebug = 0;        /* debug flag   */
144static int			tapuopen = 0;        /* allow user open() */
145static SLIST_HEAD(, tap_softc)	taphead;             /* first device */
146static struct clonedevs 	*tapclones;
147
148MALLOC_DECLARE(M_TAP);
149MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
150SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
151
152SYSCTL_DECL(_net_link);
153SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
154    "Ethernet tunnel software network interface");
155SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
156	"Allow user to open /dev/tap (based on node permissions)");
157SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
158
159DEV_MODULE(if_tap, tapmodevent, NULL);
160
161/*
162 * tapmodevent
163 *
164 * module event handler
165 */
166static int
167tapmodevent(module_t mod, int type, void *data)
168{
169	static eventhandler_tag	 eh_tag = NULL;
170	struct tap_softc	*tp = NULL;
171	struct ifnet		*ifp = NULL;
172	int			 s;
173
174	switch (type) {
175	case MOD_LOAD:
176
177		/* intitialize device */
178
179		mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
180		SLIST_INIT(&taphead);
181
182		clone_setup(&tapclones);
183		eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
184		if (eh_tag == NULL) {
185			clone_cleanup(&tapclones);
186			mtx_destroy(&tapmtx);
187			return (ENOMEM);
188		}
189		return (0);
190
191	case MOD_UNLOAD:
192		/*
193		 * The EBUSY algorithm here can't quite atomically
194		 * guarantee that this is race-free since we have to
195		 * release the tap mtx to deregister the clone handler.
196		 */
197		mtx_lock(&tapmtx);
198		SLIST_FOREACH(tp, &taphead, tap_next) {
199			mtx_lock(&tp->tap_mtx);
200			if (tp->tap_flags & TAP_OPEN) {
201				mtx_unlock(&tp->tap_mtx);
202				mtx_unlock(&tapmtx);
203				return (EBUSY);
204			}
205			mtx_unlock(&tp->tap_mtx);
206		}
207		mtx_unlock(&tapmtx);
208
209		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
210
211		mtx_lock(&tapmtx);
212		while ((tp = SLIST_FIRST(&taphead)) != NULL) {
213			SLIST_REMOVE_HEAD(&taphead, tap_next);
214			mtx_unlock(&tapmtx);
215
216			ifp = tp->tap_ifp;
217
218			TAPDEBUG("detaching %s\n", ifp->if_xname);
219
220			/* Unlocked read. */
221			KASSERT(!(tp->tap_flags & TAP_OPEN),
222				("%s flags is out of sync", ifp->if_xname));
223
224			knlist_destroy(&tp->tap_rsel.si_note);
225			destroy_dev(tp->tap_dev);
226			s = splimp();
227			ether_ifdetach(ifp);
228			if_free_type(ifp, IFT_ETHER);
229			splx(s);
230
231			mtx_destroy(&tp->tap_mtx);
232			free(tp, M_TAP);
233			mtx_lock(&tapmtx);
234		}
235		mtx_unlock(&tapmtx);
236		clone_cleanup(&tapclones);
237
238		mtx_destroy(&tapmtx);
239
240		break;
241
242	default:
243		return (EOPNOTSUPP);
244	}
245
246	return (0);
247} /* tapmodevent */
248
249
250/*
251 * DEVFS handler
252 *
253 * We need to support two kind of devices - tap and vmnet
254 */
255static void
256tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev)
257{
258	u_int		extra;
259	int		i, unit;
260	char		*device_name = name;
261
262	if (*dev != NULL)
263		return;
264
265	device_name = TAP;
266	extra = 0;
267	if (strcmp(name, TAP) == 0) {
268		unit = -1;
269	} else if (strcmp(name, VMNET) == 0) {
270		device_name = VMNET;
271		extra = VMNET_DEV_MASK;
272		unit = -1;
273	} else if (dev_stdclone(name, NULL, device_name, &unit) != 1) {
274		device_name = VMNET;
275		extra = VMNET_DEV_MASK;
276		if (dev_stdclone(name, NULL, device_name, &unit) != 1)
277			return;
278	}
279
280	/* find any existing device, or allocate new unit number */
281	i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
282	if (i) {
283		*dev = make_dev(&tap_cdevsw, unit2minor(unit | extra),
284		     UID_ROOT, GID_WHEEL, 0600, "%s%d", device_name, unit);
285		if (*dev != NULL) {
286			dev_ref(*dev);
287			(*dev)->si_flags |= SI_CHEAPCLONE;
288		}
289	}
290} /* tapclone */
291
292
293/*
294 * tapcreate
295 *
296 * to create interface
297 */
298static void
299tapcreate(struct cdev *dev)
300{
301	struct ifnet		*ifp = NULL;
302	struct tap_softc	*tp = NULL;
303	unsigned short		 macaddr_hi;
304	int			 unit, s;
305	char			*name = NULL;
306	u_char			eaddr[6];
307
308	dev->si_flags &= ~SI_CHEAPCLONE;
309
310	/* allocate driver storage and create device */
311	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
312	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
313	mtx_lock(&tapmtx);
314	SLIST_INSERT_HEAD(&taphead, tp, tap_next);
315	mtx_unlock(&tapmtx);
316
317	unit = dev2unit(dev);
318
319	/* select device: tap or vmnet */
320	if (unit & VMNET_DEV_MASK) {
321		name = VMNET;
322		tp->tap_flags |= TAP_VMNET;
323	} else
324		name = TAP;
325
326	unit &= TAPMAXUNIT;
327
328	TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, minor(dev));
329
330	/* generate fake MAC address: 00 bd xx xx xx unit_no */
331	macaddr_hi = htons(0x00bd);
332	bcopy(&macaddr_hi, eaddr, sizeof(short));
333	bcopy(&ticks, &eaddr[2], sizeof(long));
334	eaddr[5] = (u_char)unit;
335
336	/* fill the rest and attach interface */
337	ifp = tp->tap_ifp = if_alloc(IFT_ETHER);
338	if (ifp == NULL)
339		panic("%s%d: can not if_alloc()", name, unit);
340	ifp->if_softc = tp;
341	if_initname(ifp, name, unit);
342	ifp->if_init = tapifinit;
343	ifp->if_start = tapifstart;
344	ifp->if_ioctl = tapifioctl;
345	ifp->if_mtu = ETHERMTU;
346	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
347	ifp->if_snd.ifq_maxlen = ifqmaxlen;
348
349	dev->si_drv1 = tp;
350	tp->tap_dev = dev;
351
352	s = splimp();
353	ether_ifattach(ifp, eaddr);
354	splx(s);
355
356	mtx_lock(&tp->tap_mtx);
357	tp->tap_flags |= TAP_INITED;
358	mtx_unlock(&tp->tap_mtx);
359
360	knlist_init(&tp->tap_rsel.si_note, NULL, NULL, NULL, NULL);
361
362	TAPDEBUG("interface %s is created. minor = %#x\n",
363		ifp->if_xname, minor(dev));
364} /* tapcreate */
365
366
367/*
368 * tapopen
369 *
370 * to open tunnel. must be superuser
371 */
372static int
373tapopen(struct cdev *dev, int flag, int mode, struct thread *td)
374{
375	struct tap_softc	*tp = NULL;
376	struct ifnet		*ifp = NULL;
377	int			 error, s;
378
379	if (tapuopen == 0) {
380		error = priv_check(td, PRIV_NET_TAP);
381		if (error)
382			return (error);
383	}
384
385	if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
386		return (ENXIO);
387
388	/*
389	 * XXXRW: Non-atomic test-and-set of si_drv1.  Currently protected
390	 * by Giant, but the race actually exists under memory pressure as
391	 * well even when running with Giant, as malloc() may sleep.
392	 */
393	tp = dev->si_drv1;
394	if (tp == NULL) {
395		tapcreate(dev);
396		tp = dev->si_drv1;
397	}
398
399	mtx_lock(&tp->tap_mtx);
400	if (tp->tap_flags & TAP_OPEN) {
401		mtx_unlock(&tp->tap_mtx);
402		return (EBUSY);
403	}
404
405	bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
406	tp->tap_pid = td->td_proc->p_pid;
407	tp->tap_flags |= TAP_OPEN;
408	ifp = tp->tap_ifp;
409	mtx_unlock(&tp->tap_mtx);
410
411	s = splimp();
412	ifp->if_drv_flags |= IFF_DRV_RUNNING;
413	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
414	splx(s);
415
416	TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, minor(dev));
417
418	return (0);
419} /* tapopen */
420
421
422/*
423 * tapclose
424 *
425 * close the device - mark i/f down & delete routing info
426 */
427static int
428tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
429{
430	struct ifaddr		*ifa;
431	struct tap_softc	*tp = dev->si_drv1;
432	struct ifnet		*ifp = tp->tap_ifp;
433	int			s;
434
435	/* junk all pending output */
436	IF_DRAIN(&ifp->if_snd);
437
438	/*
439	 * do not bring the interface down, and do not anything with
440	 * interface, if we are in VMnet mode. just close the device.
441	 */
442
443	mtx_lock(&tp->tap_mtx);
444	if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
445		mtx_unlock(&tp->tap_mtx);
446		s = splimp();
447		if_down(ifp);
448		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
449			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
450				rtinit(ifa, (int)RTM_DELETE, 0);
451			}
452			if_purgeaddrs(ifp);
453			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
454		}
455		splx(s);
456	} else
457		mtx_unlock(&tp->tap_mtx);
458
459	funsetown(&tp->tap_sigio);
460	selwakeuppri(&tp->tap_rsel, PZERO+1);
461	KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
462
463	mtx_lock(&tp->tap_mtx);
464	tp->tap_flags &= ~TAP_OPEN;
465	tp->tap_pid = 0;
466	mtx_unlock(&tp->tap_mtx);
467
468	TAPDEBUG("%s is closed. minor = %#x\n",
469		ifp->if_xname, minor(dev));
470
471	return (0);
472} /* tapclose */
473
474
475/*
476 * tapifinit
477 *
478 * network interface initialization function
479 */
480static void
481tapifinit(void *xtp)
482{
483	struct tap_softc	*tp = (struct tap_softc *)xtp;
484	struct ifnet		*ifp = tp->tap_ifp;
485
486	TAPDEBUG("initializing %s\n", ifp->if_xname);
487
488	ifp->if_drv_flags |= IFF_DRV_RUNNING;
489	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
490
491	/* attempt to start output */
492	tapifstart(ifp);
493} /* tapifinit */
494
495
496/*
497 * tapifioctl
498 *
499 * Process an ioctl request on network interface
500 */
501static int
502tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
503{
504	struct tap_softc	*tp = ifp->if_softc;
505	struct ifstat		*ifs = NULL;
506	int			 s, dummy;
507
508	switch (cmd) {
509		case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
510		case SIOCADDMULTI:
511		case SIOCDELMULTI:
512			break;
513
514		case SIOCGIFSTATUS:
515			s = splimp();
516			ifs = (struct ifstat *)data;
517			dummy = strlen(ifs->ascii);
518			mtx_lock(&tp->tap_mtx);
519			if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
520				snprintf(ifs->ascii + dummy,
521					sizeof(ifs->ascii) - dummy,
522					"\tOpened by PID %d\n", tp->tap_pid);
523			mtx_unlock(&tp->tap_mtx);
524			splx(s);
525			break;
526
527		default:
528			s = splimp();
529			dummy = ether_ioctl(ifp, cmd, data);
530			splx(s);
531			return (dummy);
532			/* NOT REACHED */
533	}
534
535	return (0);
536} /* tapifioctl */
537
538
539/*
540 * tapifstart
541 *
542 * queue packets from higher level ready to put out
543 */
544static void
545tapifstart(struct ifnet *ifp)
546{
547	struct tap_softc	*tp = ifp->if_softc;
548	int			 s;
549
550	TAPDEBUG("%s starting\n", ifp->if_xname);
551
552	/*
553	 * do not junk pending output if we are in VMnet mode.
554	 * XXX: can this do any harm because of queue overflow?
555	 */
556
557	mtx_lock(&tp->tap_mtx);
558	if (((tp->tap_flags & TAP_VMNET) == 0) &&
559	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
560		struct mbuf	*m = NULL;
561
562		mtx_unlock(&tp->tap_mtx);
563
564		/* Unlocked read. */
565		TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
566		    tp->tap_flags);
567
568		s = splimp();
569		do {
570			IF_DEQUEUE(&ifp->if_snd, m);
571			if (m != NULL)
572				m_freem(m);
573			ifp->if_oerrors ++;
574		} while (m != NULL);
575		splx(s);
576
577		return;
578	}
579	mtx_unlock(&tp->tap_mtx);
580
581	s = splimp();
582	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
583
584	if (ifp->if_snd.ifq_len != 0) {
585		mtx_lock(&tp->tap_mtx);
586		if (tp->tap_flags & TAP_RWAIT) {
587			tp->tap_flags &= ~TAP_RWAIT;
588			wakeup(tp);
589		}
590
591		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
592			mtx_unlock(&tp->tap_mtx);
593			pgsigio(&tp->tap_sigio, SIGIO, 0);
594		} else
595			mtx_unlock(&tp->tap_mtx);
596
597		selwakeuppri(&tp->tap_rsel, PZERO+1);
598		KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
599		ifp->if_opackets ++; /* obytes are counted in ether_output */
600	}
601
602	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
603	splx(s);
604} /* tapifstart */
605
606
607/*
608 * tapioctl
609 *
610 * the cdevsw interface is now pretty minimal
611 */
612static int
613tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
614{
615	struct tap_softc	*tp = dev->si_drv1;
616	struct ifnet		*ifp = tp->tap_ifp;
617	struct tapinfo		*tapp = NULL;
618	int			 s;
619	int			 f;
620#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
621    defined(COMPAT_FREEBSD4)
622	int			 ival;
623#endif
624
625	switch (cmd) {
626		case TAPSIFINFO:
627			s = splimp();
628			tapp = (struct tapinfo *)data;
629			ifp->if_mtu = tapp->mtu;
630			ifp->if_type = tapp->type;
631			ifp->if_baudrate = tapp->baudrate;
632			splx(s);
633			break;
634
635		case TAPGIFINFO:
636			tapp = (struct tapinfo *)data;
637			tapp->mtu = ifp->if_mtu;
638			tapp->type = ifp->if_type;
639			tapp->baudrate = ifp->if_baudrate;
640			break;
641
642		case TAPSDEBUG:
643			tapdebug = *(int *)data;
644			break;
645
646		case TAPGDEBUG:
647			*(int *)data = tapdebug;
648			break;
649
650		case FIONBIO:
651			break;
652
653		case FIOASYNC:
654			s = splimp();
655			mtx_lock(&tp->tap_mtx);
656			if (*(int *)data)
657				tp->tap_flags |= TAP_ASYNC;
658			else
659				tp->tap_flags &= ~TAP_ASYNC;
660			mtx_unlock(&tp->tap_mtx);
661			splx(s);
662			break;
663
664		case FIONREAD:
665			s = splimp();
666			if (ifp->if_snd.ifq_head) {
667				struct mbuf	*mb = ifp->if_snd.ifq_head;
668
669				for(*(int *)data = 0;mb != NULL;mb = mb->m_next)
670					*(int *)data += mb->m_len;
671			} else
672				*(int *)data = 0;
673			splx(s);
674			break;
675
676		case FIOSETOWN:
677			return (fsetown(*(int *)data, &tp->tap_sigio));
678
679		case FIOGETOWN:
680			*(int *)data = fgetown(&tp->tap_sigio);
681			return (0);
682
683		/* this is deprecated, FIOSETOWN should be used instead */
684		case TIOCSPGRP:
685			return (fsetown(-(*(int *)data), &tp->tap_sigio));
686
687		/* this is deprecated, FIOGETOWN should be used instead */
688		case TIOCGPGRP:
689			*(int *)data = -fgetown(&tp->tap_sigio);
690			return (0);
691
692		/* VMware/VMnet port ioctl's */
693
694		case SIOCGIFFLAGS:	/* get ifnet flags */
695			bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
696			break;
697
698#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
699    defined(COMPAT_FREEBSD4)
700		case _IO('V', 0):
701			ival = IOCPARM_IVAL(data);
702			data = (caddr_t)&ival;
703			/* FALLTHROUGH */
704#endif
705		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
706			f = *(int *)data;
707			f &= 0x0fff;
708			f &= ~IFF_CANTCHANGE;
709			f |= IFF_UP;
710
711			s = splimp();
712			ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
713			splx(s);
714			break;
715
716		case OSIOCGIFADDR:	/* get MAC address of the remote side */
717		case SIOCGIFADDR:
718			mtx_lock(&tp->tap_mtx);
719			bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
720			mtx_unlock(&tp->tap_mtx);
721			break;
722
723		case SIOCSIFADDR:	/* set MAC address of the remote side */
724			mtx_lock(&tp->tap_mtx);
725			bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
726			mtx_unlock(&tp->tap_mtx);
727			break;
728
729		default:
730			return (ENOTTY);
731	}
732	return (0);
733} /* tapioctl */
734
735
736/*
737 * tapread
738 *
739 * the cdevsw read interface - reads a packet at a time, or at
740 * least as much of a packet as can be read
741 */
742static int
743tapread(struct cdev *dev, struct uio *uio, int flag)
744{
745	struct tap_softc	*tp = dev->si_drv1;
746	struct ifnet		*ifp = tp->tap_ifp;
747	struct mbuf		*m = NULL;
748	int			 error = 0, len, s;
749
750	TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, minor(dev));
751
752	mtx_lock(&tp->tap_mtx);
753	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
754		mtx_unlock(&tp->tap_mtx);
755
756		/* Unlocked read. */
757		TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
758			ifp->if_xname, minor(dev), tp->tap_flags);
759
760		return (EHOSTDOWN);
761	}
762
763	tp->tap_flags &= ~TAP_RWAIT;
764	mtx_unlock(&tp->tap_mtx);
765
766	/* sleep until we get a packet */
767	do {
768		s = splimp();
769		IF_DEQUEUE(&ifp->if_snd, m);
770		splx(s);
771
772		if (m == NULL) {
773			if (flag & O_NONBLOCK)
774				return (EWOULDBLOCK);
775
776			mtx_lock(&tp->tap_mtx);
777			tp->tap_flags |= TAP_RWAIT;
778			mtx_unlock(&tp->tap_mtx);
779			error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
780			if (error)
781				return (error);
782		}
783	} while (m == NULL);
784
785	/* feed packet to bpf */
786	BPF_MTAP(ifp, m);
787
788	/* xfer packet to user space */
789	while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
790		len = min(uio->uio_resid, m->m_len);
791		if (len == 0)
792			break;
793
794		error = uiomove(mtod(m, void *), len, uio);
795		m = m_free(m);
796	}
797
798	if (m != NULL) {
799		TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
800			minor(dev));
801		m_freem(m);
802	}
803
804	return (error);
805} /* tapread */
806
807
808/*
809 * tapwrite
810 *
811 * the cdevsw write interface - an atomic write is a packet - or else!
812 */
813static int
814tapwrite(struct cdev *dev, struct uio *uio, int flag)
815{
816	struct tap_softc	*tp = dev->si_drv1;
817	struct ifnet		*ifp = tp->tap_ifp;
818	struct mbuf		*m;
819
820	TAPDEBUG("%s writting, minor = %#x\n",
821		ifp->if_xname, minor(dev));
822
823	if (uio->uio_resid == 0)
824		return (0);
825
826	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
827		TAPDEBUG("%s invalid packet len = %d, minor = %#x\n",
828			ifp->if_xname, uio->uio_resid, minor(dev));
829
830		return (EIO);
831	}
832
833	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN,
834	    M_PKTHDR)) == NULL) {
835		ifp->if_ierrors ++;
836		return (ENOBUFS);
837	}
838
839	m->m_pkthdr.rcvif = ifp;
840
841	/* Pass packet up to parent. */
842	(*ifp->if_input)(ifp, m);
843	ifp->if_ipackets ++; /* ibytes are counted in parent */
844
845	return (0);
846} /* tapwrite */
847
848
849/*
850 * tappoll
851 *
852 * the poll interface, this is only useful on reads
853 * really. the write detect always returns true, write never blocks
854 * anyway, it either accepts the packet or drops it
855 */
856static int
857tappoll(struct cdev *dev, int events, struct thread *td)
858{
859	struct tap_softc	*tp = dev->si_drv1;
860	struct ifnet		*ifp = tp->tap_ifp;
861	int			 s, revents = 0;
862
863	TAPDEBUG("%s polling, minor = %#x\n",
864		ifp->if_xname, minor(dev));
865
866	s = splimp();
867	if (events & (POLLIN | POLLRDNORM)) {
868		if (ifp->if_snd.ifq_len > 0) {
869			TAPDEBUG("%s have data in queue. len = %d, " \
870				"minor = %#x\n", ifp->if_xname,
871				ifp->if_snd.ifq_len, minor(dev));
872
873			revents |= (events & (POLLIN | POLLRDNORM));
874		} else {
875			TAPDEBUG("%s waiting for data, minor = %#x\n",
876				ifp->if_xname, minor(dev));
877
878			selrecord(td, &tp->tap_rsel);
879		}
880	}
881
882	if (events & (POLLOUT | POLLWRNORM))
883		revents |= (events & (POLLOUT | POLLWRNORM));
884
885	splx(s);
886	return (revents);
887} /* tappoll */
888
889
890/*
891 * tap_kqfilter
892 *
893 * support for kevent() system call
894 */
895static int
896tapkqfilter(struct cdev *dev, struct knote *kn)
897{
898    	int			 s;
899	struct tap_softc	*tp = dev->si_drv1;
900	struct ifnet		*ifp = tp->tap_ifp;
901
902	s = splimp();
903	switch (kn->kn_filter) {
904	case EVFILT_READ:
905		TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n",
906			ifp->if_xname, minor(dev));
907		kn->kn_fop = &tap_read_filterops;
908		break;
909
910	case EVFILT_WRITE:
911		TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n",
912			ifp->if_xname, minor(dev));
913		kn->kn_fop = &tap_write_filterops;
914		break;
915
916	default:
917		TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n",
918			ifp->if_xname, minor(dev));
919		splx(s);
920		return (EINVAL);
921		/* NOT REACHED */
922	}
923	splx(s);
924
925	kn->kn_hook = (caddr_t) dev;
926	knlist_add(&tp->tap_rsel.si_note, kn, 0);
927
928	return (0);
929} /* tapkqfilter */
930
931
932/*
933 * tap_kqread
934 *
935 * Return true if there is data in the interface queue
936 */
937static int
938tapkqread(struct knote *kn, long hint)
939{
940	int			 ret, s;
941	struct cdev		*dev = (struct cdev *)(kn->kn_hook);
942	struct tap_softc	*tp = dev->si_drv1;
943	struct ifnet		*ifp = tp->tap_ifp;
944
945	s = splimp();
946	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
947		TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n",
948			ifp->if_xname, ifp->if_snd.ifq_len, minor(dev));
949		ret = 1;
950	} else {
951		TAPDEBUG("%s waiting for data, minor = %#x\n",
952			ifp->if_xname, minor(dev));
953		ret = 0;
954	}
955	splx(s);
956
957	return (ret);
958} /* tapkqread */
959
960
961/*
962 * tap_kqwrite
963 *
964 * Always can write. Return the MTU in kn->data
965 */
966static int
967tapkqwrite(struct knote *kn, long hint)
968{
969	int			 s;
970	struct tap_softc	*tp = ((struct cdev *) kn->kn_hook)->si_drv1;
971	struct ifnet		*ifp = tp->tap_ifp;
972
973	s = splimp();
974	kn->kn_data = ifp->if_mtu;
975	splx(s);
976
977	return (1);
978} /* tapkqwrite */
979
980
981static void
982tapkqdetach(struct knote *kn)
983{
984	struct tap_softc	*tp = ((struct cdev *) kn->kn_hook)->si_drv1;
985
986	knlist_remove(&tp->tap_rsel.si_note, kn, 0);
987} /* tapkqdetach */
988
989