if_tap.c revision 147256
1234772Sjlh/*-
2234772Sjlh * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3234772Sjlh * All rights reserved.
4234772Sjlh *
5234772Sjlh * Redistribution and use in source and binary forms, with or without
6234772Sjlh * modification, are permitted provided that the following conditions
7234772Sjlh * are met:
8234772Sjlh * 1. Redistributions of source code must retain the above copyright
9234772Sjlh *    notice, this list of conditions and the following disclaimer.
10234772Sjlh * 2. Redistributions in binary form must reproduce the above copyright
11234772Sjlh *    notice, this list of conditions and the following disclaimer in the
12234772Sjlh *    documentation and/or other materials provided with the distribution.
13234772Sjlh *
14234772Sjlh * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15234772Sjlh * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16234772Sjlh * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17234772Sjlh * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18234772Sjlh * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19234772Sjlh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20234772Sjlh * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21234772Sjlh * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22234772Sjlh * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23234772Sjlh * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24234772Sjlh * SUCH DAMAGE.
25234772Sjlh *
26234772Sjlh * BASED ON:
27234772Sjlh * -------------------------------------------------------------------------
28234772Sjlh *
29234772Sjlh * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30234772Sjlh * Nottingham University 1987.
31234772Sjlh */
32234772Sjlh
33234772Sjlh/*
34234772Sjlh * $FreeBSD: head/sys/net/if_tap.c 147256 2005-06-10 16:49:24Z brooks $
35235520Sjlh * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
36234772Sjlh */
37234772Sjlh
38234772Sjlh#include "opt_inet.h"
39234772Sjlh
40234772Sjlh#include <sys/param.h>
41234772Sjlh#include <sys/conf.h>
42246566Sjlh#include <sys/fcntl.h>
43234772Sjlh#include <sys/filio.h>
44234772Sjlh#include <sys/kernel.h>
45234772Sjlh#include <sys/malloc.h>
46234772Sjlh#include <sys/mbuf.h>
47234772Sjlh#include <sys/module.h>
48234772Sjlh#include <sys/poll.h>
49234772Sjlh#include <sys/proc.h>
50234772Sjlh#include <sys/selinfo.h>
51234772Sjlh#include <sys/signalvar.h>
52234772Sjlh#include <sys/socket.h>
53234772Sjlh#include <sys/sockio.h>
54234772Sjlh#include <sys/sysctl.h>
55234772Sjlh#include <sys/systm.h>
56235139Sjlh#include <sys/ttycom.h>
57234772Sjlh#include <sys/uio.h>
58234772Sjlh#include <sys/queue.h>
59234772Sjlh
60234772Sjlh#include <net/bpf.h>
61234772Sjlh#include <net/ethernet.h>
62234772Sjlh#include <net/if.h>
63234772Sjlh#include <net/if_arp.h>
64234772Sjlh#include <net/route.h>
65234772Sjlh#include <net/if_types.h>
66234772Sjlh
67234772Sjlh#include <netinet/in.h>
68234772Sjlh
69234772Sjlh#include <net/if_tapvar.h>
70234772Sjlh#include <net/if_tap.h>
71234772Sjlh
72234772Sjlh
73234772Sjlh#define CDEV_NAME	"tap"
74234772Sjlh#define TAPDEBUG	if (tapdebug) printf
75246566Sjlh
76246566Sjlh#define TAP		"tap"
77234772Sjlh#define VMNET		"vmnet"
78234772Sjlh#define TAPMAXUNIT	0x7fff
79234772Sjlh#define VMNET_DEV_MASK	CLONE_FLAG0
80234772Sjlh
81234772Sjlh/* module */
82234772Sjlhstatic int		tapmodevent(module_t, int, void *);
83234772Sjlh
84234772Sjlh/* device */
85234772Sjlhstatic void		tapclone(void *, char *, int, struct cdev **);
86234772Sjlhstatic void		tapcreate(struct cdev *);
87234772Sjlh
88234772Sjlh/* network interface */
89234772Sjlhstatic void		tapifstart(struct ifnet *);
90234772Sjlhstatic int		tapifioctl(struct ifnet *, u_long, caddr_t);
91234772Sjlhstatic void		tapifinit(void *);
92234772Sjlh
93234772Sjlh/* character device */
94234772Sjlhstatic d_open_t		tapopen;
95234772Sjlhstatic d_close_t	tapclose;
96235520Sjlhstatic d_read_t		tapread;
97246566Sjlhstatic d_write_t	tapwrite;
98235520Sjlhstatic d_ioctl_t	tapioctl;
99235520Sjlhstatic d_poll_t		tappoll;
100235520Sjlh
101235520Sjlhstatic struct cdevsw	tap_cdevsw = {
102235520Sjlh	.d_version =	D_VERSION,
103235520Sjlh	.d_flags =	D_PSEUDO | D_NEEDGIANT,
104234772Sjlh	.d_open =	tapopen,
105235520Sjlh	.d_close =	tapclose,
106235520Sjlh	.d_read =	tapread,
107235520Sjlh	.d_write =	tapwrite,
108234772Sjlh	.d_ioctl =	tapioctl,
109234772Sjlh	.d_poll =	tappoll,
110234772Sjlh	.d_name =	CDEV_NAME,
111};
112
113/*
114 * All global variables in if_tap.c are locked with tapmtx, with the
115 * exception of tapdebug, which is accessed unlocked; tapclones is
116 * static at runtime.
117 */
118static struct mtx		tapmtx;
119static int			tapdebug = 0;        /* debug flag   */
120static int			tapuopen = 0;        /* allow user open() */
121static SLIST_HEAD(, tap_softc)	taphead;             /* first device */
122static struct clonedevs 	*tapclones;
123
124MALLOC_DECLARE(M_TAP);
125MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
126SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
127
128SYSCTL_DECL(_net_link);
129SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
130    "Ethernet tunnel software network interface");
131SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
132	"Allow user to open /dev/tap (based on node permissions)");
133SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
134
135DEV_MODULE(if_tap, tapmodevent, NULL);
136
137/*
138 * tapmodevent
139 *
140 * module event handler
141 */
142static int
143tapmodevent(mod, type, data)
144	module_t	 mod;
145	int		 type;
146	void		*data;
147{
148	static eventhandler_tag	 eh_tag = NULL;
149	struct tap_softc	*tp = NULL;
150	struct ifnet		*ifp = NULL;
151	int			 s;
152
153	switch (type) {
154	case MOD_LOAD:
155
156		/* intitialize device */
157
158		mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
159		SLIST_INIT(&taphead);
160
161		clone_setup(&tapclones);
162		eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
163		if (eh_tag == NULL) {
164			clone_cleanup(&tapclones);
165			mtx_destroy(&tapmtx);
166			return (ENOMEM);
167		}
168		return (0);
169
170	case MOD_UNLOAD:
171		/*
172		 * The EBUSY algorithm here can't quite atomically
173		 * guarantee that this is race-free since we have to
174		 * release the tap mtx to deregister the clone handler.
175		 */
176		mtx_lock(&tapmtx);
177		SLIST_FOREACH(tp, &taphead, tap_next) {
178			mtx_lock(&tp->tap_mtx);
179			if (tp->tap_flags & TAP_OPEN) {
180				mtx_unlock(&tp->tap_mtx);
181				mtx_unlock(&tapmtx);
182				return (EBUSY);
183			}
184			mtx_unlock(&tp->tap_mtx);
185		}
186		mtx_unlock(&tapmtx);
187
188		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
189
190		mtx_lock(&tapmtx);
191		while ((tp = SLIST_FIRST(&taphead)) != NULL) {
192			SLIST_REMOVE_HEAD(&taphead, tap_next);
193			mtx_unlock(&tapmtx);
194
195			ifp = tp->tap_ifp;
196
197			TAPDEBUG("detaching %s\n", ifp->if_xname);
198
199			/* Unlocked read. */
200			KASSERT(!(tp->tap_flags & TAP_OPEN),
201				("%s flags is out of sync", ifp->if_xname));
202
203			destroy_dev(tp->tap_dev);
204			s = splimp();
205			ether_ifdetach(ifp);
206			if_free_type(ifp, IFT_ETHER);
207			splx(s);
208
209			mtx_destroy(&tp->tap_mtx);
210			free(tp, M_TAP);
211			mtx_lock(&tapmtx);
212		}
213		mtx_unlock(&tapmtx);
214		clone_cleanup(&tapclones);
215
216		mtx_destroy(&tapmtx);
217
218		break;
219
220	default:
221		return (EOPNOTSUPP);
222	}
223
224	return (0);
225} /* tapmodevent */
226
227
228/*
229 * DEVFS handler
230 *
231 * We need to support two kind of devices - tap and vmnet
232 */
233static void
234tapclone(arg, name, namelen, dev)
235	void	*arg;
236	char	*name;
237	int	 namelen;
238	struct cdev **dev;
239{
240	u_int		extra;
241	int		i, unit;
242	char		*device_name = name;
243
244	if (*dev != NULL)
245		return;
246
247	device_name = TAP;
248	extra = 0;
249	if (strcmp(name, TAP) == 0) {
250		unit = -1;
251	} else if (strcmp(name, VMNET) == 0) {
252		device_name = VMNET;
253		extra = VMNET_DEV_MASK;
254		unit = -1;
255	} else if (dev_stdclone(name, NULL, device_name, &unit) != 1) {
256		device_name = VMNET;
257		extra = VMNET_DEV_MASK;
258		if (dev_stdclone(name, NULL, device_name, &unit) != 1)
259			return;
260	}
261
262	/* find any existing device, or allocate new unit number */
263	i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
264	if (i) {
265		*dev = make_dev(&tap_cdevsw, unit2minor(unit | extra),
266		     UID_ROOT, GID_WHEEL, 0600, "%s%d", device_name, unit);
267		if (*dev != NULL) {
268			dev_ref(*dev);
269			(*dev)->si_flags |= SI_CHEAPCLONE;
270		}
271	}
272} /* tapclone */
273
274
275/*
276 * tapcreate
277 *
278 * to create interface
279 */
280static void
281tapcreate(dev)
282	struct cdev *dev;
283{
284	struct ifnet		*ifp = NULL;
285	struct tap_softc	*tp = NULL;
286	unsigned short		 macaddr_hi;
287	int			 unit, s;
288	char			*name = NULL;
289	u_char			eaddr[6];
290
291	dev->si_flags &= ~SI_CHEAPCLONE;
292
293	/* allocate driver storage and create device */
294	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
295	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
296	mtx_lock(&tapmtx);
297	SLIST_INSERT_HEAD(&taphead, tp, tap_next);
298	mtx_unlock(&tapmtx);
299
300	unit = dev2unit(dev);
301
302	/* select device: tap or vmnet */
303	if (unit & VMNET_DEV_MASK) {
304		name = VMNET;
305		tp->tap_flags |= TAP_VMNET;
306	} else
307		name = TAP;
308
309	unit &= TAPMAXUNIT;
310
311	TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, minor(dev));
312
313	/* generate fake MAC address: 00 bd xx xx xx unit_no */
314	macaddr_hi = htons(0x00bd);
315	bcopy(&macaddr_hi, eaddr, sizeof(short));
316	bcopy(&ticks, &eaddr[2], sizeof(long));
317	eaddr[5] = (u_char)unit;
318
319	/* fill the rest and attach interface */
320	ifp = tp->tap_ifp = if_alloc(IFT_ETHER);
321	if (ifp == NULL)
322		panic("%s%d: can not if_alloc()", name, unit);
323	ifp->if_softc = tp;
324	if_initname(ifp, name, unit);
325	ifp->if_init = tapifinit;
326	ifp->if_start = tapifstart;
327	ifp->if_ioctl = tapifioctl;
328	ifp->if_mtu = ETHERMTU;
329	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
330	ifp->if_snd.ifq_maxlen = ifqmaxlen;
331
332	dev->si_drv1 = tp;
333	tp->tap_dev = dev;
334
335	s = splimp();
336	ether_ifattach(ifp, eaddr);
337	splx(s);
338
339	mtx_lock(&tp->tap_mtx);
340	tp->tap_flags |= TAP_INITED;
341	mtx_unlock(&tp->tap_mtx);
342
343	TAPDEBUG("interface %s is created. minor = %#x\n",
344		ifp->if_xname, minor(dev));
345} /* tapcreate */
346
347
348/*
349 * tapopen
350 *
351 * to open tunnel. must be superuser
352 */
353static int
354tapopen(dev, flag, mode, td)
355	struct cdev *dev;
356	int		 flag;
357	int		 mode;
358	struct thread	*td;
359{
360	struct tap_softc	*tp = NULL;
361	struct ifnet		*ifp = NULL;
362	int			 s;
363
364	if (tapuopen == 0 && suser(td) != 0)
365		return (EPERM);
366
367	if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
368		return (ENXIO);
369
370	/*
371	 * XXXRW: Non-atomic test-and-set of si_drv1.  Currently protected
372	 * by Giant, but the race actually exists under memory pressure as
373	 * well even when running with Giant, as malloc() may sleep.
374	 */
375	tp = dev->si_drv1;
376	if (tp == NULL) {
377		tapcreate(dev);
378		tp = dev->si_drv1;
379	}
380
381	mtx_lock(&tp->tap_mtx);
382	if (tp->tap_flags & TAP_OPEN) {
383		mtx_unlock(&tp->tap_mtx);
384		return (EBUSY);
385	}
386
387	bcopy(IFP2ENADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
388	tp->tap_pid = td->td_proc->p_pid;
389	tp->tap_flags |= TAP_OPEN;
390	ifp = tp->tap_ifp;
391	mtx_unlock(&tp->tap_mtx);
392
393	s = splimp();
394	ifp->if_flags |= IFF_RUNNING;
395	ifp->if_flags &= ~IFF_OACTIVE;
396	splx(s);
397
398	TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, minor(dev));
399
400	return (0);
401} /* tapopen */
402
403
404/*
405 * tapclose
406 *
407 * close the device - mark i/f down & delete routing info
408 */
409static int
410tapclose(dev, foo, bar, td)
411	struct cdev *dev;
412	int		 foo;
413	int		 bar;
414	struct thread	*td;
415{
416	struct ifaddr *ifa;
417	struct tap_softc	*tp = dev->si_drv1;
418	struct ifnet		*ifp = tp->tap_ifp;
419	int			s;
420
421	/* junk all pending output */
422	IF_DRAIN(&ifp->if_snd);
423
424	/*
425	 * do not bring the interface down, and do not anything with
426	 * interface, if we are in VMnet mode. just close the device.
427	 */
428
429	mtx_lock(&tp->tap_mtx);
430	if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
431		mtx_unlock(&tp->tap_mtx);
432		s = splimp();
433		if_down(ifp);
434		if (ifp->if_flags & IFF_RUNNING) {
435			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
436				rtinit(ifa, (int)RTM_DELETE, 0);
437			}
438			if_purgeaddrs(ifp);
439			ifp->if_flags &= ~IFF_RUNNING;
440		}
441		splx(s);
442	} else
443		mtx_unlock(&tp->tap_mtx);
444
445	funsetown(&tp->tap_sigio);
446	selwakeuppri(&tp->tap_rsel, PZERO+1);
447
448	mtx_lock(&tp->tap_mtx);
449	tp->tap_flags &= ~TAP_OPEN;
450	tp->tap_pid = 0;
451	mtx_unlock(&tp->tap_mtx);
452
453	TAPDEBUG("%s is closed. minor = %#x\n",
454		ifp->if_xname, minor(dev));
455
456	return (0);
457} /* tapclose */
458
459
460/*
461 * tapifinit
462 *
463 * network interface initialization function
464 */
465static void
466tapifinit(xtp)
467	void	*xtp;
468{
469	struct tap_softc	*tp = (struct tap_softc *)xtp;
470	struct ifnet		*ifp = tp->tap_ifp;
471
472	TAPDEBUG("initializing %s\n", ifp->if_xname);
473
474	ifp->if_flags |= IFF_RUNNING;
475	ifp->if_flags &= ~IFF_OACTIVE;
476
477	/* attempt to start output */
478	tapifstart(ifp);
479} /* tapifinit */
480
481
482/*
483 * tapifioctl
484 *
485 * Process an ioctl request on network interface
486 */
487static int
488tapifioctl(ifp, cmd, data)
489	struct ifnet	*ifp;
490	u_long		 cmd;
491	caddr_t		 data;
492{
493	struct tap_softc	*tp = (struct tap_softc *)(ifp->if_softc);
494	struct ifstat		*ifs = NULL;
495	int			 s, dummy;
496
497	switch (cmd) {
498		case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
499		case SIOCADDMULTI:
500		case SIOCDELMULTI:
501			break;
502
503		case SIOCGIFSTATUS:
504			s = splimp();
505			ifs = (struct ifstat *)data;
506			dummy = strlen(ifs->ascii);
507			mtx_lock(&tp->tap_mtx);
508			if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
509				snprintf(ifs->ascii + dummy,
510					sizeof(ifs->ascii) - dummy,
511					"\tOpened by PID %d\n", tp->tap_pid);
512			mtx_unlock(&tp->tap_mtx);
513			splx(s);
514			break;
515
516		default:
517			s = splimp();
518			dummy = ether_ioctl(ifp, cmd, data);
519			splx(s);
520			return (dummy);
521	}
522
523	return (0);
524} /* tapifioctl */
525
526
527/*
528 * tapifstart
529 *
530 * queue packets from higher level ready to put out
531 */
532static void
533tapifstart(ifp)
534	struct ifnet	*ifp;
535{
536	struct tap_softc	*tp = ifp->if_softc;
537	int			 s;
538
539	TAPDEBUG("%s starting\n", ifp->if_xname);
540
541	/*
542	 * do not junk pending output if we are in VMnet mode.
543	 * XXX: can this do any harm because of queue overflow?
544	 */
545
546	mtx_lock(&tp->tap_mtx);
547	if (((tp->tap_flags & TAP_VMNET) == 0) &&
548	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
549		struct mbuf	*m = NULL;
550
551		mtx_unlock(&tp->tap_mtx);
552
553		/* Unlocked read. */
554		TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
555		    tp->tap_flags);
556
557		s = splimp();
558		do {
559			IF_DEQUEUE(&ifp->if_snd, m);
560			if (m != NULL)
561				m_freem(m);
562			ifp->if_oerrors ++;
563		} while (m != NULL);
564		splx(s);
565
566		return;
567	}
568	mtx_unlock(&tp->tap_mtx);
569
570	s = splimp();
571	ifp->if_flags |= IFF_OACTIVE;
572
573	if (ifp->if_snd.ifq_len != 0) {
574		mtx_lock(&tp->tap_mtx);
575		if (tp->tap_flags & TAP_RWAIT) {
576			tp->tap_flags &= ~TAP_RWAIT;
577			wakeup(tp);
578		}
579
580		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
581			mtx_unlock(&tp->tap_mtx);
582			pgsigio(&tp->tap_sigio, SIGIO, 0);
583		} else
584			mtx_unlock(&tp->tap_mtx);
585
586		selwakeuppri(&tp->tap_rsel, PZERO+1);
587		ifp->if_opackets ++; /* obytes are counted in ether_output */
588	}
589
590	ifp->if_flags &= ~IFF_OACTIVE;
591	splx(s);
592} /* tapifstart */
593
594
595/*
596 * tapioctl
597 *
598 * the cdevsw interface is now pretty minimal
599 */
600static int
601tapioctl(dev, cmd, data, flag, td)
602	struct cdev *dev;
603	u_long		 cmd;
604	caddr_t		 data;
605	int		 flag;
606	struct thread	*td;
607{
608	struct tap_softc	*tp = dev->si_drv1;
609	struct ifnet		*ifp = tp->tap_ifp;
610	struct tapinfo		*tapp = NULL;
611	int			 s;
612	int			 f;
613
614	switch (cmd) {
615		case TAPSIFINFO:
616			s = splimp();
617			tapp = (struct tapinfo *)data;
618			ifp->if_mtu = tapp->mtu;
619			ifp->if_type = tapp->type;
620			ifp->if_baudrate = tapp->baudrate;
621			splx(s);
622			break;
623
624		case TAPGIFINFO:
625			tapp = (struct tapinfo *)data;
626			tapp->mtu = ifp->if_mtu;
627			tapp->type = ifp->if_type;
628			tapp->baudrate = ifp->if_baudrate;
629			break;
630
631		case TAPSDEBUG:
632			tapdebug = *(int *)data;
633			break;
634
635		case TAPGDEBUG:
636			*(int *)data = tapdebug;
637			break;
638
639		case FIONBIO:
640			break;
641
642		case FIOASYNC:
643			s = splimp();
644			mtx_lock(&tp->tap_mtx);
645			if (*(int *)data)
646				tp->tap_flags |= TAP_ASYNC;
647			else
648				tp->tap_flags &= ~TAP_ASYNC;
649			mtx_unlock(&tp->tap_mtx);
650			splx(s);
651			break;
652
653		case FIONREAD:
654			s = splimp();
655			if (ifp->if_snd.ifq_head) {
656				struct mbuf	*mb = ifp->if_snd.ifq_head;
657
658				for(*(int *)data = 0;mb != NULL;mb = mb->m_next)
659					*(int *)data += mb->m_len;
660			} else
661				*(int *)data = 0;
662			splx(s);
663			break;
664
665		case FIOSETOWN:
666			return (fsetown(*(int *)data, &tp->tap_sigio));
667
668		case FIOGETOWN:
669			*(int *)data = fgetown(&tp->tap_sigio);
670			return (0);
671
672		/* this is deprecated, FIOSETOWN should be used instead */
673		case TIOCSPGRP:
674			return (fsetown(-(*(int *)data), &tp->tap_sigio));
675
676		/* this is deprecated, FIOGETOWN should be used instead */
677		case TIOCGPGRP:
678			*(int *)data = -fgetown(&tp->tap_sigio);
679			return (0);
680
681		/* VMware/VMnet port ioctl's */
682
683		case SIOCGIFFLAGS:	/* get ifnet flags */
684			bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
685			break;
686
687		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
688			f = *(int *)data;
689			f &= 0x0fff;
690			f &= ~IFF_CANTCHANGE;
691			f |= IFF_UP;
692
693			s = splimp();
694			ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
695			splx(s);
696			break;
697
698		case OSIOCGIFADDR:	/* get MAC address of the remote side */
699		case SIOCGIFADDR:
700			mtx_lock(&tp->tap_mtx);
701			bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
702			mtx_unlock(&tp->tap_mtx);
703			break;
704
705		case SIOCSIFADDR:	/* set MAC address of the remote side */
706			mtx_lock(&tp->tap_mtx);
707			bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
708			mtx_unlock(&tp->tap_mtx);
709			break;
710
711		default:
712			return (ENOTTY);
713	}
714	return (0);
715} /* tapioctl */
716
717
718/*
719 * tapread
720 *
721 * the cdevsw read interface - reads a packet at a time, or at
722 * least as much of a packet as can be read
723 */
724static int
725tapread(dev, uio, flag)
726	struct cdev *dev;
727	struct uio	*uio;
728	int		 flag;
729{
730	struct tap_softc	*tp = dev->si_drv1;
731	struct ifnet		*ifp = tp->tap_ifp;
732	struct mbuf		*m = NULL;
733	int			 error = 0, len, s;
734
735	TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, minor(dev));
736
737	mtx_lock(&tp->tap_mtx);
738	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
739		mtx_unlock(&tp->tap_mtx);
740
741		/* Unlocked read. */
742		TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
743			ifp->if_xname, minor(dev), tp->tap_flags);
744
745		return (EHOSTDOWN);
746	}
747
748	tp->tap_flags &= ~TAP_RWAIT;
749	mtx_unlock(&tp->tap_mtx);
750
751	/* sleep until we get a packet */
752	do {
753		s = splimp();
754		IF_DEQUEUE(&ifp->if_snd, m);
755		splx(s);
756
757		if (m == NULL) {
758			if (flag & O_NONBLOCK)
759				return (EWOULDBLOCK);
760
761			mtx_lock(&tp->tap_mtx);
762			tp->tap_flags |= TAP_RWAIT;
763			mtx_unlock(&tp->tap_mtx);
764			error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
765			if (error)
766				return (error);
767		}
768	} while (m == NULL);
769
770	/* feed packet to bpf */
771	BPF_MTAP(ifp, m);
772
773	/* xfer packet to user space */
774	while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
775		len = min(uio->uio_resid, m->m_len);
776		if (len == 0)
777			break;
778
779		error = uiomove(mtod(m, void *), len, uio);
780		m = m_free(m);
781	}
782
783	if (m != NULL) {
784		TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
785			minor(dev));
786		m_freem(m);
787	}
788
789	return (error);
790} /* tapread */
791
792
793/*
794 * tapwrite
795 *
796 * the cdevsw write interface - an atomic write is a packet - or else!
797 */
798static int
799tapwrite(dev, uio, flag)
800	struct cdev *dev;
801	struct uio	*uio;
802	int		 flag;
803{
804	struct tap_softc	*tp = dev->si_drv1;
805	struct ifnet		*ifp = tp->tap_ifp;
806	struct mbuf		*m;
807	int			 error = 0;
808
809	TAPDEBUG("%s writting, minor = %#x\n",
810		ifp->if_xname, minor(dev));
811
812	if (uio->uio_resid == 0)
813		return (0);
814
815	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
816		TAPDEBUG("%s invalid packet len = %d, minor = %#x\n",
817			ifp->if_xname, uio->uio_resid, minor(dev));
818
819		return (EIO);
820	}
821
822	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN)) == NULL) {
823		ifp->if_ierrors ++;
824		return (error);
825	}
826
827	m->m_pkthdr.rcvif = ifp;
828
829	/* Pass packet up to parent. */
830	(*ifp->if_input)(ifp, m);
831	ifp->if_ipackets ++; /* ibytes are counted in parent */
832
833	return (0);
834} /* tapwrite */
835
836
837/*
838 * tappoll
839 *
840 * the poll interface, this is only useful on reads
841 * really. the write detect always returns true, write never blocks
842 * anyway, it either accepts the packet or drops it
843 */
844static int
845tappoll(dev, events, td)
846	struct cdev *dev;
847	int		 events;
848	struct thread	*td;
849{
850	struct tap_softc	*tp = dev->si_drv1;
851	struct ifnet		*ifp = tp->tap_ifp;
852	int			 s, revents = 0;
853
854	TAPDEBUG("%s polling, minor = %#x\n",
855		ifp->if_xname, minor(dev));
856
857	s = splimp();
858	if (events & (POLLIN | POLLRDNORM)) {
859		if (ifp->if_snd.ifq_len > 0) {
860			TAPDEBUG("%s have data in queue. len = %d, " \
861				"minor = %#x\n", ifp->if_xname,
862				ifp->if_snd.ifq_len, minor(dev));
863
864			revents |= (events & (POLLIN | POLLRDNORM));
865		} else {
866			TAPDEBUG("%s waiting for data, minor = %#x\n",
867				ifp->if_xname, minor(dev));
868
869			selrecord(td, &tp->tap_rsel);
870		}
871	}
872
873	if (events & (POLLOUT | POLLWRNORM))
874		revents |= (events & (POLLOUT | POLLWRNORM));
875
876	splx(s);
877	return (revents);
878} /* tappoll */
879