if_tap.c revision 148868
1174993Srafan/*-
250276Speter * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
350276Speter * All rights reserved.
4262685Sdelphij *
550276Speter * Redistribution and use in source and binary forms, with or without
650276Speter * modification, are permitted provided that the following conditions
750276Speter * are met:
856639Speter * 1. Redistributions of source code must retain the above copyright
950276Speter *    notice, this list of conditions and the following disclaimer.
1050276Speter * 2. Redistributions in binary form must reproduce the above copyright
1150276Speter *    notice, this list of conditions and the following disclaimer in the
1250276Speter *    documentation and/or other materials provided with the distribution.
1350276Speter *
1450276Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1550276Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1650276Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1756639Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1850276Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1950276Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2050276Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2150276Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2250276Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2350276Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2450276Speter * SUCH DAMAGE.
2550276Speter *
2650276Speter * BASED ON:
2750276Speter * -------------------------------------------------------------------------
2850276Speter *
2950276Speter * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
3050276Speter * Nottingham University 1987.
3150276Speter */
3250276Speter
3350276Speter/*
34166124Srafan * $FreeBSD: head/sys/net/if_tap.c 148868 2005-08-08 19:55:32Z rwatson $
3550276Speter * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
3650276Speter */
37262685Sdelphij
3850276Speter#include "opt_inet.h"
3950276Speter
4050276Speter#include <sys/param.h>
4150276Speter#include <sys/conf.h>
4250276Speter#include <sys/fcntl.h>
4376726Speter#include <sys/filio.h>
4476726Speter#include <sys/kernel.h>
4550276Speter#include <sys/malloc.h>
4650276Speter#include <sys/mbuf.h>
4750276Speter#include <sys/module.h>
4850276Speter#include <sys/poll.h>
4976726Speter#include <sys/proc.h>
5076726Speter#include <sys/selinfo.h>
5150276Speter#include <sys/signalvar.h>
5250276Speter#include <sys/socket.h>
5350276Speter#include <sys/sockio.h>
5450276Speter#include <sys/sysctl.h>
5550276Speter#include <sys/systm.h>
5697049Speter#include <sys/ttycom.h>
5750276Speter#include <sys/uio.h>
5850276Speter#include <sys/queue.h>
5950276Speter
6050276Speter#include <net/bpf.h>
6150276Speter#include <net/ethernet.h>
62166124Srafan#include <net/if.h>
63166124Srafan#include <net/if_arp.h>
64166124Srafan#include <net/route.h>
65262685Sdelphij#include <net/if_types.h>
66262685Sdelphij
67262685Sdelphij#include <netinet/in.h>
68262685Sdelphij
69262685Sdelphij#include <net/if_tapvar.h>
70262685Sdelphij#include <net/if_tap.h>
7150276Speter
7250276Speter
7350276Speter#define CDEV_NAME	"tap"
7450276Speter#define TAPDEBUG	if (tapdebug) printf
7550276Speter
7650276Speter#define TAP		"tap"
7750276Speter#define VMNET		"vmnet"
7850276Speter#define TAPMAXUNIT	0x7fff
7950276Speter#define VMNET_DEV_MASK	CLONE_FLAG0
8050276Speter
8197049Speter/* module */
82166124Srafanstatic int		tapmodevent(module_t, int, void *);
83166124Srafan
8450276Speter/* device */
8550276Speterstatic void		tapclone(void *, struct ucred *, char *, int,
8650276Speter			    struct cdev **);
8750276Speterstatic void		tapcreate(struct cdev *);
8850276Speter
8950276Speter/* network interface */
9050276Speterstatic void		tapifstart(struct ifnet *);
9150276Speterstatic int		tapifioctl(struct ifnet *, u_long, caddr_t);
9250276Speterstatic void		tapifinit(void *);
93166124Srafan
94166124Srafan/* character device */
9550276Speterstatic d_open_t		tapopen;
9650276Speterstatic d_close_t	tapclose;
9750276Speterstatic d_read_t		tapread;
9850276Speterstatic d_write_t	tapwrite;
9950276Speterstatic d_ioctl_t	tapioctl;
10050276Speterstatic d_poll_t		tappoll;
10150276Speter
10250276Speterstatic struct cdevsw	tap_cdevsw = {
10350276Speter	.d_version =	D_VERSION,
104262629Sdelphij	.d_flags =	D_PSEUDO | D_NEEDGIANT,
105262629Sdelphij	.d_open =	tapopen,
106262629Sdelphij	.d_close =	tapclose,
107262629Sdelphij	.d_read =	tapread,
10850276Speter	.d_write =	tapwrite,
10950276Speter	.d_ioctl =	tapioctl,
11050276Speter	.d_poll =	tappoll,
11150276Speter	.d_name =	CDEV_NAME,
112262629Sdelphij};
11350276Speter
11450276Speter/*
11550276Speter * All global variables in if_tap.c are locked with tapmtx, with the
11650276Speter * exception of tapdebug, which is accessed unlocked; tapclones is
11750276Speter * static at runtime.
11850276Speter */
11950276Speterstatic struct mtx		tapmtx;
12050276Speterstatic int			tapdebug = 0;        /* debug flag   */
12150276Speterstatic int			tapuopen = 0;        /* allow user open() */
12250276Speterstatic SLIST_HEAD(, tap_softc)	taphead;             /* first device */
12350276Speterstatic struct clonedevs 	*tapclones;
12450276Speter
12550276SpeterMALLOC_DECLARE(M_TAP);
12650276SpeterMALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
12750276SpeterSYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
12850276Speter
12950276SpeterSYSCTL_DECL(_net_link);
13050276SpeterSYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
13150276Speter    "Ethernet tunnel software network interface");
13250276SpeterSYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
13350276Speter	"Allow user to open /dev/tap (based on node permissions)");
13450276SpeterSYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
13550276Speter
13650276SpeterDEV_MODULE(if_tap, tapmodevent, NULL);
13750276Speter
13850276Speter/*
13950276Speter * tapmodevent
14050276Speter *
14150276Speter * module event handler
14250276Speter */
14350276Speterstatic int
14450276Spetertapmodevent(mod, type, data)
14550276Speter	module_t	 mod;
14650276Speter	int		 type;
14762449Speter	void		*data;
14862449Speter{
14950276Speter	static eventhandler_tag	 eh_tag = NULL;
15050276Speter	struct tap_softc	*tp = NULL;
15150276Speter	struct ifnet		*ifp = NULL;
15250276Speter	int			 s;
15350276Speter
15450276Speter	switch (type) {
15550276Speter	case MOD_LOAD:
15650276Speter
15750276Speter		/* intitialize device */
15850276Speter
15950276Speter		mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
16050276Speter		SLIST_INIT(&taphead);
16150276Speter
16250276Speter		clone_setup(&tapclones);
16350276Speter		eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
16450276Speter		if (eh_tag == NULL) {
16550276Speter			clone_cleanup(&tapclones);
16650276Speter			mtx_destroy(&tapmtx);
16750276Speter			return (ENOMEM);
16850276Speter		}
16950276Speter		return (0);
17050276Speter
17150276Speter	case MOD_UNLOAD:
17262449Speter		/*
17362449Speter		 * The EBUSY algorithm here can't quite atomically
17462449Speter		 * guarantee that this is race-free since we have to
17550276Speter		 * release the tap mtx to deregister the clone handler.
17650276Speter		 */
17750276Speter		mtx_lock(&tapmtx);
178166124Srafan		SLIST_FOREACH(tp, &taphead, tap_next) {
17950276Speter			mtx_lock(&tp->tap_mtx);
18050276Speter			if (tp->tap_flags & TAP_OPEN) {
18150276Speter				mtx_unlock(&tp->tap_mtx);
18250276Speter				mtx_unlock(&tapmtx);
18350276Speter				return (EBUSY);
18450276Speter			}
18550276Speter			mtx_unlock(&tp->tap_mtx);
18650276Speter		}
18750276Speter		mtx_unlock(&tapmtx);
18850276Speter
18950276Speter		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
19050276Speter
19150276Speter		mtx_lock(&tapmtx);
19250276Speter		while ((tp = SLIST_FIRST(&taphead)) != NULL) {
19350276Speter			SLIST_REMOVE_HEAD(&taphead, tap_next);
19450276Speter			mtx_unlock(&tapmtx);
19550276Speter
19650276Speter			ifp = tp->tap_ifp;
19750276Speter
19850276Speter			TAPDEBUG("detaching %s\n", ifp->if_xname);
19997049Speter
20050276Speter			/* Unlocked read. */
20150276Speter			KASSERT(!(tp->tap_flags & TAP_OPEN),
20250276Speter				("%s flags is out of sync", ifp->if_xname));
203166124Srafan
20450276Speter			destroy_dev(tp->tap_dev);
20550276Speter			s = splimp();
206262629Sdelphij			ether_ifdetach(ifp);
207262629Sdelphij			if_free_type(ifp, IFT_ETHER);
208262629Sdelphij			splx(s);
209262629Sdelphij
210262629Sdelphij			mtx_destroy(&tp->tap_mtx);
211262629Sdelphij			free(tp, M_TAP);
212262629Sdelphij			mtx_lock(&tapmtx);
213262629Sdelphij		}
214262629Sdelphij		mtx_unlock(&tapmtx);
215174993Srafan		clone_cleanup(&tapclones);
216174993Srafan
217174993Srafan		mtx_destroy(&tapmtx);
218174993Srafan
219174993Srafan		break;
220174993Srafan
221174993Srafan	default:
222174993Srafan		return (EOPNOTSUPP);
223174993Srafan	}
224174993Srafan
22550276Speter	return (0);
226174993Srafan} /* tapmodevent */
227174993Srafan
228174993Srafan
229174993Srafan/*
230174993Srafan * DEVFS handler
231174993Srafan *
232174993Srafan * We need to support two kind of devices - tap and vmnet
233174993Srafan */
234174993Srafanstatic void
23550276Spetertapclone(arg, cred, name, namelen, dev)
23650276Speter	void	*arg;
23750276Speter	struct ucred *cred;
23876726Speter	char	*name;
23976726Speter	int	 namelen;
24076726Speter	struct cdev **dev;
24176726Speter{
24276726Speter	u_int		extra;
24376726Speter	int		i, unit;
24476726Speter	char		*device_name = name;
24576726Speter
24676726Speter	if (*dev != NULL)
24750276Speter		return;
24850276Speter
24950276Speter	device_name = TAP;
25050276Speter	extra = 0;
25176726Speter	if (strcmp(name, TAP) == 0) {
25276726Speter		unit = -1;
25376726Speter	} else if (strcmp(name, VMNET) == 0) {
25476726Speter		device_name = VMNET;
255262685Sdelphij		extra = VMNET_DEV_MASK;
256166124Srafan		unit = -1;
25776726Speter	} else if (dev_stdclone(name, NULL, device_name, &unit) != 1) {
25876726Speter		device_name = VMNET;
25976726Speter		extra = VMNET_DEV_MASK;
26050276Speter		if (dev_stdclone(name, NULL, device_name, &unit) != 1)
26150276Speter			return;
26276726Speter	}
26376726Speter
26450276Speter	/* find any existing device, or allocate new unit number */
26550276Speter	i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
26676726Speter	if (i) {
26776726Speter		*dev = make_dev(&tap_cdevsw, unit2minor(unit | extra),
26850276Speter		     UID_ROOT, GID_WHEEL, 0600, "%s%d", device_name, unit);
26950276Speter		if (*dev != NULL) {
27050276Speter			dev_ref(*dev);
27176726Speter			(*dev)->si_flags |= SI_CHEAPCLONE;
27276726Speter		}
27376726Speter	}
27476726Speter} /* tapclone */
27576726Speter
276166124Srafan
277166124Srafan/*
278174993Srafan * tapcreate
279166124Srafan *
280174993Srafan * to create interface
281174993Srafan */
282166124Srafanstatic void
283166124Srafantapcreate(dev)
284262629Sdelphij	struct cdev *dev;
285262629Sdelphij{
28650276Speter	struct ifnet		*ifp = NULL;
28750276Speter	struct tap_softc	*tp = NULL;
28850276Speter	unsigned short		 macaddr_hi;
28976726Speter	int			 unit, s;
29076726Speter	char			*name = NULL;
29176726Speter	u_char			eaddr[6];
29276726Speter
29376726Speter	dev->si_flags &= ~SI_CHEAPCLONE;
29476726Speter
29576726Speter	/* allocate driver storage and create device */
29676726Speter	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
29750276Speter	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
298262629Sdelphij	mtx_lock(&tapmtx);
299262629Sdelphij	SLIST_INSERT_HEAD(&taphead, tp, tap_next);
300262629Sdelphij	mtx_unlock(&tapmtx);
301262629Sdelphij
302262629Sdelphij	unit = dev2unit(dev);
303262629Sdelphij
304262629Sdelphij	/* select device: tap or vmnet */
305262629Sdelphij	if (unit & VMNET_DEV_MASK) {
306262629Sdelphij		name = VMNET;
307262629Sdelphij		tp->tap_flags |= TAP_VMNET;
308262629Sdelphij	} else
309262629Sdelphij		name = TAP;
310262629Sdelphij
311262629Sdelphij	unit &= TAPMAXUNIT;
312262629Sdelphij
313262629Sdelphij	TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, minor(dev));
314262629Sdelphij
315262629Sdelphij	/* generate fake MAC address: 00 bd xx xx xx unit_no */
316262629Sdelphij	macaddr_hi = htons(0x00bd);
317262629Sdelphij	bcopy(&macaddr_hi, eaddr, sizeof(short));
318262629Sdelphij	bcopy(&ticks, &eaddr[2], sizeof(long));
319262629Sdelphij	eaddr[5] = (u_char)unit;
320262629Sdelphij
321262629Sdelphij	/* fill the rest and attach interface */
322262629Sdelphij	ifp = tp->tap_ifp = if_alloc(IFT_ETHER);
323262629Sdelphij	if (ifp == NULL)
324262629Sdelphij		panic("%s%d: can not if_alloc()", name, unit);
325262629Sdelphij	ifp->if_softc = tp;
326262629Sdelphij	if_initname(ifp, name, unit);
327262629Sdelphij	ifp->if_init = tapifinit;
328262629Sdelphij	ifp->if_start = tapifstart;
32950276Speter	ifp->if_ioctl = tapifioctl;
33050276Speter	ifp->if_mtu = ETHERMTU;
33150276Speter	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
33250276Speter	ifp->if_snd.ifq_maxlen = ifqmaxlen;
33376726Speter
33450276Speter	dev->si_drv1 = tp;
335	tp->tap_dev = dev;
336
337	s = splimp();
338	ether_ifattach(ifp, eaddr);
339	splx(s);
340
341	mtx_lock(&tp->tap_mtx);
342	tp->tap_flags |= TAP_INITED;
343	mtx_unlock(&tp->tap_mtx);
344
345	TAPDEBUG("interface %s is created. minor = %#x\n",
346		ifp->if_xname, minor(dev));
347} /* tapcreate */
348
349
350/*
351 * tapopen
352 *
353 * to open tunnel. must be superuser
354 */
355static int
356tapopen(dev, flag, mode, td)
357	struct cdev *dev;
358	int		 flag;
359	int		 mode;
360	struct thread	*td;
361{
362	struct tap_softc	*tp = NULL;
363	struct ifnet		*ifp = NULL;
364	int			 s;
365
366	if (tapuopen == 0 && suser(td) != 0)
367		return (EPERM);
368
369	if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
370		return (ENXIO);
371
372	/*
373	 * XXXRW: Non-atomic test-and-set of si_drv1.  Currently protected
374	 * by Giant, but the race actually exists under memory pressure as
375	 * well even when running with Giant, as malloc() may sleep.
376	 */
377	tp = dev->si_drv1;
378	if (tp == NULL) {
379		tapcreate(dev);
380		tp = dev->si_drv1;
381	}
382
383	mtx_lock(&tp->tap_mtx);
384	if (tp->tap_flags & TAP_OPEN) {
385		mtx_unlock(&tp->tap_mtx);
386		return (EBUSY);
387	}
388
389	bcopy(IFP2ENADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
390	tp->tap_pid = td->td_proc->p_pid;
391	tp->tap_flags |= TAP_OPEN;
392	ifp = tp->tap_ifp;
393	mtx_unlock(&tp->tap_mtx);
394
395	s = splimp();
396	ifp->if_flags |= IFF_RUNNING;
397	ifp->if_flags &= ~IFF_OACTIVE;
398	splx(s);
399
400	TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, minor(dev));
401
402	return (0);
403} /* tapopen */
404
405
406/*
407 * tapclose
408 *
409 * close the device - mark i/f down & delete routing info
410 */
411static int
412tapclose(dev, foo, bar, td)
413	struct cdev *dev;
414	int		 foo;
415	int		 bar;
416	struct thread	*td;
417{
418	struct ifaddr *ifa;
419	struct tap_softc	*tp = dev->si_drv1;
420	struct ifnet		*ifp = tp->tap_ifp;
421	int			s;
422
423	/* junk all pending output */
424	IF_DRAIN(&ifp->if_snd);
425
426	/*
427	 * do not bring the interface down, and do not anything with
428	 * interface, if we are in VMnet mode. just close the device.
429	 */
430
431	mtx_lock(&tp->tap_mtx);
432	if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
433		mtx_unlock(&tp->tap_mtx);
434		s = splimp();
435		if_down(ifp);
436		if (ifp->if_flags & IFF_RUNNING) {
437			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
438				rtinit(ifa, (int)RTM_DELETE, 0);
439			}
440			if_purgeaddrs(ifp);
441			ifp->if_flags &= ~IFF_RUNNING;
442		}
443		splx(s);
444	} else
445		mtx_unlock(&tp->tap_mtx);
446
447	funsetown(&tp->tap_sigio);
448	selwakeuppri(&tp->tap_rsel, PZERO+1);
449
450	mtx_lock(&tp->tap_mtx);
451	tp->tap_flags &= ~TAP_OPEN;
452	tp->tap_pid = 0;
453	mtx_unlock(&tp->tap_mtx);
454
455	TAPDEBUG("%s is closed. minor = %#x\n",
456		ifp->if_xname, minor(dev));
457
458	return (0);
459} /* tapclose */
460
461
462/*
463 * tapifinit
464 *
465 * network interface initialization function
466 */
467static void
468tapifinit(xtp)
469	void	*xtp;
470{
471	struct tap_softc	*tp = (struct tap_softc *)xtp;
472	struct ifnet		*ifp = tp->tap_ifp;
473
474	TAPDEBUG("initializing %s\n", ifp->if_xname);
475
476	ifp->if_flags |= IFF_RUNNING;
477	ifp->if_flags &= ~IFF_OACTIVE;
478
479	/* attempt to start output */
480	tapifstart(ifp);
481} /* tapifinit */
482
483
484/*
485 * tapifioctl
486 *
487 * Process an ioctl request on network interface
488 */
489static int
490tapifioctl(ifp, cmd, data)
491	struct ifnet	*ifp;
492	u_long		 cmd;
493	caddr_t		 data;
494{
495	struct tap_softc	*tp = (struct tap_softc *)(ifp->if_softc);
496	struct ifstat		*ifs = NULL;
497	int			 s, dummy;
498
499	switch (cmd) {
500		case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
501		case SIOCADDMULTI:
502		case SIOCDELMULTI:
503			break;
504
505		case SIOCGIFSTATUS:
506			s = splimp();
507			ifs = (struct ifstat *)data;
508			dummy = strlen(ifs->ascii);
509			mtx_lock(&tp->tap_mtx);
510			if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
511				snprintf(ifs->ascii + dummy,
512					sizeof(ifs->ascii) - dummy,
513					"\tOpened by PID %d\n", tp->tap_pid);
514			mtx_unlock(&tp->tap_mtx);
515			splx(s);
516			break;
517
518		default:
519			s = splimp();
520			dummy = ether_ioctl(ifp, cmd, data);
521			splx(s);
522			return (dummy);
523	}
524
525	return (0);
526} /* tapifioctl */
527
528
529/*
530 * tapifstart
531 *
532 * queue packets from higher level ready to put out
533 */
534static void
535tapifstart(ifp)
536	struct ifnet	*ifp;
537{
538	struct tap_softc	*tp = ifp->if_softc;
539	int			 s;
540
541	TAPDEBUG("%s starting\n", ifp->if_xname);
542
543	/*
544	 * do not junk pending output if we are in VMnet mode.
545	 * XXX: can this do any harm because of queue overflow?
546	 */
547
548	mtx_lock(&tp->tap_mtx);
549	if (((tp->tap_flags & TAP_VMNET) == 0) &&
550	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
551		struct mbuf	*m = NULL;
552
553		mtx_unlock(&tp->tap_mtx);
554
555		/* Unlocked read. */
556		TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
557		    tp->tap_flags);
558
559		s = splimp();
560		do {
561			IF_DEQUEUE(&ifp->if_snd, m);
562			if (m != NULL)
563				m_freem(m);
564			ifp->if_oerrors ++;
565		} while (m != NULL);
566		splx(s);
567
568		return;
569	}
570	mtx_unlock(&tp->tap_mtx);
571
572	s = splimp();
573	ifp->if_flags |= IFF_OACTIVE;
574
575	if (ifp->if_snd.ifq_len != 0) {
576		mtx_lock(&tp->tap_mtx);
577		if (tp->tap_flags & TAP_RWAIT) {
578			tp->tap_flags &= ~TAP_RWAIT;
579			wakeup(tp);
580		}
581
582		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
583			mtx_unlock(&tp->tap_mtx);
584			pgsigio(&tp->tap_sigio, SIGIO, 0);
585		} else
586			mtx_unlock(&tp->tap_mtx);
587
588		selwakeuppri(&tp->tap_rsel, PZERO+1);
589		ifp->if_opackets ++; /* obytes are counted in ether_output */
590	}
591
592	ifp->if_flags &= ~IFF_OACTIVE;
593	splx(s);
594} /* tapifstart */
595
596
597/*
598 * tapioctl
599 *
600 * the cdevsw interface is now pretty minimal
601 */
602static int
603tapioctl(dev, cmd, data, flag, td)
604	struct cdev *dev;
605	u_long		 cmd;
606	caddr_t		 data;
607	int		 flag;
608	struct thread	*td;
609{
610	struct tap_softc	*tp = dev->si_drv1;
611	struct ifnet		*ifp = tp->tap_ifp;
612	struct tapinfo		*tapp = NULL;
613	int			 s;
614	int			 f;
615
616	switch (cmd) {
617		case TAPSIFINFO:
618			s = splimp();
619			tapp = (struct tapinfo *)data;
620			ifp->if_mtu = tapp->mtu;
621			ifp->if_type = tapp->type;
622			ifp->if_baudrate = tapp->baudrate;
623			splx(s);
624			break;
625
626		case TAPGIFINFO:
627			tapp = (struct tapinfo *)data;
628			tapp->mtu = ifp->if_mtu;
629			tapp->type = ifp->if_type;
630			tapp->baudrate = ifp->if_baudrate;
631			break;
632
633		case TAPSDEBUG:
634			tapdebug = *(int *)data;
635			break;
636
637		case TAPGDEBUG:
638			*(int *)data = tapdebug;
639			break;
640
641		case FIONBIO:
642			break;
643
644		case FIOASYNC:
645			s = splimp();
646			mtx_lock(&tp->tap_mtx);
647			if (*(int *)data)
648				tp->tap_flags |= TAP_ASYNC;
649			else
650				tp->tap_flags &= ~TAP_ASYNC;
651			mtx_unlock(&tp->tap_mtx);
652			splx(s);
653			break;
654
655		case FIONREAD:
656			s = splimp();
657			if (ifp->if_snd.ifq_head) {
658				struct mbuf	*mb = ifp->if_snd.ifq_head;
659
660				for(*(int *)data = 0;mb != NULL;mb = mb->m_next)
661					*(int *)data += mb->m_len;
662			} else
663				*(int *)data = 0;
664			splx(s);
665			break;
666
667		case FIOSETOWN:
668			return (fsetown(*(int *)data, &tp->tap_sigio));
669
670		case FIOGETOWN:
671			*(int *)data = fgetown(&tp->tap_sigio);
672			return (0);
673
674		/* this is deprecated, FIOSETOWN should be used instead */
675		case TIOCSPGRP:
676			return (fsetown(-(*(int *)data), &tp->tap_sigio));
677
678		/* this is deprecated, FIOGETOWN should be used instead */
679		case TIOCGPGRP:
680			*(int *)data = -fgetown(&tp->tap_sigio);
681			return (0);
682
683		/* VMware/VMnet port ioctl's */
684
685		case SIOCGIFFLAGS:	/* get ifnet flags */
686			bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
687			break;
688
689		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
690			f = *(int *)data;
691			f &= 0x0fff;
692			f &= ~IFF_CANTCHANGE;
693			f |= IFF_UP;
694
695			s = splimp();
696			ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
697			splx(s);
698			break;
699
700		case OSIOCGIFADDR:	/* get MAC address of the remote side */
701		case SIOCGIFADDR:
702			mtx_lock(&tp->tap_mtx);
703			bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
704			mtx_unlock(&tp->tap_mtx);
705			break;
706
707		case SIOCSIFADDR:	/* set MAC address of the remote side */
708			mtx_lock(&tp->tap_mtx);
709			bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
710			mtx_unlock(&tp->tap_mtx);
711			break;
712
713		default:
714			return (ENOTTY);
715	}
716	return (0);
717} /* tapioctl */
718
719
720/*
721 * tapread
722 *
723 * the cdevsw read interface - reads a packet at a time, or at
724 * least as much of a packet as can be read
725 */
726static int
727tapread(dev, uio, flag)
728	struct cdev *dev;
729	struct uio	*uio;
730	int		 flag;
731{
732	struct tap_softc	*tp = dev->si_drv1;
733	struct ifnet		*ifp = tp->tap_ifp;
734	struct mbuf		*m = NULL;
735	int			 error = 0, len, s;
736
737	TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, minor(dev));
738
739	mtx_lock(&tp->tap_mtx);
740	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
741		mtx_unlock(&tp->tap_mtx);
742
743		/* Unlocked read. */
744		TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
745			ifp->if_xname, minor(dev), tp->tap_flags);
746
747		return (EHOSTDOWN);
748	}
749
750	tp->tap_flags &= ~TAP_RWAIT;
751	mtx_unlock(&tp->tap_mtx);
752
753	/* sleep until we get a packet */
754	do {
755		s = splimp();
756		IF_DEQUEUE(&ifp->if_snd, m);
757		splx(s);
758
759		if (m == NULL) {
760			if (flag & O_NONBLOCK)
761				return (EWOULDBLOCK);
762
763			mtx_lock(&tp->tap_mtx);
764			tp->tap_flags |= TAP_RWAIT;
765			mtx_unlock(&tp->tap_mtx);
766			error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
767			if (error)
768				return (error);
769		}
770	} while (m == NULL);
771
772	/* feed packet to bpf */
773	BPF_MTAP(ifp, m);
774
775	/* xfer packet to user space */
776	while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
777		len = min(uio->uio_resid, m->m_len);
778		if (len == 0)
779			break;
780
781		error = uiomove(mtod(m, void *), len, uio);
782		m = m_free(m);
783	}
784
785	if (m != NULL) {
786		TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
787			minor(dev));
788		m_freem(m);
789	}
790
791	return (error);
792} /* tapread */
793
794
795/*
796 * tapwrite
797 *
798 * the cdevsw write interface - an atomic write is a packet - or else!
799 */
800static int
801tapwrite(dev, uio, flag)
802	struct cdev *dev;
803	struct uio	*uio;
804	int		 flag;
805{
806	struct tap_softc	*tp = dev->si_drv1;
807	struct ifnet		*ifp = tp->tap_ifp;
808	struct mbuf		*m;
809	int			 error = 0;
810
811	TAPDEBUG("%s writting, minor = %#x\n",
812		ifp->if_xname, minor(dev));
813
814	if (uio->uio_resid == 0)
815		return (0);
816
817	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
818		TAPDEBUG("%s invalid packet len = %d, minor = %#x\n",
819			ifp->if_xname, uio->uio_resid, minor(dev));
820
821		return (EIO);
822	}
823
824	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN)) == NULL) {
825		ifp->if_ierrors ++;
826		return (error);
827	}
828
829	m->m_pkthdr.rcvif = ifp;
830
831	/* Pass packet up to parent. */
832	(*ifp->if_input)(ifp, m);
833	ifp->if_ipackets ++; /* ibytes are counted in parent */
834
835	return (0);
836} /* tapwrite */
837
838
839/*
840 * tappoll
841 *
842 * the poll interface, this is only useful on reads
843 * really. the write detect always returns true, write never blocks
844 * anyway, it either accepts the packet or drops it
845 */
846static int
847tappoll(dev, events, td)
848	struct cdev *dev;
849	int		 events;
850	struct thread	*td;
851{
852	struct tap_softc	*tp = dev->si_drv1;
853	struct ifnet		*ifp = tp->tap_ifp;
854	int			 s, revents = 0;
855
856	TAPDEBUG("%s polling, minor = %#x\n",
857		ifp->if_xname, minor(dev));
858
859	s = splimp();
860	if (events & (POLLIN | POLLRDNORM)) {
861		if (ifp->if_snd.ifq_len > 0) {
862			TAPDEBUG("%s have data in queue. len = %d, " \
863				"minor = %#x\n", ifp->if_xname,
864				ifp->if_snd.ifq_len, minor(dev));
865
866			revents |= (events & (POLLIN | POLLRDNORM));
867		} else {
868			TAPDEBUG("%s waiting for data, minor = %#x\n",
869				ifp->if_xname, minor(dev));
870
871			selrecord(td, &tp->tap_rsel);
872		}
873	}
874
875	if (events & (POLLOUT | POLLWRNORM))
876		revents |= (events & (POLLOUT | POLLWRNORM));
877
878	splx(s);
879	return (revents);
880} /* tappoll */
881