if_tap.c revision 144979
1/*-
2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * BASED ON:
27 * -------------------------------------------------------------------------
28 *
29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30 * Nottingham University 1987.
31 */
32
33/*
34 * $FreeBSD: head/sys/net/if_tap.c 144979 2005-04-13 00:30:19Z mdodd $
35 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
36 */
37
38#include "opt_inet.h"
39
40#include <sys/param.h>
41#include <sys/conf.h>
42#include <sys/fcntl.h>
43#include <sys/filio.h>
44#include <sys/kernel.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/module.h>
48#include <sys/poll.h>
49#include <sys/proc.h>
50#include <sys/selinfo.h>
51#include <sys/signalvar.h>
52#include <sys/socket.h>
53#include <sys/sockio.h>
54#include <sys/sysctl.h>
55#include <sys/systm.h>
56#include <sys/ttycom.h>
57#include <sys/uio.h>
58#include <sys/queue.h>
59
60#include <net/bpf.h>
61#include <net/ethernet.h>
62#include <net/if.h>
63#include <net/if_arp.h>
64#include <net/route.h>
65
66#include <netinet/in.h>
67
68#include <net/if_tapvar.h>
69#include <net/if_tap.h>
70
71
72#define CDEV_NAME	"tap"
73#define TAPDEBUG	if (tapdebug) printf
74
75#define TAP		"tap"
76#define VMNET		"vmnet"
77#define TAPMAXUNIT	0x7fff
78#define VMNET_DEV_MASK	CLONE_FLAG0
79
80/* module */
81static int		tapmodevent(module_t, int, void *);
82
83/* device */
84static void		tapclone(void *, char *, int, struct cdev **);
85static void		tapcreate(struct cdev *);
86
87/* network interface */
88static void		tapifstart(struct ifnet *);
89static int		tapifioctl(struct ifnet *, u_long, caddr_t);
90static void		tapifinit(void *);
91
92/* character device */
93static d_open_t		tapopen;
94static d_close_t	tapclose;
95static d_read_t		tapread;
96static d_write_t	tapwrite;
97static d_ioctl_t	tapioctl;
98static d_poll_t		tappoll;
99
100static struct cdevsw	tap_cdevsw = {
101	.d_version =	D_VERSION,
102	.d_flags =	D_PSEUDO | D_NEEDGIANT,
103	.d_open =	tapopen,
104	.d_close =	tapclose,
105	.d_read =	tapread,
106	.d_write =	tapwrite,
107	.d_ioctl =	tapioctl,
108	.d_poll =	tappoll,
109	.d_name =	CDEV_NAME,
110};
111
112/*
113 * All global variables in if_tap.c are locked with tapmtx, with the
114 * exception of tapdebug, which is accessed unlocked; tapclones is
115 * static at runtime.
116 */
117static struct mtx		tapmtx;
118static int			tapdebug = 0;        /* debug flag   */
119static int			tapuopen = 0;        /* allow user open() */
120static SLIST_HEAD(, tap_softc)	taphead;             /* first device */
121static struct clonedevs 	*tapclones;
122
123MALLOC_DECLARE(M_TAP);
124MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
125SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
126
127SYSCTL_DECL(_net_link);
128SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
129    "Ethernet tunnel software network interface");
130SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
131	"Allow user to open /dev/tap (based on node permissions)");
132SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
133
134DEV_MODULE(if_tap, tapmodevent, NULL);
135
136/*
137 * tapmodevent
138 *
139 * module event handler
140 */
141static int
142tapmodevent(mod, type, data)
143	module_t	 mod;
144	int		 type;
145	void		*data;
146{
147	static eventhandler_tag	 eh_tag = NULL;
148	struct tap_softc	*tp = NULL;
149	struct ifnet		*ifp = NULL;
150	int			 s;
151
152	switch (type) {
153	case MOD_LOAD:
154
155		/* intitialize device */
156
157		mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
158		SLIST_INIT(&taphead);
159
160		clone_setup(&tapclones);
161		eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
162		if (eh_tag == NULL) {
163			clone_cleanup(&tapclones);
164			mtx_destroy(&tapmtx);
165			return (ENOMEM);
166		}
167		return (0);
168
169	case MOD_UNLOAD:
170		/*
171		 * The EBUSY algorithm here can't quite atomically
172		 * guarantee that this is race-free since we have to
173		 * release the tap mtx to deregister the clone handler.
174		 */
175		mtx_lock(&tapmtx);
176		SLIST_FOREACH(tp, &taphead, tap_next) {
177			mtx_lock(&tp->tap_mtx);
178			if (tp->tap_flags & TAP_OPEN) {
179				mtx_unlock(&tp->tap_mtx);
180				mtx_unlock(&tapmtx);
181				return (EBUSY);
182			}
183			mtx_unlock(&tp->tap_mtx);
184		}
185		mtx_unlock(&tapmtx);
186
187		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
188
189		mtx_lock(&tapmtx);
190		while ((tp = SLIST_FIRST(&taphead)) != NULL) {
191			SLIST_REMOVE_HEAD(&taphead, tap_next);
192			mtx_unlock(&tapmtx);
193
194			ifp = &tp->tap_if;
195
196			TAPDEBUG("detaching %s\n", ifp->if_xname);
197
198			/* Unlocked read. */
199			KASSERT(!(tp->tap_flags & TAP_OPEN),
200				("%s flags is out of sync", ifp->if_xname));
201
202			destroy_dev(tp->tap_dev);
203			s = splimp();
204			ether_ifdetach(ifp);
205			splx(s);
206
207			mtx_destroy(&tp->tap_mtx);
208			free(tp, M_TAP);
209			mtx_lock(&tapmtx);
210		}
211		mtx_unlock(&tapmtx);
212		clone_cleanup(&tapclones);
213
214		mtx_destroy(&tapmtx);
215
216		break;
217
218	default:
219		return (EOPNOTSUPP);
220	}
221
222	return (0);
223} /* tapmodevent */
224
225
226/*
227 * DEVFS handler
228 *
229 * We need to support two kind of devices - tap and vmnet
230 */
231static void
232tapclone(arg, name, namelen, dev)
233	void	*arg;
234	char	*name;
235	int	 namelen;
236	struct cdev **dev;
237{
238	u_int		extra;
239	int		i, unit;
240	char		*device_name = name;
241
242	if (*dev != NULL)
243		return;
244
245	device_name = TAP;
246	extra = 0;
247	if (strcmp(name, TAP) == 0) {
248		unit = -1;
249	} else if (strcmp(name, VMNET) == 0) {
250		device_name = VMNET;
251		extra = VMNET_DEV_MASK;
252		unit = -1;
253	} else if (dev_stdclone(name, NULL, device_name, &unit) != 1) {
254		device_name = VMNET;
255		extra = VMNET_DEV_MASK;
256		if (dev_stdclone(name, NULL, device_name, &unit) != 1)
257			return;
258	}
259
260	/* find any existing device, or allocate new unit number */
261	i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
262	if (i) {
263		*dev = make_dev(&tap_cdevsw, unit2minor(unit | extra),
264		     UID_ROOT, GID_WHEEL, 0600, "%s%d", device_name, unit);
265		if (*dev != NULL) {
266			dev_ref(*dev);
267			(*dev)->si_flags |= SI_CHEAPCLONE;
268		}
269	}
270} /* tapclone */
271
272
273/*
274 * tapcreate
275 *
276 * to create interface
277 */
278static void
279tapcreate(dev)
280	struct cdev *dev;
281{
282	struct ifnet		*ifp = NULL;
283	struct tap_softc	*tp = NULL;
284	unsigned short		 macaddr_hi;
285	int			 unit, s;
286	char			*name = NULL;
287
288	dev->si_flags &= ~SI_CHEAPCLONE;
289
290	/* allocate driver storage and create device */
291	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
292	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
293	mtx_lock(&tapmtx);
294	SLIST_INSERT_HEAD(&taphead, tp, tap_next);
295	mtx_unlock(&tapmtx);
296
297	unit = dev2unit(dev);
298
299	/* select device: tap or vmnet */
300	if (unit & VMNET_DEV_MASK) {
301		name = VMNET;
302		tp->tap_flags |= TAP_VMNET;
303	} else
304		name = TAP;
305
306	unit &= TAPMAXUNIT;
307
308	TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, minor(dev));
309
310	/* generate fake MAC address: 00 bd xx xx xx unit_no */
311	macaddr_hi = htons(0x00bd);
312	bcopy(&macaddr_hi, &tp->arpcom.ac_enaddr[0], sizeof(short));
313	bcopy(&ticks, &tp->arpcom.ac_enaddr[2], sizeof(long));
314	tp->arpcom.ac_enaddr[5] = (u_char)unit;
315
316	/* fill the rest and attach interface */
317	ifp = &tp->tap_if;
318	ifp->if_softc = tp;
319	if_initname(ifp, name, unit);
320	ifp->if_init = tapifinit;
321	ifp->if_start = tapifstart;
322	ifp->if_ioctl = tapifioctl;
323	ifp->if_mtu = ETHERMTU;
324	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
325	ifp->if_snd.ifq_maxlen = ifqmaxlen;
326
327	dev->si_drv1 = tp;
328	tp->tap_dev = dev;
329
330	s = splimp();
331	ether_ifattach(ifp, tp->arpcom.ac_enaddr);
332	splx(s);
333
334	mtx_lock(&tp->tap_mtx);
335	tp->tap_flags |= TAP_INITED;
336	mtx_unlock(&tp->tap_mtx);
337
338	TAPDEBUG("interface %s is created. minor = %#x\n",
339		ifp->if_xname, minor(dev));
340} /* tapcreate */
341
342
343/*
344 * tapopen
345 *
346 * to open tunnel. must be superuser
347 */
348static int
349tapopen(dev, flag, mode, td)
350	struct cdev *dev;
351	int		 flag;
352	int		 mode;
353	struct thread	*td;
354{
355	struct tap_softc	*tp = NULL;
356	struct ifnet		*ifp = NULL;
357	int			 s;
358
359	if (tapuopen == 0 && suser(td) != 0)
360		return (EPERM);
361
362	if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
363		return (ENXIO);
364
365	/*
366	 * XXXRW: Non-atomic test-and-set of si_drv1.  Currently protected
367	 * by Giant, but the race actually exists under memory pressure as
368	 * well even when running with Giant, as malloc() may sleep.
369	 */
370	tp = dev->si_drv1;
371	if (tp == NULL) {
372		tapcreate(dev);
373		tp = dev->si_drv1;
374	}
375
376	mtx_lock(&tp->tap_mtx);
377	if (tp->tap_flags & TAP_OPEN) {
378		mtx_unlock(&tp->tap_mtx);
379		return (EBUSY);
380	}
381
382	bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
383	tp->tap_pid = td->td_proc->p_pid;
384	tp->tap_flags |= TAP_OPEN;
385	ifp = &tp->tap_if;
386	mtx_unlock(&tp->tap_mtx);
387
388	s = splimp();
389	ifp->if_flags |= IFF_RUNNING;
390	ifp->if_flags &= ~IFF_OACTIVE;
391	splx(s);
392
393	TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, minor(dev));
394
395	return (0);
396} /* tapopen */
397
398
399/*
400 * tapclose
401 *
402 * close the device - mark i/f down & delete routing info
403 */
404static int
405tapclose(dev, foo, bar, td)
406	struct cdev *dev;
407	int		 foo;
408	int		 bar;
409	struct thread	*td;
410{
411	struct tap_softc	*tp = dev->si_drv1;
412	struct ifnet		*ifp = &tp->tap_if;
413	int			s;
414
415	/* junk all pending output */
416	IF_DRAIN(&ifp->if_snd);
417
418	/*
419	 * do not bring the interface down, and do not anything with
420	 * interface, if we are in VMnet mode. just close the device.
421	 */
422
423	mtx_lock(&tp->tap_mtx);
424	if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
425		mtx_unlock(&tp->tap_mtx);
426		s = splimp();
427		if_down(ifp);
428		if (ifp->if_flags & IFF_RUNNING) {
429			/* find internet addresses and delete routes */
430			struct ifaddr	*ifa = NULL;
431
432			/* In desparate need of ifaddr locking. */
433			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
434				if (ifa->ifa_addr->sa_family == AF_INET) {
435					rtinit(ifa, (int)RTM_DELETE, 0);
436
437					/* remove address from interface */
438					bzero(ifa->ifa_addr,
439						   sizeof(*(ifa->ifa_addr)));
440					bzero(ifa->ifa_dstaddr,
441						   sizeof(*(ifa->ifa_dstaddr)));
442					bzero(ifa->ifa_netmask,
443						   sizeof(*(ifa->ifa_netmask)));
444				}
445			}
446
447			ifp->if_flags &= ~IFF_RUNNING;
448		}
449		splx(s);
450	} else
451		mtx_unlock(&tp->tap_mtx);
452
453	funsetown(&tp->tap_sigio);
454	selwakeuppri(&tp->tap_rsel, PZERO+1);
455
456	mtx_lock(&tp->tap_mtx);
457	tp->tap_flags &= ~TAP_OPEN;
458	tp->tap_pid = 0;
459	mtx_unlock(&tp->tap_mtx);
460
461	TAPDEBUG("%s is closed. minor = %#x\n",
462		ifp->if_xname, minor(dev));
463
464	return (0);
465} /* tapclose */
466
467
468/*
469 * tapifinit
470 *
471 * network interface initialization function
472 */
473static void
474tapifinit(xtp)
475	void	*xtp;
476{
477	struct tap_softc	*tp = (struct tap_softc *)xtp;
478	struct ifnet		*ifp = &tp->tap_if;
479
480	TAPDEBUG("initializing %s\n", ifp->if_xname);
481
482	ifp->if_flags |= IFF_RUNNING;
483	ifp->if_flags &= ~IFF_OACTIVE;
484
485	/* attempt to start output */
486	tapifstart(ifp);
487} /* tapifinit */
488
489
490/*
491 * tapifioctl
492 *
493 * Process an ioctl request on network interface
494 */
495static int
496tapifioctl(ifp, cmd, data)
497	struct ifnet	*ifp;
498	u_long		 cmd;
499	caddr_t		 data;
500{
501	struct tap_softc	*tp = (struct tap_softc *)(ifp->if_softc);
502	struct ifstat		*ifs = NULL;
503	int			 s, dummy;
504
505	switch (cmd) {
506		case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
507		case SIOCADDMULTI:
508		case SIOCDELMULTI:
509			break;
510
511		case SIOCGIFSTATUS:
512			s = splimp();
513			ifs = (struct ifstat *)data;
514			dummy = strlen(ifs->ascii);
515			mtx_lock(&tp->tap_mtx);
516			if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
517				snprintf(ifs->ascii + dummy,
518					sizeof(ifs->ascii) - dummy,
519					"\tOpened by PID %d\n", tp->tap_pid);
520			mtx_unlock(&tp->tap_mtx);
521			splx(s);
522			break;
523
524		default:
525			s = splimp();
526			dummy = ether_ioctl(ifp, cmd, data);
527			splx(s);
528			return (dummy);
529	}
530
531	return (0);
532} /* tapifioctl */
533
534
535/*
536 * tapifstart
537 *
538 * queue packets from higher level ready to put out
539 */
540static void
541tapifstart(ifp)
542	struct ifnet	*ifp;
543{
544	struct tap_softc	*tp = ifp->if_softc;
545	int			 s;
546
547	TAPDEBUG("%s starting\n", ifp->if_xname);
548
549	/*
550	 * do not junk pending output if we are in VMnet mode.
551	 * XXX: can this do any harm because of queue overflow?
552	 */
553
554	mtx_lock(&tp->tap_mtx);
555	if (((tp->tap_flags & TAP_VMNET) == 0) &&
556	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
557		struct mbuf	*m = NULL;
558
559		mtx_unlock(&tp->tap_mtx);
560
561		/* Unlocked read. */
562		TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
563		    tp->tap_flags);
564
565		s = splimp();
566		do {
567			IF_DEQUEUE(&ifp->if_snd, m);
568			if (m != NULL)
569				m_freem(m);
570			ifp->if_oerrors ++;
571		} while (m != NULL);
572		splx(s);
573
574		return;
575	}
576	mtx_unlock(&tp->tap_mtx);
577
578	s = splimp();
579	ifp->if_flags |= IFF_OACTIVE;
580
581	if (ifp->if_snd.ifq_len != 0) {
582		mtx_lock(&tp->tap_mtx);
583		if (tp->tap_flags & TAP_RWAIT) {
584			tp->tap_flags &= ~TAP_RWAIT;
585			wakeup(tp);
586		}
587
588		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
589			mtx_unlock(&tp->tap_mtx);
590			pgsigio(&tp->tap_sigio, SIGIO, 0);
591		} else
592			mtx_unlock(&tp->tap_mtx);
593
594		selwakeuppri(&tp->tap_rsel, PZERO+1);
595		ifp->if_opackets ++; /* obytes are counted in ether_output */
596	}
597
598	ifp->if_flags &= ~IFF_OACTIVE;
599	splx(s);
600} /* tapifstart */
601
602
603/*
604 * tapioctl
605 *
606 * the cdevsw interface is now pretty minimal
607 */
608static int
609tapioctl(dev, cmd, data, flag, td)
610	struct cdev *dev;
611	u_long		 cmd;
612	caddr_t		 data;
613	int		 flag;
614	struct thread	*td;
615{
616	struct tap_softc	*tp = dev->si_drv1;
617	struct ifnet		*ifp = &tp->tap_if;
618	struct tapinfo		*tapp = NULL;
619	int			 s;
620	int			 f;
621
622	switch (cmd) {
623		case TAPSIFINFO:
624			s = splimp();
625			tapp = (struct tapinfo *)data;
626			ifp->if_mtu = tapp->mtu;
627			ifp->if_type = tapp->type;
628			ifp->if_baudrate = tapp->baudrate;
629			splx(s);
630			break;
631
632		case TAPGIFINFO:
633			tapp = (struct tapinfo *)data;
634			tapp->mtu = ifp->if_mtu;
635			tapp->type = ifp->if_type;
636			tapp->baudrate = ifp->if_baudrate;
637			break;
638
639		case TAPSDEBUG:
640			tapdebug = *(int *)data;
641			break;
642
643		case TAPGDEBUG:
644			*(int *)data = tapdebug;
645			break;
646
647		case FIONBIO:
648			break;
649
650		case FIOASYNC:
651			s = splimp();
652			mtx_lock(&tp->tap_mtx);
653			if (*(int *)data)
654				tp->tap_flags |= TAP_ASYNC;
655			else
656				tp->tap_flags &= ~TAP_ASYNC;
657			mtx_unlock(&tp->tap_mtx);
658			splx(s);
659			break;
660
661		case FIONREAD:
662			s = splimp();
663			if (ifp->if_snd.ifq_head) {
664				struct mbuf	*mb = ifp->if_snd.ifq_head;
665
666				for(*(int *)data = 0;mb != NULL;mb = mb->m_next)
667					*(int *)data += mb->m_len;
668			} else
669				*(int *)data = 0;
670			splx(s);
671			break;
672
673		case FIOSETOWN:
674			return (fsetown(*(int *)data, &tp->tap_sigio));
675
676		case FIOGETOWN:
677			*(int *)data = fgetown(&tp->tap_sigio);
678			return (0);
679
680		/* this is deprecated, FIOSETOWN should be used instead */
681		case TIOCSPGRP:
682			return (fsetown(-(*(int *)data), &tp->tap_sigio));
683
684		/* this is deprecated, FIOGETOWN should be used instead */
685		case TIOCGPGRP:
686			*(int *)data = -fgetown(&tp->tap_sigio);
687			return (0);
688
689		/* VMware/VMnet port ioctl's */
690
691		case SIOCGIFFLAGS:	/* get ifnet flags */
692			bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
693			break;
694
695		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
696			f = *(int *)data;
697			f &= 0x0fff;
698			f &= ~IFF_CANTCHANGE;
699			f |= IFF_UP;
700
701			s = splimp();
702			ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
703			splx(s);
704			break;
705
706		case OSIOCGIFADDR:	/* get MAC address of the remote side */
707		case SIOCGIFADDR:
708			mtx_lock(&tp->tap_mtx);
709			bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
710			mtx_unlock(&tp->tap_mtx);
711			break;
712
713		case SIOCSIFADDR:	/* set MAC address of the remote side */
714			mtx_lock(&tp->tap_mtx);
715			bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
716			mtx_unlock(&tp->tap_mtx);
717			break;
718
719		default:
720			return (ENOTTY);
721	}
722	return (0);
723} /* tapioctl */
724
725
726/*
727 * tapread
728 *
729 * the cdevsw read interface - reads a packet at a time, or at
730 * least as much of a packet as can be read
731 */
732static int
733tapread(dev, uio, flag)
734	struct cdev *dev;
735	struct uio	*uio;
736	int		 flag;
737{
738	struct tap_softc	*tp = dev->si_drv1;
739	struct ifnet		*ifp = &tp->tap_if;
740	struct mbuf		*m = NULL;
741	int			 error = 0, len, s;
742
743	TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, minor(dev));
744
745	mtx_lock(&tp->tap_mtx);
746	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
747		mtx_unlock(&tp->tap_mtx);
748
749		/* Unlocked read. */
750		TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
751			ifp->if_xname, minor(dev), tp->tap_flags);
752
753		return (EHOSTDOWN);
754	}
755
756	tp->tap_flags &= ~TAP_RWAIT;
757	mtx_unlock(&tp->tap_mtx);
758
759	/* sleep until we get a packet */
760	do {
761		s = splimp();
762		IF_DEQUEUE(&ifp->if_snd, m);
763		splx(s);
764
765		if (m == NULL) {
766			if (flag & O_NONBLOCK)
767				return (EWOULDBLOCK);
768
769			mtx_lock(&tp->tap_mtx);
770			tp->tap_flags |= TAP_RWAIT;
771			mtx_unlock(&tp->tap_mtx);
772			error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
773			if (error)
774				return (error);
775		}
776	} while (m == NULL);
777
778	/* feed packet to bpf */
779	BPF_MTAP(ifp, m);
780
781	/* xfer packet to user space */
782	while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
783		len = min(uio->uio_resid, m->m_len);
784		if (len == 0)
785			break;
786
787		error = uiomove(mtod(m, void *), len, uio);
788		m = m_free(m);
789	}
790
791	if (m != NULL) {
792		TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
793			minor(dev));
794		m_freem(m);
795	}
796
797	return (error);
798} /* tapread */
799
800
801/*
802 * tapwrite
803 *
804 * the cdevsw write interface - an atomic write is a packet - or else!
805 */
806static int
807tapwrite(dev, uio, flag)
808	struct cdev *dev;
809	struct uio	*uio;
810	int		 flag;
811{
812	struct tap_softc	*tp = dev->si_drv1;
813	struct ifnet		*ifp = &tp->tap_if;
814	struct mbuf		*m;
815	int			 error = 0;
816
817	TAPDEBUG("%s writting, minor = %#x\n",
818		ifp->if_xname, minor(dev));
819
820	if (uio->uio_resid == 0)
821		return (0);
822
823	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
824		TAPDEBUG("%s invalid packet len = %d, minor = %#x\n",
825			ifp->if_xname, uio->uio_resid, minor(dev));
826
827		return (EIO);
828	}
829
830	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0)) == NULL) {
831		ifp->if_ierrors ++;
832		return (error);
833	}
834
835	m->m_pkthdr.rcvif = ifp;
836
837	/* Pass packet up to parent. */
838	(*ifp->if_input)(ifp, m);
839	ifp->if_ipackets ++; /* ibytes are counted in parent */
840
841	return (0);
842} /* tapwrite */
843
844
845/*
846 * tappoll
847 *
848 * the poll interface, this is only useful on reads
849 * really. the write detect always returns true, write never blocks
850 * anyway, it either accepts the packet or drops it
851 */
852static int
853tappoll(dev, events, td)
854	struct cdev *dev;
855	int		 events;
856	struct thread	*td;
857{
858	struct tap_softc	*tp = dev->si_drv1;
859	struct ifnet		*ifp = &tp->tap_if;
860	int			 s, revents = 0;
861
862	TAPDEBUG("%s polling, minor = %#x\n",
863		ifp->if_xname, minor(dev));
864
865	s = splimp();
866	if (events & (POLLIN | POLLRDNORM)) {
867		if (ifp->if_snd.ifq_len > 0) {
868			TAPDEBUG("%s have data in queue. len = %d, " \
869				"minor = %#x\n", ifp->if_xname,
870				ifp->if_snd.ifq_len, minor(dev));
871
872			revents |= (events & (POLLIN | POLLRDNORM));
873		} else {
874			TAPDEBUG("%s waiting for data, minor = %#x\n",
875				ifp->if_xname, minor(dev));
876
877			selrecord(td, &tp->tap_rsel);
878		}
879	}
880
881	if (events & (POLLOUT | POLLWRNORM))
882		revents |= (events & (POLLOUT | POLLWRNORM));
883
884	splx(s);
885	return (revents);
886} /* tappoll */
887