if_tap.c revision 135354
1/*
2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * BASED ON:
27 * -------------------------------------------------------------------------
28 *
29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30 * Nottingham University 1987.
31 */
32
33/*
34 * $FreeBSD: head/sys/net/if_tap.c 135354 2004-09-17 03:55:50Z rwatson $
35 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
36 */
37
38#include "opt_inet.h"
39
40#include <sys/param.h>
41#include <sys/conf.h>
42#include <sys/filedesc.h>
43#include <sys/filio.h>
44#include <sys/kernel.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/module.h>
48#include <sys/poll.h>
49#include <sys/proc.h>
50#include <sys/signalvar.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/sysctl.h>
54#include <sys/systm.h>
55#include <sys/ttycom.h>
56#include <sys/uio.h>
57#include <sys/vnode.h>
58#include <sys/queue.h>
59
60#include <net/bpf.h>
61#include <net/ethernet.h>
62#include <net/if.h>
63#include <net/if_arp.h>
64#include <net/route.h>
65
66#include <netinet/in.h>
67
68#include <net/if_tapvar.h>
69#include <net/if_tap.h>
70
71
72#define CDEV_NAME	"tap"
73#define TAPDEBUG	if (tapdebug) printf
74
75#define TAP		"tap"
76#define VMNET		"vmnet"
77#define TAPMAXUNIT	0x7fff
78#define VMNET_DEV_MASK	CLONE_FLAG0
79
80/* module */
81static int		tapmodevent(module_t, int, void *);
82
83/* device */
84static void		tapclone(void *, char *, int, struct cdev **);
85static void		tapcreate(struct cdev *);
86
87/* network interface */
88static void		tapifstart(struct ifnet *);
89static int		tapifioctl(struct ifnet *, u_long, caddr_t);
90static void		tapifinit(void *);
91
92/* character device */
93static d_open_t		tapopen;
94static d_close_t	tapclose;
95static d_read_t		tapread;
96static d_write_t	tapwrite;
97static d_ioctl_t	tapioctl;
98static d_poll_t		tappoll;
99
100static struct cdevsw	tap_cdevsw = {
101	.d_version =	D_VERSION,
102	.d_flags =	D_PSEUDO | D_NEEDGIANT,
103	.d_open =	tapopen,
104	.d_close =	tapclose,
105	.d_read =	tapread,
106	.d_write =	tapwrite,
107	.d_ioctl =	tapioctl,
108	.d_poll =	tappoll,
109	.d_name =	CDEV_NAME,
110};
111
112/*
113 * All global variables in if_tap.c are locked with tapmtx, with the
114 * exception of tapdebug, which is accessed unlocked; tapclones is
115 * static at runtime.
116 */
117static struct mtx		tapmtx;
118static int			tapdebug = 0;        /* debug flag   */
119static SLIST_HEAD(, tap_softc)	taphead;             /* first device */
120static struct clonedevs 	*tapclones;
121
122MALLOC_DECLARE(M_TAP);
123MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
124SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
125DEV_MODULE(if_tap, tapmodevent, NULL);
126
127/*
128 * tapmodevent
129 *
130 * module event handler
131 */
132static int
133tapmodevent(mod, type, data)
134	module_t	 mod;
135	int		 type;
136	void		*data;
137{
138	static eventhandler_tag	 eh_tag = NULL;
139	struct tap_softc	*tp = NULL;
140	struct ifnet		*ifp = NULL;
141	int			 s;
142
143	switch (type) {
144	case MOD_LOAD:
145
146		/* intitialize device */
147
148		mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
149		SLIST_INIT(&taphead);
150
151		clone_setup(&tapclones);
152		eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
153		if (eh_tag == NULL) {
154			clone_cleanup(&tapclones);
155			mtx_destroy(&tapmtx);
156			return (ENOMEM);
157		}
158		return (0);
159
160	case MOD_UNLOAD:
161		/*
162		 * The EBUSY algorithm here can't quite atomically
163		 * guarantee that this is race-free since we have to
164		 * release the tap mtx to deregister the clone handler.
165		 */
166		mtx_lock(&tapmtx);
167		SLIST_FOREACH(tp, &taphead, tap_next) {
168			mtx_lock(&tp->tap_mtx);
169			if (tp->tap_flags & TAP_OPEN) {
170				mtx_unlock(&tp->tap_mtx);
171				mtx_unlock(&tapmtx);
172				return (EBUSY);
173			}
174			mtx_unlock(&tp->tap_mtx);
175		}
176		mtx_unlock(&tapmtx);
177
178		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
179
180		mtx_lock(&tapmtx);
181		while ((tp = SLIST_FIRST(&taphead)) != NULL) {
182			SLIST_REMOVE_HEAD(&taphead, tap_next);
183			mtx_unlock(&tapmtx);
184
185			ifp = &tp->tap_if;
186
187			TAPDEBUG("detaching %s\n", ifp->if_xname);
188
189			/* Unlocked read. */
190			KASSERT(!(tp->tap_flags & TAP_OPEN),
191				("%s flags is out of sync", ifp->if_xname));
192
193			destroy_dev(tp->tap_dev);
194			s = splimp();
195			ether_ifdetach(ifp);
196			splx(s);
197
198			mtx_destroy(&tp->tap_mtx);
199			free(tp, M_TAP);
200			mtx_lock(&tapmtx);
201		}
202		mtx_unlock(&tapmtx);
203		clone_cleanup(&tapclones);
204
205		mtx_destroy(&tapmtx);
206
207		break;
208
209	default:
210		return (EOPNOTSUPP);
211	}
212
213	return (0);
214} /* tapmodevent */
215
216
217/*
218 * DEVFS handler
219 *
220 * We need to support two kind of devices - tap and vmnet
221 */
222static void
223tapclone(arg, name, namelen, dev)
224	void	*arg;
225	char	*name;
226	int	 namelen;
227	struct cdev **dev;
228{
229	u_int		extra;
230	int		i, unit;
231	char		*device_name = name;
232
233	if (*dev != NULL)
234		return;
235
236	device_name = TAP;
237	extra = 0;
238	if (strcmp(name, TAP) == 0) {
239		unit = -1;
240	} else if (strcmp(name, VMNET) == 0) {
241		device_name = VMNET;
242		extra = VMNET_DEV_MASK;
243		unit = -1;
244	} else if (dev_stdclone(name, NULL, device_name, &unit) != 1) {
245		device_name = VMNET;
246		extra = VMNET_DEV_MASK;
247		if (dev_stdclone(name, NULL, device_name, &unit) != 1)
248			return;
249	}
250
251	/* find any existing device, or allocate new unit number */
252	i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
253	if (i) {
254		*dev = make_dev(&tap_cdevsw, unit2minor(unit | extra),
255		     UID_ROOT, GID_WHEEL, 0600, "%s%d", device_name, unit);
256		if (*dev != NULL)
257			(*dev)->si_flags |= SI_CHEAPCLONE;
258	}
259} /* tapclone */
260
261
262/*
263 * tapcreate
264 *
265 * to create interface
266 */
267static void
268tapcreate(dev)
269	struct cdev *dev;
270{
271	struct ifnet		*ifp = NULL;
272	struct tap_softc	*tp = NULL;
273	unsigned short		 macaddr_hi;
274	int			 unit, s;
275	char			*name = NULL;
276
277	dev->si_flags &= ~SI_CHEAPCLONE;
278
279	/* allocate driver storage and create device */
280	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
281	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
282	mtx_lock(&tapmtx);
283	SLIST_INSERT_HEAD(&taphead, tp, tap_next);
284	mtx_unlock(&tapmtx);
285
286	unit = dev2unit(dev);
287
288	/* select device: tap or vmnet */
289	if (unit & VMNET_DEV_MASK) {
290		name = VMNET;
291		tp->tap_flags |= TAP_VMNET;
292	} else
293		name = TAP;
294
295	unit &= TAPMAXUNIT;
296
297	TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, minor(dev));
298
299	/* generate fake MAC address: 00 bd xx xx xx unit_no */
300	macaddr_hi = htons(0x00bd);
301	bcopy(&macaddr_hi, &tp->arpcom.ac_enaddr[0], sizeof(short));
302	bcopy(&ticks, &tp->arpcom.ac_enaddr[2], sizeof(long));
303	tp->arpcom.ac_enaddr[5] = (u_char)unit;
304
305	/* fill the rest and attach interface */
306	ifp = &tp->tap_if;
307	ifp->if_softc = tp;
308	if_initname(ifp, name, unit);
309	ifp->if_init = tapifinit;
310	ifp->if_start = tapifstart;
311	ifp->if_ioctl = tapifioctl;
312	ifp->if_mtu = ETHERMTU;
313	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
314	ifp->if_snd.ifq_maxlen = ifqmaxlen;
315
316	dev->si_drv1 = tp;
317	tp->tap_dev = dev;
318
319	s = splimp();
320	ether_ifattach(ifp, tp->arpcom.ac_enaddr);
321	splx(s);
322
323	mtx_lock(&tp->tap_mtx);
324	tp->tap_flags |= TAP_INITED;
325	mtx_unlock(&tp->tap_mtx);
326
327	TAPDEBUG("interface %s is created. minor = %#x\n",
328		ifp->if_xname, minor(dev));
329} /* tapcreate */
330
331
332/*
333 * tapopen
334 *
335 * to open tunnel. must be superuser
336 */
337static int
338tapopen(dev, flag, mode, td)
339	struct cdev *dev;
340	int		 flag;
341	int		 mode;
342	struct thread	*td;
343{
344	struct tap_softc	*tp = NULL;
345	struct ifnet		*ifp = NULL;
346	int			 error, s;
347
348	if ((error = suser(td)) != 0)
349		return (error);
350
351	if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
352		return (ENXIO);
353
354	/*
355	 * XXXRW: Non-atomic test-and-set of si_drv1.  Currently protected
356	 * by Giant, but the race actually exists under memory pressure as
357	 * well even when running with Giant, as malloc() may sleep.
358	 */
359	tp = dev->si_drv1;
360	if (tp == NULL) {
361		tapcreate(dev);
362		tp = dev->si_drv1;
363	}
364
365	mtx_lock(&tp->tap_mtx);
366	if (tp->tap_flags & TAP_OPEN) {
367		mtx_unlock(&tp->tap_mtx);
368		return (EBUSY);
369	}
370
371	bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
372	tp->tap_pid = td->td_proc->p_pid;
373	tp->tap_flags |= TAP_OPEN;
374	ifp = &tp->tap_if;
375	mtx_unlock(&tp->tap_mtx);
376
377	s = splimp();
378	ifp->if_flags |= IFF_RUNNING;
379	ifp->if_flags &= ~IFF_OACTIVE;
380	splx(s);
381
382	TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, minor(dev));
383
384	return (0);
385} /* tapopen */
386
387
388/*
389 * tapclose
390 *
391 * close the device - mark i/f down & delete routing info
392 */
393static int
394tapclose(dev, foo, bar, td)
395	struct cdev *dev;
396	int		 foo;
397	int		 bar;
398	struct thread	*td;
399{
400	struct tap_softc	*tp = dev->si_drv1;
401	struct ifnet		*ifp = &tp->tap_if;
402	int			s;
403
404	/* junk all pending output */
405	IF_DRAIN(&ifp->if_snd);
406
407	/*
408	 * do not bring the interface down, and do not anything with
409	 * interface, if we are in VMnet mode. just close the device.
410	 */
411
412	mtx_lock(&tp->tap_mtx);
413	if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
414		mtx_unlock(&tp->tap_mtx);
415		s = splimp();
416		if_down(ifp);
417		if (ifp->if_flags & IFF_RUNNING) {
418			/* find internet addresses and delete routes */
419			struct ifaddr	*ifa = NULL;
420
421			/* In desparate need of ifaddr locking. */
422			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
423				if (ifa->ifa_addr->sa_family == AF_INET) {
424					rtinit(ifa, (int)RTM_DELETE, 0);
425
426					/* remove address from interface */
427					bzero(ifa->ifa_addr,
428						   sizeof(*(ifa->ifa_addr)));
429					bzero(ifa->ifa_dstaddr,
430						   sizeof(*(ifa->ifa_dstaddr)));
431					bzero(ifa->ifa_netmask,
432						   sizeof(*(ifa->ifa_netmask)));
433				}
434			}
435
436			ifp->if_flags &= ~IFF_RUNNING;
437		}
438		splx(s);
439	} else
440		mtx_unlock(&tp->tap_mtx);
441
442	funsetown(&tp->tap_sigio);
443	selwakeuppri(&tp->tap_rsel, PZERO+1);
444
445	mtx_lock(&tp->tap_mtx);
446	tp->tap_flags &= ~TAP_OPEN;
447	tp->tap_pid = 0;
448	mtx_unlock(&tp->tap_mtx);
449
450	TAPDEBUG("%s is closed. minor = %#x\n",
451		ifp->if_xname, minor(dev));
452
453	return (0);
454} /* tapclose */
455
456
457/*
458 * tapifinit
459 *
460 * network interface initialization function
461 */
462static void
463tapifinit(xtp)
464	void	*xtp;
465{
466	struct tap_softc	*tp = (struct tap_softc *)xtp;
467	struct ifnet		*ifp = &tp->tap_if;
468
469	TAPDEBUG("initializing %s\n", ifp->if_xname);
470
471	ifp->if_flags |= IFF_RUNNING;
472	ifp->if_flags &= ~IFF_OACTIVE;
473
474	/* attempt to start output */
475	tapifstart(ifp);
476} /* tapifinit */
477
478
479/*
480 * tapifioctl
481 *
482 * Process an ioctl request on network interface
483 */
484static int
485tapifioctl(ifp, cmd, data)
486	struct ifnet	*ifp;
487	u_long		 cmd;
488	caddr_t		 data;
489{
490	struct tap_softc	*tp = (struct tap_softc *)(ifp->if_softc);
491	struct ifstat		*ifs = NULL;
492	int			 s, dummy;
493
494	switch (cmd) {
495		case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
496		case SIOCADDMULTI:
497		case SIOCDELMULTI:
498			break;
499
500		case SIOCGIFSTATUS:
501			s = splimp();
502			ifs = (struct ifstat *)data;
503			dummy = strlen(ifs->ascii);
504			mtx_lock(&tp->tap_mtx);
505			if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
506				snprintf(ifs->ascii + dummy,
507					sizeof(ifs->ascii) - dummy,
508					"\tOpened by PID %d\n", tp->tap_pid);
509			mtx_unlock(&tp->tap_mtx);
510			splx(s);
511			break;
512
513		default:
514			s = splimp();
515			dummy = ether_ioctl(ifp, cmd, data);
516			splx(s);
517			return (dummy);
518	}
519
520	return (0);
521} /* tapifioctl */
522
523
524/*
525 * tapifstart
526 *
527 * queue packets from higher level ready to put out
528 */
529static void
530tapifstart(ifp)
531	struct ifnet	*ifp;
532{
533	struct tap_softc	*tp = ifp->if_softc;
534	int			 s;
535
536	TAPDEBUG("%s starting\n", ifp->if_xname);
537
538	/*
539	 * do not junk pending output if we are in VMnet mode.
540	 * XXX: can this do any harm because of queue overflow?
541	 */
542
543	mtx_lock(&tp->tap_mtx);
544	if (((tp->tap_flags & TAP_VMNET) == 0) &&
545	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
546		struct mbuf	*m = NULL;
547
548		mtx_unlock(&tp->tap_mtx);
549
550		/* Unlocked read. */
551		TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
552		    tp->tap_flags);
553
554		s = splimp();
555		do {
556			IF_DEQUEUE(&ifp->if_snd, m);
557			if (m != NULL)
558				m_freem(m);
559			ifp->if_oerrors ++;
560		} while (m != NULL);
561		splx(s);
562
563		return;
564	}
565	mtx_unlock(&tp->tap_mtx);
566
567	s = splimp();
568	ifp->if_flags |= IFF_OACTIVE;
569
570	if (ifp->if_snd.ifq_len != 0) {
571		mtx_lock(&tp->tap_mtx);
572		if (tp->tap_flags & TAP_RWAIT) {
573			tp->tap_flags &= ~TAP_RWAIT;
574			wakeup(tp);
575		}
576
577		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
578			mtx_unlock(&tp->tap_mtx);
579			pgsigio(&tp->tap_sigio, SIGIO, 0);
580		} else
581			mtx_unlock(&tp->tap_mtx);
582
583		selwakeuppri(&tp->tap_rsel, PZERO+1);
584		ifp->if_opackets ++; /* obytes are counted in ether_output */
585	}
586
587	ifp->if_flags &= ~IFF_OACTIVE;
588	splx(s);
589} /* tapifstart */
590
591
592/*
593 * tapioctl
594 *
595 * the cdevsw interface is now pretty minimal
596 */
597static int
598tapioctl(dev, cmd, data, flag, td)
599	struct cdev *dev;
600	u_long		 cmd;
601	caddr_t		 data;
602	int		 flag;
603	struct thread	*td;
604{
605	struct tap_softc	*tp = dev->si_drv1;
606	struct ifnet		*ifp = &tp->tap_if;
607	struct tapinfo		*tapp = NULL;
608	int			 s;
609	int			 f;
610
611	switch (cmd) {
612		case TAPSIFINFO:
613			s = splimp();
614			tapp = (struct tapinfo *)data;
615			ifp->if_mtu = tapp->mtu;
616			ifp->if_type = tapp->type;
617			ifp->if_baudrate = tapp->baudrate;
618			splx(s);
619			break;
620
621		case TAPGIFINFO:
622			tapp = (struct tapinfo *)data;
623			tapp->mtu = ifp->if_mtu;
624			tapp->type = ifp->if_type;
625			tapp->baudrate = ifp->if_baudrate;
626			break;
627
628		case TAPSDEBUG:
629			tapdebug = *(int *)data;
630			break;
631
632		case TAPGDEBUG:
633			*(int *)data = tapdebug;
634			break;
635
636		case FIONBIO:
637			break;
638
639		case FIOASYNC:
640			s = splimp();
641			mtx_lock(&tp->tap_mtx);
642			if (*(int *)data)
643				tp->tap_flags |= TAP_ASYNC;
644			else
645				tp->tap_flags &= ~TAP_ASYNC;
646			mtx_unlock(&tp->tap_mtx);
647			splx(s);
648			break;
649
650		case FIONREAD:
651			s = splimp();
652			if (ifp->if_snd.ifq_head) {
653				struct mbuf	*mb = ifp->if_snd.ifq_head;
654
655				for(*(int *)data = 0;mb != NULL;mb = mb->m_next)
656					*(int *)data += mb->m_len;
657			} else
658				*(int *)data = 0;
659			splx(s);
660			break;
661
662		case FIOSETOWN:
663			return (fsetown(*(int *)data, &tp->tap_sigio));
664
665		case FIOGETOWN:
666			*(int *)data = fgetown(&tp->tap_sigio);
667			return (0);
668
669		/* this is deprecated, FIOSETOWN should be used instead */
670		case TIOCSPGRP:
671			return (fsetown(-(*(int *)data), &tp->tap_sigio));
672
673		/* this is deprecated, FIOGETOWN should be used instead */
674		case TIOCGPGRP:
675			*(int *)data = -fgetown(&tp->tap_sigio);
676			return (0);
677
678		/* VMware/VMnet port ioctl's */
679
680		case SIOCGIFFLAGS:	/* get ifnet flags */
681			bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
682			break;
683
684		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
685			f = *(int *)data;
686			f &= 0x0fff;
687			f &= ~IFF_CANTCHANGE;
688			f |= IFF_UP;
689
690			s = splimp();
691			ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
692			splx(s);
693			break;
694
695		case OSIOCGIFADDR:	/* get MAC address of the remote side */
696		case SIOCGIFADDR:
697			mtx_lock(&tp->tap_mtx);
698			bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
699			mtx_unlock(&tp->tap_mtx);
700			break;
701
702		case SIOCSIFADDR:	/* set MAC address of the remote side */
703			mtx_lock(&tp->tap_mtx);
704			bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
705			mtx_unlock(&tp->tap_mtx);
706			break;
707
708		default:
709			return (ENOTTY);
710	}
711	return (0);
712} /* tapioctl */
713
714
715/*
716 * tapread
717 *
718 * the cdevsw read interface - reads a packet at a time, or at
719 * least as much of a packet as can be read
720 */
721static int
722tapread(dev, uio, flag)
723	struct cdev *dev;
724	struct uio	*uio;
725	int		 flag;
726{
727	struct tap_softc	*tp = dev->si_drv1;
728	struct ifnet		*ifp = &tp->tap_if;
729	struct mbuf		*m = NULL;
730	int			 error = 0, len, s;
731
732	TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, minor(dev));
733
734	mtx_lock(&tp->tap_mtx);
735	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
736		mtx_unlock(&tp->tap_mtx);
737
738		/* Unlocked read. */
739		TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
740			ifp->if_xname, minor(dev), tp->tap_flags);
741
742		return (EHOSTDOWN);
743	}
744
745	tp->tap_flags &= ~TAP_RWAIT;
746	mtx_unlock(&tp->tap_mtx);
747
748	/* sleep until we get a packet */
749	do {
750		s = splimp();
751		IF_DEQUEUE(&ifp->if_snd, m);
752		splx(s);
753
754		if (m == NULL) {
755			if (flag & IO_NDELAY)
756				return (EWOULDBLOCK);
757
758			mtx_lock(&tp->tap_mtx);
759			tp->tap_flags |= TAP_RWAIT;
760			mtx_unlock(&tp->tap_mtx);
761			error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
762			if (error)
763				return (error);
764		}
765	} while (m == NULL);
766
767	/* feed packet to bpf */
768	BPF_MTAP(ifp, m);
769
770	/* xfer packet to user space */
771	while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
772		len = min(uio->uio_resid, m->m_len);
773		if (len == 0)
774			break;
775
776		error = uiomove(mtod(m, void *), len, uio);
777		m = m_free(m);
778	}
779
780	if (m != NULL) {
781		TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
782			minor(dev));
783		m_freem(m);
784	}
785
786	return (error);
787} /* tapread */
788
789
790/*
791 * tapwrite
792 *
793 * the cdevsw write interface - an atomic write is a packet - or else!
794 */
795static int
796tapwrite(dev, uio, flag)
797	struct cdev *dev;
798	struct uio	*uio;
799	int		 flag;
800{
801	struct tap_softc	*tp = dev->si_drv1;
802	struct ifnet		*ifp = &tp->tap_if;
803	struct mbuf		*top = NULL, **mp = NULL, *m = NULL;
804	int			 error = 0, tlen, mlen;
805
806	TAPDEBUG("%s writting, minor = %#x\n",
807		ifp->if_xname, minor(dev));
808
809	if (uio->uio_resid == 0)
810		return (0);
811
812	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
813		TAPDEBUG("%s invalid packet len = %d, minor = %#x\n",
814			ifp->if_xname, uio->uio_resid, minor(dev));
815
816		return (EIO);
817	}
818	tlen = uio->uio_resid;
819
820	/* get a header mbuf */
821	MGETHDR(m, M_DONTWAIT, MT_DATA);
822	if (m == NULL)
823		return (ENOBUFS);
824	mlen = MHLEN;
825
826	top = 0;
827	mp = &top;
828	while ((error == 0) && (uio->uio_resid > 0)) {
829		m->m_len = min(mlen, uio->uio_resid);
830		error = uiomove(mtod(m, void *), m->m_len, uio);
831		*mp = m;
832		mp = &m->m_next;
833		if (uio->uio_resid > 0) {
834			MGET(m, M_DONTWAIT, MT_DATA);
835			if (m == NULL) {
836				error = ENOBUFS;
837				break;
838			}
839			mlen = MLEN;
840		}
841	}
842	if (error) {
843		ifp->if_ierrors ++;
844		if (top)
845			m_freem(top);
846		return (error);
847	}
848
849	top->m_pkthdr.len = tlen;
850	top->m_pkthdr.rcvif = ifp;
851
852	/* Pass packet up to parent. */
853	(*ifp->if_input)(ifp, top);
854	ifp->if_ipackets ++; /* ibytes are counted in parent */
855
856	return (0);
857} /* tapwrite */
858
859
860/*
861 * tappoll
862 *
863 * the poll interface, this is only useful on reads
864 * really. the write detect always returns true, write never blocks
865 * anyway, it either accepts the packet or drops it
866 */
867static int
868tappoll(dev, events, td)
869	struct cdev *dev;
870	int		 events;
871	struct thread	*td;
872{
873	struct tap_softc	*tp = dev->si_drv1;
874	struct ifnet		*ifp = &tp->tap_if;
875	int			 s, revents = 0;
876
877	TAPDEBUG("%s polling, minor = %#x\n",
878		ifp->if_xname, minor(dev));
879
880	s = splimp();
881	if (events & (POLLIN | POLLRDNORM)) {
882		if (ifp->if_snd.ifq_len > 0) {
883			TAPDEBUG("%s have data in queue. len = %d, " \
884				"minor = %#x\n", ifp->if_xname,
885				ifp->if_snd.ifq_len, minor(dev));
886
887			revents |= (events & (POLLIN | POLLRDNORM));
888		} else {
889			TAPDEBUG("%s waiting for data, minor = %#x\n",
890				ifp->if_xname, minor(dev));
891
892			selrecord(td, &tp->tap_rsel);
893		}
894	}
895
896	if (events & (POLLOUT | POLLWRNORM))
897		revents |= (events & (POLLOUT | POLLWRNORM));
898
899	splx(s);
900	return (revents);
901} /* tappoll */
902