if_tap.c revision 159079
1/*-
2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * BASED ON:
27 * -------------------------------------------------------------------------
28 *
29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30 * Nottingham University 1987.
31 */
32
33/*
34 * $FreeBSD: head/sys/net/if_tap.c 159079 2006-05-30 20:08:12Z marius $
35 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
36 */
37
38#include "opt_inet.h"
39
40#include <sys/param.h>
41#include <sys/conf.h>
42#include <sys/fcntl.h>
43#include <sys/filio.h>
44#include <sys/kernel.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/module.h>
48#include <sys/poll.h>
49#include <sys/proc.h>
50#include <sys/selinfo.h>
51#include <sys/signalvar.h>
52#include <sys/socket.h>
53#include <sys/sockio.h>
54#include <sys/sysctl.h>
55#include <sys/systm.h>
56#include <sys/ttycom.h>
57#include <sys/uio.h>
58#include <sys/queue.h>
59
60#include <net/bpf.h>
61#include <net/ethernet.h>
62#include <net/if.h>
63#include <net/if_dl.h>
64#include <net/route.h>
65#include <net/if_types.h>
66
67#include <netinet/in.h>
68
69#include <net/if_tapvar.h>
70#include <net/if_tap.h>
71
72
73#define CDEV_NAME	"tap"
74#define TAPDEBUG	if (tapdebug) printf
75
76#define TAP		"tap"
77#define VMNET		"vmnet"
78#define TAPMAXUNIT	0x7fff
79#define VMNET_DEV_MASK	CLONE_FLAG0
80
81/* module */
82static int		tapmodevent(module_t, int, void *);
83
84/* device */
85static void		tapclone(void *, struct ucred *, char *, int,
86			    struct cdev **);
87static void		tapcreate(struct cdev *);
88
89/* network interface */
90static void		tapifstart(struct ifnet *);
91static int		tapifioctl(struct ifnet *, u_long, caddr_t);
92static void		tapifinit(void *);
93
94/* character device */
95static d_open_t		tapopen;
96static d_close_t	tapclose;
97static d_read_t		tapread;
98static d_write_t	tapwrite;
99static d_ioctl_t	tapioctl;
100static d_poll_t		tappoll;
101static d_kqfilter_t	tapkqfilter;
102
103/* kqueue(2) */
104static int		tapkqread(struct knote *, long);
105static int		tapkqwrite(struct knote *, long);
106static void		tapkqdetach(struct knote *);
107
108static struct filterops	tap_read_filterops = {
109	.f_isfd =	1,
110	.f_attach =	NULL,
111	.f_detach =	tapkqdetach,
112	.f_event =	tapkqread,
113};
114
115static struct filterops	tap_write_filterops = {
116	.f_isfd =	1,
117	.f_attach =	NULL,
118	.f_detach =	tapkqdetach,
119	.f_event =	tapkqwrite,
120};
121
122static struct cdevsw	tap_cdevsw = {
123	.d_version =	D_VERSION,
124	.d_flags =	D_PSEUDO | D_NEEDGIANT,
125	.d_open =	tapopen,
126	.d_close =	tapclose,
127	.d_read =	tapread,
128	.d_write =	tapwrite,
129	.d_ioctl =	tapioctl,
130	.d_poll =	tappoll,
131	.d_name =	CDEV_NAME,
132	.d_kqfilter =	tapkqfilter,
133};
134
135/*
136 * All global variables in if_tap.c are locked with tapmtx, with the
137 * exception of tapdebug, which is accessed unlocked; tapclones is
138 * static at runtime.
139 */
140static struct mtx		tapmtx;
141static int			tapdebug = 0;        /* debug flag   */
142static int			tapuopen = 0;        /* allow user open() */
143static SLIST_HEAD(, tap_softc)	taphead;             /* first device */
144static struct clonedevs 	*tapclones;
145
146MALLOC_DECLARE(M_TAP);
147MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
148SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
149
150SYSCTL_DECL(_net_link);
151SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
152    "Ethernet tunnel software network interface");
153SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
154	"Allow user to open /dev/tap (based on node permissions)");
155SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
156
157DEV_MODULE(if_tap, tapmodevent, NULL);
158
159/*
160 * tapmodevent
161 *
162 * module event handler
163 */
164static int
165tapmodevent(module_t mod, int type, void *data)
166{
167	static eventhandler_tag	 eh_tag = NULL;
168	struct tap_softc	*tp = NULL;
169	struct ifnet		*ifp = NULL;
170	int			 s;
171
172	switch (type) {
173	case MOD_LOAD:
174
175		/* intitialize device */
176
177		mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
178		SLIST_INIT(&taphead);
179
180		clone_setup(&tapclones);
181		eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
182		if (eh_tag == NULL) {
183			clone_cleanup(&tapclones);
184			mtx_destroy(&tapmtx);
185			return (ENOMEM);
186		}
187		return (0);
188
189	case MOD_UNLOAD:
190		/*
191		 * The EBUSY algorithm here can't quite atomically
192		 * guarantee that this is race-free since we have to
193		 * release the tap mtx to deregister the clone handler.
194		 */
195		mtx_lock(&tapmtx);
196		SLIST_FOREACH(tp, &taphead, tap_next) {
197			mtx_lock(&tp->tap_mtx);
198			if (tp->tap_flags & TAP_OPEN) {
199				mtx_unlock(&tp->tap_mtx);
200				mtx_unlock(&tapmtx);
201				return (EBUSY);
202			}
203			mtx_unlock(&tp->tap_mtx);
204		}
205		mtx_unlock(&tapmtx);
206
207		EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
208
209		mtx_lock(&tapmtx);
210		while ((tp = SLIST_FIRST(&taphead)) != NULL) {
211			SLIST_REMOVE_HEAD(&taphead, tap_next);
212			mtx_unlock(&tapmtx);
213
214			ifp = tp->tap_ifp;
215
216			TAPDEBUG("detaching %s\n", ifp->if_xname);
217
218			/* Unlocked read. */
219			KASSERT(!(tp->tap_flags & TAP_OPEN),
220				("%s flags is out of sync", ifp->if_xname));
221
222			knlist_destroy(&tp->tap_rsel.si_note);
223			destroy_dev(tp->tap_dev);
224			s = splimp();
225			ether_ifdetach(ifp);
226			if_free_type(ifp, IFT_ETHER);
227			splx(s);
228
229			mtx_destroy(&tp->tap_mtx);
230			free(tp, M_TAP);
231			mtx_lock(&tapmtx);
232		}
233		mtx_unlock(&tapmtx);
234		clone_cleanup(&tapclones);
235
236		mtx_destroy(&tapmtx);
237
238		break;
239
240	default:
241		return (EOPNOTSUPP);
242	}
243
244	return (0);
245} /* tapmodevent */
246
247
248/*
249 * DEVFS handler
250 *
251 * We need to support two kind of devices - tap and vmnet
252 */
253static void
254tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev)
255{
256	u_int		extra;
257	int		i, unit;
258	char		*device_name = name;
259
260	if (*dev != NULL)
261		return;
262
263	device_name = TAP;
264	extra = 0;
265	if (strcmp(name, TAP) == 0) {
266		unit = -1;
267	} else if (strcmp(name, VMNET) == 0) {
268		device_name = VMNET;
269		extra = VMNET_DEV_MASK;
270		unit = -1;
271	} else if (dev_stdclone(name, NULL, device_name, &unit) != 1) {
272		device_name = VMNET;
273		extra = VMNET_DEV_MASK;
274		if (dev_stdclone(name, NULL, device_name, &unit) != 1)
275			return;
276	}
277
278	/* find any existing device, or allocate new unit number */
279	i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
280	if (i) {
281		*dev = make_dev(&tap_cdevsw, unit2minor(unit | extra),
282		     UID_ROOT, GID_WHEEL, 0600, "%s%d", device_name, unit);
283		if (*dev != NULL) {
284			dev_ref(*dev);
285			(*dev)->si_flags |= SI_CHEAPCLONE;
286		}
287	}
288} /* tapclone */
289
290
291/*
292 * tapcreate
293 *
294 * to create interface
295 */
296static void
297tapcreate(struct cdev *dev)
298{
299	struct ifnet		*ifp = NULL;
300	struct tap_softc	*tp = NULL;
301	unsigned short		 macaddr_hi;
302	int			 unit, s;
303	char			*name = NULL;
304	u_char			eaddr[6];
305
306	dev->si_flags &= ~SI_CHEAPCLONE;
307
308	/* allocate driver storage and create device */
309	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
310	mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
311	mtx_lock(&tapmtx);
312	SLIST_INSERT_HEAD(&taphead, tp, tap_next);
313	mtx_unlock(&tapmtx);
314
315	unit = dev2unit(dev);
316
317	/* select device: tap or vmnet */
318	if (unit & VMNET_DEV_MASK) {
319		name = VMNET;
320		tp->tap_flags |= TAP_VMNET;
321	} else
322		name = TAP;
323
324	unit &= TAPMAXUNIT;
325
326	TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, minor(dev));
327
328	/* generate fake MAC address: 00 bd xx xx xx unit_no */
329	macaddr_hi = htons(0x00bd);
330	bcopy(&macaddr_hi, eaddr, sizeof(short));
331	bcopy(&ticks, &eaddr[2], sizeof(long));
332	eaddr[5] = (u_char)unit;
333
334	/* fill the rest and attach interface */
335	ifp = tp->tap_ifp = if_alloc(IFT_ETHER);
336	if (ifp == NULL)
337		panic("%s%d: can not if_alloc()", name, unit);
338	ifp->if_softc = tp;
339	if_initname(ifp, name, unit);
340	ifp->if_init = tapifinit;
341	ifp->if_start = tapifstart;
342	ifp->if_ioctl = tapifioctl;
343	ifp->if_mtu = ETHERMTU;
344	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
345	ifp->if_snd.ifq_maxlen = ifqmaxlen;
346
347	dev->si_drv1 = tp;
348	tp->tap_dev = dev;
349
350	s = splimp();
351	ether_ifattach(ifp, eaddr);
352	splx(s);
353
354	mtx_lock(&tp->tap_mtx);
355	tp->tap_flags |= TAP_INITED;
356	mtx_unlock(&tp->tap_mtx);
357
358	knlist_init(&tp->tap_rsel.si_note, NULL, NULL, NULL, NULL);
359
360	TAPDEBUG("interface %s is created. minor = %#x\n",
361		ifp->if_xname, minor(dev));
362} /* tapcreate */
363
364
365/*
366 * tapopen
367 *
368 * to open tunnel. must be superuser
369 */
370static int
371tapopen(struct cdev *dev, int flag, int mode, struct thread *td)
372{
373	struct tap_softc	*tp = NULL;
374	struct ifnet		*ifp = NULL;
375	int			 s;
376
377	if (tapuopen == 0 && suser(td) != 0)
378		return (EPERM);
379
380	if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
381		return (ENXIO);
382
383	/*
384	 * XXXRW: Non-atomic test-and-set of si_drv1.  Currently protected
385	 * by Giant, but the race actually exists under memory pressure as
386	 * well even when running with Giant, as malloc() may sleep.
387	 */
388	tp = dev->si_drv1;
389	if (tp == NULL) {
390		tapcreate(dev);
391		tp = dev->si_drv1;
392	}
393
394	mtx_lock(&tp->tap_mtx);
395	if (tp->tap_flags & TAP_OPEN) {
396		mtx_unlock(&tp->tap_mtx);
397		return (EBUSY);
398	}
399
400	bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
401	tp->tap_pid = td->td_proc->p_pid;
402	tp->tap_flags |= TAP_OPEN;
403	ifp = tp->tap_ifp;
404	mtx_unlock(&tp->tap_mtx);
405
406	s = splimp();
407	ifp->if_drv_flags |= IFF_DRV_RUNNING;
408	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
409	splx(s);
410
411	TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, minor(dev));
412
413	return (0);
414} /* tapopen */
415
416
417/*
418 * tapclose
419 *
420 * close the device - mark i/f down & delete routing info
421 */
422static int
423tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
424{
425	struct ifaddr		*ifa;
426	struct tap_softc	*tp = dev->si_drv1;
427	struct ifnet		*ifp = tp->tap_ifp;
428	int			s;
429
430	/* junk all pending output */
431	IF_DRAIN(&ifp->if_snd);
432
433	/*
434	 * do not bring the interface down, and do not anything with
435	 * interface, if we are in VMnet mode. just close the device.
436	 */
437
438	mtx_lock(&tp->tap_mtx);
439	if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
440		mtx_unlock(&tp->tap_mtx);
441		s = splimp();
442		if_down(ifp);
443		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
444			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
445				rtinit(ifa, (int)RTM_DELETE, 0);
446			}
447			if_purgeaddrs(ifp);
448			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
449		}
450		splx(s);
451	} else
452		mtx_unlock(&tp->tap_mtx);
453
454	funsetown(&tp->tap_sigio);
455	selwakeuppri(&tp->tap_rsel, PZERO+1);
456	KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
457
458	mtx_lock(&tp->tap_mtx);
459	tp->tap_flags &= ~TAP_OPEN;
460	tp->tap_pid = 0;
461	mtx_unlock(&tp->tap_mtx);
462
463	TAPDEBUG("%s is closed. minor = %#x\n",
464		ifp->if_xname, minor(dev));
465
466	return (0);
467} /* tapclose */
468
469
470/*
471 * tapifinit
472 *
473 * network interface initialization function
474 */
475static void
476tapifinit(void *xtp)
477{
478	struct tap_softc	*tp = (struct tap_softc *)xtp;
479	struct ifnet		*ifp = tp->tap_ifp;
480
481	TAPDEBUG("initializing %s\n", ifp->if_xname);
482
483	ifp->if_drv_flags |= IFF_DRV_RUNNING;
484	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
485
486	/* attempt to start output */
487	tapifstart(ifp);
488} /* tapifinit */
489
490
491/*
492 * tapifioctl
493 *
494 * Process an ioctl request on network interface
495 */
496static int
497tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
498{
499	struct tap_softc	*tp = (struct tap_softc *)(ifp->if_softc);
500	struct ifstat		*ifs = NULL;
501	int			 s, dummy;
502
503	switch (cmd) {
504		case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
505		case SIOCADDMULTI:
506		case SIOCDELMULTI:
507			break;
508
509		case SIOCGIFSTATUS:
510			s = splimp();
511			ifs = (struct ifstat *)data;
512			dummy = strlen(ifs->ascii);
513			mtx_lock(&tp->tap_mtx);
514			if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
515				snprintf(ifs->ascii + dummy,
516					sizeof(ifs->ascii) - dummy,
517					"\tOpened by PID %d\n", tp->tap_pid);
518			mtx_unlock(&tp->tap_mtx);
519			splx(s);
520			break;
521
522		default:
523			s = splimp();
524			dummy = ether_ioctl(ifp, cmd, data);
525			splx(s);
526			return (dummy);
527			/* NOT REACHED */
528	}
529
530	return (0);
531} /* tapifioctl */
532
533
534/*
535 * tapifstart
536 *
537 * queue packets from higher level ready to put out
538 */
539static void
540tapifstart(struct ifnet *ifp)
541{
542	struct tap_softc	*tp = ifp->if_softc;
543	int			 s;
544
545	TAPDEBUG("%s starting\n", ifp->if_xname);
546
547	/*
548	 * do not junk pending output if we are in VMnet mode.
549	 * XXX: can this do any harm because of queue overflow?
550	 */
551
552	mtx_lock(&tp->tap_mtx);
553	if (((tp->tap_flags & TAP_VMNET) == 0) &&
554	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
555		struct mbuf	*m = NULL;
556
557		mtx_unlock(&tp->tap_mtx);
558
559		/* Unlocked read. */
560		TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
561		    tp->tap_flags);
562
563		s = splimp();
564		do {
565			IF_DEQUEUE(&ifp->if_snd, m);
566			if (m != NULL)
567				m_freem(m);
568			ifp->if_oerrors ++;
569		} while (m != NULL);
570		splx(s);
571
572		return;
573	}
574	mtx_unlock(&tp->tap_mtx);
575
576	s = splimp();
577	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
578
579	if (ifp->if_snd.ifq_len != 0) {
580		mtx_lock(&tp->tap_mtx);
581		if (tp->tap_flags & TAP_RWAIT) {
582			tp->tap_flags &= ~TAP_RWAIT;
583			wakeup(tp);
584		}
585
586		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
587			mtx_unlock(&tp->tap_mtx);
588			pgsigio(&tp->tap_sigio, SIGIO, 0);
589		} else
590			mtx_unlock(&tp->tap_mtx);
591
592		selwakeuppri(&tp->tap_rsel, PZERO+1);
593		KNOTE_UNLOCKED(&tp->tap_rsel.si_note, 0);
594		ifp->if_opackets ++; /* obytes are counted in ether_output */
595	}
596
597	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
598	splx(s);
599} /* tapifstart */
600
601
602/*
603 * tapioctl
604 *
605 * the cdevsw interface is now pretty minimal
606 */
607static int
608tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
609{
610	struct tap_softc	*tp = dev->si_drv1;
611	struct ifnet		*ifp = tp->tap_ifp;
612	struct tapinfo		*tapp = NULL;
613	int			 s;
614	int			 f;
615
616	switch (cmd) {
617		case TAPSIFINFO:
618			s = splimp();
619			tapp = (struct tapinfo *)data;
620			ifp->if_mtu = tapp->mtu;
621			ifp->if_type = tapp->type;
622			ifp->if_baudrate = tapp->baudrate;
623			splx(s);
624			break;
625
626		case TAPGIFINFO:
627			tapp = (struct tapinfo *)data;
628			tapp->mtu = ifp->if_mtu;
629			tapp->type = ifp->if_type;
630			tapp->baudrate = ifp->if_baudrate;
631			break;
632
633		case TAPSDEBUG:
634			tapdebug = *(int *)data;
635			break;
636
637		case TAPGDEBUG:
638			*(int *)data = tapdebug;
639			break;
640
641		case FIONBIO:
642			break;
643
644		case FIOASYNC:
645			s = splimp();
646			mtx_lock(&tp->tap_mtx);
647			if (*(int *)data)
648				tp->tap_flags |= TAP_ASYNC;
649			else
650				tp->tap_flags &= ~TAP_ASYNC;
651			mtx_unlock(&tp->tap_mtx);
652			splx(s);
653			break;
654
655		case FIONREAD:
656			s = splimp();
657			if (ifp->if_snd.ifq_head) {
658				struct mbuf	*mb = ifp->if_snd.ifq_head;
659
660				for(*(int *)data = 0;mb != NULL;mb = mb->m_next)
661					*(int *)data += mb->m_len;
662			} else
663				*(int *)data = 0;
664			splx(s);
665			break;
666
667		case FIOSETOWN:
668			return (fsetown(*(int *)data, &tp->tap_sigio));
669
670		case FIOGETOWN:
671			*(int *)data = fgetown(&tp->tap_sigio);
672			return (0);
673
674		/* this is deprecated, FIOSETOWN should be used instead */
675		case TIOCSPGRP:
676			return (fsetown(-(*(int *)data), &tp->tap_sigio));
677
678		/* this is deprecated, FIOGETOWN should be used instead */
679		case TIOCGPGRP:
680			*(int *)data = -fgetown(&tp->tap_sigio);
681			return (0);
682
683		/* VMware/VMnet port ioctl's */
684
685		case SIOCGIFFLAGS:	/* get ifnet flags */
686			bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
687			break;
688
689		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
690			f = *(intptr_t *)data;
691			f &= 0x0fff;
692			f &= ~IFF_CANTCHANGE;
693			f |= IFF_UP;
694
695			s = splimp();
696			ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
697			splx(s);
698			break;
699
700		case OSIOCGIFADDR:	/* get MAC address of the remote side */
701		case SIOCGIFADDR:
702			mtx_lock(&tp->tap_mtx);
703			bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
704			mtx_unlock(&tp->tap_mtx);
705			break;
706
707		case SIOCSIFADDR:	/* set MAC address of the remote side */
708			mtx_lock(&tp->tap_mtx);
709			bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
710			mtx_unlock(&tp->tap_mtx);
711			break;
712
713		default:
714			return (ENOTTY);
715	}
716	return (0);
717} /* tapioctl */
718
719
720/*
721 * tapread
722 *
723 * the cdevsw read interface - reads a packet at a time, or at
724 * least as much of a packet as can be read
725 */
726static int
727tapread(struct cdev *dev, struct uio *uio, int flag)
728{
729	struct tap_softc	*tp = dev->si_drv1;
730	struct ifnet		*ifp = tp->tap_ifp;
731	struct mbuf		*m = NULL;
732	int			 error = 0, len, s;
733
734	TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, minor(dev));
735
736	mtx_lock(&tp->tap_mtx);
737	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
738		mtx_unlock(&tp->tap_mtx);
739
740		/* Unlocked read. */
741		TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
742			ifp->if_xname, minor(dev), tp->tap_flags);
743
744		return (EHOSTDOWN);
745	}
746
747	tp->tap_flags &= ~TAP_RWAIT;
748	mtx_unlock(&tp->tap_mtx);
749
750	/* sleep until we get a packet */
751	do {
752		s = splimp();
753		IF_DEQUEUE(&ifp->if_snd, m);
754		splx(s);
755
756		if (m == NULL) {
757			if (flag & O_NONBLOCK)
758				return (EWOULDBLOCK);
759
760			mtx_lock(&tp->tap_mtx);
761			tp->tap_flags |= TAP_RWAIT;
762			mtx_unlock(&tp->tap_mtx);
763			error = tsleep(tp,PCATCH|(PZERO+1),"taprd",0);
764			if (error)
765				return (error);
766		}
767	} while (m == NULL);
768
769	/* feed packet to bpf */
770	BPF_MTAP(ifp, m);
771
772	/* xfer packet to user space */
773	while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
774		len = min(uio->uio_resid, m->m_len);
775		if (len == 0)
776			break;
777
778		error = uiomove(mtod(m, void *), len, uio);
779		m = m_free(m);
780	}
781
782	if (m != NULL) {
783		TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
784			minor(dev));
785		m_freem(m);
786	}
787
788	return (error);
789} /* tapread */
790
791
792/*
793 * tapwrite
794 *
795 * the cdevsw write interface - an atomic write is a packet - or else!
796 */
797static int
798tapwrite(struct cdev *dev, struct uio *uio, int flag)
799{
800	struct tap_softc	*tp = dev->si_drv1;
801	struct ifnet		*ifp = tp->tap_ifp;
802	struct mbuf		*m;
803	int			 error = 0;
804
805	TAPDEBUG("%s writting, minor = %#x\n",
806		ifp->if_xname, minor(dev));
807
808	if (uio->uio_resid == 0)
809		return (0);
810
811	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
812		TAPDEBUG("%s invalid packet len = %d, minor = %#x\n",
813			ifp->if_xname, uio->uio_resid, minor(dev));
814
815		return (EIO);
816	}
817
818	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN)) == NULL) {
819		ifp->if_ierrors ++;
820		return (error);
821	}
822
823	m->m_pkthdr.rcvif = ifp;
824
825	/* Pass packet up to parent. */
826	(*ifp->if_input)(ifp, m);
827	ifp->if_ipackets ++; /* ibytes are counted in parent */
828
829	return (0);
830} /* tapwrite */
831
832
833/*
834 * tappoll
835 *
836 * the poll interface, this is only useful on reads
837 * really. the write detect always returns true, write never blocks
838 * anyway, it either accepts the packet or drops it
839 */
840static int
841tappoll(struct cdev *dev, int events, struct thread *td)
842{
843	struct tap_softc	*tp = dev->si_drv1;
844	struct ifnet		*ifp = tp->tap_ifp;
845	int			 s, revents = 0;
846
847	TAPDEBUG("%s polling, minor = %#x\n",
848		ifp->if_xname, minor(dev));
849
850	s = splimp();
851	if (events & (POLLIN | POLLRDNORM)) {
852		if (ifp->if_snd.ifq_len > 0) {
853			TAPDEBUG("%s have data in queue. len = %d, " \
854				"minor = %#x\n", ifp->if_xname,
855				ifp->if_snd.ifq_len, minor(dev));
856
857			revents |= (events & (POLLIN | POLLRDNORM));
858		} else {
859			TAPDEBUG("%s waiting for data, minor = %#x\n",
860				ifp->if_xname, minor(dev));
861
862			selrecord(td, &tp->tap_rsel);
863		}
864	}
865
866	if (events & (POLLOUT | POLLWRNORM))
867		revents |= (events & (POLLOUT | POLLWRNORM));
868
869	splx(s);
870	return (revents);
871} /* tappoll */
872
873
874/*
875 * tap_kqfilter
876 *
877 * support for kevent() system call
878 */
879static int
880tapkqfilter(struct cdev *dev, struct knote *kn)
881{
882    	int			 s;
883	struct tap_softc	*tp = dev->si_drv1;
884	struct ifnet		*ifp = tp->tap_ifp;
885
886	s = splimp();
887	switch (kn->kn_filter) {
888	case EVFILT_READ:
889		TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n",
890			ifp->if_xname, minor(dev));
891		kn->kn_fop = &tap_read_filterops;
892		break;
893
894	case EVFILT_WRITE:
895		TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n",
896			ifp->if_xname, minor(dev));
897		kn->kn_fop = &tap_write_filterops;
898		break;
899
900	default:
901		TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n",
902			ifp->if_xname, minor(dev));
903		splx(s);
904		return (EINVAL);
905		/* NOT REACHED */
906	}
907	splx(s);
908
909	kn->kn_hook = (caddr_t) dev;
910	knlist_add(&tp->tap_rsel.si_note, kn, 0);
911
912	return (0);
913} /* tapkqfilter */
914
915
916/*
917 * tap_kqread
918 *
919 * Return true if there is data in the interface queue
920 */
921static int
922tapkqread(struct knote *kn, long hint)
923{
924	int			 ret, s;
925	struct cdev		*dev = (struct cdev *)(kn->kn_hook);
926	struct tap_softc	*tp = dev->si_drv1;
927	struct ifnet		*ifp = tp->tap_ifp;
928
929	s = splimp();
930	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
931		TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n",
932			ifp->if_xname, ifp->if_snd.ifq_len, minor(dev));
933		ret = 1;
934	} else {
935		TAPDEBUG("%s waiting for data, minor = %#x\n",
936			ifp->if_xname, minor(dev));
937		ret = 0;
938	}
939	splx(s);
940
941	return (ret);
942} /* tapkqread */
943
944
945/*
946 * tap_kqwrite
947 *
948 * Always can write. Return the MTU in kn->data
949 */
950static int
951tapkqwrite(struct knote *kn, long hint)
952{
953	int			 s;
954	struct tap_softc	*tp = ((struct cdev *) kn->kn_hook)->si_drv1;
955	struct ifnet		*ifp = tp->tap_ifp;
956
957	s = splimp();
958	kn->kn_data = ifp->if_mtu;
959	splx(s);
960
961	return (1);
962} /* tapkqwrite */
963
964
965static void
966tapkqdetach(struct knote *kn)
967{
968	struct tap_softc	*tp = ((struct cdev *) kn->kn_hook)->si_drv1;
969
970	knlist_remove(&tp->tap_rsel.si_note, kn, 0);
971} /* tapkqdetach */
972
973