if_tun.c revision 166497
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*-
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 166497 2007-02-04 16:32:46Z bms $
17 */
18
19#include "opt_atalk.h"
20#include "opt_inet.h"
21#include "opt_inet6.h"
22#include "opt_ipx.h"
23#include "opt_mac.h"
24
25#include <sys/param.h>
26#include <sys/priv.h>
27#include <sys/proc.h>
28#include <sys/systm.h>
29#include <sys/mbuf.h>
30#include <sys/module.h>
31#include <sys/socket.h>
32#include <sys/fcntl.h>
33#include <sys/filio.h>
34#include <sys/sockio.h>
35#include <sys/ttycom.h>
36#include <sys/poll.h>
37#include <sys/selinfo.h>
38#include <sys/signalvar.h>
39#include <sys/filedesc.h>
40#include <sys/kernel.h>
41#include <sys/sysctl.h>
42#include <sys/conf.h>
43#include <sys/uio.h>
44#include <sys/malloc.h>
45#include <sys/random.h>
46
47#include <net/if.h>
48#include <net/if_clone.h>
49#include <net/if_types.h>
50#include <net/netisr.h>
51#include <net/route.h>
52#ifdef INET
53#include <netinet/in.h>
54#endif
55#include <net/bpf.h>
56#include <net/if_tun.h>
57
58#include <sys/queue.h>
59
60#include <security/mac/mac_framework.h>
61
62/*
63 * tun_list is protected by global tunmtx.  Other mutable fields are
64 * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
65 * static for the duration of a tunnel interface.
66 */
67struct tun_softc {
68	TAILQ_ENTRY(tun_softc)	tun_list;
69	struct cdev *tun_dev;
70	u_short	tun_flags;		/* misc flags */
71#define	TUN_OPEN	0x0001
72#define	TUN_INITED	0x0002
73#define	TUN_RCOLL	0x0004
74#define	TUN_IASET	0x0008
75#define	TUN_DSTADDR	0x0010
76#define	TUN_LMODE	0x0020
77#define	TUN_RWAIT	0x0040
78#define	TUN_ASYNC	0x0080
79#define	TUN_IFHEAD	0x0100
80
81#define TUN_READY       (TUN_OPEN | TUN_INITED)
82
83	/*
84	 * XXXRW: tun_pid is used to exclusively lock /dev/tun.  Is this
85	 * actually needed?  Can we just return EBUSY if already open?
86	 * Problem is that this involved inherent races when a tun device
87	 * is handed off from one process to another, as opposed to just
88	 * being slightly stale informationally.
89	 */
90	pid_t	tun_pid;		/* owning pid */
91	struct	ifnet *tun_ifp;		/* the interface */
92	struct  sigio *tun_sigio;	/* information for async I/O */
93	struct	selinfo	tun_rsel;	/* read select */
94	struct mtx	tun_mtx;	/* protect mutable softc fields */
95};
96#define TUN2IFP(sc)	((sc)->tun_ifp)
97
98#define TUNDEBUG	if (tundebug) if_printf
99#define	TUNNAME		"tun"
100
101/*
102 * All mutable global variables in if_tun are locked using tunmtx, with
103 * the exception of tundebug, which is used unlocked, and tunclones,
104 * which is static after setup.
105 */
106static struct mtx tunmtx;
107static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
108static int tundebug = 0;
109static int tundclone = 1;
110static struct clonedevs *tunclones;
111static TAILQ_HEAD(,tun_softc)	tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
112SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
113
114SYSCTL_DECL(_net_link);
115SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
116    "IP tunnel software network interface.");
117SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0,
118    "Enable legacy devfs interface creation.");
119
120TUNABLE_INT("net.link.tun.devfs_cloning", &tundclone);
121
122static void	tunclone(void *arg, struct ucred *cred, char *name,
123		    int namelen, struct cdev **dev);
124static void	tuncreate(const char *name, struct cdev *dev);
125static int	tunifioctl(struct ifnet *, u_long, caddr_t);
126static int	tuninit(struct ifnet *);
127static int	tunmodevent(module_t, int, void *);
128static int	tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
129		    struct rtentry *rt);
130static void	tunstart(struct ifnet *);
131
132static int	tun_clone_create(struct if_clone *, int, caddr_t);
133static void	tun_clone_destroy(struct ifnet *);
134
135IFC_SIMPLE_DECLARE(tun, 0);
136
137static d_open_t		tunopen;
138static d_close_t	tunclose;
139static d_read_t		tunread;
140static d_write_t	tunwrite;
141static d_ioctl_t	tunioctl;
142static d_poll_t		tunpoll;
143static d_kqfilter_t	tunkqfilter;
144
145static int		tunkqread(struct knote *, long);
146static int		tunkqwrite(struct knote *, long);
147static void		tunkqdetach(struct knote *);
148
149static struct filterops tun_read_filterops = {
150	.f_isfd =	1,
151	.f_attach =	NULL,
152	.f_detach =	tunkqdetach,
153	.f_event =	tunkqread,
154};
155
156static struct filterops tun_write_filterops = {
157	.f_isfd =	1,
158	.f_attach =	NULL,
159	.f_detach =	tunkqdetach,
160	.f_event =	tunkqwrite,
161};
162
163static struct cdevsw tun_cdevsw = {
164	.d_version =	D_VERSION,
165	.d_flags =	D_PSEUDO | D_NEEDGIANT,
166	.d_open =	tunopen,
167	.d_close =	tunclose,
168	.d_read =	tunread,
169	.d_write =	tunwrite,
170	.d_ioctl =	tunioctl,
171	.d_poll =	tunpoll,
172	.d_kqfilter =	tunkqfilter,
173	.d_name =	TUNNAME,
174};
175
176static int
177tun_clone_create(struct if_clone *ifc, int unit, caddr_t params)
178{
179	struct cdev *dev;
180	int i;
181
182	/* find any existing device, or allocate new unit number */
183	i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0);
184	if (i) {
185		/* No preexisting struct cdev *, create one */
186		dev = make_dev(&tun_cdevsw, unit2minor(unit),
187		    UID_UUCP, GID_DIALER, 0600, "%s%d", ifc->ifc_name, unit);
188		if (dev != NULL) {
189			dev_ref(dev);
190			dev->si_flags |= SI_CHEAPCLONE;
191		}
192	}
193	tuncreate(ifc->ifc_name, dev);
194
195	return (0);
196}
197
198static void
199tunclone(void *arg, struct ucred *cred, char *name, int namelen,
200    struct cdev **dev)
201{
202	char devname[SPECNAMELEN + 1];
203	int u, i, append_unit;
204
205	if (*dev != NULL)
206		return;
207
208	/*
209	 * If tun cloning is enabled, only the superuser can create an
210	 * interface.
211	 */
212	if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
213		return;
214
215	if (strcmp(name, TUNNAME) == 0) {
216		u = -1;
217	} else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
218		return;	/* Don't recognise the name */
219	if (u != -1 && u > IF_MAXUNIT)
220		return;	/* Unit number too high */
221
222	if (u == -1)
223		append_unit = 1;
224	else
225		append_unit = 0;
226
227	/* find any existing device, or allocate new unit number */
228	i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
229	if (i) {
230		if (append_unit) {
231			namelen = snprintf(devname, sizeof(devname), "%s%d", name,
232			    u);
233			name = devname;
234		}
235		/* No preexisting struct cdev *, create one */
236		*dev = make_dev(&tun_cdevsw, unit2minor(u),
237		    UID_UUCP, GID_DIALER, 0600, "%s", name);
238		if (*dev != NULL) {
239			dev_ref(*dev);
240			(*dev)->si_flags |= SI_CHEAPCLONE;
241		}
242	}
243
244	if_clone_create(name, namelen, NULL);
245}
246
247static void
248tun_destroy(struct tun_softc *tp)
249{
250	struct cdev *dev;
251
252	/* Unlocked read. */
253	KASSERT((tp->tun_flags & TUN_OPEN) == 0,
254	    ("tununits is out of sync - unit %d", TUN2IFP(tp)->if_dunit));
255
256	dev = tp->tun_dev;
257	bpfdetach(TUN2IFP(tp));
258	if_detach(TUN2IFP(tp));
259	if_free(TUN2IFP(tp));
260	destroy_dev(dev);
261	knlist_destroy(&tp->tun_rsel.si_note);
262	mtx_destroy(&tp->tun_mtx);
263	free(tp, M_TUN);
264}
265
266static void
267tun_clone_destroy(struct ifnet *ifp)
268{
269	struct tun_softc *tp = ifp->if_softc;
270
271	mtx_lock(&tunmtx);
272	TAILQ_REMOVE(&tunhead, tp, tun_list);
273	mtx_unlock(&tunmtx);
274	tun_destroy(tp);
275}
276
277static int
278tunmodevent(module_t mod, int type, void *data)
279{
280	static eventhandler_tag tag;
281	struct tun_softc *tp;
282
283	switch (type) {
284	case MOD_LOAD:
285		mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
286		clone_setup(&tunclones);
287		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
288		if (tag == NULL)
289			return (ENOMEM);
290		if_clone_attach(&tun_cloner);
291		break;
292	case MOD_UNLOAD:
293		if_clone_detach(&tun_cloner);
294		EVENTHANDLER_DEREGISTER(dev_clone, tag);
295
296		mtx_lock(&tunmtx);
297		while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
298			TAILQ_REMOVE(&tunhead, tp, tun_list);
299			mtx_unlock(&tunmtx);
300			tun_destroy(tp);
301			mtx_lock(&tunmtx);
302		}
303		mtx_unlock(&tunmtx);
304		clone_cleanup(&tunclones);
305		mtx_destroy(&tunmtx);
306		break;
307	default:
308		return EOPNOTSUPP;
309	}
310	return 0;
311}
312
313static moduledata_t tun_mod = {
314	"if_tun",
315	tunmodevent,
316	0
317};
318
319DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
320
321static void
322tunstart(struct ifnet *ifp)
323{
324	struct tun_softc *tp = ifp->if_softc;
325	struct mbuf *m;
326
327	TUNDEBUG(ifp,"%s starting\n", ifp->if_xname);
328	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
329		IFQ_LOCK(&ifp->if_snd);
330		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
331		if (m == NULL) {
332			IFQ_UNLOCK(&ifp->if_snd);
333			return;
334		}
335		IFQ_UNLOCK(&ifp->if_snd);
336	}
337
338	mtx_lock(&tp->tun_mtx);
339	if (tp->tun_flags & TUN_RWAIT) {
340		tp->tun_flags &= ~TUN_RWAIT;
341		wakeup(tp);
342	}
343	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
344		mtx_unlock(&tp->tun_mtx);
345		pgsigio(&tp->tun_sigio, SIGIO, 0);
346	} else
347		mtx_unlock(&tp->tun_mtx);
348	selwakeuppri(&tp->tun_rsel, PZERO + 1);
349	KNOTE_UNLOCKED(&tp->tun_rsel.si_note, 0);
350}
351
352/* XXX: should return an error code so it can fail. */
353static void
354tuncreate(const char *name, struct cdev *dev)
355{
356	struct tun_softc *sc;
357	struct ifnet *ifp;
358
359	dev->si_flags &= ~SI_CHEAPCLONE;
360
361	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
362	mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
363	sc->tun_flags = TUN_INITED;
364	sc->tun_dev = dev;
365	mtx_lock(&tunmtx);
366	TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
367	mtx_unlock(&tunmtx);
368
369	ifp = sc->tun_ifp = if_alloc(IFT_PPP);
370	if (ifp == NULL)
371		panic("%s%d: failed to if_alloc() interface.\n",
372		    name, dev2unit(dev));
373	if_initname(ifp, name, dev2unit(dev));
374	ifp->if_mtu = TUNMTU;
375	ifp->if_ioctl = tunifioctl;
376	ifp->if_output = tunoutput;
377	ifp->if_start = tunstart;
378	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
379	ifp->if_softc = sc;
380	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
381	ifp->if_snd.ifq_drv_maxlen = 0;
382	IFQ_SET_READY(&ifp->if_snd);
383	knlist_init(&sc->tun_rsel.si_note, NULL, NULL, NULL, NULL);
384
385	if_attach(ifp);
386	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
387	dev->si_drv1 = sc;
388	TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
389	    ifp->if_xname, minor(dev));
390}
391
392static int
393tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
394{
395	struct ifnet	*ifp;
396	struct tun_softc *tp;
397
398	/*
399	 * XXXRW: Non-atomic test and set of dev->si_drv1 requires
400	 * synchronization.
401	 */
402	tp = dev->si_drv1;
403	if (!tp) {
404		tuncreate(TUNNAME, dev);
405		tp = dev->si_drv1;
406	}
407
408	/*
409	 * XXXRW: This use of tun_pid is subject to error due to the
410	 * fact that a reference to the tunnel can live beyond the
411	 * death of the process that created it.  Can we replace this
412	 * with a simple busy flag?
413	 */
414	mtx_lock(&tp->tun_mtx);
415	if (tp->tun_pid != 0 && tp->tun_pid != td->td_proc->p_pid) {
416		mtx_unlock(&tp->tun_mtx);
417		return (EBUSY);
418	}
419	tp->tun_pid = td->td_proc->p_pid;
420
421	tp->tun_flags |= TUN_OPEN;
422	mtx_unlock(&tp->tun_mtx);
423	ifp = TUN2IFP(tp);
424	TUNDEBUG(ifp, "open\n");
425
426	return (0);
427}
428
429/*
430 * tunclose - close the device - mark i/f down & delete
431 * routing info
432 */
433static	int
434tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
435{
436	struct tun_softc *tp;
437	struct ifnet *ifp;
438	int s;
439
440	tp = dev->si_drv1;
441	ifp = TUN2IFP(tp);
442
443	mtx_lock(&tp->tun_mtx);
444	tp->tun_flags &= ~TUN_OPEN;
445	tp->tun_pid = 0;
446
447	/*
448	 * junk all pending output
449	 */
450	s = splimp();
451	IFQ_PURGE(&ifp->if_snd);
452	splx(s);
453	mtx_unlock(&tp->tun_mtx);
454
455	if (ifp->if_flags & IFF_UP) {
456		s = splimp();
457		if_down(ifp);
458		splx(s);
459	}
460
461	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
462		struct ifaddr *ifa;
463
464		s = splimp();
465		/* find internet addresses and delete routes */
466		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
467			if (ifa->ifa_addr->sa_family == AF_INET)
468				/* Unlocked read. */
469				rtinit(ifa, (int)RTM_DELETE,
470				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
471		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
472		splx(s);
473	}
474
475	funsetown(&tp->tun_sigio);
476	selwakeuppri(&tp->tun_rsel, PZERO + 1);
477	KNOTE_UNLOCKED(&tp->tun_rsel.si_note, 0);
478	TUNDEBUG (ifp, "closed\n");
479	return (0);
480}
481
482static int
483tuninit(struct ifnet *ifp)
484{
485	struct tun_softc *tp = ifp->if_softc;
486	struct ifaddr *ifa;
487	int error = 0;
488
489	TUNDEBUG(ifp, "tuninit\n");
490
491	ifp->if_flags |= IFF_UP;
492	ifp->if_drv_flags |= IFF_DRV_RUNNING;
493	getmicrotime(&ifp->if_lastchange);
494
495#ifdef INET
496	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
497		if (ifa->ifa_addr->sa_family == AF_INET) {
498			struct sockaddr_in *si;
499
500			si = (struct sockaddr_in *)ifa->ifa_addr;
501			mtx_lock(&tp->tun_mtx);
502			if (si->sin_addr.s_addr)
503				tp->tun_flags |= TUN_IASET;
504
505			si = (struct sockaddr_in *)ifa->ifa_dstaddr;
506			if (si && si->sin_addr.s_addr)
507				tp->tun_flags |= TUN_DSTADDR;
508			mtx_unlock(&tp->tun_mtx);
509		}
510	}
511#endif
512	return (error);
513}
514
515/*
516 * Process an ioctl request.
517 */
518static int
519tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
520{
521	struct ifreq *ifr = (struct ifreq *)data;
522	struct tun_softc *tp = ifp->if_softc;
523	struct ifstat *ifs;
524	int		error = 0, s;
525
526	s = splimp();
527	switch(cmd) {
528	case SIOCGIFSTATUS:
529		ifs = (struct ifstat *)data;
530		mtx_lock(&tp->tun_mtx);
531		if (tp->tun_pid)
532			sprintf(ifs->ascii + strlen(ifs->ascii),
533			    "\tOpened by PID %d\n", tp->tun_pid);
534		mtx_unlock(&tp->tun_mtx);
535		break;
536	case SIOCSIFADDR:
537		error = tuninit(ifp);
538		TUNDEBUG(ifp, "address set, error=%d\n", error);
539		break;
540	case SIOCSIFDSTADDR:
541		error = tuninit(ifp);
542		TUNDEBUG(ifp, "destination address set, error=%d\n", error);
543		break;
544	case SIOCSIFMTU:
545		ifp->if_mtu = ifr->ifr_mtu;
546		TUNDEBUG(ifp, "mtu set\n");
547		break;
548	case SIOCSIFFLAGS:
549	case SIOCADDMULTI:
550	case SIOCDELMULTI:
551		break;
552	default:
553		error = EINVAL;
554	}
555	splx(s);
556	return (error);
557}
558
559/*
560 * tunoutput - queue packets from higher level ready to put out.
561 */
562static int
563tunoutput(
564	struct ifnet *ifp,
565	struct mbuf *m0,
566	struct sockaddr *dst,
567	struct rtentry *rt)
568{
569	struct tun_softc *tp = ifp->if_softc;
570	u_short cached_tun_flags;
571	int error;
572	u_int32_t af;
573
574	TUNDEBUG (ifp, "tunoutput\n");
575
576#ifdef MAC
577	error = mac_check_ifnet_transmit(ifp, m0);
578	if (error) {
579		m_freem(m0);
580		return (error);
581	}
582#endif
583
584	/* Could be unlocked read? */
585	mtx_lock(&tp->tun_mtx);
586	cached_tun_flags = tp->tun_flags;
587	mtx_unlock(&tp->tun_mtx);
588	if ((cached_tun_flags & TUN_READY) != TUN_READY) {
589		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
590		m_freem (m0);
591		return (EHOSTDOWN);
592	}
593
594	if ((ifp->if_flags & IFF_UP) != IFF_UP) {
595		m_freem (m0);
596		return (EHOSTDOWN);
597	}
598
599	/* BPF writes need to be handled specially. */
600	if (dst->sa_family == AF_UNSPEC) {
601		bcopy(dst->sa_data, &af, sizeof(af));
602		dst->sa_family = af;
603	}
604
605	if (bpf_peers_present(ifp->if_bpf)) {
606		af = dst->sa_family;
607		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
608	}
609
610	/* prepend sockaddr? this may abort if the mbuf allocation fails */
611	if (cached_tun_flags & TUN_LMODE) {
612		/* allocate space for sockaddr */
613		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
614
615		/* if allocation failed drop packet */
616		if (m0 == NULL) {
617			ifp->if_iqdrops++;
618			ifp->if_oerrors++;
619			return (ENOBUFS);
620		} else {
621			bcopy(dst, m0->m_data, dst->sa_len);
622		}
623	}
624
625	if (cached_tun_flags & TUN_IFHEAD) {
626		/* Prepend the address family */
627		M_PREPEND(m0, 4, M_DONTWAIT);
628
629		/* if allocation failed drop packet */
630		if (m0 == NULL) {
631			ifp->if_iqdrops++;
632			ifp->if_oerrors++;
633			return (ENOBUFS);
634		} else
635			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
636	} else {
637#ifdef INET
638		if (dst->sa_family != AF_INET)
639#endif
640		{
641			m_freem(m0);
642			return (EAFNOSUPPORT);
643		}
644	}
645
646	IFQ_HANDOFF(ifp, m0, error);
647	if (error) {
648		ifp->if_collisions++;
649		return (ENOBUFS);
650	}
651	ifp->if_opackets++;
652	return (0);
653}
654
655/*
656 * the cdevsw interface is now pretty minimal.
657 */
658static	int
659tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
660{
661	int		s;
662	int		error;
663	struct tun_softc *tp = dev->si_drv1;
664	struct tuninfo *tunp;
665
666	switch (cmd) {
667	case TUNSIFINFO:
668		tunp = (struct tuninfo *)data;
669		if (tunp->mtu < IF_MINMTU)
670			return (EINVAL);
671		if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
672			error = priv_check(td, PRIV_NET_SETIFMTU);
673			if (error)
674				return (error);
675		}
676		TUN2IFP(tp)->if_mtu = tunp->mtu;
677		TUN2IFP(tp)->if_type = tunp->type;
678		TUN2IFP(tp)->if_baudrate = tunp->baudrate;
679		break;
680	case TUNGIFINFO:
681		tunp = (struct tuninfo *)data;
682		tunp->mtu = TUN2IFP(tp)->if_mtu;
683		tunp->type = TUN2IFP(tp)->if_type;
684		tunp->baudrate = TUN2IFP(tp)->if_baudrate;
685		break;
686	case TUNSDEBUG:
687		tundebug = *(int *)data;
688		break;
689	case TUNGDEBUG:
690		*(int *)data = tundebug;
691		break;
692	case TUNSLMODE:
693		mtx_lock(&tp->tun_mtx);
694		if (*(int *)data) {
695			tp->tun_flags |= TUN_LMODE;
696			tp->tun_flags &= ~TUN_IFHEAD;
697		} else
698			tp->tun_flags &= ~TUN_LMODE;
699		mtx_unlock(&tp->tun_mtx);
700		break;
701	case TUNSIFHEAD:
702		mtx_lock(&tp->tun_mtx);
703		if (*(int *)data) {
704			tp->tun_flags |= TUN_IFHEAD;
705			tp->tun_flags &= ~TUN_LMODE;
706		} else
707			tp->tun_flags &= ~TUN_IFHEAD;
708		mtx_unlock(&tp->tun_mtx);
709		break;
710	case TUNGIFHEAD:
711		/* Could be unlocked read? */
712		mtx_lock(&tp->tun_mtx);
713		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
714		mtx_unlock(&tp->tun_mtx);
715		break;
716	case TUNSIFMODE:
717		/* deny this if UP */
718		if (TUN2IFP(tp)->if_flags & IFF_UP)
719			return(EBUSY);
720
721		switch (*(int *)data & ~IFF_MULTICAST) {
722		case IFF_POINTOPOINT:
723		case IFF_BROADCAST:
724			TUN2IFP(tp)->if_flags &=
725			    ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
726			TUN2IFP(tp)->if_flags |= *(int *)data;
727			break;
728		default:
729			return(EINVAL);
730		}
731		break;
732	case TUNSIFPID:
733		mtx_lock(&tp->tun_mtx);
734		tp->tun_pid = curthread->td_proc->p_pid;
735		mtx_unlock(&tp->tun_mtx);
736		break;
737	case FIONBIO:
738		break;
739	case FIOASYNC:
740		mtx_lock(&tp->tun_mtx);
741		if (*(int *)data)
742			tp->tun_flags |= TUN_ASYNC;
743		else
744			tp->tun_flags &= ~TUN_ASYNC;
745		mtx_unlock(&tp->tun_mtx);
746		break;
747	case FIONREAD:
748		s = splimp();
749		if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
750			struct mbuf *mb;
751			IFQ_LOCK(&TUN2IFP(tp)->if_snd);
752			IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
753			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
754				*(int *)data += mb->m_len;
755			IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
756		} else
757			*(int *)data = 0;
758		splx(s);
759		break;
760	case FIOSETOWN:
761		return (fsetown(*(int *)data, &tp->tun_sigio));
762
763	case FIOGETOWN:
764		*(int *)data = fgetown(&tp->tun_sigio);
765		return (0);
766
767	/* This is deprecated, FIOSETOWN should be used instead. */
768	case TIOCSPGRP:
769		return (fsetown(-(*(int *)data), &tp->tun_sigio));
770
771	/* This is deprecated, FIOGETOWN should be used instead. */
772	case TIOCGPGRP:
773		*(int *)data = -fgetown(&tp->tun_sigio);
774		return (0);
775
776	default:
777		return (ENOTTY);
778	}
779	return (0);
780}
781
782/*
783 * The cdevsw read interface - reads a packet at a time, or at
784 * least as much of a packet as can be read.
785 */
786static	int
787tunread(struct cdev *dev, struct uio *uio, int flag)
788{
789	struct tun_softc *tp = dev->si_drv1;
790	struct ifnet	*ifp = TUN2IFP(tp);
791	struct mbuf	*m;
792	int		error=0, len, s;
793
794	TUNDEBUG (ifp, "read\n");
795	mtx_lock(&tp->tun_mtx);
796	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
797		mtx_unlock(&tp->tun_mtx);
798		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
799		return (EHOSTDOWN);
800	}
801
802	tp->tun_flags &= ~TUN_RWAIT;
803	mtx_unlock(&tp->tun_mtx);
804
805	s = splimp();
806	do {
807		IFQ_DEQUEUE(&ifp->if_snd, m);
808		if (m == NULL) {
809			if (flag & O_NONBLOCK) {
810				splx(s);
811				return (EWOULDBLOCK);
812			}
813			mtx_lock(&tp->tun_mtx);
814			tp->tun_flags |= TUN_RWAIT;
815			mtx_unlock(&tp->tun_mtx);
816			if ((error = tsleep(tp, PCATCH | (PZERO + 1),
817					"tunread", 0)) != 0) {
818				splx(s);
819				return (error);
820			}
821		}
822	} while (m == NULL);
823	splx(s);
824
825	while (m && uio->uio_resid > 0 && error == 0) {
826		len = min(uio->uio_resid, m->m_len);
827		if (len != 0)
828			error = uiomove(mtod(m, void *), len, uio);
829		m = m_free(m);
830	}
831
832	if (m) {
833		TUNDEBUG(ifp, "Dropping mbuf\n");
834		m_freem(m);
835	}
836	return (error);
837}
838
839/*
840 * the cdevsw write interface - an atomic write is a packet - or else!
841 */
842static	int
843tunwrite(struct cdev *dev, struct uio *uio, int flag)
844{
845	struct tun_softc *tp = dev->si_drv1;
846	struct ifnet	*ifp = TUN2IFP(tp);
847	struct mbuf	*m;
848	int		error = 0;
849	uint32_t	family;
850	int 		isr;
851
852	TUNDEBUG(ifp, "tunwrite\n");
853
854	if ((ifp->if_flags & IFF_UP) != IFF_UP)
855		/* ignore silently */
856		return (0);
857
858	if (uio->uio_resid == 0)
859		return (0);
860
861	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
862		TUNDEBUG(ifp, "len=%d!\n", uio->uio_resid);
863		return (EIO);
864	}
865
866	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
867		ifp->if_ierrors++;
868		return (error);
869	}
870
871	m->m_pkthdr.rcvif = ifp;
872#ifdef MAC
873	mac_create_mbuf_from_ifnet(ifp, m);
874#endif
875
876	/* Could be unlocked read? */
877	mtx_lock(&tp->tun_mtx);
878	if (tp->tun_flags & TUN_IFHEAD) {
879		mtx_unlock(&tp->tun_mtx);
880		if (m->m_len < sizeof(family) &&
881		    (m = m_pullup(m, sizeof(family))) == NULL)
882			return (ENOBUFS);
883		family = ntohl(*mtod(m, u_int32_t *));
884		m_adj(m, sizeof(family));
885	} else {
886		mtx_unlock(&tp->tun_mtx);
887		family = AF_INET;
888	}
889
890	BPF_MTAP2(ifp, &family, sizeof(family), m);
891
892	switch (family) {
893#ifdef INET
894	case AF_INET:
895		isr = NETISR_IP;
896		break;
897#endif
898#ifdef INET6
899	case AF_INET6:
900		isr = NETISR_IPV6;
901		break;
902#endif
903#ifdef IPX
904	case AF_IPX:
905		isr = NETISR_IPX;
906		break;
907#endif
908#ifdef NETATALK
909	case AF_APPLETALK:
910		isr = NETISR_ATALK2;
911		break;
912#endif
913	default:
914		m_freem(m);
915		return (EAFNOSUPPORT);
916	}
917	/* First chunk of an mbuf contains good junk */
918	if (harvest.point_to_point)
919		random_harvest(m, 16, 3, 0, RANDOM_NET);
920	ifp->if_ibytes += m->m_pkthdr.len;
921	ifp->if_ipackets++;
922	netisr_dispatch(isr, m);
923	return (0);
924}
925
926/*
927 * tunpoll - the poll interface, this is only useful on reads
928 * really. The write detect always returns true, write never blocks
929 * anyway, it either accepts the packet or drops it.
930 */
931static	int
932tunpoll(struct cdev *dev, int events, struct thread *td)
933{
934	int		s;
935	struct tun_softc *tp = dev->si_drv1;
936	struct ifnet	*ifp = TUN2IFP(tp);
937	int		revents = 0;
938	struct mbuf	*m;
939
940	s = splimp();
941	TUNDEBUG(ifp, "tunpoll\n");
942
943	if (events & (POLLIN | POLLRDNORM)) {
944		IFQ_LOCK(&ifp->if_snd);
945		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
946		if (m != NULL) {
947			TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
948			revents |= events & (POLLIN | POLLRDNORM);
949		} else {
950			TUNDEBUG(ifp, "tunpoll waiting\n");
951			selrecord(td, &tp->tun_rsel);
952		}
953		IFQ_UNLOCK(&ifp->if_snd);
954	}
955	if (events & (POLLOUT | POLLWRNORM))
956		revents |= events & (POLLOUT | POLLWRNORM);
957
958	splx(s);
959	return (revents);
960}
961
962/*
963 * tunkqfilter - support for the kevent() system call.
964 */
965static int
966tunkqfilter(struct cdev *dev, struct knote *kn)
967{
968	int			s;
969	struct tun_softc	*tp = dev->si_drv1;
970	struct ifnet	*ifp = TUN2IFP(tp);
971
972	s = splimp();
973	switch(kn->kn_filter) {
974	case EVFILT_READ:
975		TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
976		    ifp->if_xname, minor(dev));
977		kn->kn_fop = &tun_read_filterops;
978		break;
979
980	case EVFILT_WRITE:
981		TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
982		    ifp->if_xname, minor(dev));
983		kn->kn_fop = &tun_write_filterops;
984		break;
985
986	default:
987		TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
988		    ifp->if_xname, minor(dev));
989		splx(s);
990		return(EINVAL);
991	}
992	splx(s);
993
994	kn->kn_hook = (caddr_t) dev;
995	knlist_add(&tp->tun_rsel.si_note, kn, 0);
996
997	return (0);
998}
999
1000/*
1001 * Return true of there is data in the interface queue.
1002 */
1003static int
1004tunkqread(struct knote *kn, long hint)
1005{
1006	int			ret, s;
1007	struct cdev		*dev = (struct cdev *)(kn->kn_hook);
1008	struct tun_softc	*tp = dev->si_drv1;
1009	struct ifnet	*ifp = TUN2IFP(tp);
1010
1011	s = splimp();
1012	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
1013		TUNDEBUG(ifp,
1014		    "%s have data in the queue.  Len = %d, minor = %#x\n",
1015		    ifp->if_xname, ifp->if_snd.ifq_len, minor(dev));
1016		ret = 1;
1017	} else {
1018		TUNDEBUG(ifp,
1019		    "%s waiting for data, minor = %#x\n", ifp->if_xname,
1020		    minor(dev));
1021		ret = 0;
1022	}
1023	splx(s);
1024
1025	return (ret);
1026}
1027
1028/*
1029 * Always can write, always return MTU in kn->data.
1030 */
1031static int
1032tunkqwrite(struct knote *kn, long hint)
1033{
1034	int			s;
1035	struct tun_softc	*tp = ((struct cdev *)kn->kn_hook)->si_drv1;
1036	struct ifnet	*ifp = TUN2IFP(tp);
1037
1038	s = splimp();
1039	kn->kn_data = ifp->if_mtu;
1040	splx(s);
1041
1042	return (1);
1043}
1044
1045static void
1046tunkqdetach(struct knote *kn)
1047{
1048	struct tun_softc	*tp = ((struct cdev *)kn->kn_hook)->si_drv1;
1049
1050	knlist_remove(&tp->tun_rsel.si_note, kn, 0);
1051}
1052