if_tun.c revision 160038
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*-
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 160038 2006-06-29 19:22:05Z yar $
17 */
18
19#include "opt_atalk.h"
20#include "opt_inet.h"
21#include "opt_inet6.h"
22#include "opt_ipx.h"
23#include "opt_mac.h"
24
25#include <sys/param.h>
26#include <sys/proc.h>
27#include <sys/systm.h>
28#include <sys/mac.h>
29#include <sys/mbuf.h>
30#include <sys/module.h>
31#include <sys/socket.h>
32#include <sys/fcntl.h>
33#include <sys/filio.h>
34#include <sys/sockio.h>
35#include <sys/ttycom.h>
36#include <sys/poll.h>
37#include <sys/selinfo.h>
38#include <sys/signalvar.h>
39#include <sys/filedesc.h>
40#include <sys/kernel.h>
41#include <sys/sysctl.h>
42#include <sys/conf.h>
43#include <sys/uio.h>
44#include <sys/malloc.h>
45#include <sys/random.h>
46
47#include <net/if.h>
48#include <net/if_types.h>
49#include <net/netisr.h>
50#include <net/route.h>
51#ifdef INET
52#include <netinet/in.h>
53#endif
54#include <net/bpf.h>
55#include <net/if_tun.h>
56
57#include <sys/queue.h>
58
59/*
60 * tun_list is protected by global tunmtx.  Other mutable fields are
61 * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
62 * static for the duration of a tunnel interface.
63 */
64struct tun_softc {
65	TAILQ_ENTRY(tun_softc)	tun_list;
66	struct cdev *tun_dev;
67	u_short	tun_flags;		/* misc flags */
68#define	TUN_OPEN	0x0001
69#define	TUN_INITED	0x0002
70#define	TUN_RCOLL	0x0004
71#define	TUN_IASET	0x0008
72#define	TUN_DSTADDR	0x0010
73#define	TUN_LMODE	0x0020
74#define	TUN_RWAIT	0x0040
75#define	TUN_ASYNC	0x0080
76#define	TUN_IFHEAD	0x0100
77
78#define TUN_READY       (TUN_OPEN | TUN_INITED)
79
80	/*
81	 * XXXRW: tun_pid is used to exclusively lock /dev/tun.  Is this
82	 * actually needed?  Can we just return EBUSY if already open?
83	 * Problem is that this involved inherent races when a tun device
84	 * is handed off from one process to another, as opposed to just
85	 * being slightly stale informationally.
86	 */
87	pid_t	tun_pid;		/* owning pid */
88	struct	ifnet *tun_ifp;		/* the interface */
89	struct  sigio *tun_sigio;	/* information for async I/O */
90	struct	selinfo	tun_rsel;	/* read select */
91	struct mtx	tun_mtx;	/* protect mutable softc fields */
92};
93#define TUN2IFP(sc)	((sc)->tun_ifp)
94
95#define TUNDEBUG	if (tundebug) if_printf
96#define	TUNNAME		"tun"
97
98/*
99 * All mutable global variables in if_tun are locked using tunmtx, with
100 * the exception of tundebug, which is used unlocked, and tunclones,
101 * which is static after setup.
102 */
103static struct mtx tunmtx;
104static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
105static int tundebug = 0;
106static struct clonedevs *tunclones;
107static TAILQ_HEAD(,tun_softc)	tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
108SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
109
110static void	tunclone(void *arg, struct ucred *cred, char *name,
111		    int namelen, struct cdev **dev);
112static void	tuncreate(struct cdev *dev);
113static int	tunifioctl(struct ifnet *, u_long, caddr_t);
114static int	tuninit(struct ifnet *);
115static int	tunmodevent(module_t, int, void *);
116static int	tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
117		    struct rtentry *rt);
118static void	tunstart(struct ifnet *);
119
120static d_open_t		tunopen;
121static d_close_t	tunclose;
122static d_read_t		tunread;
123static d_write_t	tunwrite;
124static d_ioctl_t	tunioctl;
125static d_poll_t		tunpoll;
126
127static struct cdevsw tun_cdevsw = {
128	.d_version =	D_VERSION,
129	.d_flags =	D_PSEUDO | D_NEEDGIANT,
130	.d_open =	tunopen,
131	.d_close =	tunclose,
132	.d_read =	tunread,
133	.d_write =	tunwrite,
134	.d_ioctl =	tunioctl,
135	.d_poll =	tunpoll,
136	.d_name =	TUNNAME,
137};
138
139static void
140tunclone(void *arg, struct ucred *cred, char *name, int namelen,
141    struct cdev **dev)
142{
143	int u, i;
144
145	if (*dev != NULL)
146		return;
147
148	if (strcmp(name, TUNNAME) == 0) {
149		u = -1;
150	} else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
151		return;	/* Don't recognise the name */
152	if (u != -1 && u > IF_MAXUNIT)
153		return;	/* Unit number too high */
154
155	/* find any existing device, or allocate new unit number */
156	i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
157	if (i) {
158		/* No preexisting struct cdev *, create one */
159		*dev = make_dev(&tun_cdevsw, unit2minor(u),
160		    UID_UUCP, GID_DIALER, 0600, "tun%d", u);
161		if (*dev != NULL) {
162			dev_ref(*dev);
163			(*dev)->si_flags |= SI_CHEAPCLONE;
164		}
165	}
166}
167
168static void
169tun_destroy(struct tun_softc *tp)
170{
171	struct cdev *dev;
172
173	/* Unlocked read. */
174	KASSERT((tp->tun_flags & TUN_OPEN) == 0,
175	    ("tununits is out of sync - unit %d", TUN2IFP(tp)->if_dunit));
176
177	dev = tp->tun_dev;
178	bpfdetach(TUN2IFP(tp));
179	if_detach(TUN2IFP(tp));
180	if_free(TUN2IFP(tp));
181	destroy_dev(dev);
182	mtx_destroy(&tp->tun_mtx);
183	free(tp, M_TUN);
184}
185
186static int
187tunmodevent(module_t mod, int type, void *data)
188{
189	static eventhandler_tag tag;
190	struct tun_softc *tp;
191
192	switch (type) {
193	case MOD_LOAD:
194		mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
195		clone_setup(&tunclones);
196		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
197		if (tag == NULL)
198			return (ENOMEM);
199		break;
200	case MOD_UNLOAD:
201		EVENTHANDLER_DEREGISTER(dev_clone, tag);
202
203		mtx_lock(&tunmtx);
204		while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
205			TAILQ_REMOVE(&tunhead, tp, tun_list);
206			mtx_unlock(&tunmtx);
207			tun_destroy(tp);
208			mtx_lock(&tunmtx);
209		}
210		mtx_unlock(&tunmtx);
211		clone_cleanup(&tunclones);
212		mtx_destroy(&tunmtx);
213		break;
214	default:
215		return EOPNOTSUPP;
216	}
217	return 0;
218}
219
220static moduledata_t tun_mod = {
221	"if_tun",
222	tunmodevent,
223	0
224};
225
226DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
227
228static void
229tunstart(struct ifnet *ifp)
230{
231	struct tun_softc *tp = ifp->if_softc;
232	struct mbuf *m;
233
234	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
235		IFQ_LOCK(&ifp->if_snd);
236		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
237		if (m == NULL) {
238			IFQ_UNLOCK(&ifp->if_snd);
239			return;
240		}
241		IFQ_UNLOCK(&ifp->if_snd);
242	}
243
244	mtx_lock(&tp->tun_mtx);
245	if (tp->tun_flags & TUN_RWAIT) {
246		tp->tun_flags &= ~TUN_RWAIT;
247		wakeup(tp);
248	}
249	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
250		mtx_unlock(&tp->tun_mtx);
251		pgsigio(&tp->tun_sigio, SIGIO, 0);
252	} else
253		mtx_unlock(&tp->tun_mtx);
254	selwakeuppri(&tp->tun_rsel, PZERO + 1);
255}
256
257/* XXX: should return an error code so it can fail. */
258static void
259tuncreate(struct cdev *dev)
260{
261	struct tun_softc *sc;
262	struct ifnet *ifp;
263
264	dev->si_flags &= ~SI_CHEAPCLONE;
265
266	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
267	mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
268	sc->tun_flags = TUN_INITED;
269	sc->tun_dev = dev;
270	mtx_lock(&tunmtx);
271	TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
272	mtx_unlock(&tunmtx);
273
274	ifp = sc->tun_ifp = if_alloc(IFT_PPP);
275	if (ifp == NULL)
276		panic("%s%d: failed to if_alloc() interface.\n",
277		    TUNNAME, dev2unit(dev));
278	if_initname(ifp, TUNNAME, dev2unit(dev));
279	ifp->if_mtu = TUNMTU;
280	ifp->if_ioctl = tunifioctl;
281	ifp->if_output = tunoutput;
282	ifp->if_start = tunstart;
283	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
284	ifp->if_softc = sc;
285	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
286	ifp->if_snd.ifq_drv_maxlen = 0;
287	IFQ_SET_READY(&ifp->if_snd);
288
289	if_attach(ifp);
290	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
291	dev->si_drv1 = sc;
292}
293
294static int
295tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
296{
297	struct ifnet	*ifp;
298	struct tun_softc *tp;
299
300	/*
301	 * XXXRW: Non-atomic test and set of dev->si_drv1 requires
302	 * synchronization.
303	 */
304	tp = dev->si_drv1;
305	if (!tp) {
306		tuncreate(dev);
307		tp = dev->si_drv1;
308	}
309
310	/*
311	 * XXXRW: This use of tun_pid is subject to error due to the
312	 * fact that a reference to the tunnel can live beyond the
313	 * death of the process that created it.  Can we replace this
314	 * with a simple busy flag?
315	 */
316	mtx_lock(&tp->tun_mtx);
317	if (tp->tun_pid != 0 && tp->tun_pid != td->td_proc->p_pid) {
318		mtx_unlock(&tp->tun_mtx);
319		return (EBUSY);
320	}
321	tp->tun_pid = td->td_proc->p_pid;
322
323	tp->tun_flags |= TUN_OPEN;
324	mtx_unlock(&tp->tun_mtx);
325	ifp = TUN2IFP(tp);
326	TUNDEBUG(ifp, "open\n");
327
328	return (0);
329}
330
331/*
332 * tunclose - close the device - mark i/f down & delete
333 * routing info
334 */
335static	int
336tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
337{
338	struct tun_softc *tp;
339	struct ifnet *ifp;
340	int s;
341
342	tp = dev->si_drv1;
343	ifp = TUN2IFP(tp);
344
345	mtx_lock(&tp->tun_mtx);
346	tp->tun_flags &= ~TUN_OPEN;
347	tp->tun_pid = 0;
348
349	/*
350	 * junk all pending output
351	 */
352	s = splimp();
353	IFQ_PURGE(&ifp->if_snd);
354	splx(s);
355	mtx_unlock(&tp->tun_mtx);
356
357	if (ifp->if_flags & IFF_UP) {
358		s = splimp();
359		if_down(ifp);
360		splx(s);
361	}
362
363	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
364		struct ifaddr *ifa;
365
366		s = splimp();
367		/* find internet addresses and delete routes */
368		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
369			if (ifa->ifa_addr->sa_family == AF_INET)
370				/* Unlocked read. */
371				rtinit(ifa, (int)RTM_DELETE,
372				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
373		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
374		splx(s);
375	}
376
377	funsetown(&tp->tun_sigio);
378	selwakeuppri(&tp->tun_rsel, PZERO + 1);
379	TUNDEBUG (ifp, "closed\n");
380	return (0);
381}
382
383static int
384tuninit(struct ifnet *ifp)
385{
386	struct tun_softc *tp = ifp->if_softc;
387	struct ifaddr *ifa;
388	int error = 0;
389
390	TUNDEBUG(ifp, "tuninit\n");
391
392	ifp->if_flags |= IFF_UP;
393	ifp->if_drv_flags |= IFF_DRV_RUNNING;
394	getmicrotime(&ifp->if_lastchange);
395
396#ifdef INET
397	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
398		if (ifa->ifa_addr->sa_family == AF_INET) {
399			struct sockaddr_in *si;
400
401			si = (struct sockaddr_in *)ifa->ifa_addr;
402			mtx_lock(&tp->tun_mtx);
403			if (si->sin_addr.s_addr)
404				tp->tun_flags |= TUN_IASET;
405
406			si = (struct sockaddr_in *)ifa->ifa_dstaddr;
407			if (si && si->sin_addr.s_addr)
408				tp->tun_flags |= TUN_DSTADDR;
409			mtx_unlock(&tp->tun_mtx);
410		}
411	}
412#endif
413	return (error);
414}
415
416/*
417 * Process an ioctl request.
418 */
419static int
420tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
421{
422	struct ifreq *ifr = (struct ifreq *)data;
423	struct tun_softc *tp = ifp->if_softc;
424	struct ifstat *ifs;
425	int		error = 0, s;
426
427	s = splimp();
428	switch(cmd) {
429	case SIOCGIFSTATUS:
430		ifs = (struct ifstat *)data;
431		mtx_lock(&tp->tun_mtx);
432		if (tp->tun_pid)
433			sprintf(ifs->ascii + strlen(ifs->ascii),
434			    "\tOpened by PID %d\n", tp->tun_pid);
435		mtx_unlock(&tp->tun_mtx);
436		break;
437	case SIOCSIFADDR:
438		error = tuninit(ifp);
439		TUNDEBUG(ifp, "address set, error=%d\n", error);
440		break;
441	case SIOCSIFDSTADDR:
442		error = tuninit(ifp);
443		TUNDEBUG(ifp, "destination address set, error=%d\n", error);
444		break;
445	case SIOCSIFMTU:
446		ifp->if_mtu = ifr->ifr_mtu;
447		TUNDEBUG(ifp, "mtu set\n");
448		break;
449	case SIOCSIFFLAGS:
450	case SIOCADDMULTI:
451	case SIOCDELMULTI:
452		break;
453	default:
454		error = EINVAL;
455	}
456	splx(s);
457	return (error);
458}
459
460/*
461 * tunoutput - queue packets from higher level ready to put out.
462 */
463static int
464tunoutput(
465	struct ifnet *ifp,
466	struct mbuf *m0,
467	struct sockaddr *dst,
468	struct rtentry *rt)
469{
470	struct tun_softc *tp = ifp->if_softc;
471	u_short cached_tun_flags;
472	int error;
473	u_int32_t af;
474
475	TUNDEBUG (ifp, "tunoutput\n");
476
477#ifdef MAC
478	error = mac_check_ifnet_transmit(ifp, m0);
479	if (error) {
480		m_freem(m0);
481		return (error);
482	}
483#endif
484
485	/* Could be unlocked read? */
486	mtx_lock(&tp->tun_mtx);
487	cached_tun_flags = tp->tun_flags;
488	mtx_unlock(&tp->tun_mtx);
489	if ((cached_tun_flags & TUN_READY) != TUN_READY) {
490		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
491		m_freem (m0);
492		return (EHOSTDOWN);
493	}
494
495	if ((ifp->if_flags & IFF_UP) != IFF_UP) {
496		m_freem (m0);
497		return (EHOSTDOWN);
498	}
499
500	/* BPF writes need to be handled specially. */
501	if (dst->sa_family == AF_UNSPEC) {
502		bcopy(dst->sa_data, &af, sizeof(af));
503		dst->sa_family = af;
504	}
505
506	if (bpf_peers_present(ifp->if_bpf)) {
507		af = dst->sa_family;
508		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
509	}
510
511	/* prepend sockaddr? this may abort if the mbuf allocation fails */
512	if (cached_tun_flags & TUN_LMODE) {
513		/* allocate space for sockaddr */
514		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
515
516		/* if allocation failed drop packet */
517		if (m0 == NULL) {
518			ifp->if_iqdrops++;
519			ifp->if_oerrors++;
520			return (ENOBUFS);
521		} else {
522			bcopy(dst, m0->m_data, dst->sa_len);
523		}
524	}
525
526	if (cached_tun_flags & TUN_IFHEAD) {
527		/* Prepend the address family */
528		M_PREPEND(m0, 4, M_DONTWAIT);
529
530		/* if allocation failed drop packet */
531		if (m0 == NULL) {
532			ifp->if_iqdrops++;
533			ifp->if_oerrors++;
534			return (ENOBUFS);
535		} else
536			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
537	} else {
538#ifdef INET
539		if (dst->sa_family != AF_INET)
540#endif
541		{
542			m_freem(m0);
543			return (EAFNOSUPPORT);
544		}
545	}
546
547	IFQ_HANDOFF(ifp, m0, error);
548	if (error) {
549		ifp->if_collisions++;
550		return (ENOBUFS);
551	}
552	ifp->if_opackets++;
553	return (0);
554}
555
556/*
557 * the cdevsw interface is now pretty minimal.
558 */
559static	int
560tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
561{
562	int		s;
563	int		error;
564	struct tun_softc *tp = dev->si_drv1;
565	struct tuninfo *tunp;
566
567	switch (cmd) {
568	case TUNSIFINFO:
569		tunp = (struct tuninfo *)data;
570		if (tunp->mtu < IF_MINMTU)
571			return (EINVAL);
572		if (TUN2IFP(tp)->if_mtu != tunp->mtu
573		&& (error = suser(td)) != 0)
574			return (error);
575		TUN2IFP(tp)->if_mtu = tunp->mtu;
576		TUN2IFP(tp)->if_type = tunp->type;
577		TUN2IFP(tp)->if_baudrate = tunp->baudrate;
578		break;
579	case TUNGIFINFO:
580		tunp = (struct tuninfo *)data;
581		tunp->mtu = TUN2IFP(tp)->if_mtu;
582		tunp->type = TUN2IFP(tp)->if_type;
583		tunp->baudrate = TUN2IFP(tp)->if_baudrate;
584		break;
585	case TUNSDEBUG:
586		tundebug = *(int *)data;
587		break;
588	case TUNGDEBUG:
589		*(int *)data = tundebug;
590		break;
591	case TUNSLMODE:
592		mtx_lock(&tp->tun_mtx);
593		if (*(int *)data) {
594			tp->tun_flags |= TUN_LMODE;
595			tp->tun_flags &= ~TUN_IFHEAD;
596		} else
597			tp->tun_flags &= ~TUN_LMODE;
598		mtx_unlock(&tp->tun_mtx);
599		break;
600	case TUNSIFHEAD:
601		mtx_lock(&tp->tun_mtx);
602		if (*(int *)data) {
603			tp->tun_flags |= TUN_IFHEAD;
604			tp->tun_flags &= ~TUN_LMODE;
605		} else
606			tp->tun_flags &= ~TUN_IFHEAD;
607		mtx_unlock(&tp->tun_mtx);
608		break;
609	case TUNGIFHEAD:
610		/* Could be unlocked read? */
611		mtx_lock(&tp->tun_mtx);
612		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
613		mtx_unlock(&tp->tun_mtx);
614		break;
615	case TUNSIFMODE:
616		/* deny this if UP */
617		if (TUN2IFP(tp)->if_flags & IFF_UP)
618			return(EBUSY);
619
620		switch (*(int *)data & ~IFF_MULTICAST) {
621		case IFF_POINTOPOINT:
622		case IFF_BROADCAST:
623			TUN2IFP(tp)->if_flags &=
624			    ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
625			TUN2IFP(tp)->if_flags |= *(int *)data;
626			break;
627		default:
628			return(EINVAL);
629		}
630		break;
631	case TUNSIFPID:
632		mtx_lock(&tp->tun_mtx);
633		tp->tun_pid = curthread->td_proc->p_pid;
634		mtx_unlock(&tp->tun_mtx);
635		break;
636	case FIONBIO:
637		break;
638	case FIOASYNC:
639		mtx_lock(&tp->tun_mtx);
640		if (*(int *)data)
641			tp->tun_flags |= TUN_ASYNC;
642		else
643			tp->tun_flags &= ~TUN_ASYNC;
644		mtx_unlock(&tp->tun_mtx);
645		break;
646	case FIONREAD:
647		s = splimp();
648		if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
649			struct mbuf *mb;
650			IFQ_LOCK(&TUN2IFP(tp)->if_snd);
651			IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
652			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
653				*(int *)data += mb->m_len;
654			IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
655		} else
656			*(int *)data = 0;
657		splx(s);
658		break;
659	case FIOSETOWN:
660		return (fsetown(*(int *)data, &tp->tun_sigio));
661
662	case FIOGETOWN:
663		*(int *)data = fgetown(&tp->tun_sigio);
664		return (0);
665
666	/* This is deprecated, FIOSETOWN should be used instead. */
667	case TIOCSPGRP:
668		return (fsetown(-(*(int *)data), &tp->tun_sigio));
669
670	/* This is deprecated, FIOGETOWN should be used instead. */
671	case TIOCGPGRP:
672		*(int *)data = -fgetown(&tp->tun_sigio);
673		return (0);
674
675	default:
676		return (ENOTTY);
677	}
678	return (0);
679}
680
681/*
682 * The cdevsw read interface - reads a packet at a time, or at
683 * least as much of a packet as can be read.
684 */
685static	int
686tunread(struct cdev *dev, struct uio *uio, int flag)
687{
688	struct tun_softc *tp = dev->si_drv1;
689	struct ifnet	*ifp = TUN2IFP(tp);
690	struct mbuf	*m;
691	int		error=0, len, s;
692
693	TUNDEBUG (ifp, "read\n");
694	mtx_lock(&tp->tun_mtx);
695	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
696		mtx_unlock(&tp->tun_mtx);
697		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
698		return (EHOSTDOWN);
699	}
700
701	tp->tun_flags &= ~TUN_RWAIT;
702	mtx_unlock(&tp->tun_mtx);
703
704	s = splimp();
705	do {
706		IFQ_DEQUEUE(&ifp->if_snd, m);
707		if (m == NULL) {
708			if (flag & O_NONBLOCK) {
709				splx(s);
710				return (EWOULDBLOCK);
711			}
712			mtx_lock(&tp->tun_mtx);
713			tp->tun_flags |= TUN_RWAIT;
714			mtx_unlock(&tp->tun_mtx);
715			if((error = tsleep(tp, PCATCH | (PZERO + 1),
716					"tunread", 0)) != 0) {
717				splx(s);
718				return (error);
719			}
720		}
721	} while (m == NULL);
722	splx(s);
723
724	while (m && uio->uio_resid > 0 && error == 0) {
725		len = min(uio->uio_resid, m->m_len);
726		if (len != 0)
727			error = uiomove(mtod(m, void *), len, uio);
728		m = m_free(m);
729	}
730
731	if (m) {
732		TUNDEBUG(ifp, "Dropping mbuf\n");
733		m_freem(m);
734	}
735	return (error);
736}
737
738/*
739 * the cdevsw write interface - an atomic write is a packet - or else!
740 */
741static	int
742tunwrite(struct cdev *dev, struct uio *uio, int flag)
743{
744	struct tun_softc *tp = dev->si_drv1;
745	struct ifnet	*ifp = TUN2IFP(tp);
746	struct mbuf	*m;
747	int		error = 0;
748	uint32_t	family;
749	int 		isr;
750
751	TUNDEBUG(ifp, "tunwrite\n");
752
753	if ((ifp->if_flags & IFF_UP) != IFF_UP)
754		/* ignore silently */
755		return (0);
756
757	if (uio->uio_resid == 0)
758		return (0);
759
760	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
761		TUNDEBUG(ifp, "len=%d!\n", uio->uio_resid);
762		return (EIO);
763	}
764
765	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0)) == NULL) {
766		ifp->if_ierrors++;
767		return (error);
768	}
769
770	m->m_pkthdr.rcvif = ifp;
771#ifdef MAC
772	mac_create_mbuf_from_ifnet(ifp, m);
773#endif
774
775	/* Could be unlocked read? */
776	mtx_lock(&tp->tun_mtx);
777	if (tp->tun_flags & TUN_IFHEAD) {
778		mtx_unlock(&tp->tun_mtx);
779		if (m->m_len < sizeof(family) &&
780		    (m = m_pullup(m, sizeof(family))) == NULL)
781			return (ENOBUFS);
782		family = ntohl(*mtod(m, u_int32_t *));
783		m_adj(m, sizeof(family));
784	} else {
785		mtx_unlock(&tp->tun_mtx);
786		family = AF_INET;
787	}
788
789	BPF_MTAP2(ifp, &family, sizeof(family), m);
790
791	switch (family) {
792#ifdef INET
793	case AF_INET:
794		isr = NETISR_IP;
795		break;
796#endif
797#ifdef INET6
798	case AF_INET6:
799		isr = NETISR_IPV6;
800		break;
801#endif
802#ifdef IPX
803	case AF_IPX:
804		isr = NETISR_IPX;
805		break;
806#endif
807#ifdef NETATALK
808	case AF_APPLETALK:
809		isr = NETISR_ATALK2;
810		break;
811#endif
812	default:
813		m_freem(m);
814		return (EAFNOSUPPORT);
815	}
816	/* First chunk of an mbuf contains good junk */
817	if (harvest.point_to_point)
818		random_harvest(m, 16, 3, 0, RANDOM_NET);
819	ifp->if_ibytes += m->m_pkthdr.len;
820	ifp->if_ipackets++;
821	netisr_dispatch(isr, m);
822	return (0);
823}
824
825/*
826 * tunpoll - the poll interface, this is only useful on reads
827 * really. The write detect always returns true, write never blocks
828 * anyway, it either accepts the packet or drops it.
829 */
830static	int
831tunpoll(struct cdev *dev, int events, struct thread *td)
832{
833	int		s;
834	struct tun_softc *tp = dev->si_drv1;
835	struct ifnet	*ifp = TUN2IFP(tp);
836	int		revents = 0;
837	struct mbuf	*m;
838
839	s = splimp();
840	TUNDEBUG(ifp, "tunpoll\n");
841
842	if (events & (POLLIN | POLLRDNORM)) {
843		IFQ_LOCK(&ifp->if_snd);
844		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
845		if (m != NULL) {
846			TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
847			revents |= events & (POLLIN | POLLRDNORM);
848		} else {
849			TUNDEBUG(ifp, "tunpoll waiting\n");
850			selrecord(td, &tp->tun_rsel);
851		}
852		IFQ_UNLOCK(&ifp->if_snd);
853	}
854	if (events & (POLLOUT | POLLWRNORM))
855		revents |= events & (POLLOUT | POLLWRNORM);
856
857	splx(s);
858	return (revents);
859}
860