if_tun.c revision 148887
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*-
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 148887 2005-08-09 10:20:02Z rwatson $
17 */
18
19#include "opt_atalk.h"
20#include "opt_inet.h"
21#include "opt_inet6.h"
22#include "opt_ipx.h"
23#include "opt_mac.h"
24
25#include <sys/param.h>
26#include <sys/proc.h>
27#include <sys/systm.h>
28#include <sys/mac.h>
29#include <sys/mbuf.h>
30#include <sys/module.h>
31#include <sys/socket.h>
32#include <sys/fcntl.h>
33#include <sys/filio.h>
34#include <sys/sockio.h>
35#include <sys/ttycom.h>
36#include <sys/poll.h>
37#include <sys/selinfo.h>
38#include <sys/signalvar.h>
39#include <sys/filedesc.h>
40#include <sys/kernel.h>
41#include <sys/sysctl.h>
42#include <sys/conf.h>
43#include <sys/uio.h>
44#include <sys/malloc.h>
45#include <sys/random.h>
46
47#include <net/if.h>
48#include <net/if_types.h>
49#include <net/netisr.h>
50#include <net/route.h>
51#ifdef INET
52#include <netinet/in.h>
53#endif
54#include <net/bpf.h>
55#include <net/if_tun.h>
56
57#include <sys/queue.h>
58
59/*
60 * tun_list is protected by global tunmtx.  Other mutable fields are
61 * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
62 * static for the duration of a tunnel interface.
63 */
64struct tun_softc {
65	TAILQ_ENTRY(tun_softc)	tun_list;
66	struct cdev *tun_dev;
67	u_short	tun_flags;		/* misc flags */
68#define	TUN_OPEN	0x0001
69#define	TUN_INITED	0x0002
70#define	TUN_RCOLL	0x0004
71#define	TUN_IASET	0x0008
72#define	TUN_DSTADDR	0x0010
73#define	TUN_LMODE	0x0020
74#define	TUN_RWAIT	0x0040
75#define	TUN_ASYNC	0x0080
76#define	TUN_IFHEAD	0x0100
77
78#define TUN_READY       (TUN_OPEN | TUN_INITED)
79
80	/*
81	 * XXXRW: tun_pid is used to exclusively lock /dev/tun.  Is this
82	 * actually needed?  Can we just return EBUSY if already open?
83	 * Problem is that this involved inherent races when a tun device
84	 * is handed off from one process to another, as opposed to just
85	 * being slightly stale informationally.
86	 */
87	pid_t	tun_pid;		/* owning pid */
88	struct	ifnet *tun_ifp;		/* the interface */
89	struct  sigio *tun_sigio;	/* information for async I/O */
90	struct	selinfo	tun_rsel;	/* read select */
91	struct mtx	tun_mtx;	/* protect mutable softc fields */
92};
93#define TUN2IFP(sc)	((sc)->tun_ifp)
94
95#define TUNDEBUG	if (tundebug) if_printf
96#define	TUNNAME		"tun"
97
98/*
99 * All mutable global variables in if_tun are locked using tunmtx, with
100 * the exception of tundebug, which is used unlocked, and tunclones,
101 * which is static after setup.
102 */
103static struct mtx tunmtx;
104static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
105static int tundebug = 0;
106static struct clonedevs *tunclones;
107static TAILQ_HEAD(,tun_softc)	tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
108SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
109
110static void	tunclone(void *arg, struct ucred *cred, char *name,
111		    int namelen, struct cdev **dev);
112static void	tuncreate(struct cdev *dev);
113static int	tunifioctl(struct ifnet *, u_long, caddr_t);
114static int	tuninit(struct ifnet *);
115static int	tunmodevent(module_t, int, void *);
116static int	tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
117		    struct rtentry *rt);
118static void	tunstart(struct ifnet *);
119
120static d_open_t		tunopen;
121static d_close_t	tunclose;
122static d_read_t		tunread;
123static d_write_t	tunwrite;
124static d_ioctl_t	tunioctl;
125static d_poll_t		tunpoll;
126
127static struct cdevsw tun_cdevsw = {
128	.d_version =	D_VERSION,
129	.d_flags =	D_PSEUDO | D_NEEDGIANT,
130	.d_open =	tunopen,
131	.d_close =	tunclose,
132	.d_read =	tunread,
133	.d_write =	tunwrite,
134	.d_ioctl =	tunioctl,
135	.d_poll =	tunpoll,
136	.d_name =	TUNNAME,
137};
138
139static void
140tunclone(void *arg, struct ucred *cred, char *name, int namelen,
141    struct cdev **dev)
142{
143	int u, i;
144
145	if (*dev != NULL)
146		return;
147
148	if (strcmp(name, TUNNAME) == 0) {
149		u = -1;
150	} else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
151		return;	/* Don't recognise the name */
152	if (u != -1 && u > IF_MAXUNIT)
153		return;	/* Unit number too high */
154
155	/* find any existing device, or allocate new unit number */
156	i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
157	if (i) {
158		/* No preexisting struct cdev *, create one */
159		*dev = make_dev(&tun_cdevsw, unit2minor(u),
160		    UID_UUCP, GID_DIALER, 0600, "tun%d", u);
161		if (*dev != NULL) {
162			dev_ref(*dev);
163			(*dev)->si_flags |= SI_CHEAPCLONE;
164		}
165	}
166}
167
168static void
169tun_destroy(struct tun_softc *tp)
170{
171	struct cdev *dev;
172
173	/* Unlocked read. */
174	KASSERT((tp->tun_flags & TUN_OPEN) == 0,
175	    ("tununits is out of sync - unit %d", TUN2IFP(tp)->if_dunit));
176
177	dev = tp->tun_dev;
178	bpfdetach(TUN2IFP(tp));
179	if_detach(TUN2IFP(tp));
180	if_free(TUN2IFP(tp));
181	destroy_dev(dev);
182	mtx_destroy(&tp->tun_mtx);
183	free(tp, M_TUN);
184}
185
186static int
187tunmodevent(module_t mod, int type, void *data)
188{
189	static eventhandler_tag tag;
190	struct tun_softc *tp;
191
192	switch (type) {
193	case MOD_LOAD:
194		mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
195		clone_setup(&tunclones);
196		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
197		if (tag == NULL)
198			return (ENOMEM);
199		break;
200	case MOD_UNLOAD:
201		EVENTHANDLER_DEREGISTER(dev_clone, tag);
202
203		mtx_lock(&tunmtx);
204		while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
205			TAILQ_REMOVE(&tunhead, tp, tun_list);
206			mtx_unlock(&tunmtx);
207			tun_destroy(tp);
208			mtx_lock(&tunmtx);
209		}
210		mtx_unlock(&tunmtx);
211		clone_cleanup(&tunclones);
212		mtx_destroy(&tunmtx);
213		break;
214	default:
215		return EOPNOTSUPP;
216	}
217	return 0;
218}
219
220static moduledata_t tun_mod = {
221	"if_tun",
222	tunmodevent,
223	0
224};
225
226DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
227
228static void
229tunstart(struct ifnet *ifp)
230{
231	struct tun_softc *tp = ifp->if_softc;
232	struct mbuf *m;
233
234	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
235		IFQ_LOCK(&ifp->if_snd);
236		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
237		if (m == NULL) {
238			IFQ_UNLOCK(&ifp->if_snd);
239			return;
240		}
241		IFQ_UNLOCK(&ifp->if_snd);
242	}
243
244	mtx_lock(&tp->tun_mtx);
245	if (tp->tun_flags & TUN_RWAIT) {
246		tp->tun_flags &= ~TUN_RWAIT;
247		wakeup(tp);
248	}
249	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
250		mtx_unlock(&tp->tun_mtx);
251		pgsigio(&tp->tun_sigio, SIGIO, 0);
252	} else
253		mtx_unlock(&tp->tun_mtx);
254	selwakeuppri(&tp->tun_rsel, PZERO + 1);
255}
256
257/* XXX: should return an error code so it can fail. */
258static void
259tuncreate(struct cdev *dev)
260{
261	struct tun_softc *sc;
262	struct ifnet *ifp;
263
264	dev->si_flags &= ~SI_CHEAPCLONE;
265
266	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
267	mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
268	sc->tun_flags = TUN_INITED;
269	sc->tun_dev = dev;
270	mtx_lock(&tunmtx);
271	TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
272	mtx_unlock(&tunmtx);
273
274	ifp = sc->tun_ifp = if_alloc(IFT_PPP);
275	if (ifp == NULL)
276		panic("%s%d: failed to if_alloc() interface.\n",
277		    TUNNAME, dev2unit(dev));
278	if_initname(ifp, TUNNAME, dev2unit(dev));
279	ifp->if_mtu = TUNMTU;
280	ifp->if_ioctl = tunifioctl;
281	ifp->if_output = tunoutput;
282	ifp->if_start = tunstart;
283	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
284	ifp->if_softc = sc;
285	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
286	ifp->if_snd.ifq_drv_maxlen = 0;
287	IFQ_SET_READY(&ifp->if_snd);
288
289	if_attach(ifp);
290	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
291	dev->si_drv1 = sc;
292}
293
294static int
295tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
296{
297	struct ifnet	*ifp;
298	struct tun_softc *tp;
299
300	/*
301	 * XXXRW: Non-atomic test and set of dev->si_drv1 requires
302	 * synchronization.
303	 */
304	tp = dev->si_drv1;
305	if (!tp) {
306		tuncreate(dev);
307		tp = dev->si_drv1;
308	}
309
310	/*
311	 * XXXRW: This use of tun_pid is subject to error due to the
312	 * fact that a reference to the tunnel can live beyond the
313	 * death of the process that created it.  Can we replace this
314	 * with a simple busy flag?
315	 */
316	mtx_lock(&tp->tun_mtx);
317	if (tp->tun_pid != 0 && tp->tun_pid != td->td_proc->p_pid) {
318		mtx_unlock(&tp->tun_mtx);
319		return (EBUSY);
320	}
321	tp->tun_pid = td->td_proc->p_pid;
322
323	tp->tun_flags |= TUN_OPEN;
324	mtx_unlock(&tp->tun_mtx);
325	ifp = TUN2IFP(tp);
326	TUNDEBUG(ifp, "open\n");
327
328	return (0);
329}
330
331/*
332 * tunclose - close the device - mark i/f down & delete
333 * routing info
334 */
335static	int
336tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
337{
338	struct tun_softc *tp;
339	struct ifnet *ifp;
340	int s;
341
342	tp = dev->si_drv1;
343	ifp = TUN2IFP(tp);
344
345	mtx_lock(&tp->tun_mtx);
346	tp->tun_flags &= ~TUN_OPEN;
347	tp->tun_pid = 0;
348
349	/*
350	 * junk all pending output
351	 */
352	s = splimp();
353	IFQ_PURGE(&ifp->if_snd);
354	splx(s);
355	mtx_unlock(&tp->tun_mtx);
356
357	if (ifp->if_flags & IFF_UP) {
358		s = splimp();
359		if_down(ifp);
360		splx(s);
361	}
362
363	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
364		struct ifaddr *ifa;
365
366		s = splimp();
367		/* find internet addresses and delete routes */
368		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
369			if (ifa->ifa_addr->sa_family == AF_INET)
370				/* Unlocked read. */
371				rtinit(ifa, (int)RTM_DELETE,
372				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
373		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
374		splx(s);
375	}
376
377	funsetown(&tp->tun_sigio);
378	selwakeuppri(&tp->tun_rsel, PZERO + 1);
379	TUNDEBUG (ifp, "closed\n");
380	return (0);
381}
382
383static int
384tuninit(struct ifnet *ifp)
385{
386	struct tun_softc *tp = ifp->if_softc;
387	struct ifaddr *ifa;
388	int error = 0;
389
390	TUNDEBUG(ifp, "tuninit\n");
391
392	ifp->if_flags |= IFF_UP;
393	ifp->if_drv_flags |= IFF_DRV_RUNNING;
394	getmicrotime(&ifp->if_lastchange);
395
396	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
397	     ifa = TAILQ_NEXT(ifa, ifa_link)) {
398		if (ifa->ifa_addr == NULL)
399			error = EFAULT;
400			/* XXX: Should maybe return straight off? */
401		else {
402#ifdef INET
403			if (ifa->ifa_addr->sa_family == AF_INET) {
404			    struct sockaddr_in *si;
405
406			    si = (struct sockaddr_in *)ifa->ifa_addr;
407			    mtx_lock(&tp->tun_mtx);
408			    if (si->sin_addr.s_addr)
409				    tp->tun_flags |= TUN_IASET;
410
411			    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
412			    if (si && si->sin_addr.s_addr)
413				    tp->tun_flags |= TUN_DSTADDR;
414			    mtx_unlock(&tp->tun_mtx);
415			}
416#endif
417		}
418	}
419	return (error);
420}
421
422/*
423 * Process an ioctl request.
424 */
425static int
426tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
427{
428	struct ifreq *ifr = (struct ifreq *)data;
429	struct tun_softc *tp = ifp->if_softc;
430	struct ifstat *ifs;
431	int		error = 0, s;
432
433	s = splimp();
434	switch(cmd) {
435	case SIOCGIFSTATUS:
436		ifs = (struct ifstat *)data;
437		mtx_lock(&tp->tun_mtx);
438		if (tp->tun_pid)
439			sprintf(ifs->ascii + strlen(ifs->ascii),
440			    "\tOpened by PID %d\n", tp->tun_pid);
441		mtx_unlock(&tp->tun_mtx);
442		break;
443	case SIOCSIFADDR:
444		error = tuninit(ifp);
445		TUNDEBUG(ifp, "address set, error=%d\n", error);
446		break;
447	case SIOCSIFDSTADDR:
448		error = tuninit(ifp);
449		TUNDEBUG(ifp, "destination address set, error=%d\n", error);
450		break;
451	case SIOCSIFMTU:
452		ifp->if_mtu = ifr->ifr_mtu;
453		TUNDEBUG(ifp, "mtu set\n");
454		break;
455	case SIOCSIFFLAGS:
456	case SIOCADDMULTI:
457	case SIOCDELMULTI:
458		break;
459	default:
460		error = EINVAL;
461	}
462	splx(s);
463	return (error);
464}
465
466/*
467 * tunoutput - queue packets from higher level ready to put out.
468 */
469static int
470tunoutput(
471	struct ifnet *ifp,
472	struct mbuf *m0,
473	struct sockaddr *dst,
474	struct rtentry *rt)
475{
476	struct tun_softc *tp = ifp->if_softc;
477	u_short cached_tun_flags;
478	int error;
479	u_int32_t af;
480
481	TUNDEBUG (ifp, "tunoutput\n");
482
483#ifdef MAC
484	error = mac_check_ifnet_transmit(ifp, m0);
485	if (error) {
486		m_freem(m0);
487		return (error);
488	}
489#endif
490
491	/* Could be unlocked read? */
492	mtx_lock(&tp->tun_mtx);
493	cached_tun_flags = tp->tun_flags;
494	mtx_unlock(&tp->tun_mtx);
495	if ((cached_tun_flags & TUN_READY) != TUN_READY) {
496		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
497		m_freem (m0);
498		return (EHOSTDOWN);
499	}
500
501	if ((ifp->if_flags & IFF_UP) != IFF_UP) {
502		m_freem (m0);
503		return (EHOSTDOWN);
504	}
505
506	/* BPF writes need to be handled specially. */
507	if (dst->sa_family == AF_UNSPEC) {
508		bcopy(dst->sa_data, &af, sizeof(af));
509		dst->sa_family = af;
510	}
511
512	if (ifp->if_bpf) {
513		af = dst->sa_family;
514		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
515	}
516
517	/* prepend sockaddr? this may abort if the mbuf allocation fails */
518	if (cached_tun_flags & TUN_LMODE) {
519		/* allocate space for sockaddr */
520		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
521
522		/* if allocation failed drop packet */
523		if (m0 == NULL) {
524			ifp->if_iqdrops++;
525			ifp->if_oerrors++;
526			return (ENOBUFS);
527		} else {
528			bcopy(dst, m0->m_data, dst->sa_len);
529		}
530	}
531
532	if (cached_tun_flags & TUN_IFHEAD) {
533		/* Prepend the address family */
534		M_PREPEND(m0, 4, M_DONTWAIT);
535
536		/* if allocation failed drop packet */
537		if (m0 == NULL) {
538			ifp->if_iqdrops++;
539			ifp->if_oerrors++;
540			return (ENOBUFS);
541		} else
542			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
543	} else {
544#ifdef INET
545		if (dst->sa_family != AF_INET)
546#endif
547		{
548			m_freem(m0);
549			return (EAFNOSUPPORT);
550		}
551	}
552
553	IFQ_HANDOFF(ifp, m0, error);
554	if (error) {
555		ifp->if_collisions++;
556		return (ENOBUFS);
557	}
558	ifp->if_opackets++;
559	return (0);
560}
561
562/*
563 * the cdevsw interface is now pretty minimal.
564 */
565static	int
566tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
567{
568	int		s;
569	int		error;
570	struct tun_softc *tp = dev->si_drv1;
571	struct tuninfo *tunp;
572
573	switch (cmd) {
574	case TUNSIFINFO:
575		tunp = (struct tuninfo *)data;
576		if (tunp->mtu < IF_MINMTU)
577			return (EINVAL);
578		if (TUN2IFP(tp)->if_mtu != tunp->mtu
579		&& (error = suser(td)) != 0)
580			return (error);
581		TUN2IFP(tp)->if_mtu = tunp->mtu;
582		TUN2IFP(tp)->if_type = tunp->type;
583		TUN2IFP(tp)->if_baudrate = tunp->baudrate;
584		break;
585	case TUNGIFINFO:
586		tunp = (struct tuninfo *)data;
587		tunp->mtu = TUN2IFP(tp)->if_mtu;
588		tunp->type = TUN2IFP(tp)->if_type;
589		tunp->baudrate = TUN2IFP(tp)->if_baudrate;
590		break;
591	case TUNSDEBUG:
592		tundebug = *(int *)data;
593		break;
594	case TUNGDEBUG:
595		*(int *)data = tundebug;
596		break;
597	case TUNSLMODE:
598		mtx_lock(&tp->tun_mtx);
599		if (*(int *)data) {
600			tp->tun_flags |= TUN_LMODE;
601			tp->tun_flags &= ~TUN_IFHEAD;
602		} else
603			tp->tun_flags &= ~TUN_LMODE;
604		mtx_unlock(&tp->tun_mtx);
605		break;
606	case TUNSIFHEAD:
607		mtx_lock(&tp->tun_mtx);
608		if (*(int *)data) {
609			tp->tun_flags |= TUN_IFHEAD;
610			tp->tun_flags &= ~TUN_LMODE;
611		} else
612			tp->tun_flags &= ~TUN_IFHEAD;
613		mtx_unlock(&tp->tun_mtx);
614		break;
615	case TUNGIFHEAD:
616		/* Could be unlocked read? */
617		mtx_lock(&tp->tun_mtx);
618		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
619		mtx_unlock(&tp->tun_mtx);
620		break;
621	case TUNSIFMODE:
622		/* deny this if UP */
623		if (TUN2IFP(tp)->if_flags & IFF_UP)
624			return(EBUSY);
625
626		switch (*(int *)data & ~IFF_MULTICAST) {
627		case IFF_POINTOPOINT:
628		case IFF_BROADCAST:
629			TUN2IFP(tp)->if_flags &=
630			    ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
631			TUN2IFP(tp)->if_flags |= *(int *)data;
632			break;
633		default:
634			return(EINVAL);
635		}
636		break;
637	case TUNSIFPID:
638		mtx_lock(&tp->tun_mtx);
639		tp->tun_pid = curthread->td_proc->p_pid;
640		mtx_unlock(&tp->tun_mtx);
641		break;
642	case FIONBIO:
643		break;
644	case FIOASYNC:
645		mtx_lock(&tp->tun_mtx);
646		if (*(int *)data)
647			tp->tun_flags |= TUN_ASYNC;
648		else
649			tp->tun_flags &= ~TUN_ASYNC;
650		mtx_unlock(&tp->tun_mtx);
651		break;
652	case FIONREAD:
653		s = splimp();
654		if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
655			struct mbuf *mb;
656			IFQ_LOCK(&TUN2IFP(tp)->if_snd);
657			IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
658			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
659				*(int *)data += mb->m_len;
660			IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
661		} else
662			*(int *)data = 0;
663		splx(s);
664		break;
665	case FIOSETOWN:
666		return (fsetown(*(int *)data, &tp->tun_sigio));
667
668	case FIOGETOWN:
669		*(int *)data = fgetown(&tp->tun_sigio);
670		return (0);
671
672	/* This is deprecated, FIOSETOWN should be used instead. */
673	case TIOCSPGRP:
674		return (fsetown(-(*(int *)data), &tp->tun_sigio));
675
676	/* This is deprecated, FIOGETOWN should be used instead. */
677	case TIOCGPGRP:
678		*(int *)data = -fgetown(&tp->tun_sigio);
679		return (0);
680
681	default:
682		return (ENOTTY);
683	}
684	return (0);
685}
686
687/*
688 * The cdevsw read interface - reads a packet at a time, or at
689 * least as much of a packet as can be read.
690 */
691static	int
692tunread(struct cdev *dev, struct uio *uio, int flag)
693{
694	struct tun_softc *tp = dev->si_drv1;
695	struct ifnet	*ifp = TUN2IFP(tp);
696	struct mbuf	*m;
697	int		error=0, len, s;
698
699	TUNDEBUG (ifp, "read\n");
700	mtx_lock(&tp->tun_mtx);
701	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
702		mtx_unlock(&tp->tun_mtx);
703		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
704		return (EHOSTDOWN);
705	}
706
707	tp->tun_flags &= ~TUN_RWAIT;
708	mtx_unlock(&tp->tun_mtx);
709
710	s = splimp();
711	do {
712		IFQ_DEQUEUE(&ifp->if_snd, m);
713		if (m == NULL) {
714			if (flag & O_NONBLOCK) {
715				splx(s);
716				return (EWOULDBLOCK);
717			}
718			mtx_lock(&tp->tun_mtx);
719			tp->tun_flags |= TUN_RWAIT;
720			mtx_unlock(&tp->tun_mtx);
721			if((error = tsleep(tp, PCATCH | (PZERO + 1),
722					"tunread", 0)) != 0) {
723				splx(s);
724				return (error);
725			}
726		}
727	} while (m == NULL);
728	splx(s);
729
730	while (m && uio->uio_resid > 0 && error == 0) {
731		len = min(uio->uio_resid, m->m_len);
732		if (len != 0)
733			error = uiomove(mtod(m, void *), len, uio);
734		m = m_free(m);
735	}
736
737	if (m) {
738		TUNDEBUG(ifp, "Dropping mbuf\n");
739		m_freem(m);
740	}
741	return (error);
742}
743
744/*
745 * the cdevsw write interface - an atomic write is a packet - or else!
746 */
747static	int
748tunwrite(struct cdev *dev, struct uio *uio, int flag)
749{
750	struct tun_softc *tp = dev->si_drv1;
751	struct ifnet	*ifp = TUN2IFP(tp);
752	struct mbuf	*m;
753	int		error = 0;
754	uint32_t	family;
755	int 		isr;
756
757	TUNDEBUG(ifp, "tunwrite\n");
758
759	if ((ifp->if_flags & IFF_UP) != IFF_UP)
760		/* ignore silently */
761		return (0);
762
763	if (uio->uio_resid == 0)
764		return (0);
765
766	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
767		TUNDEBUG(ifp, "len=%d!\n", uio->uio_resid);
768		return (EIO);
769	}
770
771	if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0)) == NULL) {
772		ifp->if_ierrors++;
773		return (error);
774	}
775
776	m->m_pkthdr.rcvif = ifp;
777#ifdef MAC
778	mac_create_mbuf_from_ifnet(ifp, m);
779#endif
780
781	/* Could be unlocked read? */
782	mtx_lock(&tp->tun_mtx);
783	if (tp->tun_flags & TUN_IFHEAD) {
784		mtx_unlock(&tp->tun_mtx);
785		if (m->m_len < sizeof(family) &&
786		    (m = m_pullup(m, sizeof(family))) == NULL)
787			return (ENOBUFS);
788		family = ntohl(*mtod(m, u_int32_t *));
789		m_adj(m, sizeof(family));
790	} else {
791		mtx_unlock(&tp->tun_mtx);
792		family = AF_INET;
793	}
794
795	BPF_MTAP2(ifp, &family, sizeof(family), m);
796
797	switch (family) {
798#ifdef INET
799	case AF_INET:
800		isr = NETISR_IP;
801		break;
802#endif
803#ifdef INET6
804	case AF_INET6:
805		isr = NETISR_IPV6;
806		break;
807#endif
808#ifdef IPX
809	case AF_IPX:
810		isr = NETISR_IPX;
811		break;
812#endif
813#ifdef NETATALK
814	case AF_APPLETALK:
815		isr = NETISR_ATALK2;
816		break;
817#endif
818	default:
819		m_freem(m);
820		return (EAFNOSUPPORT);
821	}
822	/* First chunk of an mbuf contains good junk */
823	if (harvest.point_to_point)
824		random_harvest(m, 16, 3, 0, RANDOM_NET);
825	ifp->if_ibytes += m->m_pkthdr.len;
826	ifp->if_ipackets++;
827	netisr_dispatch(isr, m);
828	return (0);
829}
830
831/*
832 * tunpoll - the poll interface, this is only useful on reads
833 * really. The write detect always returns true, write never blocks
834 * anyway, it either accepts the packet or drops it.
835 */
836static	int
837tunpoll(struct cdev *dev, int events, struct thread *td)
838{
839	int		s;
840	struct tun_softc *tp = dev->si_drv1;
841	struct ifnet	*ifp = TUN2IFP(tp);
842	int		revents = 0;
843	struct mbuf	*m;
844
845	s = splimp();
846	TUNDEBUG(ifp, "tunpoll\n");
847
848	if (events & (POLLIN | POLLRDNORM)) {
849		IFQ_LOCK(&ifp->if_snd);
850		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
851		if (m != NULL) {
852			TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
853			revents |= events & (POLLIN | POLLRDNORM);
854		} else {
855			TUNDEBUG(ifp, "tunpoll waiting\n");
856			selrecord(td, &tp->tun_rsel);
857		}
858		IFQ_UNLOCK(&ifp->if_snd);
859	}
860	if (events & (POLLOUT | POLLWRNORM))
861		revents |= events & (POLLOUT | POLLWRNORM);
862
863	splx(s);
864	return (revents);
865}
866