if_tun.c revision 56703
1276772Smarkj/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2276772Smarkj
3276772Smarkj/*
4276772Smarkj * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5276772Smarkj * Nottingham University 1987.
6276772Smarkj *
7276772Smarkj * This source may be freely distributed, however I would be interested
8276772Smarkj * in any changes that are made.
9276772Smarkj *
10276772Smarkj * This driver takes packets off the IP i/f and hands them up to a
11276772Smarkj * user process to have its wicked way with. This driver has it's
12276772Smarkj * roots in a similar driver written by Phil Cockcroft (formerly) at
13276772Smarkj * UCL. This driver is based much more on read/write/poll mode of
14276772Smarkj * operation though.
15276772Smarkj *
16276772Smarkj * $FreeBSD: head/sys/net/if_tun.c 56703 2000-01-27 23:37:39Z brian $
17276772Smarkj */
18276772Smarkj
19276772Smarkj#include "opt_atalk.h"
20276772Smarkj#include "opt_atm.h"
21276772Smarkj#include "opt_inet.h"
22276772Smarkj#include "opt_inet6.h"
23276772Smarkj#include "opt_ipx.h"
24276772Smarkj#include "opt_natm.h"
25276772Smarkj
26276772Smarkj#include <sys/param.h>
27276772Smarkj#include <sys/proc.h>
28276772Smarkj#include <sys/systm.h>
29276772Smarkj#include <sys/mbuf.h>
30276772Smarkj#include <sys/socket.h>
31276772Smarkj#include <sys/filio.h>
32276772Smarkj#include <sys/sockio.h>
33276772Smarkj#include <sys/ttycom.h>
34276772Smarkj#include <sys/poll.h>
35276772Smarkj#include <sys/signalvar.h>
36276772Smarkj#include <sys/filedesc.h>
37276772Smarkj#include <sys/kernel.h>
38276772Smarkj#include <sys/sysctl.h>
39276772Smarkj#include <sys/conf.h>
40276772Smarkj#include <sys/uio.h>
41276772Smarkj#include <sys/vnode.h>
42276772Smarkj#include <sys/malloc.h>
43276772Smarkj
44276772Smarkj#include <net/if.h>
45276772Smarkj#include <net/netisr.h>
46276772Smarkj#include <net/route.h>
47276772Smarkj#include <net/intrq.h>
48276772Smarkj
49276772Smarkj#ifdef INET
50276772Smarkj#include <netinet/in.h>
51276772Smarkj#include <netinet/in_var.h>
52276772Smarkj#endif
53276772Smarkj
54276772Smarkj#include <net/bpf.h>
55276772Smarkj
56276772Smarkj#include <net/if_tunvar.h>
57276772Smarkj#include <net/if_tun.h>
58298076Scem
59276772Smarkjstatic MALLOC_DEFINE(M_TUN, "tun", "Tunnel Interface");
60276772Smarkj
61276772Smarkjstatic void tunattach __P((void *));
62276772SmarkjPSEUDO_SET(tunattach, if_tun);
63276772Smarkj
64276772Smarkjstatic void tuncreate __P((dev_t dev));
65276772Smarkj
66276772Smarkj#define TUNDEBUG	if (tundebug) printf
67290957Smariusstatic int tundebug = 0;
68276772SmarkjSYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
69276772Smarkj
70276772Smarkjstatic int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *,
71276772Smarkj	    struct rtentry *rt));
72276772Smarkjstatic int tunifioctl __P((struct ifnet *, u_long, caddr_t));
73276772Smarkjstatic int tuninit __P((struct ifnet *));
74298310Spfg
75276772Smarkjstatic	d_open_t	tunopen;
76276772Smarkjstatic	d_close_t	tunclose;
77276772Smarkjstatic	d_read_t	tunread;
78276772Smarkjstatic	d_write_t	tunwrite;
79276772Smarkjstatic	d_ioctl_t	tunioctl;
80276772Smarkjstatic	d_poll_t	tunpoll;
81290957Smarius
82276772Smarkj#define CDEV_MAJOR 52
83276772Smarkjstatic struct cdevsw tun_cdevsw = {
84276772Smarkj	/* open */	tunopen,
85276772Smarkj	/* close */	tunclose,
86276772Smarkj	/* read */	tunread,
87276772Smarkj	/* write */	tunwrite,
88276772Smarkj	/* ioctl */	tunioctl,
89276772Smarkj	/* poll */	tunpoll,
90276772Smarkj	/* mmap */	nommap,
91276772Smarkj	/* strategy */	nostrategy,
92276772Smarkj	/* name */	"tun",
93276772Smarkj	/* maj */	CDEV_MAJOR,
94276772Smarkj	/* dump */	nodump,
95276772Smarkj	/* psize */	nopsize,
96276772Smarkj	/* flags */	0,
97276772Smarkj	/* bmaj */	-1
98276772Smarkj};
99276772Smarkj
100290957Smariusstatic void
101276772Smarkjtunattach(dummy)
102276772Smarkj	void *dummy;
103276772Smarkj{
104276772Smarkj
105276772Smarkj	cdevsw_add(&tun_cdevsw);
106276772Smarkj}
107290957Smarius
108276772Smarkjstatic void
109276772Smarkjtuncreate(dev)
110290957Smarius	dev_t dev;
111276772Smarkj{
112276772Smarkj	struct tun_softc *sc;
113276772Smarkj	struct ifnet *ifp;
114276772Smarkj
115276772Smarkj	dev = make_dev(&tun_cdevsw, minor(dev),
116276772Smarkj	    UID_UUCP, GID_DIALER, 0600, "tun%d", lminor(dev));
117290957Smarius
118276772Smarkj	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK);
119276772Smarkj	bzero(sc, sizeof *sc);
120276772Smarkj	sc->tun_flags = TUN_INITED;
121276772Smarkj
122276772Smarkj	ifp = &sc->tun_if;
123276772Smarkj	ifp->if_unit = lminor(dev);
124276772Smarkj	ifp->if_name = "tun";
125276772Smarkj	ifp->if_mtu = TUNMTU;
126298076Scem	ifp->if_ioctl = tunifioctl;
127276772Smarkj	ifp->if_output = tunoutput;
128276772Smarkj	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
129298076Scem	ifp->if_snd.ifq_maxlen = ifqmaxlen;
130276772Smarkj	ifp->if_softc = sc;
131276772Smarkj	if_attach(ifp);
132276772Smarkj	bpfattach(ifp, DLT_NULL, sizeof(u_int));
133298076Scem	dev->si_drv1 = sc;
134298076Scem}
135298076Scem
136276772Smarkj/*
137276772Smarkj * tunnel open - must be superuser & the device must be
138298076Scem * configured in
139276772Smarkj */
140276772Smarkjstatic	int
141276772Smarkjtunopen(dev, flag, mode, p)
142276772Smarkj	dev_t	dev;
143276772Smarkj	int	flag, mode;
144276772Smarkj	struct proc *p;
145276772Smarkj{
146276772Smarkj	struct ifnet	*ifp;
147276772Smarkj	struct tun_softc *tp;
148276772Smarkj	register int	error;
149276772Smarkj
150276772Smarkj	error = suser(p);
151276772Smarkj	if (error)
152276772Smarkj		return (error);
153298076Scem
154298076Scem	tp = dev->si_drv1;
155276772Smarkj	if (!tp) {
156276772Smarkj		tuncreate(dev);
157276772Smarkj		tp = dev->si_drv1;
158276772Smarkj	}
159276772Smarkj	if (tp->tun_flags & TUN_OPEN)
160276772Smarkj		return EBUSY;
161276772Smarkj	tp->tun_pid = p->p_pid;
162276772Smarkj	ifp = &tp->tun_if;
163276772Smarkj	tp->tun_flags |= TUN_OPEN;
164276772Smarkj	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
165276772Smarkj	return (0);
166276772Smarkj}
167276772Smarkj
168276772Smarkj/*
169276772Smarkj * tunclose - close the device - mark i/f down & delete
170276772Smarkj * routing info
171276772Smarkj */
172276772Smarkjstatic	int
173276772Smarkjtunclose(dev, foo, bar, p)
174276772Smarkj	dev_t dev;
175298069Spfg	int foo;
176276772Smarkj	int bar;
177276772Smarkj	struct proc *p;
178276772Smarkj{
179276772Smarkj	register int	s;
180276772Smarkj	struct tun_softc *tp;
181276772Smarkj	struct ifnet	*ifp;
182276772Smarkj	struct mbuf	*m;
183276772Smarkj
184276772Smarkj	tp = dev->si_drv1;
185276772Smarkj	ifp = &tp->tun_if;
186276772Smarkj
187276772Smarkj	tp->tun_flags &= ~TUN_OPEN;
188276772Smarkj	tp->tun_pid = 0;
189276772Smarkj
190276772Smarkj	/*
191276772Smarkj	 * junk all pending output
192276772Smarkj	 */
193276772Smarkj	do {
194276772Smarkj		s = splimp();
195276772Smarkj		IF_DEQUEUE(&ifp->if_snd, m);
196276772Smarkj		splx(s);
197276772Smarkj		if (m)
198276772Smarkj			m_freem(m);
199276772Smarkj	} while (m);
200276772Smarkj
201276772Smarkj	if (ifp->if_flags & IFF_UP) {
202276772Smarkj		s = splimp();
203276772Smarkj		if_down(ifp);
204276772Smarkj		splx(s);
205276772Smarkj	}
206276772Smarkj
207276772Smarkj	if (ifp->if_flags & IFF_RUNNING) {
208276772Smarkj		register struct ifaddr *ifa;
209276772Smarkj
210276772Smarkj		s = splimp();
211276772Smarkj		/* find internet addresses and delete routes */
212276772Smarkj		for (ifa = ifp->if_addrhead.tqh_first; ifa;
213276772Smarkj		    ifa = ifa->ifa_link.tqe_next)
214276772Smarkj			if (ifa->ifa_addr->sa_family == AF_INET)
215276772Smarkj				rtinit(ifa, (int)RTM_DELETE,
216276772Smarkj				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
217276772Smarkj		ifp->if_flags &= ~IFF_RUNNING;
218276772Smarkj		splx(s);
219276772Smarkj	}
220276772Smarkj
221276772Smarkj	funsetown(tp->tun_sigio);
222276772Smarkj	selwakeup(&tp->tun_rsel);
223276772Smarkj
224276772Smarkj	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
225276772Smarkj	return (0);
226276772Smarkj}
227276772Smarkj
228276772Smarkjstatic int
229276772Smarkjtuninit(ifp)
230276772Smarkj	struct ifnet *ifp;
231276772Smarkj{
232276772Smarkj	struct tun_softc *tp = ifp->if_softc;
233276772Smarkj	register struct ifaddr *ifa;
234276772Smarkj
235276772Smarkj	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
236276772Smarkj
237276772Smarkj	ifp->if_flags |= IFF_UP | IFF_RUNNING;
238290957Smarius	getmicrotime(&ifp->if_lastchange);
239276772Smarkj
240276772Smarkj	for (ifa = ifp->if_addrhead.tqh_first; ifa;
241276772Smarkj	     ifa = ifa->ifa_link.tqe_next) {
242276772Smarkj#ifdef INET
243276772Smarkj		if (ifa->ifa_addr->sa_family == AF_INET) {
244276772Smarkj		    struct sockaddr_in *si;
245276772Smarkj
246276772Smarkj		    si = (struct sockaddr_in *)ifa->ifa_addr;
247276772Smarkj		    if (si && si->sin_addr.s_addr)
248276772Smarkj			    tp->tun_flags |= TUN_IASET;
249276772Smarkj
250276772Smarkj		    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
251276772Smarkj		    if (si && si->sin_addr.s_addr)
252276772Smarkj			    tp->tun_flags |= TUN_DSTADDR;
253276772Smarkj		}
254276772Smarkj#endif
255276772Smarkj	}
256276772Smarkj	return 0;
257276772Smarkj}
258276772Smarkj
259276772Smarkj/*
260276772Smarkj * Process an ioctl request.
261276772Smarkj */
262276772Smarkjint
263276772Smarkjtunifioctl(ifp, cmd, data)
264276772Smarkj	struct ifnet *ifp;
265276772Smarkj	u_long	cmd;
266276772Smarkj	caddr_t	data;
267276772Smarkj{
268276772Smarkj	struct ifreq *ifr = (struct ifreq *)data;
269276772Smarkj	struct tun_softc *tp = ifp->if_softc;
270276772Smarkj	struct ifstat *ifs;
271276772Smarkj	int		error = 0, s;
272276772Smarkj
273276772Smarkj	s = splimp();
274276772Smarkj	switch(cmd) {
275276772Smarkj	case SIOCGIFSTATUS:
276276772Smarkj		ifs = (struct ifstat *)data;
277276772Smarkj		if (tp->tun_pid)
278276772Smarkj			sprintf(ifs->ascii + strlen(ifs->ascii),
279276772Smarkj			    "\tOpened by PID %d\n", tp->tun_pid);
280276772Smarkj		return(0);
281276772Smarkj	case SIOCSIFADDR:
282276772Smarkj		tuninit(ifp);
283276772Smarkj		TUNDEBUG("%s%d: address set\n",
284276772Smarkj			 ifp->if_name, ifp->if_unit);
285276772Smarkj		break;
286276772Smarkj	case SIOCSIFDSTADDR:
287298076Scem		tuninit(ifp);
288276772Smarkj		TUNDEBUG("%s%d: destination address set\n",
289276772Smarkj			 ifp->if_name, ifp->if_unit);
290276772Smarkj		break;
291276772Smarkj	case SIOCSIFMTU:
292276772Smarkj		ifp->if_mtu = ifr->ifr_mtu;
293276772Smarkj		TUNDEBUG("%s%d: mtu set\n",
294276772Smarkj			 ifp->if_name, ifp->if_unit);
295276772Smarkj		break;
296276772Smarkj	case SIOCADDMULTI:
297276772Smarkj	case SIOCDELMULTI:
298276772Smarkj		break;
299276772Smarkj
300276772Smarkj
301276772Smarkj	default:
302276772Smarkj		error = EINVAL;
303276772Smarkj	}
304276772Smarkj	splx(s);
305276772Smarkj	return (error);
306276772Smarkj}
307276772Smarkj
308276772Smarkj/*
309276772Smarkj * tunoutput - queue packets from higher level ready to put out.
310276772Smarkj */
311276772Smarkjint
312276772Smarkjtunoutput(ifp, m0, dst, rt)
313276772Smarkj	struct ifnet   *ifp;
314276772Smarkj	struct mbuf    *m0;
315276772Smarkj	struct sockaddr *dst;
316276772Smarkj	struct rtentry *rt;
317276772Smarkj{
318276772Smarkj	struct tun_softc *tp = ifp->if_softc;
319276772Smarkj	int		s;
320276772Smarkj
321276772Smarkj	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
322276772Smarkj
323276772Smarkj	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
324276772Smarkj		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
325298076Scem			  ifp->if_unit, tp->tun_flags);
326276772Smarkj		m_freem (m0);
327276772Smarkj		return EHOSTDOWN;
328298076Scem	}
329276772Smarkj
330276772Smarkj	/* BPF write needs to be handled specially */
331276772Smarkj	if (dst->sa_family == AF_UNSPEC) {
332276772Smarkj		dst->sa_family = *(mtod(m0, int *));
333298076Scem		m0->m_len -= sizeof(int);
334276772Smarkj		m0->m_pkthdr.len -= sizeof(int);
335276772Smarkj		m0->m_data += sizeof(int);
336276772Smarkj	}
337276772Smarkj
338276772Smarkj	if (ifp->if_bpf) {
339276772Smarkj		/*
340276772Smarkj		 * We need to prepend the address family as
341276772Smarkj		 * a four byte field.  Cons up a dummy header
342298076Scem		 * to pacify bpf.  This is safe because bpf
343276772Smarkj		 * will only read from the mbuf (i.e., it won't
344276772Smarkj		 * try to free it or keep a pointer to it).
345298076Scem		 */
346276772Smarkj		struct mbuf m;
347276772Smarkj		u_int af = dst->sa_family;
348276772Smarkj
349276772Smarkj		m.m_next = m0;
350276772Smarkj		m.m_len = 4;
351276772Smarkj		m.m_data = (char *)&af;
352276772Smarkj
353276772Smarkj		bpf_mtap(ifp, &m);
354276772Smarkj	}
355276772Smarkj
356276772Smarkj	/* prepend sockaddr? this may abort if the mbuf allocation fails */
357276772Smarkj	if (tp->tun_flags & TUN_LMODE) {
358276772Smarkj		/* allocate space for sockaddr */
359276772Smarkj		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
360276772Smarkj
361276772Smarkj		/* if allocation failed drop packet */
362276772Smarkj		if (m0 == NULL){
363276772Smarkj			s = splimp();	/* spl on queue manipulation */
364276772Smarkj			IF_DROP(&ifp->if_snd);
365276772Smarkj			splx(s);
366276772Smarkj			ifp->if_oerrors++;
367276772Smarkj			return (ENOBUFS);
368276772Smarkj		} else {
369276772Smarkj			bcopy(dst, m0->m_data, dst->sa_len);
370276772Smarkj		}
371276772Smarkj	}
372276772Smarkj
373276772Smarkj	if (tp->tun_flags & TUN_IFHEAD) {
374276772Smarkj		/* Prepend the address family */
375276772Smarkj		M_PREPEND(m0, 4, M_DONTWAIT);
376298076Scem
377276772Smarkj		/* if allocation failed drop packet */
378276772Smarkj		if (m0 == NULL){
379276772Smarkj			s = splimp();	/* spl on queue manipulation */
380276772Smarkj			IF_DROP(&ifp->if_snd);
381276772Smarkj			splx(s);
382276772Smarkj			ifp->if_oerrors++;
383276772Smarkj			return ENOBUFS;
384276772Smarkj		} else
385276772Smarkj			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
386276772Smarkj	} else {
387276772Smarkj#ifdef INET
388276772Smarkj		if (dst->sa_family != AF_INET)
389276772Smarkj#endif
390276772Smarkj		{
391276772Smarkj			m_freem(m0);
392276772Smarkj			return EAFNOSUPPORT;
393276772Smarkj		}
394276772Smarkj	}
395276772Smarkj
396276772Smarkj	s = splimp();
397290957Smarius	if (IF_QFULL(&ifp->if_snd)) {
398		IF_DROP(&ifp->if_snd);
399		m_freem(m0);
400		splx(s);
401		ifp->if_collisions++;
402		return ENOBUFS;
403	}
404	ifp->if_obytes += m0->m_pkthdr.len;
405	IF_ENQUEUE(&ifp->if_snd, m0);
406	splx(s);
407	ifp->if_opackets++;
408
409	if (tp->tun_flags & TUN_RWAIT) {
410		tp->tun_flags &= ~TUN_RWAIT;
411		wakeup((caddr_t)tp);
412	}
413	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
414		pgsigio(tp->tun_sigio, SIGIO, 0);
415	selwakeup(&tp->tun_rsel);
416	return 0;
417}
418
419/*
420 * the cdevsw interface is now pretty minimal.
421 */
422static	int
423tunioctl(dev, cmd, data, flag, p)
424	dev_t		dev;
425	u_long		cmd;
426	caddr_t		data;
427	int		flag;
428	struct proc	*p;
429{
430	int		s;
431	struct tun_softc *tp = dev->si_drv1;
432 	struct tuninfo *tunp;
433
434	switch (cmd) {
435 	case TUNSIFINFO:
436 		tunp = (struct tuninfo *)data;
437		if (tunp->mtu < IF_MINMTU)
438			return (EINVAL);
439 		tp->tun_if.if_mtu = tunp->mtu;
440 		tp->tun_if.if_type = tunp->type;
441 		tp->tun_if.if_baudrate = tunp->baudrate;
442 		break;
443 	case TUNGIFINFO:
444 		tunp = (struct tuninfo *)data;
445 		tunp->mtu = tp->tun_if.if_mtu;
446 		tunp->type = tp->tun_if.if_type;
447 		tunp->baudrate = tp->tun_if.if_baudrate;
448 		break;
449	case TUNSDEBUG:
450		tundebug = *(int *)data;
451		break;
452	case TUNGDEBUG:
453		*(int *)data = tundebug;
454		break;
455	case TUNSLMODE:
456		if (*(int *)data) {
457			tp->tun_flags |= TUN_LMODE;
458			tp->tun_flags &= ~TUN_IFHEAD;
459		} else
460			tp->tun_flags &= ~TUN_LMODE;
461		break;
462	case TUNSIFHEAD:
463		if (*(int *)data) {
464			tp->tun_flags |= TUN_IFHEAD;
465			tp->tun_flags &= ~TUN_LMODE;
466		} else
467			tp->tun_flags &= ~TUN_IFHEAD;
468		break;
469	case TUNGIFHEAD:
470		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
471		break;
472	case TUNSIFMODE:
473		/* deny this if UP */
474		if (tp->tun_if.if_flags & IFF_UP)
475			return(EBUSY);
476
477		switch (*(int *)data) {
478		case IFF_POINTOPOINT:
479			tp->tun_if.if_flags |= IFF_POINTOPOINT;
480			tp->tun_if.if_flags &= ~IFF_BROADCAST;
481			break;
482		case IFF_BROADCAST:
483			tp->tun_if.if_flags &= ~IFF_POINTOPOINT;
484			tp->tun_if.if_flags |= IFF_BROADCAST;
485			break;
486		default:
487			return(EINVAL);
488		}
489		break;
490	case TUNSIFPID:
491		tp->tun_pid = curproc->p_pid;
492		break;
493	case FIONBIO:
494		break;
495	case FIOASYNC:
496		if (*(int *)data)
497			tp->tun_flags |= TUN_ASYNC;
498		else
499			tp->tun_flags &= ~TUN_ASYNC;
500		break;
501	case FIONREAD:
502		s = splimp();
503		if (tp->tun_if.if_snd.ifq_head) {
504			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
505			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
506				*(int *)data += mb->m_len;
507		} else
508			*(int *)data = 0;
509		splx(s);
510		break;
511	case FIOSETOWN:
512		return (fsetown(*(int *)data, &tp->tun_sigio));
513
514	case FIOGETOWN:
515		*(int *)data = fgetown(tp->tun_sigio);
516		return (0);
517
518	/* This is deprecated, FIOSETOWN should be used instead. */
519	case TIOCSPGRP:
520		return (fsetown(-(*(int *)data), &tp->tun_sigio));
521
522	/* This is deprecated, FIOGETOWN should be used instead. */
523	case TIOCGPGRP:
524		*(int *)data = -fgetown(tp->tun_sigio);
525		return (0);
526
527	default:
528		return (ENOTTY);
529	}
530	return (0);
531}
532
533/*
534 * The cdevsw read interface - reads a packet at a time, or at
535 * least as much of a packet as can be read.
536 */
537static	int
538tunread(dev, uio, flag)
539	dev_t dev;
540	struct uio *uio;
541	int flag;
542{
543	struct tun_softc *tp = dev->si_drv1;
544	struct ifnet	*ifp = &tp->tun_if;
545	struct mbuf	*m, *m0;
546	int		error=0, len, s;
547
548	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
549	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
550		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
551			  ifp->if_unit, tp->tun_flags);
552		return EHOSTDOWN;
553	}
554
555	tp->tun_flags &= ~TUN_RWAIT;
556
557	s = splimp();
558	do {
559		IF_DEQUEUE(&ifp->if_snd, m0);
560		if (m0 == 0) {
561			if (flag & IO_NDELAY) {
562				splx(s);
563				return EWOULDBLOCK;
564			}
565			tp->tun_flags |= TUN_RWAIT;
566			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
567					"tunread", 0)) != 0) {
568				splx(s);
569				return error;
570			}
571		}
572	} while (m0 == 0);
573	splx(s);
574
575	while (m0 && uio->uio_resid > 0 && error == 0) {
576		len = min(uio->uio_resid, m0->m_len);
577		if (len == 0)
578			break;
579		error = uiomove(mtod(m0, caddr_t), len, uio);
580		MFREE(m0, m);
581		m0 = m;
582	}
583
584	if (m0) {
585		TUNDEBUG("Dropping mbuf\n");
586		m_freem(m0);
587	}
588	return error;
589}
590
591/*
592 * the cdevsw write interface - an atomic write is a packet - or else!
593 */
594static	int
595tunwrite(dev, uio, flag)
596	dev_t dev;
597	struct uio *uio;
598	int flag;
599{
600	struct tun_softc *tp = dev->si_drv1;
601	struct ifnet	*ifp = &tp->tun_if;
602	struct mbuf	*top, **mp, *m;
603	int		error=0, tlen, mlen;
604	u_int32_t	family;
605
606	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
607
608	if (uio->uio_resid == 0)
609		return 0;
610
611	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
612		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
613		    uio->uio_resid);
614		return EIO;
615	}
616	tlen = uio->uio_resid;
617
618	/* get a header mbuf */
619	MGETHDR(m, M_DONTWAIT, MT_DATA);
620	if (m == NULL)
621		return ENOBUFS;
622	mlen = MHLEN;
623
624	top = 0;
625	mp = &top;
626	while (error == 0 && uio->uio_resid > 0) {
627		m->m_len = min(mlen, uio->uio_resid);
628		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
629		*mp = m;
630		mp = &m->m_next;
631		if (uio->uio_resid > 0) {
632			MGET (m, M_DONTWAIT, MT_DATA);
633			if (m == 0) {
634				error = ENOBUFS;
635				break;
636			}
637			mlen = MLEN;
638		}
639	}
640	if (error) {
641		if (top)
642			m_freem (top);
643		return error;
644	}
645
646	top->m_pkthdr.len = tlen;
647	top->m_pkthdr.rcvif = ifp;
648
649	if (ifp->if_bpf) {
650		if (tp->tun_flags & TUN_IFHEAD)
651			/*
652			 * Conveniently, we already have a 4-byte address
653			 * family prepended to our packet !
654			 */
655			bpf_mtap(ifp, top);
656		else {
657			/*
658			 * We need to prepend the address family as
659			 * a four byte field.  Cons up a dummy header
660			 * to pacify bpf.  This is safe because bpf
661			 * will only read from the mbuf (i.e., it won't
662			 * try to free it or keep a pointer to it).
663			 */
664			struct mbuf m;
665			u_int af = AF_INET;
666
667			m.m_next = top;
668			m.m_len = 4;
669			m.m_data = (char *)&af;
670
671			bpf_mtap(ifp, &m);
672		}
673	}
674
675	if (tp->tun_flags & TUN_IFHEAD) {
676		if (top->m_len < sizeof(family) &&
677		    (top = m_pullup(top, sizeof(family))) == NULL)
678				return ENOBUFS;
679		family = ntohl(*mtod(top, u_int32_t *));
680		m_adj(top, sizeof(family));
681	} else
682		family = AF_INET;
683
684	return family_enqueue(family, top);
685}
686
687/*
688 * tunpoll - the poll interface, this is only useful on reads
689 * really. The write detect always returns true, write never blocks
690 * anyway, it either accepts the packet or drops it.
691 */
692static	int
693tunpoll(dev, events, p)
694	dev_t dev;
695	int events;
696	struct proc *p;
697{
698	int		s;
699	struct tun_softc *tp = dev->si_drv1;
700	struct ifnet	*ifp = &tp->tun_if;
701	int		revents = 0;
702
703	s = splimp();
704	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
705
706	if (events & (POLLIN | POLLRDNORM)) {
707		if (ifp->if_snd.ifq_len > 0) {
708			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
709			    ifp->if_unit, ifp->if_snd.ifq_len);
710			revents |= events & (POLLIN | POLLRDNORM);
711		} else {
712			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
713			    ifp->if_unit);
714			selrecord(p, &tp->tun_rsel);
715		}
716	}
717	if (events & (POLLOUT | POLLWRNORM))
718		revents |= events & (POLLOUT | POLLWRNORM);
719
720	splx(s);
721	return (revents);
722}
723