if_tun.c revision 110097
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 110097 2003-01-30 13:13:11Z phk $
17 */
18
19#include "opt_inet.h"
20#include "opt_mac.h"
21
22#include <sys/param.h>
23#include <sys/proc.h>
24#include <sys/systm.h>
25#include <sys/mac.h>
26#include <sys/mbuf.h>
27#include <sys/module.h>
28#include <sys/socket.h>
29#include <sys/filio.h>
30#include <sys/sockio.h>
31#include <sys/ttycom.h>
32#include <sys/poll.h>
33#include <sys/signalvar.h>
34#include <sys/filedesc.h>
35#include <sys/kernel.h>
36#include <sys/sysctl.h>
37#include <sys/conf.h>
38#include <sys/uio.h>
39#include <sys/vnode.h>
40#include <sys/malloc.h>
41#include <machine/bus.h>	/* XXX Shouldn't really be required ! */
42#include <sys/rman.h>
43
44#include <net/if.h>
45#include <net/if_types.h>
46#include <net/route.h>
47#include <net/intrq.h>
48#ifdef INET
49#include <netinet/in.h>
50#endif
51#include <net/bpf.h>
52#include <net/if_tunvar.h>
53#include <net/if_tun.h>
54
55#define TUNDEBUG	if (tundebug) printf
56#define	TUNNAME		"tun"
57#define	TUN_MAXUNIT	0x7fff	/* ifp->if_unit is only 15 bits */
58
59static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
60static int tundebug = 0;
61static struct tun_softc *tunhead = NULL;
62static struct rman tununits;
63static udev_t tunbasedev = NOUDEV;
64SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
65
66static void	tunclone(void *arg, char *name, int namelen, dev_t *dev);
67static void	tuncreate(dev_t dev);
68static int	tunifioctl(struct ifnet *, u_long, caddr_t);
69static int	tuninit(struct ifnet *);
70static int	tunmodevent(module_t, int, void *);
71static int	tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
72		    struct rtentry *rt);
73static void	tunstart(struct ifnet *);
74
75static d_open_t		tunopen;
76static d_close_t	tunclose;
77static d_read_t		tunread;
78static d_write_t	tunwrite;
79static d_ioctl_t	tunioctl;
80static d_poll_t		tunpoll;
81
82#define CDEV_MAJOR 52
83static struct cdevsw tun_cdevsw = {
84	/* open */	tunopen,
85	/* close */	tunclose,
86	/* read */	tunread,
87	/* write */	tunwrite,
88	/* ioctl */	tunioctl,
89	/* poll */	tunpoll,
90	/* mmap */	nommap,
91	/* strategy */	nostrategy,
92	/* name */	TUNNAME,
93	/* maj */	CDEV_MAJOR,
94	/* dump */	nodump,
95	/* psize */	nopsize,
96	/* flags */	0,
97};
98
99static void
100tunclone(void *arg, char *name, int namelen, dev_t *dev)
101{
102	struct resource *r;
103	int err;
104	int u;
105
106	if (*dev != NODEV)
107		return;
108
109	if (strcmp(name, TUNNAME) == 0) {
110		r = rman_reserve_resource(&tununits, 0, TUN_MAXUNIT, 1,
111		    RF_ALLOCATED | RF_ACTIVE, NULL);
112		u = rman_get_start(r);
113		err = rman_release_resource(r);
114		KASSERT(err == 0, ("Unexpected failure releasing resource"));
115		*dev = makedev(CDEV_MAJOR, unit2minor(u));
116		if ((*dev)->si_flags & SI_NAMED)
117			return;	/* Already make_dev()d */
118	} else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
119		return;	/* Don't recognise the name */
120
121	*dev = make_dev(&tun_cdevsw, unit2minor(u),
122	    UID_ROOT, GID_WHEEL, 0600, "tun%d", u);
123
124	/*
125	 * All devices depend on tunbasedev so that we can simply
126	 * destroy_dev() this device at module unload time to get
127	 * rid of all our make_dev()d resources.
128	 */
129	if (tunbasedev == NOUDEV)
130		tunbasedev = (*dev)->si_udev;
131	else {
132		(*dev)->si_flags |= SI_CHEAPCLONE;
133		dev_depends(udev2dev(tunbasedev, 0), *dev);
134	}
135}
136
137static int
138tunmodevent(module_t mod, int type, void *data)
139{
140	static eventhandler_tag tag;
141	struct tun_softc *tp;
142	dev_t dev;
143	int err;
144
145	switch (type) {
146	case MOD_LOAD:
147		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
148		if (tag == NULL)
149			return (ENOMEM);
150		tununits.rm_type = RMAN_ARRAY;
151		tununits.rm_descr = "open if_tun units";
152		err = rman_init(&tununits);
153		if (err != 0) {
154			cdevsw_remove(&tun_cdevsw);
155			EVENTHANDLER_DEREGISTER(dev_clone, tag);
156			return (err);
157		}
158		err = rman_manage_region(&tununits, 0, TUN_MAXUNIT);
159		if (err != 0) {
160			printf("%s: tununits: rman_manage_region: Failed %d\n",
161			    TUNNAME, err);
162			rman_fini(&tununits);
163			cdevsw_remove(&tun_cdevsw);
164			EVENTHANDLER_DEREGISTER(dev_clone, tag);
165			return (err);
166		}
167		break;
168	case MOD_UNLOAD:
169		err = rman_fini(&tununits);
170		if (err != 0)
171			return (err);
172		EVENTHANDLER_DEREGISTER(dev_clone, tag);
173
174		while (tunhead != NULL) {
175			KASSERT((tunhead->tun_flags & TUN_OPEN) == 0,
176			    ("tununits is out of sync - unit %d",
177			    tunhead->tun_if.if_unit));
178			tp = tunhead;
179			dev = makedev(tun_cdevsw.d_maj,
180			    unit2minor(tp->tun_if.if_unit));
181			KASSERT(dev->si_drv1 == tp, ("Bad makedev result"));
182			tunhead = tp->next;
183			bpfdetach(&tp->tun_if);
184			if_detach(&tp->tun_if);
185			KASSERT(dev->si_flags & SI_NAMED, ("Missing make_dev"));
186			free(tp, M_TUN);
187		}
188
189		/*
190		 * Destroying tunbasedev results in all of our make_dev()s
191		 * conveniently going away.
192		 */
193		if (tunbasedev != NOUDEV)
194			destroy_dev(udev2dev(tunbasedev, 0));
195
196		break;
197	}
198	return 0;
199}
200
201static moduledata_t tun_mod = {
202	"if_tun",
203	tunmodevent,
204	0
205};
206
207DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
208
209static void
210tunstart(struct ifnet *ifp)
211{
212	struct tun_softc *tp = ifp->if_softc;
213
214	if (tp->tun_flags & TUN_RWAIT) {
215		tp->tun_flags &= ~TUN_RWAIT;
216		wakeup((caddr_t)tp);
217	}
218	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
219		pgsigio(&tp->tun_sigio, SIGIO, 0);
220	selwakeup(&tp->tun_rsel);
221}
222
223static void
224tuncreate(dev_t dev)
225{
226	struct tun_softc *sc;
227	struct ifnet *ifp;
228
229	if (!(dev->si_flags & SI_NAMED))
230		dev = make_dev(&tun_cdevsw, minor(dev),
231		    UID_UUCP, GID_DIALER, 0600, "tun%d", dev2unit(dev));
232
233	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_ZERO);
234	sc->tun_flags = TUN_INITED;
235	sc->next = tunhead;
236	tunhead = sc;
237
238	ifp = &sc->tun_if;
239	ifp->if_unit = dev2unit(dev);
240	ifp->if_name = TUNNAME;
241	ifp->if_mtu = TUNMTU;
242	ifp->if_ioctl = tunifioctl;
243	ifp->if_output = tunoutput;
244	ifp->if_start = tunstart;
245	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
246	ifp->if_type = IFT_PPP;
247	ifp->if_snd.ifq_maxlen = ifqmaxlen;
248	ifp->if_softc = sc;
249	if_attach(ifp);
250	bpfattach(ifp, DLT_NULL, sizeof(u_int));
251	dev->si_drv1 = sc;
252}
253
254static int
255tunopen(dev_t dev, int flag, int mode, struct thread *td)
256{
257	struct resource *r;
258	struct ifnet	*ifp;
259	struct tun_softc *tp;
260	int unit;
261
262	unit = dev2unit(dev);
263	if (unit > TUN_MAXUNIT)
264		return (ENXIO);
265
266	r = rman_reserve_resource(&tununits, unit, unit, 1,
267	    RF_ALLOCATED | RF_ACTIVE, NULL);
268	if (r == NULL)
269		return (EBUSY);
270
271	dev->si_flags &= ~SI_CHEAPCLONE;
272
273	tp = dev->si_drv1;
274	if (!tp) {
275		tuncreate(dev);
276		tp = dev->si_drv1;
277	}
278	KASSERT(!(tp->tun_flags & TUN_OPEN), ("Resource & flags out-of-sync"));
279	tp->r_unit = r;
280	tp->tun_pid = td->td_proc->p_pid;
281	ifp = &tp->tun_if;
282	tp->tun_flags |= TUN_OPEN;
283	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
284
285	return (0);
286}
287
288/*
289 * tunclose - close the device - mark i/f down & delete
290 * routing info
291 */
292static	int
293tunclose(dev_t dev, int foo, int bar, struct thread *td)
294{
295	struct tun_softc *tp;
296	struct ifnet *ifp;
297	int s;
298	int err;
299
300	tp = dev->si_drv1;
301	ifp = &tp->tun_if;
302
303	KASSERT(tp->r_unit, ("Unit %d not marked open", ifp->if_unit));
304	tp->tun_flags &= ~TUN_OPEN;
305	tp->tun_pid = 0;
306
307	/*
308	 * junk all pending output
309	 */
310	IF_DRAIN(&ifp->if_snd);
311
312	if (ifp->if_flags & IFF_UP) {
313		s = splimp();
314		if_down(ifp);
315		splx(s);
316	}
317
318	if (ifp->if_flags & IFF_RUNNING) {
319		register struct ifaddr *ifa;
320
321		s = splimp();
322		/* find internet addresses and delete routes */
323		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
324			if (ifa->ifa_addr->sa_family == AF_INET)
325				rtinit(ifa, (int)RTM_DELETE,
326				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
327		ifp->if_flags &= ~IFF_RUNNING;
328		splx(s);
329	}
330
331	funsetown(&tp->tun_sigio);
332	selwakeup(&tp->tun_rsel);
333
334	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
335	err = rman_release_resource(tp->r_unit);
336	KASSERT(err == 0, ("Unit %d failed to release", ifp->if_unit));
337
338	return (0);
339}
340
341static int
342tuninit(struct ifnet *ifp)
343{
344	struct tun_softc *tp = ifp->if_softc;
345	register struct ifaddr *ifa;
346	int error = 0;
347
348	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
349
350	ifp->if_flags |= IFF_UP | IFF_RUNNING;
351	getmicrotime(&ifp->if_lastchange);
352
353	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
354	     ifa = TAILQ_NEXT(ifa, ifa_link)) {
355		if (ifa->ifa_addr == NULL)
356			error = EFAULT;
357			/* XXX: Should maybe return straight off? */
358		else {
359#ifdef INET
360			if (ifa->ifa_addr->sa_family == AF_INET) {
361			    struct sockaddr_in *si;
362
363			    si = (struct sockaddr_in *)ifa->ifa_addr;
364			    if (si->sin_addr.s_addr)
365				    tp->tun_flags |= TUN_IASET;
366
367			    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
368			    if (si && si->sin_addr.s_addr)
369				    tp->tun_flags |= TUN_DSTADDR;
370			}
371#endif
372		}
373	}
374	return (error);
375}
376
377/*
378 * Process an ioctl request.
379 */
380static int
381tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
382{
383	struct ifreq *ifr = (struct ifreq *)data;
384	struct tun_softc *tp = ifp->if_softc;
385	struct ifstat *ifs;
386	int		error = 0, s;
387
388	s = splimp();
389	switch(cmd) {
390	case SIOCGIFSTATUS:
391		ifs = (struct ifstat *)data;
392		if (tp->tun_pid)
393			sprintf(ifs->ascii + strlen(ifs->ascii),
394			    "\tOpened by PID %d\n", tp->tun_pid);
395		break;
396	case SIOCSIFADDR:
397		error = tuninit(ifp);
398		TUNDEBUG("%s%d: address set, error=%d\n",
399			 ifp->if_name, ifp->if_unit, error);
400		break;
401	case SIOCSIFDSTADDR:
402		error = tuninit(ifp);
403		TUNDEBUG("%s%d: destination address set, error=%d\n",
404			 ifp->if_name, ifp->if_unit, error);
405		break;
406	case SIOCSIFMTU:
407		ifp->if_mtu = ifr->ifr_mtu;
408		TUNDEBUG("%s%d: mtu set\n", ifp->if_name, ifp->if_unit);
409		break;
410	case SIOCSIFFLAGS:
411	case SIOCADDMULTI:
412	case SIOCDELMULTI:
413		break;
414	default:
415		error = EINVAL;
416	}
417	splx(s);
418	return (error);
419}
420
421/*
422 * tunoutput - queue packets from higher level ready to put out.
423 */
424static int
425tunoutput(
426	struct ifnet *ifp,
427	struct mbuf *m0,
428	struct sockaddr *dst,
429	struct rtentry *rt)
430{
431	struct tun_softc *tp = ifp->if_softc;
432#ifdef MAC
433	int error;
434#endif
435
436	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
437
438#ifdef MAC
439	error = mac_check_ifnet_transmit(ifp, m0);
440	if (error) {
441		m_freem(m0);
442		return (error);
443	}
444#endif
445
446	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
447		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
448			  ifp->if_unit, tp->tun_flags);
449		m_freem (m0);
450		return (EHOSTDOWN);
451	}
452
453	if ((ifp->if_flags & IFF_UP) != IFF_UP) {
454		m_freem (m0);
455		return (EHOSTDOWN);
456	}
457
458	/* BPF write needs to be handled specially */
459	if (dst->sa_family == AF_UNSPEC) {
460		dst->sa_family = *(mtod(m0, int *));
461		m0->m_len -= sizeof(int);
462		m0->m_pkthdr.len -= sizeof(int);
463		m0->m_data += sizeof(int);
464	}
465
466	if (ifp->if_bpf) {
467		/*
468		 * We need to prepend the address family as
469		 * a four byte field.  Cons up a dummy header
470		 * to pacify bpf.  This is safe because bpf
471		 * will only read from the mbuf (i.e., it won't
472		 * try to free it or keep a pointer to it).
473		 */
474		struct mbuf m;
475		uint32_t af = dst->sa_family;
476
477		m.m_next = m0;
478		m.m_len = 4;
479		m.m_data = (char *)&af;
480
481		BPF_MTAP(ifp, &m);
482	}
483
484	/* prepend sockaddr? this may abort if the mbuf allocation fails */
485	if (tp->tun_flags & TUN_LMODE) {
486		/* allocate space for sockaddr */
487		M_PREPEND(m0, dst->sa_len, M_NOWAIT);
488
489		/* if allocation failed drop packet */
490		if (m0 == NULL) {
491			ifp->if_iqdrops++;
492			ifp->if_oerrors++;
493			return (ENOBUFS);
494		} else {
495			bcopy(dst, m0->m_data, dst->sa_len);
496		}
497	}
498
499	if (tp->tun_flags & TUN_IFHEAD) {
500		/* Prepend the address family */
501		M_PREPEND(m0, 4, M_NOWAIT);
502
503		/* if allocation failed drop packet */
504		if (m0 == NULL) {
505			ifp->if_iqdrops++;
506			ifp->if_oerrors++;
507			return (ENOBUFS);
508		} else
509			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
510	} else {
511#ifdef INET
512		if (dst->sa_family != AF_INET)
513#endif
514		{
515			m_freem(m0);
516			return (EAFNOSUPPORT);
517		}
518	}
519
520	if (! IF_HANDOFF(&ifp->if_snd, m0, ifp)) {
521		ifp->if_collisions++;
522		return (ENOBUFS);
523	}
524	ifp->if_opackets++;
525	return (0);
526}
527
528/*
529 * the cdevsw interface is now pretty minimal.
530 */
531static	int
532tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
533{
534	int		s;
535	int		error;
536	struct tun_softc *tp = dev->si_drv1;
537 	struct tuninfo *tunp;
538
539	switch (cmd) {
540 	case TUNSIFINFO:
541 		tunp = (struct tuninfo *)data;
542		if (tunp->mtu < IF_MINMTU)
543			return (EINVAL);
544 		if (tp->tun_if.if_mtu != tunp->mtu
545		&& (error = suser(td)) != 0)
546			return (error);
547 		tp->tun_if.if_mtu = tunp->mtu;
548 		tp->tun_if.if_type = tunp->type;
549 		tp->tun_if.if_baudrate = tunp->baudrate;
550 		break;
551 	case TUNGIFINFO:
552 		tunp = (struct tuninfo *)data;
553 		tunp->mtu = tp->tun_if.if_mtu;
554 		tunp->type = tp->tun_if.if_type;
555 		tunp->baudrate = tp->tun_if.if_baudrate;
556 		break;
557	case TUNSDEBUG:
558		tundebug = *(int *)data;
559		break;
560	case TUNGDEBUG:
561		*(int *)data = tundebug;
562		break;
563	case TUNSLMODE:
564		if (*(int *)data) {
565			tp->tun_flags |= TUN_LMODE;
566			tp->tun_flags &= ~TUN_IFHEAD;
567		} else
568			tp->tun_flags &= ~TUN_LMODE;
569		break;
570	case TUNSIFHEAD:
571		if (*(int *)data) {
572			tp->tun_flags |= TUN_IFHEAD;
573			tp->tun_flags &= ~TUN_LMODE;
574		} else
575			tp->tun_flags &= ~TUN_IFHEAD;
576		break;
577	case TUNGIFHEAD:
578		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
579		break;
580	case TUNSIFMODE:
581		/* deny this if UP */
582		if (tp->tun_if.if_flags & IFF_UP)
583			return(EBUSY);
584
585		switch (*(int *)data & ~IFF_MULTICAST) {
586		case IFF_POINTOPOINT:
587		case IFF_BROADCAST:
588			tp->tun_if.if_flags &=
589			    ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
590			tp->tun_if.if_flags |= *(int *)data;
591			break;
592		default:
593			return(EINVAL);
594		}
595		break;
596	case TUNSIFPID:
597		tp->tun_pid = curthread->td_proc->p_pid;
598		break;
599	case FIONBIO:
600		break;
601	case FIOASYNC:
602		if (*(int *)data)
603			tp->tun_flags |= TUN_ASYNC;
604		else
605			tp->tun_flags &= ~TUN_ASYNC;
606		break;
607	case FIONREAD:
608		s = splimp();
609		if (tp->tun_if.if_snd.ifq_head) {
610			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
611			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
612				*(int *)data += mb->m_len;
613		} else
614			*(int *)data = 0;
615		splx(s);
616		break;
617	case FIOSETOWN:
618		return (fsetown(*(int *)data, &tp->tun_sigio));
619
620	case FIOGETOWN:
621		*(int *)data = fgetown(&tp->tun_sigio);
622		return (0);
623
624	/* This is deprecated, FIOSETOWN should be used instead. */
625	case TIOCSPGRP:
626		return (fsetown(-(*(int *)data), &tp->tun_sigio));
627
628	/* This is deprecated, FIOGETOWN should be used instead. */
629	case TIOCGPGRP:
630		*(int *)data = -fgetown(&tp->tun_sigio);
631		return (0);
632
633	default:
634		return (ENOTTY);
635	}
636	return (0);
637}
638
639/*
640 * The cdevsw read interface - reads a packet at a time, or at
641 * least as much of a packet as can be read.
642 */
643static	int
644tunread(dev_t dev, struct uio *uio, int flag)
645{
646	struct tun_softc *tp = dev->si_drv1;
647	struct ifnet	*ifp = &tp->tun_if;
648	struct mbuf	*m;
649	int		error=0, len, s;
650
651	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
652	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
653		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
654			  ifp->if_unit, tp->tun_flags);
655		return (EHOSTDOWN);
656	}
657
658	tp->tun_flags &= ~TUN_RWAIT;
659
660	s = splimp();
661	do {
662		IF_DEQUEUE(&ifp->if_snd, m);
663		if (m == NULL) {
664			if (flag & IO_NDELAY) {
665				splx(s);
666				return (EWOULDBLOCK);
667			}
668			tp->tun_flags |= TUN_RWAIT;
669			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
670					"tunread", 0)) != 0) {
671				splx(s);
672				return (error);
673			}
674		}
675	} while (m == NULL);
676	splx(s);
677
678	while (m && uio->uio_resid > 0 && error == 0) {
679		len = min(uio->uio_resid, m->m_len);
680		if (len != 0)
681			error = uiomove(mtod(m, caddr_t), len, uio);
682		m = m_free(m);
683	}
684
685	if (m) {
686		TUNDEBUG("%s%d: Dropping mbuf\n", ifp->if_name, ifp->if_unit);
687		m_freem(m);
688	}
689	return (error);
690}
691
692/*
693 * the cdevsw write interface - an atomic write is a packet - or else!
694 */
695static	int
696tunwrite(dev_t dev, struct uio *uio, int flag)
697{
698	struct tun_softc *tp = dev->si_drv1;
699	struct ifnet	*ifp = &tp->tun_if;
700	struct mbuf	*top, **mp, *m;
701	int		error=0, tlen, mlen;
702	uint32_t	family;
703
704	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
705
706	if ((ifp->if_flags & IFF_UP) != IFF_UP)
707		/* ignore silently */
708		return (0);
709
710	if (uio->uio_resid == 0)
711		return (0);
712
713	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
714		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
715		    uio->uio_resid);
716		return (EIO);
717	}
718	tlen = uio->uio_resid;
719
720	/* get a header mbuf */
721	MGETHDR(m, M_NOWAIT, MT_DATA);
722	if (m == NULL)
723		return (ENOBUFS);
724	mlen = MHLEN;
725
726	top = 0;
727	mp = &top;
728	while (error == 0 && uio->uio_resid > 0) {
729		m->m_len = min(mlen, uio->uio_resid);
730		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
731		*mp = m;
732		mp = &m->m_next;
733		if (uio->uio_resid > 0) {
734			MGET (m, M_NOWAIT, MT_DATA);
735			if (m == 0) {
736				error = ENOBUFS;
737				break;
738			}
739			mlen = MLEN;
740		}
741	}
742	if (error) {
743		if (top)
744			m_freem (top);
745		ifp->if_ierrors++;
746		return (error);
747	}
748
749	top->m_pkthdr.len = tlen;
750	top->m_pkthdr.rcvif = ifp;
751#ifdef MAC
752	mac_create_mbuf_from_ifnet(ifp, top);
753#endif
754
755	if (ifp->if_bpf) {
756		if (tp->tun_flags & TUN_IFHEAD) {
757			/*
758			 * Conveniently, we already have a 4-byte address
759			 * family prepended to our packet !
760			 * Inconveniently, it's in the wrong byte order !
761			 */
762			if ((top = m_pullup(top, sizeof(family))) == NULL)
763				return (ENOBUFS);
764			*mtod(top, u_int32_t *) =
765			    ntohl(*mtod(top, u_int32_t *));
766			BPF_MTAP(ifp, top);
767			*mtod(top, u_int32_t *) =
768			    htonl(*mtod(top, u_int32_t *));
769		} else {
770			/*
771			 * We need to prepend the address family as
772			 * a four byte field.  Cons up a dummy header
773			 * to pacify bpf.  This is safe because bpf
774			 * will only read from the mbuf (i.e., it won't
775			 * try to free it or keep a pointer to it).
776			 */
777			struct mbuf m;
778			uint32_t af = AF_INET;
779
780			m.m_next = top;
781			m.m_len = 4;
782			m.m_data = (char *)&af;
783
784			BPF_MTAP(ifp, &m);
785		}
786	}
787
788	if (tp->tun_flags & TUN_IFHEAD) {
789		if (top->m_len < sizeof(family) &&
790		    (top = m_pullup(top, sizeof(family))) == NULL)
791			return (ENOBUFS);
792		family = ntohl(*mtod(top, u_int32_t *));
793		m_adj(top, sizeof(family));
794	} else
795		family = AF_INET;
796
797	ifp->if_ibytes += top->m_pkthdr.len;
798	ifp->if_ipackets++;
799
800	return (family_enqueue(family, top));
801}
802
803/*
804 * tunpoll - the poll interface, this is only useful on reads
805 * really. The write detect always returns true, write never blocks
806 * anyway, it either accepts the packet or drops it.
807 */
808static	int
809tunpoll(dev_t dev, int events, struct thread *td)
810{
811	int		s;
812	struct tun_softc *tp = dev->si_drv1;
813	struct ifnet	*ifp = &tp->tun_if;
814	int		revents = 0;
815
816	s = splimp();
817	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
818
819	if (events & (POLLIN | POLLRDNORM)) {
820		if (ifp->if_snd.ifq_len > 0) {
821			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
822			    ifp->if_unit, ifp->if_snd.ifq_len);
823			revents |= events & (POLLIN | POLLRDNORM);
824		} else {
825			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
826			    ifp->if_unit);
827			selrecord(td, &tp->tun_rsel);
828		}
829	}
830	if (events & (POLLOUT | POLLWRNORM))
831		revents |= events & (POLLOUT | POLLWRNORM);
832
833	splx(s);
834	return (revents);
835}
836