if_tun.c revision 109623
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 109623 2003-01-21 08:56:16Z alfred $
17 */
18
19#include "opt_inet.h"
20#include "opt_mac.h"
21
22#include <sys/param.h>
23#include <sys/proc.h>
24#include <sys/systm.h>
25#include <sys/mac.h>
26#include <sys/mbuf.h>
27#include <sys/module.h>
28#include <sys/socket.h>
29#include <sys/filio.h>
30#include <sys/sockio.h>
31#include <sys/ttycom.h>
32#include <sys/poll.h>
33#include <sys/signalvar.h>
34#include <sys/filedesc.h>
35#include <sys/kernel.h>
36#include <sys/sysctl.h>
37#include <sys/conf.h>
38#include <sys/uio.h>
39#include <sys/vnode.h>
40#include <sys/malloc.h>
41#include <machine/bus.h>	/* XXX Shouldn't really be required ! */
42#include <sys/rman.h>
43
44#include <net/if.h>
45#include <net/if_types.h>
46#include <net/route.h>
47#include <net/intrq.h>
48#ifdef INET
49#include <netinet/in.h>
50#endif
51#include <net/bpf.h>
52#include <net/if_tunvar.h>
53#include <net/if_tun.h>
54
55#define TUNDEBUG	if (tundebug) printf
56#define	TUNNAME		"tun"
57#define	TUN_MAXUNIT	0x7fff	/* ifp->if_unit is only 15 bits */
58
59static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
60static int tundebug = 0;
61static struct tun_softc *tunhead = NULL;
62static struct rman tununits;
63static udev_t tunbasedev = NOUDEV;
64SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
65
66static void	tunclone(void *arg, char *name, int namelen, dev_t *dev);
67static void	tuncreate(dev_t dev);
68static int	tunifioctl(struct ifnet *, u_long, caddr_t);
69static int	tuninit(struct ifnet *);
70static int	tunmodevent(module_t, int, void *);
71static int	tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
72		    struct rtentry *rt);
73static void	tunstart(struct ifnet *);
74
75static d_open_t		tunopen;
76static d_close_t	tunclose;
77static d_read_t		tunread;
78static d_write_t	tunwrite;
79static d_ioctl_t	tunioctl;
80static d_poll_t		tunpoll;
81
82#define CDEV_MAJOR 52
83static struct cdevsw tun_cdevsw = {
84	/* open */	tunopen,
85	/* close */	tunclose,
86	/* read */	tunread,
87	/* write */	tunwrite,
88	/* ioctl */	tunioctl,
89	/* poll */	tunpoll,
90	/* mmap */	nommap,
91	/* strategy */	nostrategy,
92	/* name */	TUNNAME,
93	/* maj */	CDEV_MAJOR,
94	/* dump */	nodump,
95	/* psize */	nopsize,
96	/* flags */	0,
97};
98
99static void
100tunclone(void *arg, char *name, int namelen, dev_t *dev)
101{
102	struct resource *r;
103	int err;
104	int u;
105
106	if (*dev != NODEV)
107		return;
108
109	if (strcmp(name, TUNNAME) == 0) {
110		r = rman_reserve_resource(&tununits, 0, TUN_MAXUNIT, 1,
111		    RF_ALLOCATED | RF_ACTIVE, NULL);
112		u = rman_get_start(r);
113		err = rman_release_resource(r);
114		KASSERT(err == 0, ("Unexpected failure releasing resource"));
115		*dev = makedev(CDEV_MAJOR, unit2minor(u));
116		if ((*dev)->si_flags & SI_NAMED)
117			return;	/* Already make_dev()d */
118	} else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
119		return;	/* Don't recognise the name */
120
121	*dev = make_dev(&tun_cdevsw, unit2minor(u),
122	    UID_ROOT, GID_WHEEL, 0600, "tun%d", u);
123
124	/*
125	 * All devices depend on tunbasedev so that we can simply
126	 * destroy_dev() this device at module unload time to get
127	 * rid of all our make_dev()d resources.
128	 */
129	if (tunbasedev == NOUDEV)
130		tunbasedev = (*dev)->si_udev;
131	else {
132		(*dev)->si_flags |= SI_CHEAPCLONE;
133		dev_depends(udev2dev(tunbasedev, 0), *dev);
134	}
135}
136
137static int
138tunmodevent(module_t mod, int type, void *data)
139{
140	static eventhandler_tag tag;
141	struct tun_softc *tp;
142	dev_t dev;
143	int err;
144
145	switch (type) {
146	case MOD_LOAD:
147		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
148		if (tag == NULL)
149			return (ENOMEM);
150#ifdef NODEVFS
151		err = cdevsw_add(&tun_cdevsw);
152		if (err != 0) {
153			EVENTHANDLER_DEREGISTER(dev_clone, tag);
154			return (err);
155		}
156#endif
157		tununits.rm_type = RMAN_ARRAY;
158		tununits.rm_descr = "open if_tun units";
159		err = rman_init(&tununits);
160		if (err != 0) {
161			cdevsw_remove(&tun_cdevsw);
162			EVENTHANDLER_DEREGISTER(dev_clone, tag);
163			return (err);
164		}
165		err = rman_manage_region(&tununits, 0, TUN_MAXUNIT);
166		if (err != 0) {
167			printf("%s: tununits: rman_manage_region: Failed %d\n",
168			    TUNNAME, err);
169			rman_fini(&tununits);
170			cdevsw_remove(&tun_cdevsw);
171			EVENTHANDLER_DEREGISTER(dev_clone, tag);
172			return (err);
173		}
174		break;
175	case MOD_UNLOAD:
176		err = rman_fini(&tununits);
177		if (err != 0)
178			return (err);
179		EVENTHANDLER_DEREGISTER(dev_clone, tag);
180
181		while (tunhead != NULL) {
182			KASSERT((tunhead->tun_flags & TUN_OPEN) == 0,
183			    ("tununits is out of sync - unit %d",
184			    tunhead->tun_if.if_unit));
185			tp = tunhead;
186			dev = makedev(tun_cdevsw.d_maj,
187			    unit2minor(tp->tun_if.if_unit));
188			KASSERT(dev->si_drv1 == tp, ("Bad makedev result"));
189			tunhead = tp->next;
190			bpfdetach(&tp->tun_if);
191			if_detach(&tp->tun_if);
192			KASSERT(dev->si_flags & SI_NAMED, ("Missing make_dev"));
193			free(tp, M_TUN);
194		}
195
196		/*
197		 * Destroying tunbasedev results in all of our make_dev()s
198		 * conveniently going away.
199		 */
200		if (tunbasedev != NOUDEV)
201			destroy_dev(udev2dev(tunbasedev, 0));
202
203#ifdef NODEVFS
204		cdevsw_remove(&tun_cdevsw);
205#endif
206		break;
207	}
208	return 0;
209}
210
211static moduledata_t tun_mod = {
212	"if_tun",
213	tunmodevent,
214	0
215};
216
217DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
218
219static void
220tunstart(struct ifnet *ifp)
221{
222	struct tun_softc *tp = ifp->if_softc;
223
224	if (tp->tun_flags & TUN_RWAIT) {
225		tp->tun_flags &= ~TUN_RWAIT;
226		wakeup((caddr_t)tp);
227	}
228	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
229		pgsigio(&tp->tun_sigio, SIGIO, 0);
230	selwakeup(&tp->tun_rsel);
231}
232
233static void
234tuncreate(dev_t dev)
235{
236	struct tun_softc *sc;
237	struct ifnet *ifp;
238
239	if (!(dev->si_flags & SI_NAMED))
240		dev = make_dev(&tun_cdevsw, minor(dev),
241		    UID_UUCP, GID_DIALER, 0600, "tun%d", dev2unit(dev));
242
243	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_ZERO);
244	sc->tun_flags = TUN_INITED;
245	sc->next = tunhead;
246	tunhead = sc;
247
248	ifp = &sc->tun_if;
249	ifp->if_unit = dev2unit(dev);
250	ifp->if_name = TUNNAME;
251	ifp->if_mtu = TUNMTU;
252	ifp->if_ioctl = tunifioctl;
253	ifp->if_output = tunoutput;
254	ifp->if_start = tunstart;
255	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
256	ifp->if_type = IFT_PPP;
257	ifp->if_snd.ifq_maxlen = ifqmaxlen;
258	ifp->if_softc = sc;
259	if_attach(ifp);
260	bpfattach(ifp, DLT_NULL, sizeof(u_int));
261	dev->si_drv1 = sc;
262}
263
264static int
265tunopen(dev_t dev, int flag, int mode, struct thread *td)
266{
267	struct resource *r;
268	struct ifnet	*ifp;
269	struct tun_softc *tp;
270	int unit;
271
272	unit = dev2unit(dev);
273	if (unit > TUN_MAXUNIT)
274		return (ENXIO);
275
276	r = rman_reserve_resource(&tununits, unit, unit, 1,
277	    RF_ALLOCATED | RF_ACTIVE, NULL);
278	if (r == NULL)
279		return (EBUSY);
280
281	dev->si_flags &= ~SI_CHEAPCLONE;
282
283	tp = dev->si_drv1;
284	if (!tp) {
285		tuncreate(dev);
286		tp = dev->si_drv1;
287	}
288	KASSERT(!(tp->tun_flags & TUN_OPEN), ("Resource & flags out-of-sync"));
289	tp->r_unit = r;
290	tp->tun_pid = td->td_proc->p_pid;
291	ifp = &tp->tun_if;
292	tp->tun_flags |= TUN_OPEN;
293	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
294
295	return (0);
296}
297
298/*
299 * tunclose - close the device - mark i/f down & delete
300 * routing info
301 */
302static	int
303tunclose(dev_t dev, int foo, int bar, struct thread *td)
304{
305	struct tun_softc *tp;
306	struct ifnet *ifp;
307	int s;
308	int err;
309
310	tp = dev->si_drv1;
311	ifp = &tp->tun_if;
312
313	KASSERT(tp->r_unit, ("Unit %d not marked open", ifp->if_unit));
314	tp->tun_flags &= ~TUN_OPEN;
315	tp->tun_pid = 0;
316
317	/*
318	 * junk all pending output
319	 */
320	IF_DRAIN(&ifp->if_snd);
321
322	if (ifp->if_flags & IFF_UP) {
323		s = splimp();
324		if_down(ifp);
325		splx(s);
326	}
327
328	if (ifp->if_flags & IFF_RUNNING) {
329		register struct ifaddr *ifa;
330
331		s = splimp();
332		/* find internet addresses and delete routes */
333		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
334			if (ifa->ifa_addr->sa_family == AF_INET)
335				rtinit(ifa, (int)RTM_DELETE,
336				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
337		ifp->if_flags &= ~IFF_RUNNING;
338		splx(s);
339	}
340
341	funsetown(&tp->tun_sigio);
342	selwakeup(&tp->tun_rsel);
343
344	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
345	err = rman_release_resource(tp->r_unit);
346	KASSERT(err == 0, ("Unit %d failed to release", ifp->if_unit));
347
348	return (0);
349}
350
351static int
352tuninit(struct ifnet *ifp)
353{
354	struct tun_softc *tp = ifp->if_softc;
355	register struct ifaddr *ifa;
356	int error = 0;
357
358	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
359
360	ifp->if_flags |= IFF_UP | IFF_RUNNING;
361	getmicrotime(&ifp->if_lastchange);
362
363	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa;
364	     ifa = TAILQ_NEXT(ifa, ifa_link)) {
365		if (ifa->ifa_addr == NULL)
366			error = EFAULT;
367			/* XXX: Should maybe return straight off? */
368		else {
369#ifdef INET
370			if (ifa->ifa_addr->sa_family == AF_INET) {
371			    struct sockaddr_in *si;
372
373			    si = (struct sockaddr_in *)ifa->ifa_addr;
374			    if (si->sin_addr.s_addr)
375				    tp->tun_flags |= TUN_IASET;
376
377			    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
378			    if (si && si->sin_addr.s_addr)
379				    tp->tun_flags |= TUN_DSTADDR;
380			}
381#endif
382		}
383	}
384	return (error);
385}
386
387/*
388 * Process an ioctl request.
389 */
390static int
391tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
392{
393	struct ifreq *ifr = (struct ifreq *)data;
394	struct tun_softc *tp = ifp->if_softc;
395	struct ifstat *ifs;
396	int		error = 0, s;
397
398	s = splimp();
399	switch(cmd) {
400	case SIOCGIFSTATUS:
401		ifs = (struct ifstat *)data;
402		if (tp->tun_pid)
403			sprintf(ifs->ascii + strlen(ifs->ascii),
404			    "\tOpened by PID %d\n", tp->tun_pid);
405		break;
406	case SIOCSIFADDR:
407		error = tuninit(ifp);
408		TUNDEBUG("%s%d: address set, error=%d\n",
409			 ifp->if_name, ifp->if_unit, error);
410		break;
411	case SIOCSIFDSTADDR:
412		error = tuninit(ifp);
413		TUNDEBUG("%s%d: destination address set, error=%d\n",
414			 ifp->if_name, ifp->if_unit, error);
415		break;
416	case SIOCSIFMTU:
417		ifp->if_mtu = ifr->ifr_mtu;
418		TUNDEBUG("%s%d: mtu set\n", ifp->if_name, ifp->if_unit);
419		break;
420	case SIOCSIFFLAGS:
421	case SIOCADDMULTI:
422	case SIOCDELMULTI:
423		break;
424	default:
425		error = EINVAL;
426	}
427	splx(s);
428	return (error);
429}
430
431/*
432 * tunoutput - queue packets from higher level ready to put out.
433 */
434static int
435tunoutput(
436	struct ifnet *ifp,
437	struct mbuf *m0,
438	struct sockaddr *dst,
439	struct rtentry *rt)
440{
441	struct tun_softc *tp = ifp->if_softc;
442#ifdef MAC
443	int error;
444#endif
445
446	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
447
448#ifdef MAC
449	error = mac_check_ifnet_transmit(ifp, m0);
450	if (error) {
451		m_freem(m0);
452		return (error);
453	}
454#endif
455
456	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
457		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
458			  ifp->if_unit, tp->tun_flags);
459		m_freem (m0);
460		return (EHOSTDOWN);
461	}
462
463	if ((ifp->if_flags & IFF_UP) != IFF_UP) {
464		m_freem (m0);
465		return (EHOSTDOWN);
466	}
467
468	/* BPF write needs to be handled specially */
469	if (dst->sa_family == AF_UNSPEC) {
470		dst->sa_family = *(mtod(m0, int *));
471		m0->m_len -= sizeof(int);
472		m0->m_pkthdr.len -= sizeof(int);
473		m0->m_data += sizeof(int);
474	}
475
476	if (ifp->if_bpf) {
477		/*
478		 * We need to prepend the address family as
479		 * a four byte field.  Cons up a dummy header
480		 * to pacify bpf.  This is safe because bpf
481		 * will only read from the mbuf (i.e., it won't
482		 * try to free it or keep a pointer to it).
483		 */
484		struct mbuf m;
485		uint32_t af = dst->sa_family;
486
487		m.m_next = m0;
488		m.m_len = 4;
489		m.m_data = (char *)&af;
490
491		BPF_MTAP(ifp, &m);
492	}
493
494	/* prepend sockaddr? this may abort if the mbuf allocation fails */
495	if (tp->tun_flags & TUN_LMODE) {
496		/* allocate space for sockaddr */
497		M_PREPEND(m0, dst->sa_len, M_NOWAIT);
498
499		/* if allocation failed drop packet */
500		if (m0 == NULL) {
501			ifp->if_iqdrops++;
502			ifp->if_oerrors++;
503			return (ENOBUFS);
504		} else {
505			bcopy(dst, m0->m_data, dst->sa_len);
506		}
507	}
508
509	if (tp->tun_flags & TUN_IFHEAD) {
510		/* Prepend the address family */
511		M_PREPEND(m0, 4, M_NOWAIT);
512
513		/* if allocation failed drop packet */
514		if (m0 == NULL) {
515			ifp->if_iqdrops++;
516			ifp->if_oerrors++;
517			return (ENOBUFS);
518		} else
519			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
520	} else {
521#ifdef INET
522		if (dst->sa_family != AF_INET)
523#endif
524		{
525			m_freem(m0);
526			return (EAFNOSUPPORT);
527		}
528	}
529
530	if (! IF_HANDOFF(&ifp->if_snd, m0, ifp)) {
531		ifp->if_collisions++;
532		return (ENOBUFS);
533	}
534	ifp->if_opackets++;
535	return (0);
536}
537
538/*
539 * the cdevsw interface is now pretty minimal.
540 */
541static	int
542tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
543{
544	int		s;
545	int		error;
546	struct tun_softc *tp = dev->si_drv1;
547 	struct tuninfo *tunp;
548
549	switch (cmd) {
550 	case TUNSIFINFO:
551 		tunp = (struct tuninfo *)data;
552		if (tunp->mtu < IF_MINMTU)
553			return (EINVAL);
554 		if (tp->tun_if.if_mtu != tunp->mtu
555		&& (error = suser(td)) != 0)
556			return (error);
557 		tp->tun_if.if_mtu = tunp->mtu;
558 		tp->tun_if.if_type = tunp->type;
559 		tp->tun_if.if_baudrate = tunp->baudrate;
560 		break;
561 	case TUNGIFINFO:
562 		tunp = (struct tuninfo *)data;
563 		tunp->mtu = tp->tun_if.if_mtu;
564 		tunp->type = tp->tun_if.if_type;
565 		tunp->baudrate = tp->tun_if.if_baudrate;
566 		break;
567	case TUNSDEBUG:
568		tundebug = *(int *)data;
569		break;
570	case TUNGDEBUG:
571		*(int *)data = tundebug;
572		break;
573	case TUNSLMODE:
574		if (*(int *)data) {
575			tp->tun_flags |= TUN_LMODE;
576			tp->tun_flags &= ~TUN_IFHEAD;
577		} else
578			tp->tun_flags &= ~TUN_LMODE;
579		break;
580	case TUNSIFHEAD:
581		if (*(int *)data) {
582			tp->tun_flags |= TUN_IFHEAD;
583			tp->tun_flags &= ~TUN_LMODE;
584		} else
585			tp->tun_flags &= ~TUN_IFHEAD;
586		break;
587	case TUNGIFHEAD:
588		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
589		break;
590	case TUNSIFMODE:
591		/* deny this if UP */
592		if (tp->tun_if.if_flags & IFF_UP)
593			return(EBUSY);
594
595		switch (*(int *)data & ~IFF_MULTICAST) {
596		case IFF_POINTOPOINT:
597		case IFF_BROADCAST:
598			tp->tun_if.if_flags &=
599			    ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
600			tp->tun_if.if_flags |= *(int *)data;
601			break;
602		default:
603			return(EINVAL);
604		}
605		break;
606	case TUNSIFPID:
607		tp->tun_pid = curthread->td_proc->p_pid;
608		break;
609	case FIONBIO:
610		break;
611	case FIOASYNC:
612		if (*(int *)data)
613			tp->tun_flags |= TUN_ASYNC;
614		else
615			tp->tun_flags &= ~TUN_ASYNC;
616		break;
617	case FIONREAD:
618		s = splimp();
619		if (tp->tun_if.if_snd.ifq_head) {
620			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
621			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
622				*(int *)data += mb->m_len;
623		} else
624			*(int *)data = 0;
625		splx(s);
626		break;
627	case FIOSETOWN:
628		return (fsetown(*(int *)data, &tp->tun_sigio));
629
630	case FIOGETOWN:
631		*(int *)data = fgetown(&tp->tun_sigio);
632		return (0);
633
634	/* This is deprecated, FIOSETOWN should be used instead. */
635	case TIOCSPGRP:
636		return (fsetown(-(*(int *)data), &tp->tun_sigio));
637
638	/* This is deprecated, FIOGETOWN should be used instead. */
639	case TIOCGPGRP:
640		*(int *)data = -fgetown(&tp->tun_sigio);
641		return (0);
642
643	default:
644		return (ENOTTY);
645	}
646	return (0);
647}
648
649/*
650 * The cdevsw read interface - reads a packet at a time, or at
651 * least as much of a packet as can be read.
652 */
653static	int
654tunread(dev_t dev, struct uio *uio, int flag)
655{
656	struct tun_softc *tp = dev->si_drv1;
657	struct ifnet	*ifp = &tp->tun_if;
658	struct mbuf	*m;
659	int		error=0, len, s;
660
661	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
662	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
663		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
664			  ifp->if_unit, tp->tun_flags);
665		return (EHOSTDOWN);
666	}
667
668	tp->tun_flags &= ~TUN_RWAIT;
669
670	s = splimp();
671	do {
672		IF_DEQUEUE(&ifp->if_snd, m);
673		if (m == NULL) {
674			if (flag & IO_NDELAY) {
675				splx(s);
676				return (EWOULDBLOCK);
677			}
678			tp->tun_flags |= TUN_RWAIT;
679			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
680					"tunread", 0)) != 0) {
681				splx(s);
682				return (error);
683			}
684		}
685	} while (m == NULL);
686	splx(s);
687
688	while (m && uio->uio_resid > 0 && error == 0) {
689		len = min(uio->uio_resid, m->m_len);
690		if (len != 0)
691			error = uiomove(mtod(m, caddr_t), len, uio);
692		m = m_free(m);
693	}
694
695	if (m) {
696		TUNDEBUG("%s%d: Dropping mbuf\n", ifp->if_name, ifp->if_unit);
697		m_freem(m);
698	}
699	return (error);
700}
701
702/*
703 * the cdevsw write interface - an atomic write is a packet - or else!
704 */
705static	int
706tunwrite(dev_t dev, struct uio *uio, int flag)
707{
708	struct tun_softc *tp = dev->si_drv1;
709	struct ifnet	*ifp = &tp->tun_if;
710	struct mbuf	*top, **mp, *m;
711	int		error=0, tlen, mlen;
712	uint32_t	family;
713
714	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
715
716	if ((ifp->if_flags & IFF_UP) != IFF_UP)
717		/* ignore silently */
718		return (0);
719
720	if (uio->uio_resid == 0)
721		return (0);
722
723	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
724		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
725		    uio->uio_resid);
726		return (EIO);
727	}
728	tlen = uio->uio_resid;
729
730	/* get a header mbuf */
731	MGETHDR(m, M_NOWAIT, MT_DATA);
732	if (m == NULL)
733		return (ENOBUFS);
734	mlen = MHLEN;
735
736	top = 0;
737	mp = &top;
738	while (error == 0 && uio->uio_resid > 0) {
739		m->m_len = min(mlen, uio->uio_resid);
740		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
741		*mp = m;
742		mp = &m->m_next;
743		if (uio->uio_resid > 0) {
744			MGET (m, M_NOWAIT, MT_DATA);
745			if (m == 0) {
746				error = ENOBUFS;
747				break;
748			}
749			mlen = MLEN;
750		}
751	}
752	if (error) {
753		if (top)
754			m_freem (top);
755		ifp->if_ierrors++;
756		return (error);
757	}
758
759	top->m_pkthdr.len = tlen;
760	top->m_pkthdr.rcvif = ifp;
761#ifdef MAC
762	mac_create_mbuf_from_ifnet(ifp, top);
763#endif
764
765	if (ifp->if_bpf) {
766		if (tp->tun_flags & TUN_IFHEAD) {
767			/*
768			 * Conveniently, we already have a 4-byte address
769			 * family prepended to our packet !
770			 * Inconveniently, it's in the wrong byte order !
771			 */
772			if ((top = m_pullup(top, sizeof(family))) == NULL)
773				return (ENOBUFS);
774			*mtod(top, u_int32_t *) =
775			    ntohl(*mtod(top, u_int32_t *));
776			BPF_MTAP(ifp, top);
777			*mtod(top, u_int32_t *) =
778			    htonl(*mtod(top, u_int32_t *));
779		} else {
780			/*
781			 * We need to prepend the address family as
782			 * a four byte field.  Cons up a dummy header
783			 * to pacify bpf.  This is safe because bpf
784			 * will only read from the mbuf (i.e., it won't
785			 * try to free it or keep a pointer to it).
786			 */
787			struct mbuf m;
788			uint32_t af = AF_INET;
789
790			m.m_next = top;
791			m.m_len = 4;
792			m.m_data = (char *)&af;
793
794			BPF_MTAP(ifp, &m);
795		}
796	}
797
798	if (tp->tun_flags & TUN_IFHEAD) {
799		if (top->m_len < sizeof(family) &&
800		    (top = m_pullup(top, sizeof(family))) == NULL)
801			return (ENOBUFS);
802		family = ntohl(*mtod(top, u_int32_t *));
803		m_adj(top, sizeof(family));
804	} else
805		family = AF_INET;
806
807	ifp->if_ibytes += top->m_pkthdr.len;
808	ifp->if_ipackets++;
809
810	return (family_enqueue(family, top));
811}
812
813/*
814 * tunpoll - the poll interface, this is only useful on reads
815 * really. The write detect always returns true, write never blocks
816 * anyway, it either accepts the packet or drops it.
817 */
818static	int
819tunpoll(dev_t dev, int events, struct thread *td)
820{
821	int		s;
822	struct tun_softc *tp = dev->si_drv1;
823	struct ifnet	*ifp = &tp->tun_if;
824	int		revents = 0;
825
826	s = splimp();
827	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
828
829	if (events & (POLLIN | POLLRDNORM)) {
830		if (ifp->if_snd.ifq_len > 0) {
831			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
832			    ifp->if_unit, ifp->if_snd.ifq_len);
833			revents |= events & (POLLIN | POLLRDNORM);
834		} else {
835			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
836			    ifp->if_unit);
837			selrecord(td, &tp->tun_rsel);
838		}
839	}
840	if (events & (POLLOUT | POLLWRNORM))
841		revents |= events & (POLLOUT | POLLWRNORM);
842
843	splx(s);
844	return (revents);
845}
846