if_tun.c revision 64880
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 64880 2000-08-20 21:34:39Z phk $
17 */
18
19#include "opt_inet.h"
20#include "opt_devfs.h"
21
22#include <sys/param.h>
23#include <sys/proc.h>
24#include <sys/systm.h>
25#include <sys/mbuf.h>
26#include <sys/socket.h>
27#include <sys/filio.h>
28#include <sys/sockio.h>
29#include <sys/ttycom.h>
30#include <sys/poll.h>
31#include <sys/signalvar.h>
32#include <sys/filedesc.h>
33#include <sys/kernel.h>
34#include <sys/sysctl.h>
35#include <sys/conf.h>
36#include <sys/uio.h>
37#include <sys/vnode.h>
38#include <sys/malloc.h>
39
40#include <net/if.h>
41#include <net/if_types.h>
42#include <net/route.h>
43#include <net/intrq.h>
44
45#ifdef INET
46#include <netinet/in.h>
47#endif
48
49#include <net/bpf.h>
50
51#include <net/if_tunvar.h>
52#include <net/if_tun.h>
53
54#ifdef DEVFS
55#include <sys/eventhandler.h>
56#include <fs/devfs/devfs.h>
57#endif
58
59static MALLOC_DEFINE(M_TUN, "tun", "Tunnel Interface");
60
61static void tunattach __P((void *));
62PSEUDO_SET(tunattach, if_tun);
63
64static void tuncreate __P((dev_t dev));
65
66#define TUNDEBUG	if (tundebug) printf
67static int tundebug = 0;
68SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
69
70static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *,
71	    struct rtentry *rt));
72static int tunifioctl __P((struct ifnet *, u_long, caddr_t));
73static int tuninit __P((struct ifnet *));
74
75static	d_open_t	tunopen;
76static	d_close_t	tunclose;
77static	d_read_t	tunread;
78static	d_write_t	tunwrite;
79static	d_ioctl_t	tunioctl;
80static	d_poll_t	tunpoll;
81
82#define CDEV_MAJOR 52
83static struct cdevsw tun_cdevsw = {
84	/* open */	tunopen,
85	/* close */	tunclose,
86	/* read */	tunread,
87	/* write */	tunwrite,
88	/* ioctl */	tunioctl,
89	/* poll */	tunpoll,
90	/* mmap */	nommap,
91	/* strategy */	nostrategy,
92	/* name */	"tun",
93	/* maj */	CDEV_MAJOR,
94	/* dump */	nodump,
95	/* psize */	nopsize,
96	/* flags */	0,
97	/* bmaj */	-1
98};
99
100#ifdef DEVFS
101static void tun_clone __P((void *arg, char *name, int namelen, dev_t *dev));
102
103static void
104tun_clone(arg, name, namelen, dev)
105	void *arg;
106	char *name;
107	int namelen;
108	dev_t *dev;
109{
110	int u;
111
112	if (*dev != NODEV)
113		return;
114	if (devfs_stdclone(name, NULL, "tun", &u) != 1)
115		return;
116	/* XXX: minor encoding if u > 255 */
117	*dev = make_dev(&tun_cdevsw, u,
118	    UID_UUCP, GID_DIALER, 0600, "tun%d", u);
119
120}
121#endif
122
123
124static void
125tunattach(dummy)
126	void *dummy;
127{
128
129#ifdef DEVFS
130	EVENTHANDLER_REGISTER(devfs_clone, tun_clone, 0, 1000);
131#else
132	cdevsw_add(&tun_cdevsw);
133#endif
134}
135
136static void
137tuncreate(dev)
138	dev_t dev;
139{
140	struct tun_softc *sc;
141	struct ifnet *ifp;
142
143	dev = make_dev(&tun_cdevsw, minor(dev),
144	    UID_UUCP, GID_DIALER, 0600, "tun%d", lminor(dev));
145
146	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK);
147	bzero(sc, sizeof *sc);
148	sc->tun_flags = TUN_INITED;
149
150	ifp = &sc->tun_if;
151	ifp->if_unit = lminor(dev);
152	ifp->if_name = "tun";
153	ifp->if_mtu = TUNMTU;
154	ifp->if_ioctl = tunifioctl;
155	ifp->if_output = tunoutput;
156	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
157	ifp->if_type = IFT_PPP;
158	ifp->if_snd.ifq_maxlen = ifqmaxlen;
159	ifp->if_softc = sc;
160	if_attach(ifp);
161	bpfattach(ifp, DLT_NULL, sizeof(u_int));
162	dev->si_drv1 = sc;
163}
164
165/*
166 * tunnel open - must be superuser & the device must be
167 * configured in
168 */
169static	int
170tunopen(dev, flag, mode, p)
171	dev_t	dev;
172	int	flag, mode;
173	struct proc *p;
174{
175	struct ifnet	*ifp;
176	struct tun_softc *tp;
177	register int	error;
178
179	error = suser(p);
180	if (error)
181		return (error);
182
183	tp = dev->si_drv1;
184	if (!tp) {
185		tuncreate(dev);
186		tp = dev->si_drv1;
187	}
188	if (tp->tun_flags & TUN_OPEN)
189		return EBUSY;
190	tp->tun_pid = p->p_pid;
191	ifp = &tp->tun_if;
192	tp->tun_flags |= TUN_OPEN;
193	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
194	return (0);
195}
196
197/*
198 * tunclose - close the device - mark i/f down & delete
199 * routing info
200 */
201static	int
202tunclose(dev, foo, bar, p)
203	dev_t dev;
204	int foo;
205	int bar;
206	struct proc *p;
207{
208	register int	s;
209	struct tun_softc *tp;
210	struct ifnet	*ifp;
211	struct mbuf	*m;
212
213	tp = dev->si_drv1;
214	ifp = &tp->tun_if;
215
216	tp->tun_flags &= ~TUN_OPEN;
217	tp->tun_pid = 0;
218
219	/*
220	 * junk all pending output
221	 */
222	do {
223		s = splimp();
224		IF_DEQUEUE(&ifp->if_snd, m);
225		splx(s);
226		if (m)
227			m_freem(m);
228	} while (m);
229
230	if (ifp->if_flags & IFF_UP) {
231		s = splimp();
232		if_down(ifp);
233		splx(s);
234	}
235
236	if (ifp->if_flags & IFF_RUNNING) {
237		register struct ifaddr *ifa;
238
239		s = splimp();
240		/* find internet addresses and delete routes */
241		for (ifa = ifp->if_addrhead.tqh_first; ifa;
242		    ifa = ifa->ifa_link.tqe_next)
243			if (ifa->ifa_addr->sa_family == AF_INET)
244				rtinit(ifa, (int)RTM_DELETE,
245				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
246		ifp->if_flags &= ~IFF_RUNNING;
247		splx(s);
248	}
249
250	funsetown(tp->tun_sigio);
251	selwakeup(&tp->tun_rsel);
252
253	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
254	return (0);
255}
256
257static int
258tuninit(ifp)
259	struct ifnet *ifp;
260{
261	struct tun_softc *tp = ifp->if_softc;
262	register struct ifaddr *ifa;
263
264	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
265
266	ifp->if_flags |= IFF_UP | IFF_RUNNING;
267	getmicrotime(&ifp->if_lastchange);
268
269	for (ifa = ifp->if_addrhead.tqh_first; ifa;
270	     ifa = ifa->ifa_link.tqe_next) {
271#ifdef INET
272		if (ifa->ifa_addr->sa_family == AF_INET) {
273		    struct sockaddr_in *si;
274
275		    si = (struct sockaddr_in *)ifa->ifa_addr;
276		    if (si && si->sin_addr.s_addr)
277			    tp->tun_flags |= TUN_IASET;
278
279		    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
280		    if (si && si->sin_addr.s_addr)
281			    tp->tun_flags |= TUN_DSTADDR;
282		}
283#endif
284	}
285	return 0;
286}
287
288/*
289 * Process an ioctl request.
290 */
291int
292tunifioctl(ifp, cmd, data)
293	struct ifnet *ifp;
294	u_long	cmd;
295	caddr_t	data;
296{
297	struct ifreq *ifr = (struct ifreq *)data;
298	struct tun_softc *tp = ifp->if_softc;
299	struct ifstat *ifs;
300	int		error = 0, s;
301
302	s = splimp();
303	switch(cmd) {
304	case SIOCGIFSTATUS:
305		ifs = (struct ifstat *)data;
306		if (tp->tun_pid)
307			sprintf(ifs->ascii + strlen(ifs->ascii),
308			    "\tOpened by PID %d\n", tp->tun_pid);
309		return(0);
310	case SIOCSIFADDR:
311		tuninit(ifp);
312		TUNDEBUG("%s%d: address set\n",
313			 ifp->if_name, ifp->if_unit);
314		break;
315	case SIOCSIFDSTADDR:
316		tuninit(ifp);
317		TUNDEBUG("%s%d: destination address set\n",
318			 ifp->if_name, ifp->if_unit);
319		break;
320	case SIOCSIFMTU:
321		ifp->if_mtu = ifr->ifr_mtu;
322		TUNDEBUG("%s%d: mtu set\n",
323			 ifp->if_name, ifp->if_unit);
324		break;
325	case SIOCADDMULTI:
326	case SIOCDELMULTI:
327		break;
328
329
330	default:
331		error = EINVAL;
332	}
333	splx(s);
334	return (error);
335}
336
337/*
338 * tunoutput - queue packets from higher level ready to put out.
339 */
340int
341tunoutput(ifp, m0, dst, rt)
342	struct ifnet   *ifp;
343	struct mbuf    *m0;
344	struct sockaddr *dst;
345	struct rtentry *rt;
346{
347	struct tun_softc *tp = ifp->if_softc;
348	int		s;
349
350	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
351
352	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
353		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
354			  ifp->if_unit, tp->tun_flags);
355		m_freem (m0);
356		return EHOSTDOWN;
357	}
358
359	/* BPF write needs to be handled specially */
360	if (dst->sa_family == AF_UNSPEC) {
361		dst->sa_family = *(mtod(m0, int *));
362		m0->m_len -= sizeof(int);
363		m0->m_pkthdr.len -= sizeof(int);
364		m0->m_data += sizeof(int);
365	}
366
367	if (ifp->if_bpf) {
368		/*
369		 * We need to prepend the address family as
370		 * a four byte field.  Cons up a dummy header
371		 * to pacify bpf.  This is safe because bpf
372		 * will only read from the mbuf (i.e., it won't
373		 * try to free it or keep a pointer to it).
374		 */
375		struct mbuf m;
376		u_int af = dst->sa_family;
377
378		m.m_next = m0;
379		m.m_len = 4;
380		m.m_data = (char *)&af;
381
382		bpf_mtap(ifp, &m);
383	}
384
385	/* prepend sockaddr? this may abort if the mbuf allocation fails */
386	if (tp->tun_flags & TUN_LMODE) {
387		/* allocate space for sockaddr */
388		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
389
390		/* if allocation failed drop packet */
391		if (m0 == NULL){
392			s = splimp();	/* spl on queue manipulation */
393			IF_DROP(&ifp->if_snd);
394			splx(s);
395			ifp->if_oerrors++;
396			return (ENOBUFS);
397		} else {
398			bcopy(dst, m0->m_data, dst->sa_len);
399		}
400	}
401
402	if (tp->tun_flags & TUN_IFHEAD) {
403		/* Prepend the address family */
404		M_PREPEND(m0, 4, M_DONTWAIT);
405
406		/* if allocation failed drop packet */
407		if (m0 == NULL){
408			s = splimp();	/* spl on queue manipulation */
409			IF_DROP(&ifp->if_snd);
410			splx(s);
411			ifp->if_oerrors++;
412			return ENOBUFS;
413		} else
414			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
415	} else {
416#ifdef INET
417		if (dst->sa_family != AF_INET)
418#endif
419		{
420			m_freem(m0);
421			return EAFNOSUPPORT;
422		}
423	}
424
425	s = splimp();
426	if (IF_QFULL(&ifp->if_snd)) {
427		IF_DROP(&ifp->if_snd);
428		m_freem(m0);
429		splx(s);
430		ifp->if_collisions++;
431		return ENOBUFS;
432	}
433	ifp->if_obytes += m0->m_pkthdr.len;
434	IF_ENQUEUE(&ifp->if_snd, m0);
435	splx(s);
436	ifp->if_opackets++;
437
438	if (tp->tun_flags & TUN_RWAIT) {
439		tp->tun_flags &= ~TUN_RWAIT;
440		wakeup((caddr_t)tp);
441	}
442	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
443		pgsigio(tp->tun_sigio, SIGIO, 0);
444	selwakeup(&tp->tun_rsel);
445	return 0;
446}
447
448/*
449 * the cdevsw interface is now pretty minimal.
450 */
451static	int
452tunioctl(dev, cmd, data, flag, p)
453	dev_t		dev;
454	u_long		cmd;
455	caddr_t		data;
456	int		flag;
457	struct proc	*p;
458{
459	int		s;
460	struct tun_softc *tp = dev->si_drv1;
461 	struct tuninfo *tunp;
462
463	switch (cmd) {
464 	case TUNSIFINFO:
465 		tunp = (struct tuninfo *)data;
466		if (tunp->mtu < IF_MINMTU)
467			return (EINVAL);
468 		tp->tun_if.if_mtu = tunp->mtu;
469 		tp->tun_if.if_type = tunp->type;
470 		tp->tun_if.if_baudrate = tunp->baudrate;
471 		break;
472 	case TUNGIFINFO:
473 		tunp = (struct tuninfo *)data;
474 		tunp->mtu = tp->tun_if.if_mtu;
475 		tunp->type = tp->tun_if.if_type;
476 		tunp->baudrate = tp->tun_if.if_baudrate;
477 		break;
478	case TUNSDEBUG:
479		tundebug = *(int *)data;
480		break;
481	case TUNGDEBUG:
482		*(int *)data = tundebug;
483		break;
484	case TUNSLMODE:
485		if (*(int *)data) {
486			tp->tun_flags |= TUN_LMODE;
487			tp->tun_flags &= ~TUN_IFHEAD;
488		} else
489			tp->tun_flags &= ~TUN_LMODE;
490		break;
491	case TUNSIFHEAD:
492		if (*(int *)data) {
493			tp->tun_flags |= TUN_IFHEAD;
494			tp->tun_flags &= ~TUN_LMODE;
495		} else
496			tp->tun_flags &= ~TUN_IFHEAD;
497		break;
498	case TUNGIFHEAD:
499		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
500		break;
501	case TUNSIFMODE:
502		/* deny this if UP */
503		if (tp->tun_if.if_flags & IFF_UP)
504			return(EBUSY);
505
506		switch (*(int *)data) {
507		case IFF_POINTOPOINT:
508			tp->tun_if.if_flags |= IFF_POINTOPOINT;
509			tp->tun_if.if_flags &= ~IFF_BROADCAST;
510			break;
511		case IFF_BROADCAST:
512			tp->tun_if.if_flags &= ~IFF_POINTOPOINT;
513			tp->tun_if.if_flags |= IFF_BROADCAST;
514			break;
515		default:
516			return(EINVAL);
517		}
518		break;
519	case TUNSIFPID:
520		tp->tun_pid = curproc->p_pid;
521		break;
522	case FIONBIO:
523		break;
524	case FIOASYNC:
525		if (*(int *)data)
526			tp->tun_flags |= TUN_ASYNC;
527		else
528			tp->tun_flags &= ~TUN_ASYNC;
529		break;
530	case FIONREAD:
531		s = splimp();
532		if (tp->tun_if.if_snd.ifq_head) {
533			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
534			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
535				*(int *)data += mb->m_len;
536		} else
537			*(int *)data = 0;
538		splx(s);
539		break;
540	case FIOSETOWN:
541		return (fsetown(*(int *)data, &tp->tun_sigio));
542
543	case FIOGETOWN:
544		*(int *)data = fgetown(tp->tun_sigio);
545		return (0);
546
547	/* This is deprecated, FIOSETOWN should be used instead. */
548	case TIOCSPGRP:
549		return (fsetown(-(*(int *)data), &tp->tun_sigio));
550
551	/* This is deprecated, FIOGETOWN should be used instead. */
552	case TIOCGPGRP:
553		*(int *)data = -fgetown(tp->tun_sigio);
554		return (0);
555
556	default:
557		return (ENOTTY);
558	}
559	return (0);
560}
561
562/*
563 * The cdevsw read interface - reads a packet at a time, or at
564 * least as much of a packet as can be read.
565 */
566static	int
567tunread(dev, uio, flag)
568	dev_t dev;
569	struct uio *uio;
570	int flag;
571{
572	struct tun_softc *tp = dev->si_drv1;
573	struct ifnet	*ifp = &tp->tun_if;
574	struct mbuf	*m, *m0;
575	int		error=0, len, s;
576
577	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
578	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
579		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
580			  ifp->if_unit, tp->tun_flags);
581		return EHOSTDOWN;
582	}
583
584	tp->tun_flags &= ~TUN_RWAIT;
585
586	s = splimp();
587	do {
588		IF_DEQUEUE(&ifp->if_snd, m0);
589		if (m0 == 0) {
590			if (flag & IO_NDELAY) {
591				splx(s);
592				return EWOULDBLOCK;
593			}
594			tp->tun_flags |= TUN_RWAIT;
595			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
596					"tunread", 0)) != 0) {
597				splx(s);
598				return error;
599			}
600		}
601	} while (m0 == 0);
602	splx(s);
603
604	while (m0 && uio->uio_resid > 0 && error == 0) {
605		len = min(uio->uio_resid, m0->m_len);
606		if (len == 0)
607			break;
608		error = uiomove(mtod(m0, caddr_t), len, uio);
609		MFREE(m0, m);
610		m0 = m;
611	}
612
613	if (m0) {
614		TUNDEBUG("Dropping mbuf\n");
615		m_freem(m0);
616	}
617	return error;
618}
619
620/*
621 * the cdevsw write interface - an atomic write is a packet - or else!
622 */
623static	int
624tunwrite(dev, uio, flag)
625	dev_t dev;
626	struct uio *uio;
627	int flag;
628{
629	struct tun_softc *tp = dev->si_drv1;
630	struct ifnet	*ifp = &tp->tun_if;
631	struct mbuf	*top, **mp, *m;
632	int		error=0, tlen, mlen;
633	u_int32_t	family;
634
635	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
636
637	if (uio->uio_resid == 0)
638		return 0;
639
640	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
641		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
642		    uio->uio_resid);
643		return EIO;
644	}
645	tlen = uio->uio_resid;
646
647	/* get a header mbuf */
648	MGETHDR(m, M_DONTWAIT, MT_DATA);
649	if (m == NULL)
650		return ENOBUFS;
651	mlen = MHLEN;
652
653	top = 0;
654	mp = &top;
655	while (error == 0 && uio->uio_resid > 0) {
656		m->m_len = min(mlen, uio->uio_resid);
657		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
658		*mp = m;
659		mp = &m->m_next;
660		if (uio->uio_resid > 0) {
661			MGET (m, M_DONTWAIT, MT_DATA);
662			if (m == 0) {
663				error = ENOBUFS;
664				break;
665			}
666			mlen = MLEN;
667		}
668	}
669	if (error) {
670		if (top)
671			m_freem (top);
672		ifp->if_ierrors++;
673		return error;
674	}
675
676	top->m_pkthdr.len = tlen;
677	top->m_pkthdr.rcvif = ifp;
678
679	if (ifp->if_bpf) {
680		if (tp->tun_flags & TUN_IFHEAD)
681			/*
682			 * Conveniently, we already have a 4-byte address
683			 * family prepended to our packet !
684			 */
685			bpf_mtap(ifp, top);
686		else {
687			/*
688			 * We need to prepend the address family as
689			 * a four byte field.  Cons up a dummy header
690			 * to pacify bpf.  This is safe because bpf
691			 * will only read from the mbuf (i.e., it won't
692			 * try to free it or keep a pointer to it).
693			 */
694			struct mbuf m;
695			u_int af = AF_INET;
696
697			m.m_next = top;
698			m.m_len = 4;
699			m.m_data = (char *)&af;
700
701			bpf_mtap(ifp, &m);
702		}
703	}
704
705	if (tp->tun_flags & TUN_IFHEAD) {
706		if (top->m_len < sizeof(family) &&
707		    (top = m_pullup(top, sizeof(family))) == NULL)
708				return ENOBUFS;
709		family = ntohl(*mtod(top, u_int32_t *));
710		m_adj(top, sizeof(family));
711	} else
712		family = AF_INET;
713
714	ifp->if_ibytes += top->m_pkthdr.len;
715	ifp->if_ipackets++;
716
717	return family_enqueue(family, top);
718}
719
720/*
721 * tunpoll - the poll interface, this is only useful on reads
722 * really. The write detect always returns true, write never blocks
723 * anyway, it either accepts the packet or drops it.
724 */
725static	int
726tunpoll(dev, events, p)
727	dev_t dev;
728	int events;
729	struct proc *p;
730{
731	int		s;
732	struct tun_softc *tp = dev->si_drv1;
733	struct ifnet	*ifp = &tp->tun_if;
734	int		revents = 0;
735
736	s = splimp();
737	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
738
739	if (events & (POLLIN | POLLRDNORM)) {
740		if (ifp->if_snd.ifq_len > 0) {
741			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
742			    ifp->if_unit, ifp->if_snd.ifq_len);
743			revents |= events & (POLLIN | POLLRDNORM);
744		} else {
745			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
746			    ifp->if_unit);
747			selrecord(p, &tp->tun_rsel);
748		}
749	}
750	if (events & (POLLOUT | POLLWRNORM))
751		revents |= events & (POLLOUT | POLLWRNORM);
752
753	splx(s);
754	return (revents);
755}
756