if_tun.c revision 66067
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 66067 2000-09-19 10:28:44Z phk $
17 */
18
19#include "opt_inet.h"
20
21#include <sys/param.h>
22#include <sys/proc.h>
23#include <sys/systm.h>
24#include <sys/mbuf.h>
25#include <sys/socket.h>
26#include <sys/filio.h>
27#include <sys/sockio.h>
28#include <sys/ttycom.h>
29#include <sys/poll.h>
30#include <sys/signalvar.h>
31#include <sys/filedesc.h>
32#include <sys/kernel.h>
33#include <sys/sysctl.h>
34#include <sys/conf.h>
35#include <sys/uio.h>
36#include <sys/vnode.h>
37#include <sys/malloc.h>
38
39#include <net/if.h>
40#include <net/if_types.h>
41#include <net/route.h>
42#include <net/intrq.h>
43
44#ifdef INET
45#include <netinet/in.h>
46#endif
47
48#include <net/bpf.h>
49
50#include <net/if_tunvar.h>
51#include <net/if_tun.h>
52
53static MALLOC_DEFINE(M_TUN, "tun", "Tunnel Interface");
54
55static void tunattach __P((void *));
56PSEUDO_SET(tunattach, if_tun);
57
58static void tuncreate __P((dev_t dev));
59
60#define TUNDEBUG	if (tundebug) printf
61static int tundebug = 0;
62SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
63
64static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *,
65	    struct rtentry *rt));
66static int tunifioctl __P((struct ifnet *, u_long, caddr_t));
67static int tuninit __P((struct ifnet *));
68
69static	d_open_t	tunopen;
70static	d_close_t	tunclose;
71static	d_read_t	tunread;
72static	d_write_t	tunwrite;
73static	d_ioctl_t	tunioctl;
74static	d_poll_t	tunpoll;
75
76#define CDEV_MAJOR 52
77static struct cdevsw tun_cdevsw = {
78	/* open */	tunopen,
79	/* close */	tunclose,
80	/* read */	tunread,
81	/* write */	tunwrite,
82	/* ioctl */	tunioctl,
83	/* poll */	tunpoll,
84	/* mmap */	nommap,
85	/* strategy */	nostrategy,
86	/* name */	"tun",
87	/* maj */	CDEV_MAJOR,
88	/* dump */	nodump,
89	/* psize */	nopsize,
90	/* flags */	0,
91	/* bmaj */	-1
92};
93
94static void tun_clone __P((void *arg, char *name, int namelen, dev_t *dev));
95
96static void
97tun_clone(arg, name, namelen, dev)
98	void *arg;
99	char *name;
100	int namelen;
101	dev_t *dev;
102{
103	int u;
104
105	if (*dev != NODEV)
106		return;
107	if (dev_stdclone(name, NULL, "tun", &u) != 1)
108		return;
109	/* XXX: minor encoding if u > 255 */
110	*dev = make_dev(&tun_cdevsw, u,
111	    UID_UUCP, GID_DIALER, 0600, "tun%d", u);
112
113}
114
115static void
116tunattach(dummy)
117	void *dummy;
118{
119
120	EVENTHANDLER_REGISTER(dev_clone, tun_clone, 0, 1000);
121	cdevsw_add(&tun_cdevsw);
122}
123
124static void
125tuncreate(dev)
126	dev_t dev;
127{
128	struct tun_softc *sc;
129	struct ifnet *ifp;
130
131	dev = make_dev(&tun_cdevsw, minor(dev),
132	    UID_UUCP, GID_DIALER, 0600, "tun%d", dev2unit(dev));
133
134	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK);
135	bzero(sc, sizeof *sc);
136	sc->tun_flags = TUN_INITED;
137
138	ifp = &sc->tun_if;
139	ifp->if_unit = dev2unit(dev);
140	ifp->if_name = "tun";
141	ifp->if_mtu = TUNMTU;
142	ifp->if_ioctl = tunifioctl;
143	ifp->if_output = tunoutput;
144	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
145	ifp->if_type = IFT_PPP;
146	ifp->if_snd.ifq_maxlen = ifqmaxlen;
147	ifp->if_softc = sc;
148	if_attach(ifp);
149	bpfattach(ifp, DLT_NULL, sizeof(u_int));
150	dev->si_drv1 = sc;
151}
152
153/*
154 * tunnel open - must be superuser & the device must be
155 * configured in
156 */
157static	int
158tunopen(dev, flag, mode, p)
159	dev_t	dev;
160	int	flag, mode;
161	struct proc *p;
162{
163	struct ifnet	*ifp;
164	struct tun_softc *tp;
165	register int	error;
166
167	error = suser(p);
168	if (error)
169		return (error);
170
171	tp = dev->si_drv1;
172	if (!tp) {
173		tuncreate(dev);
174		tp = dev->si_drv1;
175	}
176	if (tp->tun_flags & TUN_OPEN)
177		return EBUSY;
178	tp->tun_pid = p->p_pid;
179	ifp = &tp->tun_if;
180	tp->tun_flags |= TUN_OPEN;
181	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
182	return (0);
183}
184
185/*
186 * tunclose - close the device - mark i/f down & delete
187 * routing info
188 */
189static	int
190tunclose(dev, foo, bar, p)
191	dev_t dev;
192	int foo;
193	int bar;
194	struct proc *p;
195{
196	register int	s;
197	struct tun_softc *tp;
198	struct ifnet	*ifp;
199	struct mbuf	*m;
200
201	tp = dev->si_drv1;
202	ifp = &tp->tun_if;
203
204	tp->tun_flags &= ~TUN_OPEN;
205	tp->tun_pid = 0;
206
207	/*
208	 * junk all pending output
209	 */
210	do {
211		s = splimp();
212		IF_DEQUEUE(&ifp->if_snd, m);
213		splx(s);
214		if (m)
215			m_freem(m);
216	} while (m);
217
218	if (ifp->if_flags & IFF_UP) {
219		s = splimp();
220		if_down(ifp);
221		splx(s);
222	}
223
224	if (ifp->if_flags & IFF_RUNNING) {
225		register struct ifaddr *ifa;
226
227		s = splimp();
228		/* find internet addresses and delete routes */
229		for (ifa = ifp->if_addrhead.tqh_first; ifa;
230		    ifa = ifa->ifa_link.tqe_next)
231			if (ifa->ifa_addr->sa_family == AF_INET)
232				rtinit(ifa, (int)RTM_DELETE,
233				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
234		ifp->if_flags &= ~IFF_RUNNING;
235		splx(s);
236	}
237
238	funsetown(tp->tun_sigio);
239	selwakeup(&tp->tun_rsel);
240
241	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
242	return (0);
243}
244
245static int
246tuninit(ifp)
247	struct ifnet *ifp;
248{
249	struct tun_softc *tp = ifp->if_softc;
250	register struct ifaddr *ifa;
251
252	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
253
254	ifp->if_flags |= IFF_UP | IFF_RUNNING;
255	getmicrotime(&ifp->if_lastchange);
256
257	for (ifa = ifp->if_addrhead.tqh_first; ifa;
258	     ifa = ifa->ifa_link.tqe_next) {
259#ifdef INET
260		if (ifa->ifa_addr->sa_family == AF_INET) {
261		    struct sockaddr_in *si;
262
263		    si = (struct sockaddr_in *)ifa->ifa_addr;
264		    if (si && si->sin_addr.s_addr)
265			    tp->tun_flags |= TUN_IASET;
266
267		    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
268		    if (si && si->sin_addr.s_addr)
269			    tp->tun_flags |= TUN_DSTADDR;
270		}
271#endif
272	}
273	return 0;
274}
275
276/*
277 * Process an ioctl request.
278 */
279int
280tunifioctl(ifp, cmd, data)
281	struct ifnet *ifp;
282	u_long	cmd;
283	caddr_t	data;
284{
285	struct ifreq *ifr = (struct ifreq *)data;
286	struct tun_softc *tp = ifp->if_softc;
287	struct ifstat *ifs;
288	int		error = 0, s;
289
290	s = splimp();
291	switch(cmd) {
292	case SIOCGIFSTATUS:
293		ifs = (struct ifstat *)data;
294		if (tp->tun_pid)
295			sprintf(ifs->ascii + strlen(ifs->ascii),
296			    "\tOpened by PID %d\n", tp->tun_pid);
297		return(0);
298	case SIOCSIFADDR:
299		tuninit(ifp);
300		TUNDEBUG("%s%d: address set\n",
301			 ifp->if_name, ifp->if_unit);
302		break;
303	case SIOCSIFDSTADDR:
304		tuninit(ifp);
305		TUNDEBUG("%s%d: destination address set\n",
306			 ifp->if_name, ifp->if_unit);
307		break;
308	case SIOCSIFMTU:
309		ifp->if_mtu = ifr->ifr_mtu;
310		TUNDEBUG("%s%d: mtu set\n",
311			 ifp->if_name, ifp->if_unit);
312		break;
313	case SIOCADDMULTI:
314	case SIOCDELMULTI:
315		break;
316
317
318	default:
319		error = EINVAL;
320	}
321	splx(s);
322	return (error);
323}
324
325/*
326 * tunoutput - queue packets from higher level ready to put out.
327 */
328int
329tunoutput(ifp, m0, dst, rt)
330	struct ifnet   *ifp;
331	struct mbuf    *m0;
332	struct sockaddr *dst;
333	struct rtentry *rt;
334{
335	struct tun_softc *tp = ifp->if_softc;
336	int		s;
337
338	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
339
340	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
341		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
342			  ifp->if_unit, tp->tun_flags);
343		m_freem (m0);
344		return EHOSTDOWN;
345	}
346
347	/* BPF write needs to be handled specially */
348	if (dst->sa_family == AF_UNSPEC) {
349		dst->sa_family = *(mtod(m0, int *));
350		m0->m_len -= sizeof(int);
351		m0->m_pkthdr.len -= sizeof(int);
352		m0->m_data += sizeof(int);
353	}
354
355	if (ifp->if_bpf) {
356		/*
357		 * We need to prepend the address family as
358		 * a four byte field.  Cons up a dummy header
359		 * to pacify bpf.  This is safe because bpf
360		 * will only read from the mbuf (i.e., it won't
361		 * try to free it or keep a pointer to it).
362		 */
363		struct mbuf m;
364		u_int af = dst->sa_family;
365
366		m.m_next = m0;
367		m.m_len = 4;
368		m.m_data = (char *)&af;
369
370		bpf_mtap(ifp, &m);
371	}
372
373	/* prepend sockaddr? this may abort if the mbuf allocation fails */
374	if (tp->tun_flags & TUN_LMODE) {
375		/* allocate space for sockaddr */
376		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
377
378		/* if allocation failed drop packet */
379		if (m0 == NULL){
380			s = splimp();	/* spl on queue manipulation */
381			IF_DROP(&ifp->if_snd);
382			splx(s);
383			ifp->if_oerrors++;
384			return (ENOBUFS);
385		} else {
386			bcopy(dst, m0->m_data, dst->sa_len);
387		}
388	}
389
390	if (tp->tun_flags & TUN_IFHEAD) {
391		/* Prepend the address family */
392		M_PREPEND(m0, 4, M_DONTWAIT);
393
394		/* if allocation failed drop packet */
395		if (m0 == NULL){
396			s = splimp();	/* spl on queue manipulation */
397			IF_DROP(&ifp->if_snd);
398			splx(s);
399			ifp->if_oerrors++;
400			return ENOBUFS;
401		} else
402			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
403	} else {
404#ifdef INET
405		if (dst->sa_family != AF_INET)
406#endif
407		{
408			m_freem(m0);
409			return EAFNOSUPPORT;
410		}
411	}
412
413	s = splimp();
414	if (IF_QFULL(&ifp->if_snd)) {
415		IF_DROP(&ifp->if_snd);
416		m_freem(m0);
417		splx(s);
418		ifp->if_collisions++;
419		return ENOBUFS;
420	}
421	ifp->if_obytes += m0->m_pkthdr.len;
422	IF_ENQUEUE(&ifp->if_snd, m0);
423	splx(s);
424	ifp->if_opackets++;
425
426	if (tp->tun_flags & TUN_RWAIT) {
427		tp->tun_flags &= ~TUN_RWAIT;
428		wakeup((caddr_t)tp);
429	}
430	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
431		pgsigio(tp->tun_sigio, SIGIO, 0);
432	selwakeup(&tp->tun_rsel);
433	return 0;
434}
435
436/*
437 * the cdevsw interface is now pretty minimal.
438 */
439static	int
440tunioctl(dev, cmd, data, flag, p)
441	dev_t		dev;
442	u_long		cmd;
443	caddr_t		data;
444	int		flag;
445	struct proc	*p;
446{
447	int		s;
448	struct tun_softc *tp = dev->si_drv1;
449 	struct tuninfo *tunp;
450
451	switch (cmd) {
452 	case TUNSIFINFO:
453 		tunp = (struct tuninfo *)data;
454		if (tunp->mtu < IF_MINMTU)
455			return (EINVAL);
456 		tp->tun_if.if_mtu = tunp->mtu;
457 		tp->tun_if.if_type = tunp->type;
458 		tp->tun_if.if_baudrate = tunp->baudrate;
459 		break;
460 	case TUNGIFINFO:
461 		tunp = (struct tuninfo *)data;
462 		tunp->mtu = tp->tun_if.if_mtu;
463 		tunp->type = tp->tun_if.if_type;
464 		tunp->baudrate = tp->tun_if.if_baudrate;
465 		break;
466	case TUNSDEBUG:
467		tundebug = *(int *)data;
468		break;
469	case TUNGDEBUG:
470		*(int *)data = tundebug;
471		break;
472	case TUNSLMODE:
473		if (*(int *)data) {
474			tp->tun_flags |= TUN_LMODE;
475			tp->tun_flags &= ~TUN_IFHEAD;
476		} else
477			tp->tun_flags &= ~TUN_LMODE;
478		break;
479	case TUNSIFHEAD:
480		if (*(int *)data) {
481			tp->tun_flags |= TUN_IFHEAD;
482			tp->tun_flags &= ~TUN_LMODE;
483		} else
484			tp->tun_flags &= ~TUN_IFHEAD;
485		break;
486	case TUNGIFHEAD:
487		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
488		break;
489	case TUNSIFMODE:
490		/* deny this if UP */
491		if (tp->tun_if.if_flags & IFF_UP)
492			return(EBUSY);
493
494		switch (*(int *)data) {
495		case IFF_POINTOPOINT:
496			tp->tun_if.if_flags |= IFF_POINTOPOINT;
497			tp->tun_if.if_flags &= ~IFF_BROADCAST;
498			break;
499		case IFF_BROADCAST:
500			tp->tun_if.if_flags &= ~IFF_POINTOPOINT;
501			tp->tun_if.if_flags |= IFF_BROADCAST;
502			break;
503		default:
504			return(EINVAL);
505		}
506		break;
507	case TUNSIFPID:
508		tp->tun_pid = curproc->p_pid;
509		break;
510	case FIONBIO:
511		break;
512	case FIOASYNC:
513		if (*(int *)data)
514			tp->tun_flags |= TUN_ASYNC;
515		else
516			tp->tun_flags &= ~TUN_ASYNC;
517		break;
518	case FIONREAD:
519		s = splimp();
520		if (tp->tun_if.if_snd.ifq_head) {
521			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
522			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
523				*(int *)data += mb->m_len;
524		} else
525			*(int *)data = 0;
526		splx(s);
527		break;
528	case FIOSETOWN:
529		return (fsetown(*(int *)data, &tp->tun_sigio));
530
531	case FIOGETOWN:
532		*(int *)data = fgetown(tp->tun_sigio);
533		return (0);
534
535	/* This is deprecated, FIOSETOWN should be used instead. */
536	case TIOCSPGRP:
537		return (fsetown(-(*(int *)data), &tp->tun_sigio));
538
539	/* This is deprecated, FIOGETOWN should be used instead. */
540	case TIOCGPGRP:
541		*(int *)data = -fgetown(tp->tun_sigio);
542		return (0);
543
544	default:
545		return (ENOTTY);
546	}
547	return (0);
548}
549
550/*
551 * The cdevsw read interface - reads a packet at a time, or at
552 * least as much of a packet as can be read.
553 */
554static	int
555tunread(dev, uio, flag)
556	dev_t dev;
557	struct uio *uio;
558	int flag;
559{
560	struct tun_softc *tp = dev->si_drv1;
561	struct ifnet	*ifp = &tp->tun_if;
562	struct mbuf	*m, *m0;
563	int		error=0, len, s;
564
565	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
566	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
567		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
568			  ifp->if_unit, tp->tun_flags);
569		return EHOSTDOWN;
570	}
571
572	tp->tun_flags &= ~TUN_RWAIT;
573
574	s = splimp();
575	do {
576		IF_DEQUEUE(&ifp->if_snd, m0);
577		if (m0 == 0) {
578			if (flag & IO_NDELAY) {
579				splx(s);
580				return EWOULDBLOCK;
581			}
582			tp->tun_flags |= TUN_RWAIT;
583			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
584					"tunread", 0)) != 0) {
585				splx(s);
586				return error;
587			}
588		}
589	} while (m0 == 0);
590	splx(s);
591
592	while (m0 && uio->uio_resid > 0 && error == 0) {
593		len = min(uio->uio_resid, m0->m_len);
594		if (len == 0)
595			break;
596		error = uiomove(mtod(m0, caddr_t), len, uio);
597		MFREE(m0, m);
598		m0 = m;
599	}
600
601	if (m0) {
602		TUNDEBUG("Dropping mbuf\n");
603		m_freem(m0);
604	}
605	return error;
606}
607
608/*
609 * the cdevsw write interface - an atomic write is a packet - or else!
610 */
611static	int
612tunwrite(dev, uio, flag)
613	dev_t dev;
614	struct uio *uio;
615	int flag;
616{
617	struct tun_softc *tp = dev->si_drv1;
618	struct ifnet	*ifp = &tp->tun_if;
619	struct mbuf	*top, **mp, *m;
620	int		error=0, tlen, mlen;
621	u_int32_t	family;
622
623	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
624
625	if (uio->uio_resid == 0)
626		return 0;
627
628	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
629		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
630		    uio->uio_resid);
631		return EIO;
632	}
633	tlen = uio->uio_resid;
634
635	/* get a header mbuf */
636	MGETHDR(m, M_DONTWAIT, MT_DATA);
637	if (m == NULL)
638		return ENOBUFS;
639	mlen = MHLEN;
640
641	top = 0;
642	mp = &top;
643	while (error == 0 && uio->uio_resid > 0) {
644		m->m_len = min(mlen, uio->uio_resid);
645		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
646		*mp = m;
647		mp = &m->m_next;
648		if (uio->uio_resid > 0) {
649			MGET (m, M_DONTWAIT, MT_DATA);
650			if (m == 0) {
651				error = ENOBUFS;
652				break;
653			}
654			mlen = MLEN;
655		}
656	}
657	if (error) {
658		if (top)
659			m_freem (top);
660		ifp->if_ierrors++;
661		return error;
662	}
663
664	top->m_pkthdr.len = tlen;
665	top->m_pkthdr.rcvif = ifp;
666
667	if (ifp->if_bpf) {
668		if (tp->tun_flags & TUN_IFHEAD)
669			/*
670			 * Conveniently, we already have a 4-byte address
671			 * family prepended to our packet !
672			 */
673			bpf_mtap(ifp, top);
674		else {
675			/*
676			 * We need to prepend the address family as
677			 * a four byte field.  Cons up a dummy header
678			 * to pacify bpf.  This is safe because bpf
679			 * will only read from the mbuf (i.e., it won't
680			 * try to free it or keep a pointer to it).
681			 */
682			struct mbuf m;
683			u_int af = AF_INET;
684
685			m.m_next = top;
686			m.m_len = 4;
687			m.m_data = (char *)&af;
688
689			bpf_mtap(ifp, &m);
690		}
691	}
692
693	if (tp->tun_flags & TUN_IFHEAD) {
694		if (top->m_len < sizeof(family) &&
695		    (top = m_pullup(top, sizeof(family))) == NULL)
696				return ENOBUFS;
697		family = ntohl(*mtod(top, u_int32_t *));
698		m_adj(top, sizeof(family));
699	} else
700		family = AF_INET;
701
702	ifp->if_ibytes += top->m_pkthdr.len;
703	ifp->if_ipackets++;
704
705	return family_enqueue(family, top);
706}
707
708/*
709 * tunpoll - the poll interface, this is only useful on reads
710 * really. The write detect always returns true, write never blocks
711 * anyway, it either accepts the packet or drops it.
712 */
713static	int
714tunpoll(dev, events, p)
715	dev_t dev;
716	int events;
717	struct proc *p;
718{
719	int		s;
720	struct tun_softc *tp = dev->si_drv1;
721	struct ifnet	*ifp = &tp->tun_if;
722	int		revents = 0;
723
724	s = splimp();
725	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
726
727	if (events & (POLLIN | POLLRDNORM)) {
728		if (ifp->if_snd.ifq_len > 0) {
729			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
730			    ifp->if_unit, ifp->if_snd.ifq_len);
731			revents |= events & (POLLIN | POLLRDNORM);
732		} else {
733			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
734			    ifp->if_unit);
735			selrecord(p, &tp->tun_rsel);
736		}
737	}
738	if (events & (POLLOUT | POLLWRNORM))
739		revents |= events & (POLLOUT | POLLWRNORM);
740
741	splx(s);
742	return (revents);
743}
744