if_tun.c revision 56703
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 56703 2000-01-27 23:37:39Z brian $
17 */
18
19#include "opt_atalk.h"
20#include "opt_atm.h"
21#include "opt_inet.h"
22#include "opt_inet6.h"
23#include "opt_ipx.h"
24#include "opt_natm.h"
25
26#include <sys/param.h>
27#include <sys/proc.h>
28#include <sys/systm.h>
29#include <sys/mbuf.h>
30#include <sys/socket.h>
31#include <sys/filio.h>
32#include <sys/sockio.h>
33#include <sys/ttycom.h>
34#include <sys/poll.h>
35#include <sys/signalvar.h>
36#include <sys/filedesc.h>
37#include <sys/kernel.h>
38#include <sys/sysctl.h>
39#include <sys/conf.h>
40#include <sys/uio.h>
41#include <sys/vnode.h>
42#include <sys/malloc.h>
43
44#include <net/if.h>
45#include <net/netisr.h>
46#include <net/route.h>
47#include <net/intrq.h>
48
49#ifdef INET
50#include <netinet/in.h>
51#include <netinet/in_var.h>
52#endif
53
54#include <net/bpf.h>
55
56#include <net/if_tunvar.h>
57#include <net/if_tun.h>
58
59static MALLOC_DEFINE(M_TUN, "tun", "Tunnel Interface");
60
61static void tunattach __P((void *));
62PSEUDO_SET(tunattach, if_tun);
63
64static void tuncreate __P((dev_t dev));
65
66#define TUNDEBUG	if (tundebug) printf
67static int tundebug = 0;
68SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
69
70static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *,
71	    struct rtentry *rt));
72static int tunifioctl __P((struct ifnet *, u_long, caddr_t));
73static int tuninit __P((struct ifnet *));
74
75static	d_open_t	tunopen;
76static	d_close_t	tunclose;
77static	d_read_t	tunread;
78static	d_write_t	tunwrite;
79static	d_ioctl_t	tunioctl;
80static	d_poll_t	tunpoll;
81
82#define CDEV_MAJOR 52
83static struct cdevsw tun_cdevsw = {
84	/* open */	tunopen,
85	/* close */	tunclose,
86	/* read */	tunread,
87	/* write */	tunwrite,
88	/* ioctl */	tunioctl,
89	/* poll */	tunpoll,
90	/* mmap */	nommap,
91	/* strategy */	nostrategy,
92	/* name */	"tun",
93	/* maj */	CDEV_MAJOR,
94	/* dump */	nodump,
95	/* psize */	nopsize,
96	/* flags */	0,
97	/* bmaj */	-1
98};
99
100static void
101tunattach(dummy)
102	void *dummy;
103{
104
105	cdevsw_add(&tun_cdevsw);
106}
107
108static void
109tuncreate(dev)
110	dev_t dev;
111{
112	struct tun_softc *sc;
113	struct ifnet *ifp;
114
115	dev = make_dev(&tun_cdevsw, minor(dev),
116	    UID_UUCP, GID_DIALER, 0600, "tun%d", lminor(dev));
117
118	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK);
119	bzero(sc, sizeof *sc);
120	sc->tun_flags = TUN_INITED;
121
122	ifp = &sc->tun_if;
123	ifp->if_unit = lminor(dev);
124	ifp->if_name = "tun";
125	ifp->if_mtu = TUNMTU;
126	ifp->if_ioctl = tunifioctl;
127	ifp->if_output = tunoutput;
128	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
129	ifp->if_snd.ifq_maxlen = ifqmaxlen;
130	ifp->if_softc = sc;
131	if_attach(ifp);
132	bpfattach(ifp, DLT_NULL, sizeof(u_int));
133	dev->si_drv1 = sc;
134}
135
136/*
137 * tunnel open - must be superuser & the device must be
138 * configured in
139 */
140static	int
141tunopen(dev, flag, mode, p)
142	dev_t	dev;
143	int	flag, mode;
144	struct proc *p;
145{
146	struct ifnet	*ifp;
147	struct tun_softc *tp;
148	register int	error;
149
150	error = suser(p);
151	if (error)
152		return (error);
153
154	tp = dev->si_drv1;
155	if (!tp) {
156		tuncreate(dev);
157		tp = dev->si_drv1;
158	}
159	if (tp->tun_flags & TUN_OPEN)
160		return EBUSY;
161	tp->tun_pid = p->p_pid;
162	ifp = &tp->tun_if;
163	tp->tun_flags |= TUN_OPEN;
164	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
165	return (0);
166}
167
168/*
169 * tunclose - close the device - mark i/f down & delete
170 * routing info
171 */
172static	int
173tunclose(dev, foo, bar, p)
174	dev_t dev;
175	int foo;
176	int bar;
177	struct proc *p;
178{
179	register int	s;
180	struct tun_softc *tp;
181	struct ifnet	*ifp;
182	struct mbuf	*m;
183
184	tp = dev->si_drv1;
185	ifp = &tp->tun_if;
186
187	tp->tun_flags &= ~TUN_OPEN;
188	tp->tun_pid = 0;
189
190	/*
191	 * junk all pending output
192	 */
193	do {
194		s = splimp();
195		IF_DEQUEUE(&ifp->if_snd, m);
196		splx(s);
197		if (m)
198			m_freem(m);
199	} while (m);
200
201	if (ifp->if_flags & IFF_UP) {
202		s = splimp();
203		if_down(ifp);
204		splx(s);
205	}
206
207	if (ifp->if_flags & IFF_RUNNING) {
208		register struct ifaddr *ifa;
209
210		s = splimp();
211		/* find internet addresses and delete routes */
212		for (ifa = ifp->if_addrhead.tqh_first; ifa;
213		    ifa = ifa->ifa_link.tqe_next)
214			if (ifa->ifa_addr->sa_family == AF_INET)
215				rtinit(ifa, (int)RTM_DELETE,
216				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
217		ifp->if_flags &= ~IFF_RUNNING;
218		splx(s);
219	}
220
221	funsetown(tp->tun_sigio);
222	selwakeup(&tp->tun_rsel);
223
224	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
225	return (0);
226}
227
228static int
229tuninit(ifp)
230	struct ifnet *ifp;
231{
232	struct tun_softc *tp = ifp->if_softc;
233	register struct ifaddr *ifa;
234
235	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
236
237	ifp->if_flags |= IFF_UP | IFF_RUNNING;
238	getmicrotime(&ifp->if_lastchange);
239
240	for (ifa = ifp->if_addrhead.tqh_first; ifa;
241	     ifa = ifa->ifa_link.tqe_next) {
242#ifdef INET
243		if (ifa->ifa_addr->sa_family == AF_INET) {
244		    struct sockaddr_in *si;
245
246		    si = (struct sockaddr_in *)ifa->ifa_addr;
247		    if (si && si->sin_addr.s_addr)
248			    tp->tun_flags |= TUN_IASET;
249
250		    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
251		    if (si && si->sin_addr.s_addr)
252			    tp->tun_flags |= TUN_DSTADDR;
253		}
254#endif
255	}
256	return 0;
257}
258
259/*
260 * Process an ioctl request.
261 */
262int
263tunifioctl(ifp, cmd, data)
264	struct ifnet *ifp;
265	u_long	cmd;
266	caddr_t	data;
267{
268	struct ifreq *ifr = (struct ifreq *)data;
269	struct tun_softc *tp = ifp->if_softc;
270	struct ifstat *ifs;
271	int		error = 0, s;
272
273	s = splimp();
274	switch(cmd) {
275	case SIOCGIFSTATUS:
276		ifs = (struct ifstat *)data;
277		if (tp->tun_pid)
278			sprintf(ifs->ascii + strlen(ifs->ascii),
279			    "\tOpened by PID %d\n", tp->tun_pid);
280		return(0);
281	case SIOCSIFADDR:
282		tuninit(ifp);
283		TUNDEBUG("%s%d: address set\n",
284			 ifp->if_name, ifp->if_unit);
285		break;
286	case SIOCSIFDSTADDR:
287		tuninit(ifp);
288		TUNDEBUG("%s%d: destination address set\n",
289			 ifp->if_name, ifp->if_unit);
290		break;
291	case SIOCSIFMTU:
292		ifp->if_mtu = ifr->ifr_mtu;
293		TUNDEBUG("%s%d: mtu set\n",
294			 ifp->if_name, ifp->if_unit);
295		break;
296	case SIOCADDMULTI:
297	case SIOCDELMULTI:
298		break;
299
300
301	default:
302		error = EINVAL;
303	}
304	splx(s);
305	return (error);
306}
307
308/*
309 * tunoutput - queue packets from higher level ready to put out.
310 */
311int
312tunoutput(ifp, m0, dst, rt)
313	struct ifnet   *ifp;
314	struct mbuf    *m0;
315	struct sockaddr *dst;
316	struct rtentry *rt;
317{
318	struct tun_softc *tp = ifp->if_softc;
319	int		s;
320
321	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
322
323	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
324		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
325			  ifp->if_unit, tp->tun_flags);
326		m_freem (m0);
327		return EHOSTDOWN;
328	}
329
330	/* BPF write needs to be handled specially */
331	if (dst->sa_family == AF_UNSPEC) {
332		dst->sa_family = *(mtod(m0, int *));
333		m0->m_len -= sizeof(int);
334		m0->m_pkthdr.len -= sizeof(int);
335		m0->m_data += sizeof(int);
336	}
337
338	if (ifp->if_bpf) {
339		/*
340		 * We need to prepend the address family as
341		 * a four byte field.  Cons up a dummy header
342		 * to pacify bpf.  This is safe because bpf
343		 * will only read from the mbuf (i.e., it won't
344		 * try to free it or keep a pointer to it).
345		 */
346		struct mbuf m;
347		u_int af = dst->sa_family;
348
349		m.m_next = m0;
350		m.m_len = 4;
351		m.m_data = (char *)&af;
352
353		bpf_mtap(ifp, &m);
354	}
355
356	/* prepend sockaddr? this may abort if the mbuf allocation fails */
357	if (tp->tun_flags & TUN_LMODE) {
358		/* allocate space for sockaddr */
359		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
360
361		/* if allocation failed drop packet */
362		if (m0 == NULL){
363			s = splimp();	/* spl on queue manipulation */
364			IF_DROP(&ifp->if_snd);
365			splx(s);
366			ifp->if_oerrors++;
367			return (ENOBUFS);
368		} else {
369			bcopy(dst, m0->m_data, dst->sa_len);
370		}
371	}
372
373	if (tp->tun_flags & TUN_IFHEAD) {
374		/* Prepend the address family */
375		M_PREPEND(m0, 4, M_DONTWAIT);
376
377		/* if allocation failed drop packet */
378		if (m0 == NULL){
379			s = splimp();	/* spl on queue manipulation */
380			IF_DROP(&ifp->if_snd);
381			splx(s);
382			ifp->if_oerrors++;
383			return ENOBUFS;
384		} else
385			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
386	} else {
387#ifdef INET
388		if (dst->sa_family != AF_INET)
389#endif
390		{
391			m_freem(m0);
392			return EAFNOSUPPORT;
393		}
394	}
395
396	s = splimp();
397	if (IF_QFULL(&ifp->if_snd)) {
398		IF_DROP(&ifp->if_snd);
399		m_freem(m0);
400		splx(s);
401		ifp->if_collisions++;
402		return ENOBUFS;
403	}
404	ifp->if_obytes += m0->m_pkthdr.len;
405	IF_ENQUEUE(&ifp->if_snd, m0);
406	splx(s);
407	ifp->if_opackets++;
408
409	if (tp->tun_flags & TUN_RWAIT) {
410		tp->tun_flags &= ~TUN_RWAIT;
411		wakeup((caddr_t)tp);
412	}
413	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
414		pgsigio(tp->tun_sigio, SIGIO, 0);
415	selwakeup(&tp->tun_rsel);
416	return 0;
417}
418
419/*
420 * the cdevsw interface is now pretty minimal.
421 */
422static	int
423tunioctl(dev, cmd, data, flag, p)
424	dev_t		dev;
425	u_long		cmd;
426	caddr_t		data;
427	int		flag;
428	struct proc	*p;
429{
430	int		s;
431	struct tun_softc *tp = dev->si_drv1;
432 	struct tuninfo *tunp;
433
434	switch (cmd) {
435 	case TUNSIFINFO:
436 		tunp = (struct tuninfo *)data;
437		if (tunp->mtu < IF_MINMTU)
438			return (EINVAL);
439 		tp->tun_if.if_mtu = tunp->mtu;
440 		tp->tun_if.if_type = tunp->type;
441 		tp->tun_if.if_baudrate = tunp->baudrate;
442 		break;
443 	case TUNGIFINFO:
444 		tunp = (struct tuninfo *)data;
445 		tunp->mtu = tp->tun_if.if_mtu;
446 		tunp->type = tp->tun_if.if_type;
447 		tunp->baudrate = tp->tun_if.if_baudrate;
448 		break;
449	case TUNSDEBUG:
450		tundebug = *(int *)data;
451		break;
452	case TUNGDEBUG:
453		*(int *)data = tundebug;
454		break;
455	case TUNSLMODE:
456		if (*(int *)data) {
457			tp->tun_flags |= TUN_LMODE;
458			tp->tun_flags &= ~TUN_IFHEAD;
459		} else
460			tp->tun_flags &= ~TUN_LMODE;
461		break;
462	case TUNSIFHEAD:
463		if (*(int *)data) {
464			tp->tun_flags |= TUN_IFHEAD;
465			tp->tun_flags &= ~TUN_LMODE;
466		} else
467			tp->tun_flags &= ~TUN_IFHEAD;
468		break;
469	case TUNGIFHEAD:
470		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
471		break;
472	case TUNSIFMODE:
473		/* deny this if UP */
474		if (tp->tun_if.if_flags & IFF_UP)
475			return(EBUSY);
476
477		switch (*(int *)data) {
478		case IFF_POINTOPOINT:
479			tp->tun_if.if_flags |= IFF_POINTOPOINT;
480			tp->tun_if.if_flags &= ~IFF_BROADCAST;
481			break;
482		case IFF_BROADCAST:
483			tp->tun_if.if_flags &= ~IFF_POINTOPOINT;
484			tp->tun_if.if_flags |= IFF_BROADCAST;
485			break;
486		default:
487			return(EINVAL);
488		}
489		break;
490	case TUNSIFPID:
491		tp->tun_pid = curproc->p_pid;
492		break;
493	case FIONBIO:
494		break;
495	case FIOASYNC:
496		if (*(int *)data)
497			tp->tun_flags |= TUN_ASYNC;
498		else
499			tp->tun_flags &= ~TUN_ASYNC;
500		break;
501	case FIONREAD:
502		s = splimp();
503		if (tp->tun_if.if_snd.ifq_head) {
504			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
505			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
506				*(int *)data += mb->m_len;
507		} else
508			*(int *)data = 0;
509		splx(s);
510		break;
511	case FIOSETOWN:
512		return (fsetown(*(int *)data, &tp->tun_sigio));
513
514	case FIOGETOWN:
515		*(int *)data = fgetown(tp->tun_sigio);
516		return (0);
517
518	/* This is deprecated, FIOSETOWN should be used instead. */
519	case TIOCSPGRP:
520		return (fsetown(-(*(int *)data), &tp->tun_sigio));
521
522	/* This is deprecated, FIOGETOWN should be used instead. */
523	case TIOCGPGRP:
524		*(int *)data = -fgetown(tp->tun_sigio);
525		return (0);
526
527	default:
528		return (ENOTTY);
529	}
530	return (0);
531}
532
533/*
534 * The cdevsw read interface - reads a packet at a time, or at
535 * least as much of a packet as can be read.
536 */
537static	int
538tunread(dev, uio, flag)
539	dev_t dev;
540	struct uio *uio;
541	int flag;
542{
543	struct tun_softc *tp = dev->si_drv1;
544	struct ifnet	*ifp = &tp->tun_if;
545	struct mbuf	*m, *m0;
546	int		error=0, len, s;
547
548	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
549	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
550		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
551			  ifp->if_unit, tp->tun_flags);
552		return EHOSTDOWN;
553	}
554
555	tp->tun_flags &= ~TUN_RWAIT;
556
557	s = splimp();
558	do {
559		IF_DEQUEUE(&ifp->if_snd, m0);
560		if (m0 == 0) {
561			if (flag & IO_NDELAY) {
562				splx(s);
563				return EWOULDBLOCK;
564			}
565			tp->tun_flags |= TUN_RWAIT;
566			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
567					"tunread", 0)) != 0) {
568				splx(s);
569				return error;
570			}
571		}
572	} while (m0 == 0);
573	splx(s);
574
575	while (m0 && uio->uio_resid > 0 && error == 0) {
576		len = min(uio->uio_resid, m0->m_len);
577		if (len == 0)
578			break;
579		error = uiomove(mtod(m0, caddr_t), len, uio);
580		MFREE(m0, m);
581		m0 = m;
582	}
583
584	if (m0) {
585		TUNDEBUG("Dropping mbuf\n");
586		m_freem(m0);
587	}
588	return error;
589}
590
591/*
592 * the cdevsw write interface - an atomic write is a packet - or else!
593 */
594static	int
595tunwrite(dev, uio, flag)
596	dev_t dev;
597	struct uio *uio;
598	int flag;
599{
600	struct tun_softc *tp = dev->si_drv1;
601	struct ifnet	*ifp = &tp->tun_if;
602	struct mbuf	*top, **mp, *m;
603	int		error=0, tlen, mlen;
604	u_int32_t	family;
605
606	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
607
608	if (uio->uio_resid == 0)
609		return 0;
610
611	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
612		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
613		    uio->uio_resid);
614		return EIO;
615	}
616	tlen = uio->uio_resid;
617
618	/* get a header mbuf */
619	MGETHDR(m, M_DONTWAIT, MT_DATA);
620	if (m == NULL)
621		return ENOBUFS;
622	mlen = MHLEN;
623
624	top = 0;
625	mp = &top;
626	while (error == 0 && uio->uio_resid > 0) {
627		m->m_len = min(mlen, uio->uio_resid);
628		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
629		*mp = m;
630		mp = &m->m_next;
631		if (uio->uio_resid > 0) {
632			MGET (m, M_DONTWAIT, MT_DATA);
633			if (m == 0) {
634				error = ENOBUFS;
635				break;
636			}
637			mlen = MLEN;
638		}
639	}
640	if (error) {
641		if (top)
642			m_freem (top);
643		return error;
644	}
645
646	top->m_pkthdr.len = tlen;
647	top->m_pkthdr.rcvif = ifp;
648
649	if (ifp->if_bpf) {
650		if (tp->tun_flags & TUN_IFHEAD)
651			/*
652			 * Conveniently, we already have a 4-byte address
653			 * family prepended to our packet !
654			 */
655			bpf_mtap(ifp, top);
656		else {
657			/*
658			 * We need to prepend the address family as
659			 * a four byte field.  Cons up a dummy header
660			 * to pacify bpf.  This is safe because bpf
661			 * will only read from the mbuf (i.e., it won't
662			 * try to free it or keep a pointer to it).
663			 */
664			struct mbuf m;
665			u_int af = AF_INET;
666
667			m.m_next = top;
668			m.m_len = 4;
669			m.m_data = (char *)&af;
670
671			bpf_mtap(ifp, &m);
672		}
673	}
674
675	if (tp->tun_flags & TUN_IFHEAD) {
676		if (top->m_len < sizeof(family) &&
677		    (top = m_pullup(top, sizeof(family))) == NULL)
678				return ENOBUFS;
679		family = ntohl(*mtod(top, u_int32_t *));
680		m_adj(top, sizeof(family));
681	} else
682		family = AF_INET;
683
684	return family_enqueue(family, top);
685}
686
687/*
688 * tunpoll - the poll interface, this is only useful on reads
689 * really. The write detect always returns true, write never blocks
690 * anyway, it either accepts the packet or drops it.
691 */
692static	int
693tunpoll(dev, events, p)
694	dev_t dev;
695	int events;
696	struct proc *p;
697{
698	int		s;
699	struct tun_softc *tp = dev->si_drv1;
700	struct ifnet	*ifp = &tp->tun_if;
701	int		revents = 0;
702
703	s = splimp();
704	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
705
706	if (events & (POLLIN | POLLRDNORM)) {
707		if (ifp->if_snd.ifq_len > 0) {
708			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
709			    ifp->if_unit, ifp->if_snd.ifq_len);
710			revents |= events & (POLLIN | POLLRDNORM);
711		} else {
712			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
713			    ifp->if_unit);
714			selrecord(p, &tp->tun_rsel);
715		}
716	}
717	if (events & (POLLOUT | POLLWRNORM))
718		revents |= events & (POLLOUT | POLLWRNORM);
719
720	splx(s);
721	return (revents);
722}
723