if_tun.c revision 51646
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 51646 1999-09-25 12:06:01Z phk $
17 */
18
19#include "tun.h"
20
21#include "opt_inet.h"
22
23#include <sys/param.h>
24#include <sys/proc.h>
25#include <sys/systm.h>
26#include <sys/mbuf.h>
27#include <sys/socket.h>
28#include <sys/filio.h>
29#include <sys/sockio.h>
30#include <sys/ttycom.h>
31#include <sys/poll.h>
32#include <sys/signalvar.h>
33#include <sys/filedesc.h>
34#include <sys/kernel.h>
35#include <sys/sysctl.h>
36#include <sys/conf.h>
37#include <sys/uio.h>
38#include <sys/vnode.h>
39#include <sys/malloc.h>
40
41#include <net/if.h>
42#include <net/netisr.h>
43#include <net/route.h>
44
45#ifdef INET
46#include <netinet/in.h>
47#include <netinet/in_var.h>
48#endif
49
50#ifdef NS
51#include <netns/ns.h>
52#include <netns/ns_if.h>
53#endif
54
55#include <net/bpf.h>
56
57#include <net/if_tunvar.h>
58#include <net/if_tun.h>
59
60static MALLOC_DEFINE(M_TUN, "tun", "Tunnel Interface");
61
62static void tunattach __P((void *));
63PSEUDO_SET(tunattach, if_tun);
64
65static void tuncreate __P((dev_t dev));
66
67#define TUNDEBUG	if (tundebug) printf
68static int tundebug = 0;
69SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
70
71static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *,
72	    struct rtentry *rt));
73static int tunifioctl __P((struct ifnet *, u_long, caddr_t));
74static int tuninit __P((struct ifnet *));
75
76static	d_open_t	tunopen;
77static	d_close_t	tunclose;
78static	d_read_t	tunread;
79static	d_write_t	tunwrite;
80static	d_ioctl_t	tunioctl;
81static	d_poll_t	tunpoll;
82
83#define CDEV_MAJOR 52
84static struct cdevsw tun_cdevsw = {
85	/* open */	tunopen,
86	/* close */	tunclose,
87	/* read */	tunread,
88	/* write */	tunwrite,
89	/* ioctl */	tunioctl,
90	/* stop */	nostop,
91	/* reset */	noreset,
92	/* devtotty */	nodevtotty,
93	/* poll */	tunpoll,
94	/* mmap */	nommap,
95	/* strategy */	nostrategy,
96	/* name */	"tun",
97	/* parms */	noparms,
98	/* maj */	CDEV_MAJOR,
99	/* dump */	nodump,
100	/* psize */	nopsize,
101	/* flags */	0,
102	/* maxio */	0,
103	/* bmaj */	-1
104};
105
106static void
107tunattach(dummy)
108	void *dummy;
109{
110
111	cdevsw_add(&tun_cdevsw);
112}
113
114static void
115tuncreate(dev)
116	dev_t dev;
117{
118	struct tun_softc *sc;
119	struct ifnet *ifp;
120
121	dev = make_dev(&tun_cdevsw, minor(dev),
122	    UID_UUCP, GID_DIALER, 0600, "tun%d", lminor(dev));
123
124	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK);
125	bzero(sc, sizeof *sc);
126	sc->tun_flags = TUN_INITED;
127
128	ifp = &sc->tun_if;
129	ifp->if_unit = lminor(dev);
130	ifp->if_name = "tun";
131	ifp->if_mtu = TUNMTU;
132	ifp->if_ioctl = tunifioctl;
133	ifp->if_output = tunoutput;
134	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
135	ifp->if_snd.ifq_maxlen = ifqmaxlen;
136	ifp->if_softc = sc;
137	if_attach(ifp);
138	bpfattach(ifp, DLT_NULL, sizeof(u_int));
139	dev->si_drv1 = sc;
140}
141
142/*
143 * tunnel open - must be superuser & the device must be
144 * configured in
145 */
146static	int
147tunopen(dev, flag, mode, p)
148	dev_t	dev;
149	int	flag, mode;
150	struct proc *p;
151{
152	struct ifnet	*ifp;
153	struct tun_softc *tp;
154	register int	error;
155
156	error = suser(p);
157	if (error)
158		return (error);
159
160	tp = dev->si_drv1;
161	if (!tp) {
162		tuncreate(dev);
163		tp = dev->si_drv1;
164	}
165	if (tp->tun_flags & TUN_OPEN)
166		return EBUSY;
167	tp->tun_pid = p->p_pid;
168	ifp = &tp->tun_if;
169	tp->tun_flags |= TUN_OPEN;
170	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
171	return (0);
172}
173
174/*
175 * tunclose - close the device - mark i/f down & delete
176 * routing info
177 */
178static	int
179tunclose(dev, foo, bar, p)
180	dev_t dev;
181	int foo;
182	int bar;
183	struct proc *p;
184{
185	register int	s;
186	struct tun_softc *tp;
187	struct ifnet	*ifp;
188	struct mbuf	*m;
189
190	tp = dev->si_drv1;
191	ifp = &tp->tun_if;
192
193	tp->tun_flags &= ~TUN_OPEN;
194	tp->tun_pid = 0;
195
196	/*
197	 * junk all pending output
198	 */
199	do {
200		s = splimp();
201		IF_DEQUEUE(&ifp->if_snd, m);
202		splx(s);
203		if (m)
204			m_freem(m);
205	} while (m);
206
207	if (ifp->if_flags & IFF_UP) {
208		s = splimp();
209		if_down(ifp);
210		splx(s);
211	}
212
213	if (ifp->if_flags & IFF_RUNNING) {
214		register struct ifaddr *ifa;
215
216		s = splimp();
217		/* find internet addresses and delete routes */
218		for (ifa = ifp->if_addrhead.tqh_first; ifa;
219		    ifa = ifa->ifa_link.tqe_next)
220			if (ifa->ifa_addr->sa_family == AF_INET)
221				rtinit(ifa, (int)RTM_DELETE,
222				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
223		ifp->if_flags &= ~IFF_RUNNING;
224		splx(s);
225	}
226
227	funsetown(tp->tun_sigio);
228	selwakeup(&tp->tun_rsel);
229
230	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
231	return (0);
232}
233
234static int
235tuninit(ifp)
236	struct ifnet *ifp;
237{
238	struct tun_softc *tp = ifp->if_softc;
239	register struct ifaddr *ifa;
240
241	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
242
243	ifp->if_flags |= IFF_UP | IFF_RUNNING;
244	getmicrotime(&ifp->if_lastchange);
245
246	for (ifa = ifp->if_addrhead.tqh_first; ifa;
247	     ifa = ifa->ifa_link.tqe_next) {
248#ifdef INET
249		if (ifa->ifa_addr->sa_family == AF_INET) {
250		    struct sockaddr_in *si;
251
252		    si = (struct sockaddr_in *)ifa->ifa_addr;
253		    if (si && si->sin_addr.s_addr)
254			    tp->tun_flags |= TUN_IASET;
255
256		    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
257		    if (si && si->sin_addr.s_addr)
258			    tp->tun_flags |= TUN_DSTADDR;
259		}
260#endif
261	}
262	return 0;
263}
264
265/*
266 * Process an ioctl request.
267 */
268int
269tunifioctl(ifp, cmd, data)
270	struct ifnet *ifp;
271	u_long	cmd;
272	caddr_t	data;
273{
274	struct ifreq *ifr = (struct ifreq *)data;
275	struct tun_softc *tp = ifp->if_softc;
276	struct ifstat *ifs;
277	int		error = 0, s;
278
279	s = splimp();
280	switch(cmd) {
281	case SIOCGIFSTATUS:
282		ifs = (struct ifstat *)data;
283		if (tp->tun_pid)
284			sprintf(ifs->ascii + strlen(ifs->ascii),
285			    "\tOpened by PID %d\n", tp->tun_pid);
286		return(0);
287	case SIOCSIFADDR:
288		tuninit(ifp);
289		TUNDEBUG("%s%d: address set\n",
290			 ifp->if_name, ifp->if_unit);
291		break;
292	case SIOCSIFDSTADDR:
293		tuninit(ifp);
294		TUNDEBUG("%s%d: destination address set\n",
295			 ifp->if_name, ifp->if_unit);
296		break;
297	case SIOCSIFMTU:
298		ifp->if_mtu = ifr->ifr_mtu;
299		TUNDEBUG("%s%d: mtu set\n",
300			 ifp->if_name, ifp->if_unit);
301		break;
302	case SIOCADDMULTI:
303	case SIOCDELMULTI:
304		break;
305
306
307	default:
308		error = EINVAL;
309	}
310	splx(s);
311	return (error);
312}
313
314/*
315 * tunoutput - queue packets from higher level ready to put out.
316 */
317int
318tunoutput(ifp, m0, dst, rt)
319	struct ifnet   *ifp;
320	struct mbuf    *m0;
321	struct sockaddr *dst;
322	struct rtentry *rt;
323{
324	struct tun_softc *tp = ifp->if_softc;
325	int		s;
326
327	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
328
329	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
330		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
331			  ifp->if_unit, tp->tun_flags);
332		m_freem (m0);
333		return EHOSTDOWN;
334	}
335
336	/* BPF write needs to be handled specially */
337	if (dst->sa_family == AF_UNSPEC) {
338		dst->sa_family = *(mtod(m0, int *));
339		m0->m_len -= sizeof(int);
340		m0->m_pkthdr.len -= sizeof(int);
341		m0->m_data += sizeof(int);
342	}
343
344	if (ifp->if_bpf) {
345		/*
346		 * We need to prepend the address family as
347		 * a four byte field.  Cons up a dummy header
348		 * to pacify bpf.  This is safe because bpf
349		 * will only read from the mbuf (i.e., it won't
350		 * try to free it or keep a pointer to it).
351		 */
352		struct mbuf m;
353		u_int af = dst->sa_family;
354
355		m.m_next = m0;
356		m.m_len = 4;
357		m.m_data = (char *)&af;
358
359		bpf_mtap(ifp, &m);
360	}
361
362	/* prepend sockaddr? this may abort if the mbuf allocation fails */
363	if (tp->tun_flags & TUN_LMODE) {
364		/* allocate space for sockaddr */
365		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
366
367		/* if allocation failed drop packet */
368		if (m0 == NULL){
369			s = splimp();	/* spl on queue manipulation */
370			IF_DROP(&ifp->if_snd);
371			splx(s);
372			ifp->if_oerrors++;
373			return (ENOBUFS);
374		} else {
375			bcopy(dst, m0->m_data, dst->sa_len);
376		}
377	}
378
379	switch(dst->sa_family) {
380#ifdef INET
381	case AF_INET:
382		s = splimp();
383		if (IF_QFULL(&ifp->if_snd)) {
384			IF_DROP(&ifp->if_snd);
385			m_freem(m0);
386			splx(s);
387			ifp->if_collisions++;
388			return (ENOBUFS);
389		}
390		ifp->if_obytes += m0->m_pkthdr.len;
391		IF_ENQUEUE(&ifp->if_snd, m0);
392		splx(s);
393		ifp->if_opackets++;
394		break;
395#endif
396	default:
397		m_freem(m0);
398		return EAFNOSUPPORT;
399	}
400
401	if (tp->tun_flags & TUN_RWAIT) {
402		tp->tun_flags &= ~TUN_RWAIT;
403		wakeup((caddr_t)tp);
404	}
405	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
406		pgsigio(tp->tun_sigio, SIGIO, 0);
407	selwakeup(&tp->tun_rsel);
408	return 0;
409}
410
411/*
412 * the cdevsw interface is now pretty minimal.
413 */
414static	int
415tunioctl(dev, cmd, data, flag, p)
416	dev_t		dev;
417	u_long		cmd;
418	caddr_t		data;
419	int		flag;
420	struct proc	*p;
421{
422	int		s;
423	struct tun_softc *tp = dev->si_drv1;
424 	struct tuninfo *tunp;
425
426	switch (cmd) {
427 	case TUNSIFINFO:
428 	        tunp = (struct tuninfo *)data;
429		if (tunp->mtu < IF_MINMTU)
430			return (EINVAL);
431 		tp->tun_if.if_mtu = tunp->mtu;
432 		tp->tun_if.if_type = tunp->type;
433 		tp->tun_if.if_baudrate = tunp->baudrate;
434 		break;
435 	case TUNGIFINFO:
436 		tunp = (struct tuninfo *)data;
437 		tunp->mtu = tp->tun_if.if_mtu;
438 		tunp->type = tp->tun_if.if_type;
439 		tunp->baudrate = tp->tun_if.if_baudrate;
440 		break;
441	case TUNSDEBUG:
442		tundebug = *(int *)data;
443		break;
444	case TUNGDEBUG:
445		*(int *)data = tundebug;
446		break;
447	case TUNSLMODE:
448		if (*(int *)data)
449			tp->tun_flags |= TUN_LMODE;
450		else
451			tp->tun_flags &= ~TUN_LMODE;
452		break;
453	case TUNSIFMODE:
454		/* deny this if UP */
455		if (tp->tun_if.if_flags & IFF_UP)
456			return(EBUSY);
457
458		switch (*(int *)data) {
459		case IFF_POINTOPOINT:
460			tp->tun_if.if_flags |= IFF_POINTOPOINT;
461			tp->tun_if.if_flags &= ~IFF_BROADCAST;
462			break;
463		case IFF_BROADCAST:
464			tp->tun_if.if_flags &= ~IFF_POINTOPOINT;
465			tp->tun_if.if_flags |= IFF_BROADCAST;
466			break;
467		default:
468			return(EINVAL);
469		}
470		break;
471	case FIONBIO:
472		break;
473	case FIOASYNC:
474		if (*(int *)data)
475			tp->tun_flags |= TUN_ASYNC;
476		else
477			tp->tun_flags &= ~TUN_ASYNC;
478		break;
479	case FIONREAD:
480		s = splimp();
481		if (tp->tun_if.if_snd.ifq_head) {
482			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
483			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
484				*(int *)data += mb->m_len;
485		} else
486			*(int *)data = 0;
487		splx(s);
488		break;
489	case FIOSETOWN:
490		return (fsetown(*(int *)data, &tp->tun_sigio));
491
492	case FIOGETOWN:
493		*(int *)data = fgetown(tp->tun_sigio);
494		return (0);
495
496	/* This is deprecated, FIOSETOWN should be used instead. */
497	case TIOCSPGRP:
498		return (fsetown(-(*(int *)data), &tp->tun_sigio));
499
500	/* This is deprecated, FIOGETOWN should be used instead. */
501	case TIOCGPGRP:
502		*(int *)data = -fgetown(tp->tun_sigio);
503		return (0);
504
505	default:
506		return (ENOTTY);
507	}
508	return (0);
509}
510
511/*
512 * The cdevsw read interface - reads a packet at a time, or at
513 * least as much of a packet as can be read.
514 */
515static	int
516tunread(dev, uio, flag)
517	dev_t dev;
518	struct uio *uio;
519	int flag;
520{
521	struct tun_softc *tp = dev->si_drv1;
522	struct ifnet	*ifp = &tp->tun_if;
523	struct mbuf	*m, *m0;
524	int		error=0, len, s;
525
526	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
527	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
528		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
529			  ifp->if_unit, tp->tun_flags);
530		return EHOSTDOWN;
531	}
532
533	tp->tun_flags &= ~TUN_RWAIT;
534
535	s = splimp();
536	do {
537		IF_DEQUEUE(&ifp->if_snd, m0);
538		if (m0 == 0) {
539			if (flag & IO_NDELAY) {
540				splx(s);
541				return EWOULDBLOCK;
542			}
543			tp->tun_flags |= TUN_RWAIT;
544			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
545					"tunread", 0)) != 0) {
546				splx(s);
547				return error;
548			}
549		}
550	} while (m0 == 0);
551	splx(s);
552
553	while (m0 && uio->uio_resid > 0 && error == 0) {
554		len = min(uio->uio_resid, m0->m_len);
555		if (len == 0)
556			break;
557		error = uiomove(mtod(m0, caddr_t), len, uio);
558		MFREE(m0, m);
559		m0 = m;
560	}
561
562	if (m0) {
563		TUNDEBUG("Dropping mbuf\n");
564		m_freem(m0);
565	}
566	return error;
567}
568
569/*
570 * the cdevsw write interface - an atomic write is a packet - or else!
571 */
572static	int
573tunwrite(dev, uio, flag)
574	dev_t dev;
575	struct uio *uio;
576	int flag;
577{
578	struct tun_softc *tp = dev->si_drv1;
579	struct ifnet	*ifp = &tp->tun_if;
580	struct mbuf	*top, **mp, *m;
581	int		error=0, s, tlen, mlen;
582
583	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
584
585	if (uio->uio_resid == 0)
586		return 0;
587
588	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
589		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
590		    uio->uio_resid);
591		return EIO;
592	}
593	tlen = uio->uio_resid;
594
595	/* get a header mbuf */
596	MGETHDR(m, M_DONTWAIT, MT_DATA);
597	if (m == NULL)
598		return ENOBUFS;
599	mlen = MHLEN;
600
601	top = 0;
602	mp = &top;
603	while (error == 0 && uio->uio_resid > 0) {
604		m->m_len = min(mlen, uio->uio_resid);
605		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
606		*mp = m;
607		mp = &m->m_next;
608		if (uio->uio_resid > 0) {
609			MGET (m, M_DONTWAIT, MT_DATA);
610			if (m == 0) {
611				error = ENOBUFS;
612				break;
613			}
614			mlen = MLEN;
615		}
616	}
617	if (error) {
618		if (top)
619			m_freem (top);
620		return error;
621	}
622
623	top->m_pkthdr.len = tlen;
624	top->m_pkthdr.rcvif = ifp;
625
626	if (ifp->if_bpf) {
627		/*
628		 * We need to prepend the address family as
629		 * a four byte field.  Cons up a dummy header
630		 * to pacify bpf.  This is safe because bpf
631		 * will only read from the mbuf (i.e., it won't
632		 * try to free it or keep a pointer to it).
633		 */
634		struct mbuf m;
635		u_int af = AF_INET;
636
637		m.m_next = top;
638		m.m_len = 4;
639		m.m_data = (char *)&af;
640
641		bpf_mtap(ifp, &m);
642	}
643
644#ifdef INET
645	s = splimp();
646	if (IF_QFULL (&ipintrq)) {
647		IF_DROP(&ipintrq);
648		splx(s);
649		ifp->if_collisions++;
650		m_freem(top);
651		return ENOBUFS;
652	}
653	IF_ENQUEUE(&ipintrq, top);
654	splx(s);
655	ifp->if_ibytes += tlen;
656	ifp->if_ipackets++;
657	schednetisr(NETISR_IP);
658#endif
659	return error;
660}
661
662/*
663 * tunpoll - the poll interface, this is only useful on reads
664 * really. The write detect always returns true, write never blocks
665 * anyway, it either accepts the packet or drops it.
666 */
667static	int
668tunpoll(dev, events, p)
669	dev_t dev;
670	int events;
671	struct proc *p;
672{
673	int		s;
674	struct tun_softc *tp = dev->si_drv1;
675	struct ifnet	*ifp = &tp->tun_if;
676	int		revents = 0;
677
678	s = splimp();
679	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
680
681	if (events & (POLLIN | POLLRDNORM)) {
682		if (ifp->if_snd.ifq_len > 0) {
683			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
684			    ifp->if_unit, ifp->if_snd.ifq_len);
685			revents |= events & (POLLIN | POLLRDNORM);
686		} else {
687			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
688			    ifp->if_unit);
689			selrecord(p, &tp->tun_rsel);
690		}
691	}
692	if (events & (POLLOUT | POLLWRNORM))
693		revents |= events & (POLLOUT | POLLWRNORM);
694
695	splx(s);
696	return (revents);
697}
698