if_tun.c revision 51658
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 51658 1999-09-25 18:24:47Z phk $
17 */
18
19#include "tun.h"
20
21#include "opt_inet.h"
22
23#include <sys/param.h>
24#include <sys/proc.h>
25#include <sys/systm.h>
26#include <sys/mbuf.h>
27#include <sys/socket.h>
28#include <sys/filio.h>
29#include <sys/sockio.h>
30#include <sys/ttycom.h>
31#include <sys/poll.h>
32#include <sys/signalvar.h>
33#include <sys/filedesc.h>
34#include <sys/kernel.h>
35#include <sys/sysctl.h>
36#include <sys/conf.h>
37#include <sys/uio.h>
38#include <sys/vnode.h>
39#include <sys/malloc.h>
40
41#include <net/if.h>
42#include <net/netisr.h>
43#include <net/route.h>
44
45#ifdef INET
46#include <netinet/in.h>
47#include <netinet/in_var.h>
48#endif
49
50#ifdef NS
51#include <netns/ns.h>
52#include <netns/ns_if.h>
53#endif
54
55#include <net/bpf.h>
56
57#include <net/if_tunvar.h>
58#include <net/if_tun.h>
59
60static MALLOC_DEFINE(M_TUN, "tun", "Tunnel Interface");
61
62static void tunattach __P((void *));
63PSEUDO_SET(tunattach, if_tun);
64
65static void tuncreate __P((dev_t dev));
66
67#define TUNDEBUG	if (tundebug) printf
68static int tundebug = 0;
69SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
70
71static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *,
72	    struct rtentry *rt));
73static int tunifioctl __P((struct ifnet *, u_long, caddr_t));
74static int tuninit __P((struct ifnet *));
75
76static	d_open_t	tunopen;
77static	d_close_t	tunclose;
78static	d_read_t	tunread;
79static	d_write_t	tunwrite;
80static	d_ioctl_t	tunioctl;
81static	d_poll_t	tunpoll;
82
83#define CDEV_MAJOR 52
84static struct cdevsw tun_cdevsw = {
85	/* open */	tunopen,
86	/* close */	tunclose,
87	/* read */	tunread,
88	/* write */	tunwrite,
89	/* ioctl */	tunioctl,
90	/* poll */	tunpoll,
91	/* mmap */	nommap,
92	/* strategy */	nostrategy,
93	/* name */	"tun",
94	/* maj */	CDEV_MAJOR,
95	/* dump */	nodump,
96	/* psize */	nopsize,
97	/* flags */	0,
98	/* bmaj */	-1
99};
100
101static void
102tunattach(dummy)
103	void *dummy;
104{
105
106	cdevsw_add(&tun_cdevsw);
107}
108
109static void
110tuncreate(dev)
111	dev_t dev;
112{
113	struct tun_softc *sc;
114	struct ifnet *ifp;
115
116	dev = make_dev(&tun_cdevsw, minor(dev),
117	    UID_UUCP, GID_DIALER, 0600, "tun%d", lminor(dev));
118
119	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK);
120	bzero(sc, sizeof *sc);
121	sc->tun_flags = TUN_INITED;
122
123	ifp = &sc->tun_if;
124	ifp->if_unit = lminor(dev);
125	ifp->if_name = "tun";
126	ifp->if_mtu = TUNMTU;
127	ifp->if_ioctl = tunifioctl;
128	ifp->if_output = tunoutput;
129	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
130	ifp->if_snd.ifq_maxlen = ifqmaxlen;
131	ifp->if_softc = sc;
132	if_attach(ifp);
133	bpfattach(ifp, DLT_NULL, sizeof(u_int));
134	dev->si_drv1 = sc;
135}
136
137/*
138 * tunnel open - must be superuser & the device must be
139 * configured in
140 */
141static	int
142tunopen(dev, flag, mode, p)
143	dev_t	dev;
144	int	flag, mode;
145	struct proc *p;
146{
147	struct ifnet	*ifp;
148	struct tun_softc *tp;
149	register int	error;
150
151	error = suser(p);
152	if (error)
153		return (error);
154
155	tp = dev->si_drv1;
156	if (!tp) {
157		tuncreate(dev);
158		tp = dev->si_drv1;
159	}
160	if (tp->tun_flags & TUN_OPEN)
161		return EBUSY;
162	tp->tun_pid = p->p_pid;
163	ifp = &tp->tun_if;
164	tp->tun_flags |= TUN_OPEN;
165	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
166	return (0);
167}
168
169/*
170 * tunclose - close the device - mark i/f down & delete
171 * routing info
172 */
173static	int
174tunclose(dev, foo, bar, p)
175	dev_t dev;
176	int foo;
177	int bar;
178	struct proc *p;
179{
180	register int	s;
181	struct tun_softc *tp;
182	struct ifnet	*ifp;
183	struct mbuf	*m;
184
185	tp = dev->si_drv1;
186	ifp = &tp->tun_if;
187
188	tp->tun_flags &= ~TUN_OPEN;
189	tp->tun_pid = 0;
190
191	/*
192	 * junk all pending output
193	 */
194	do {
195		s = splimp();
196		IF_DEQUEUE(&ifp->if_snd, m);
197		splx(s);
198		if (m)
199			m_freem(m);
200	} while (m);
201
202	if (ifp->if_flags & IFF_UP) {
203		s = splimp();
204		if_down(ifp);
205		splx(s);
206	}
207
208	if (ifp->if_flags & IFF_RUNNING) {
209		register struct ifaddr *ifa;
210
211		s = splimp();
212		/* find internet addresses and delete routes */
213		for (ifa = ifp->if_addrhead.tqh_first; ifa;
214		    ifa = ifa->ifa_link.tqe_next)
215			if (ifa->ifa_addr->sa_family == AF_INET)
216				rtinit(ifa, (int)RTM_DELETE,
217				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
218		ifp->if_flags &= ~IFF_RUNNING;
219		splx(s);
220	}
221
222	funsetown(tp->tun_sigio);
223	selwakeup(&tp->tun_rsel);
224
225	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
226	return (0);
227}
228
229static int
230tuninit(ifp)
231	struct ifnet *ifp;
232{
233	struct tun_softc *tp = ifp->if_softc;
234	register struct ifaddr *ifa;
235
236	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
237
238	ifp->if_flags |= IFF_UP | IFF_RUNNING;
239	getmicrotime(&ifp->if_lastchange);
240
241	for (ifa = ifp->if_addrhead.tqh_first; ifa;
242	     ifa = ifa->ifa_link.tqe_next) {
243#ifdef INET
244		if (ifa->ifa_addr->sa_family == AF_INET) {
245		    struct sockaddr_in *si;
246
247		    si = (struct sockaddr_in *)ifa->ifa_addr;
248		    if (si && si->sin_addr.s_addr)
249			    tp->tun_flags |= TUN_IASET;
250
251		    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
252		    if (si && si->sin_addr.s_addr)
253			    tp->tun_flags |= TUN_DSTADDR;
254		}
255#endif
256	}
257	return 0;
258}
259
260/*
261 * Process an ioctl request.
262 */
263int
264tunifioctl(ifp, cmd, data)
265	struct ifnet *ifp;
266	u_long	cmd;
267	caddr_t	data;
268{
269	struct ifreq *ifr = (struct ifreq *)data;
270	struct tun_softc *tp = ifp->if_softc;
271	struct ifstat *ifs;
272	int		error = 0, s;
273
274	s = splimp();
275	switch(cmd) {
276	case SIOCGIFSTATUS:
277		ifs = (struct ifstat *)data;
278		if (tp->tun_pid)
279			sprintf(ifs->ascii + strlen(ifs->ascii),
280			    "\tOpened by PID %d\n", tp->tun_pid);
281		return(0);
282	case SIOCSIFADDR:
283		tuninit(ifp);
284		TUNDEBUG("%s%d: address set\n",
285			 ifp->if_name, ifp->if_unit);
286		break;
287	case SIOCSIFDSTADDR:
288		tuninit(ifp);
289		TUNDEBUG("%s%d: destination address set\n",
290			 ifp->if_name, ifp->if_unit);
291		break;
292	case SIOCSIFMTU:
293		ifp->if_mtu = ifr->ifr_mtu;
294		TUNDEBUG("%s%d: mtu set\n",
295			 ifp->if_name, ifp->if_unit);
296		break;
297	case SIOCADDMULTI:
298	case SIOCDELMULTI:
299		break;
300
301
302	default:
303		error = EINVAL;
304	}
305	splx(s);
306	return (error);
307}
308
309/*
310 * tunoutput - queue packets from higher level ready to put out.
311 */
312int
313tunoutput(ifp, m0, dst, rt)
314	struct ifnet   *ifp;
315	struct mbuf    *m0;
316	struct sockaddr *dst;
317	struct rtentry *rt;
318{
319	struct tun_softc *tp = ifp->if_softc;
320	int		s;
321
322	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
323
324	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
325		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
326			  ifp->if_unit, tp->tun_flags);
327		m_freem (m0);
328		return EHOSTDOWN;
329	}
330
331	/* BPF write needs to be handled specially */
332	if (dst->sa_family == AF_UNSPEC) {
333		dst->sa_family = *(mtod(m0, int *));
334		m0->m_len -= sizeof(int);
335		m0->m_pkthdr.len -= sizeof(int);
336		m0->m_data += sizeof(int);
337	}
338
339	if (ifp->if_bpf) {
340		/*
341		 * We need to prepend the address family as
342		 * a four byte field.  Cons up a dummy header
343		 * to pacify bpf.  This is safe because bpf
344		 * will only read from the mbuf (i.e., it won't
345		 * try to free it or keep a pointer to it).
346		 */
347		struct mbuf m;
348		u_int af = dst->sa_family;
349
350		m.m_next = m0;
351		m.m_len = 4;
352		m.m_data = (char *)&af;
353
354		bpf_mtap(ifp, &m);
355	}
356
357	/* prepend sockaddr? this may abort if the mbuf allocation fails */
358	if (tp->tun_flags & TUN_LMODE) {
359		/* allocate space for sockaddr */
360		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
361
362		/* if allocation failed drop packet */
363		if (m0 == NULL){
364			s = splimp();	/* spl on queue manipulation */
365			IF_DROP(&ifp->if_snd);
366			splx(s);
367			ifp->if_oerrors++;
368			return (ENOBUFS);
369		} else {
370			bcopy(dst, m0->m_data, dst->sa_len);
371		}
372	}
373
374	switch(dst->sa_family) {
375#ifdef INET
376	case AF_INET:
377		s = splimp();
378		if (IF_QFULL(&ifp->if_snd)) {
379			IF_DROP(&ifp->if_snd);
380			m_freem(m0);
381			splx(s);
382			ifp->if_collisions++;
383			return (ENOBUFS);
384		}
385		ifp->if_obytes += m0->m_pkthdr.len;
386		IF_ENQUEUE(&ifp->if_snd, m0);
387		splx(s);
388		ifp->if_opackets++;
389		break;
390#endif
391	default:
392		m_freem(m0);
393		return EAFNOSUPPORT;
394	}
395
396	if (tp->tun_flags & TUN_RWAIT) {
397		tp->tun_flags &= ~TUN_RWAIT;
398		wakeup((caddr_t)tp);
399	}
400	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
401		pgsigio(tp->tun_sigio, SIGIO, 0);
402	selwakeup(&tp->tun_rsel);
403	return 0;
404}
405
406/*
407 * the cdevsw interface is now pretty minimal.
408 */
409static	int
410tunioctl(dev, cmd, data, flag, p)
411	dev_t		dev;
412	u_long		cmd;
413	caddr_t		data;
414	int		flag;
415	struct proc	*p;
416{
417	int		s;
418	struct tun_softc *tp = dev->si_drv1;
419 	struct tuninfo *tunp;
420
421	switch (cmd) {
422 	case TUNSIFINFO:
423 	        tunp = (struct tuninfo *)data;
424		if (tunp->mtu < IF_MINMTU)
425			return (EINVAL);
426 		tp->tun_if.if_mtu = tunp->mtu;
427 		tp->tun_if.if_type = tunp->type;
428 		tp->tun_if.if_baudrate = tunp->baudrate;
429 		break;
430 	case TUNGIFINFO:
431 		tunp = (struct tuninfo *)data;
432 		tunp->mtu = tp->tun_if.if_mtu;
433 		tunp->type = tp->tun_if.if_type;
434 		tunp->baudrate = tp->tun_if.if_baudrate;
435 		break;
436	case TUNSDEBUG:
437		tundebug = *(int *)data;
438		break;
439	case TUNGDEBUG:
440		*(int *)data = tundebug;
441		break;
442	case TUNSLMODE:
443		if (*(int *)data)
444			tp->tun_flags |= TUN_LMODE;
445		else
446			tp->tun_flags &= ~TUN_LMODE;
447		break;
448	case TUNSIFMODE:
449		/* deny this if UP */
450		if (tp->tun_if.if_flags & IFF_UP)
451			return(EBUSY);
452
453		switch (*(int *)data) {
454		case IFF_POINTOPOINT:
455			tp->tun_if.if_flags |= IFF_POINTOPOINT;
456			tp->tun_if.if_flags &= ~IFF_BROADCAST;
457			break;
458		case IFF_BROADCAST:
459			tp->tun_if.if_flags &= ~IFF_POINTOPOINT;
460			tp->tun_if.if_flags |= IFF_BROADCAST;
461			break;
462		default:
463			return(EINVAL);
464		}
465		break;
466	case FIONBIO:
467		break;
468	case FIOASYNC:
469		if (*(int *)data)
470			tp->tun_flags |= TUN_ASYNC;
471		else
472			tp->tun_flags &= ~TUN_ASYNC;
473		break;
474	case FIONREAD:
475		s = splimp();
476		if (tp->tun_if.if_snd.ifq_head) {
477			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
478			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
479				*(int *)data += mb->m_len;
480		} else
481			*(int *)data = 0;
482		splx(s);
483		break;
484	case FIOSETOWN:
485		return (fsetown(*(int *)data, &tp->tun_sigio));
486
487	case FIOGETOWN:
488		*(int *)data = fgetown(tp->tun_sigio);
489		return (0);
490
491	/* This is deprecated, FIOSETOWN should be used instead. */
492	case TIOCSPGRP:
493		return (fsetown(-(*(int *)data), &tp->tun_sigio));
494
495	/* This is deprecated, FIOGETOWN should be used instead. */
496	case TIOCGPGRP:
497		*(int *)data = -fgetown(tp->tun_sigio);
498		return (0);
499
500	default:
501		return (ENOTTY);
502	}
503	return (0);
504}
505
506/*
507 * The cdevsw read interface - reads a packet at a time, or at
508 * least as much of a packet as can be read.
509 */
510static	int
511tunread(dev, uio, flag)
512	dev_t dev;
513	struct uio *uio;
514	int flag;
515{
516	struct tun_softc *tp = dev->si_drv1;
517	struct ifnet	*ifp = &tp->tun_if;
518	struct mbuf	*m, *m0;
519	int		error=0, len, s;
520
521	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
522	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
523		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
524			  ifp->if_unit, tp->tun_flags);
525		return EHOSTDOWN;
526	}
527
528	tp->tun_flags &= ~TUN_RWAIT;
529
530	s = splimp();
531	do {
532		IF_DEQUEUE(&ifp->if_snd, m0);
533		if (m0 == 0) {
534			if (flag & IO_NDELAY) {
535				splx(s);
536				return EWOULDBLOCK;
537			}
538			tp->tun_flags |= TUN_RWAIT;
539			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
540					"tunread", 0)) != 0) {
541				splx(s);
542				return error;
543			}
544		}
545	} while (m0 == 0);
546	splx(s);
547
548	while (m0 && uio->uio_resid > 0 && error == 0) {
549		len = min(uio->uio_resid, m0->m_len);
550		if (len == 0)
551			break;
552		error = uiomove(mtod(m0, caddr_t), len, uio);
553		MFREE(m0, m);
554		m0 = m;
555	}
556
557	if (m0) {
558		TUNDEBUG("Dropping mbuf\n");
559		m_freem(m0);
560	}
561	return error;
562}
563
564/*
565 * the cdevsw write interface - an atomic write is a packet - or else!
566 */
567static	int
568tunwrite(dev, uio, flag)
569	dev_t dev;
570	struct uio *uio;
571	int flag;
572{
573	struct tun_softc *tp = dev->si_drv1;
574	struct ifnet	*ifp = &tp->tun_if;
575	struct mbuf	*top, **mp, *m;
576	int		error=0, s, tlen, mlen;
577
578	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
579
580	if (uio->uio_resid == 0)
581		return 0;
582
583	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
584		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
585		    uio->uio_resid);
586		return EIO;
587	}
588	tlen = uio->uio_resid;
589
590	/* get a header mbuf */
591	MGETHDR(m, M_DONTWAIT, MT_DATA);
592	if (m == NULL)
593		return ENOBUFS;
594	mlen = MHLEN;
595
596	top = 0;
597	mp = &top;
598	while (error == 0 && uio->uio_resid > 0) {
599		m->m_len = min(mlen, uio->uio_resid);
600		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
601		*mp = m;
602		mp = &m->m_next;
603		if (uio->uio_resid > 0) {
604			MGET (m, M_DONTWAIT, MT_DATA);
605			if (m == 0) {
606				error = ENOBUFS;
607				break;
608			}
609			mlen = MLEN;
610		}
611	}
612	if (error) {
613		if (top)
614			m_freem (top);
615		return error;
616	}
617
618	top->m_pkthdr.len = tlen;
619	top->m_pkthdr.rcvif = ifp;
620
621	if (ifp->if_bpf) {
622		/*
623		 * We need to prepend the address family as
624		 * a four byte field.  Cons up a dummy header
625		 * to pacify bpf.  This is safe because bpf
626		 * will only read from the mbuf (i.e., it won't
627		 * try to free it or keep a pointer to it).
628		 */
629		struct mbuf m;
630		u_int af = AF_INET;
631
632		m.m_next = top;
633		m.m_len = 4;
634		m.m_data = (char *)&af;
635
636		bpf_mtap(ifp, &m);
637	}
638
639#ifdef INET
640	s = splimp();
641	if (IF_QFULL (&ipintrq)) {
642		IF_DROP(&ipintrq);
643		splx(s);
644		ifp->if_collisions++;
645		m_freem(top);
646		return ENOBUFS;
647	}
648	IF_ENQUEUE(&ipintrq, top);
649	splx(s);
650	ifp->if_ibytes += tlen;
651	ifp->if_ipackets++;
652	schednetisr(NETISR_IP);
653#endif
654	return error;
655}
656
657/*
658 * tunpoll - the poll interface, this is only useful on reads
659 * really. The write detect always returns true, write never blocks
660 * anyway, it either accepts the packet or drops it.
661 */
662static	int
663tunpoll(dev, events, p)
664	dev_t dev;
665	int events;
666	struct proc *p;
667{
668	int		s;
669	struct tun_softc *tp = dev->si_drv1;
670	struct ifnet	*ifp = &tp->tun_if;
671	int		revents = 0;
672
673	s = splimp();
674	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
675
676	if (events & (POLLIN | POLLRDNORM)) {
677		if (ifp->if_snd.ifq_len > 0) {
678			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
679			    ifp->if_unit, ifp->if_snd.ifq_len);
680			revents |= events & (POLLIN | POLLRDNORM);
681		} else {
682			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
683			    ifp->if_unit);
684			selrecord(p, &tp->tun_rsel);
685		}
686	}
687	if (events & (POLLOUT | POLLWRNORM))
688		revents |= events & (POLLOUT | POLLWRNORM);
689
690	splx(s);
691	return (revents);
692}
693