if_tun.c revision 63358
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 63358 2000-07-17 23:21:42Z brian $
17 */
18
19#include "opt_inet.h"
20
21#include <sys/param.h>
22#include <sys/proc.h>
23#include <sys/systm.h>
24#include <sys/mbuf.h>
25#include <sys/socket.h>
26#include <sys/filio.h>
27#include <sys/sockio.h>
28#include <sys/ttycom.h>
29#include <sys/poll.h>
30#include <sys/signalvar.h>
31#include <sys/filedesc.h>
32#include <sys/kernel.h>
33#include <sys/sysctl.h>
34#include <sys/conf.h>
35#include <sys/uio.h>
36#include <sys/vnode.h>
37#include <sys/malloc.h>
38
39#include <net/if.h>
40#include <net/if_types.h>
41#include <net/route.h>
42#include <net/intrq.h>
43
44#ifdef INET
45#include <netinet/in.h>
46#endif
47
48#include <net/bpf.h>
49
50#include <net/if_tunvar.h>
51#include <net/if_tun.h>
52
53static MALLOC_DEFINE(M_TUN, "tun", "Tunnel Interface");
54
55static void tunattach __P((void *));
56PSEUDO_SET(tunattach, if_tun);
57
58static void tuncreate __P((dev_t dev));
59
60#define TUNDEBUG	if (tundebug) printf
61static int tundebug = 0;
62SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
63
64static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *,
65	    struct rtentry *rt));
66static int tunifioctl __P((struct ifnet *, u_long, caddr_t));
67static int tuninit __P((struct ifnet *));
68
69static	d_open_t	tunopen;
70static	d_close_t	tunclose;
71static	d_read_t	tunread;
72static	d_write_t	tunwrite;
73static	d_ioctl_t	tunioctl;
74static	d_poll_t	tunpoll;
75
76#define CDEV_MAJOR 52
77static struct cdevsw tun_cdevsw = {
78	/* open */	tunopen,
79	/* close */	tunclose,
80	/* read */	tunread,
81	/* write */	tunwrite,
82	/* ioctl */	tunioctl,
83	/* poll */	tunpoll,
84	/* mmap */	nommap,
85	/* strategy */	nostrategy,
86	/* name */	"tun",
87	/* maj */	CDEV_MAJOR,
88	/* dump */	nodump,
89	/* psize */	nopsize,
90	/* flags */	0,
91	/* bmaj */	-1
92};
93
94static void
95tunattach(dummy)
96	void *dummy;
97{
98
99	cdevsw_add(&tun_cdevsw);
100}
101
102static void
103tuncreate(dev)
104	dev_t dev;
105{
106	struct tun_softc *sc;
107	struct ifnet *ifp;
108
109	dev = make_dev(&tun_cdevsw, minor(dev),
110	    UID_UUCP, GID_DIALER, 0600, "tun%d", lminor(dev));
111
112	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK);
113	bzero(sc, sizeof *sc);
114	sc->tun_flags = TUN_INITED;
115
116	ifp = &sc->tun_if;
117	ifp->if_unit = lminor(dev);
118	ifp->if_name = "tun";
119	ifp->if_mtu = TUNMTU;
120	ifp->if_ioctl = tunifioctl;
121	ifp->if_output = tunoutput;
122	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
123	ifp->if_type = IFT_PPP;
124	ifp->if_snd.ifq_maxlen = ifqmaxlen;
125	ifp->if_softc = sc;
126	if_attach(ifp);
127	bpfattach(ifp, DLT_NULL, sizeof(u_int));
128	dev->si_drv1 = sc;
129}
130
131/*
132 * tunnel open - must be superuser & the device must be
133 * configured in
134 */
135static	int
136tunopen(dev, flag, mode, p)
137	dev_t	dev;
138	int	flag, mode;
139	struct proc *p;
140{
141	struct ifnet	*ifp;
142	struct tun_softc *tp;
143	register int	error;
144
145	error = suser(p);
146	if (error)
147		return (error);
148
149	tp = dev->si_drv1;
150	if (!tp) {
151		tuncreate(dev);
152		tp = dev->si_drv1;
153	}
154	if (tp->tun_flags & TUN_OPEN)
155		return EBUSY;
156	tp->tun_pid = p->p_pid;
157	ifp = &tp->tun_if;
158	tp->tun_flags |= TUN_OPEN;
159	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
160	return (0);
161}
162
163/*
164 * tunclose - close the device - mark i/f down & delete
165 * routing info
166 */
167static	int
168tunclose(dev, foo, bar, p)
169	dev_t dev;
170	int foo;
171	int bar;
172	struct proc *p;
173{
174	register int	s;
175	struct tun_softc *tp;
176	struct ifnet	*ifp;
177	struct mbuf	*m;
178
179	tp = dev->si_drv1;
180	ifp = &tp->tun_if;
181
182	tp->tun_flags &= ~TUN_OPEN;
183	tp->tun_pid = 0;
184
185	/*
186	 * junk all pending output
187	 */
188	do {
189		s = splimp();
190		IF_DEQUEUE(&ifp->if_snd, m);
191		splx(s);
192		if (m)
193			m_freem(m);
194	} while (m);
195
196	if (ifp->if_flags & IFF_UP) {
197		s = splimp();
198		if_down(ifp);
199		splx(s);
200	}
201
202	if (ifp->if_flags & IFF_RUNNING) {
203		register struct ifaddr *ifa;
204
205		s = splimp();
206		/* find internet addresses and delete routes */
207		for (ifa = ifp->if_addrhead.tqh_first; ifa;
208		    ifa = ifa->ifa_link.tqe_next)
209			if (ifa->ifa_addr->sa_family == AF_INET)
210				rtinit(ifa, (int)RTM_DELETE,
211				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
212		ifp->if_flags &= ~IFF_RUNNING;
213		splx(s);
214	}
215
216	funsetown(tp->tun_sigio);
217	selwakeup(&tp->tun_rsel);
218
219	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
220	return (0);
221}
222
223static int
224tuninit(ifp)
225	struct ifnet *ifp;
226{
227	struct tun_softc *tp = ifp->if_softc;
228	register struct ifaddr *ifa;
229
230	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
231
232	ifp->if_flags |= IFF_UP | IFF_RUNNING;
233	getmicrotime(&ifp->if_lastchange);
234
235	for (ifa = ifp->if_addrhead.tqh_first; ifa;
236	     ifa = ifa->ifa_link.tqe_next) {
237#ifdef INET
238		if (ifa->ifa_addr->sa_family == AF_INET) {
239		    struct sockaddr_in *si;
240
241		    si = (struct sockaddr_in *)ifa->ifa_addr;
242		    if (si && si->sin_addr.s_addr)
243			    tp->tun_flags |= TUN_IASET;
244
245		    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
246		    if (si && si->sin_addr.s_addr)
247			    tp->tun_flags |= TUN_DSTADDR;
248		}
249#endif
250	}
251	return 0;
252}
253
254/*
255 * Process an ioctl request.
256 */
257int
258tunifioctl(ifp, cmd, data)
259	struct ifnet *ifp;
260	u_long	cmd;
261	caddr_t	data;
262{
263	struct ifreq *ifr = (struct ifreq *)data;
264	struct tun_softc *tp = ifp->if_softc;
265	struct ifstat *ifs;
266	int		error = 0, s;
267
268	s = splimp();
269	switch(cmd) {
270	case SIOCGIFSTATUS:
271		ifs = (struct ifstat *)data;
272		if (tp->tun_pid)
273			sprintf(ifs->ascii + strlen(ifs->ascii),
274			    "\tOpened by PID %d\n", tp->tun_pid);
275		return(0);
276	case SIOCSIFADDR:
277		tuninit(ifp);
278		TUNDEBUG("%s%d: address set\n",
279			 ifp->if_name, ifp->if_unit);
280		break;
281	case SIOCSIFDSTADDR:
282		tuninit(ifp);
283		TUNDEBUG("%s%d: destination address set\n",
284			 ifp->if_name, ifp->if_unit);
285		break;
286	case SIOCSIFMTU:
287		ifp->if_mtu = ifr->ifr_mtu;
288		TUNDEBUG("%s%d: mtu set\n",
289			 ifp->if_name, ifp->if_unit);
290		break;
291	case SIOCADDMULTI:
292	case SIOCDELMULTI:
293		break;
294
295
296	default:
297		error = EINVAL;
298	}
299	splx(s);
300	return (error);
301}
302
303/*
304 * tunoutput - queue packets from higher level ready to put out.
305 */
306int
307tunoutput(ifp, m0, dst, rt)
308	struct ifnet   *ifp;
309	struct mbuf    *m0;
310	struct sockaddr *dst;
311	struct rtentry *rt;
312{
313	struct tun_softc *tp = ifp->if_softc;
314	int		s;
315
316	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
317
318	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
319		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
320			  ifp->if_unit, tp->tun_flags);
321		m_freem (m0);
322		return EHOSTDOWN;
323	}
324
325	/* BPF write needs to be handled specially */
326	if (dst->sa_family == AF_UNSPEC) {
327		dst->sa_family = *(mtod(m0, int *));
328		m0->m_len -= sizeof(int);
329		m0->m_pkthdr.len -= sizeof(int);
330		m0->m_data += sizeof(int);
331	}
332
333	if (ifp->if_bpf) {
334		/*
335		 * We need to prepend the address family as
336		 * a four byte field.  Cons up a dummy header
337		 * to pacify bpf.  This is safe because bpf
338		 * will only read from the mbuf (i.e., it won't
339		 * try to free it or keep a pointer to it).
340		 */
341		struct mbuf m;
342		u_int af = dst->sa_family;
343
344		m.m_next = m0;
345		m.m_len = 4;
346		m.m_data = (char *)&af;
347
348		bpf_mtap(ifp, &m);
349	}
350
351	/* prepend sockaddr? this may abort if the mbuf allocation fails */
352	if (tp->tun_flags & TUN_LMODE) {
353		/* allocate space for sockaddr */
354		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
355
356		/* if allocation failed drop packet */
357		if (m0 == NULL){
358			s = splimp();	/* spl on queue manipulation */
359			IF_DROP(&ifp->if_snd);
360			splx(s);
361			ifp->if_oerrors++;
362			return (ENOBUFS);
363		} else {
364			bcopy(dst, m0->m_data, dst->sa_len);
365		}
366	}
367
368	if (tp->tun_flags & TUN_IFHEAD) {
369		/* Prepend the address family */
370		M_PREPEND(m0, 4, M_DONTWAIT);
371
372		/* if allocation failed drop packet */
373		if (m0 == NULL){
374			s = splimp();	/* spl on queue manipulation */
375			IF_DROP(&ifp->if_snd);
376			splx(s);
377			ifp->if_oerrors++;
378			return ENOBUFS;
379		} else
380			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
381	} else {
382#ifdef INET
383		if (dst->sa_family != AF_INET)
384#endif
385		{
386			m_freem(m0);
387			return EAFNOSUPPORT;
388		}
389	}
390
391	s = splimp();
392	if (IF_QFULL(&ifp->if_snd)) {
393		IF_DROP(&ifp->if_snd);
394		m_freem(m0);
395		splx(s);
396		ifp->if_collisions++;
397		return ENOBUFS;
398	}
399	ifp->if_obytes += m0->m_pkthdr.len;
400	IF_ENQUEUE(&ifp->if_snd, m0);
401	splx(s);
402	ifp->if_opackets++;
403
404	if (tp->tun_flags & TUN_RWAIT) {
405		tp->tun_flags &= ~TUN_RWAIT;
406		wakeup((caddr_t)tp);
407	}
408	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
409		pgsigio(tp->tun_sigio, SIGIO, 0);
410	selwakeup(&tp->tun_rsel);
411	return 0;
412}
413
414/*
415 * the cdevsw interface is now pretty minimal.
416 */
417static	int
418tunioctl(dev, cmd, data, flag, p)
419	dev_t		dev;
420	u_long		cmd;
421	caddr_t		data;
422	int		flag;
423	struct proc	*p;
424{
425	int		s;
426	struct tun_softc *tp = dev->si_drv1;
427 	struct tuninfo *tunp;
428
429	switch (cmd) {
430 	case TUNSIFINFO:
431 		tunp = (struct tuninfo *)data;
432		if (tunp->mtu < IF_MINMTU)
433			return (EINVAL);
434 		tp->tun_if.if_mtu = tunp->mtu;
435 		tp->tun_if.if_type = tunp->type;
436 		tp->tun_if.if_baudrate = tunp->baudrate;
437 		break;
438 	case TUNGIFINFO:
439 		tunp = (struct tuninfo *)data;
440 		tunp->mtu = tp->tun_if.if_mtu;
441 		tunp->type = tp->tun_if.if_type;
442 		tunp->baudrate = tp->tun_if.if_baudrate;
443 		break;
444	case TUNSDEBUG:
445		tundebug = *(int *)data;
446		break;
447	case TUNGDEBUG:
448		*(int *)data = tundebug;
449		break;
450	case TUNSLMODE:
451		if (*(int *)data) {
452			tp->tun_flags |= TUN_LMODE;
453			tp->tun_flags &= ~TUN_IFHEAD;
454		} else
455			tp->tun_flags &= ~TUN_LMODE;
456		break;
457	case TUNSIFHEAD:
458		if (*(int *)data) {
459			tp->tun_flags |= TUN_IFHEAD;
460			tp->tun_flags &= ~TUN_LMODE;
461		} else
462			tp->tun_flags &= ~TUN_IFHEAD;
463		break;
464	case TUNGIFHEAD:
465		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
466		break;
467	case TUNSIFMODE:
468		/* deny this if UP */
469		if (tp->tun_if.if_flags & IFF_UP)
470			return(EBUSY);
471
472		switch (*(int *)data) {
473		case IFF_POINTOPOINT:
474			tp->tun_if.if_flags |= IFF_POINTOPOINT;
475			tp->tun_if.if_flags &= ~IFF_BROADCAST;
476			break;
477		case IFF_BROADCAST:
478			tp->tun_if.if_flags &= ~IFF_POINTOPOINT;
479			tp->tun_if.if_flags |= IFF_BROADCAST;
480			break;
481		default:
482			return(EINVAL);
483		}
484		break;
485	case TUNSIFPID:
486		tp->tun_pid = curproc->p_pid;
487		break;
488	case FIONBIO:
489		break;
490	case FIOASYNC:
491		if (*(int *)data)
492			tp->tun_flags |= TUN_ASYNC;
493		else
494			tp->tun_flags &= ~TUN_ASYNC;
495		break;
496	case FIONREAD:
497		s = splimp();
498		if (tp->tun_if.if_snd.ifq_head) {
499			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
500			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
501				*(int *)data += mb->m_len;
502		} else
503			*(int *)data = 0;
504		splx(s);
505		break;
506	case FIOSETOWN:
507		return (fsetown(*(int *)data, &tp->tun_sigio));
508
509	case FIOGETOWN:
510		*(int *)data = fgetown(tp->tun_sigio);
511		return (0);
512
513	/* This is deprecated, FIOSETOWN should be used instead. */
514	case TIOCSPGRP:
515		return (fsetown(-(*(int *)data), &tp->tun_sigio));
516
517	/* This is deprecated, FIOGETOWN should be used instead. */
518	case TIOCGPGRP:
519		*(int *)data = -fgetown(tp->tun_sigio);
520		return (0);
521
522	default:
523		return (ENOTTY);
524	}
525	return (0);
526}
527
528/*
529 * The cdevsw read interface - reads a packet at a time, or at
530 * least as much of a packet as can be read.
531 */
532static	int
533tunread(dev, uio, flag)
534	dev_t dev;
535	struct uio *uio;
536	int flag;
537{
538	struct tun_softc *tp = dev->si_drv1;
539	struct ifnet	*ifp = &tp->tun_if;
540	struct mbuf	*m, *m0;
541	int		error=0, len, s;
542
543	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
544	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
545		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
546			  ifp->if_unit, tp->tun_flags);
547		return EHOSTDOWN;
548	}
549
550	tp->tun_flags &= ~TUN_RWAIT;
551
552	s = splimp();
553	do {
554		IF_DEQUEUE(&ifp->if_snd, m0);
555		if (m0 == 0) {
556			if (flag & IO_NDELAY) {
557				splx(s);
558				return EWOULDBLOCK;
559			}
560			tp->tun_flags |= TUN_RWAIT;
561			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
562					"tunread", 0)) != 0) {
563				splx(s);
564				return error;
565			}
566		}
567	} while (m0 == 0);
568	splx(s);
569
570	while (m0 && uio->uio_resid > 0 && error == 0) {
571		len = min(uio->uio_resid, m0->m_len);
572		if (len == 0)
573			break;
574		error = uiomove(mtod(m0, caddr_t), len, uio);
575		MFREE(m0, m);
576		m0 = m;
577	}
578
579	if (m0) {
580		TUNDEBUG("Dropping mbuf\n");
581		m_freem(m0);
582	}
583	return error;
584}
585
586/*
587 * the cdevsw write interface - an atomic write is a packet - or else!
588 */
589static	int
590tunwrite(dev, uio, flag)
591	dev_t dev;
592	struct uio *uio;
593	int flag;
594{
595	struct tun_softc *tp = dev->si_drv1;
596	struct ifnet	*ifp = &tp->tun_if;
597	struct mbuf	*top, **mp, *m;
598	int		error=0, tlen, mlen;
599	u_int32_t	family;
600
601	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
602
603	if (uio->uio_resid == 0)
604		return 0;
605
606	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
607		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
608		    uio->uio_resid);
609		return EIO;
610	}
611	tlen = uio->uio_resid;
612
613	/* get a header mbuf */
614	MGETHDR(m, M_DONTWAIT, MT_DATA);
615	if (m == NULL)
616		return ENOBUFS;
617	mlen = MHLEN;
618
619	top = 0;
620	mp = &top;
621	while (error == 0 && uio->uio_resid > 0) {
622		m->m_len = min(mlen, uio->uio_resid);
623		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
624		*mp = m;
625		mp = &m->m_next;
626		if (uio->uio_resid > 0) {
627			MGET (m, M_DONTWAIT, MT_DATA);
628			if (m == 0) {
629				error = ENOBUFS;
630				break;
631			}
632			mlen = MLEN;
633		}
634	}
635	if (error) {
636		if (top)
637			m_freem (top);
638		ifp->if_ierrors++;
639		return error;
640	}
641
642	top->m_pkthdr.len = tlen;
643	top->m_pkthdr.rcvif = ifp;
644
645	if (ifp->if_bpf) {
646		if (tp->tun_flags & TUN_IFHEAD)
647			/*
648			 * Conveniently, we already have a 4-byte address
649			 * family prepended to our packet !
650			 */
651			bpf_mtap(ifp, top);
652		else {
653			/*
654			 * We need to prepend the address family as
655			 * a four byte field.  Cons up a dummy header
656			 * to pacify bpf.  This is safe because bpf
657			 * will only read from the mbuf (i.e., it won't
658			 * try to free it or keep a pointer to it).
659			 */
660			struct mbuf m;
661			u_int af = AF_INET;
662
663			m.m_next = top;
664			m.m_len = 4;
665			m.m_data = (char *)&af;
666
667			bpf_mtap(ifp, &m);
668		}
669	}
670
671	if (tp->tun_flags & TUN_IFHEAD) {
672		if (top->m_len < sizeof(family) &&
673		    (top = m_pullup(top, sizeof(family))) == NULL)
674				return ENOBUFS;
675		family = ntohl(*mtod(top, u_int32_t *));
676		m_adj(top, sizeof(family));
677	} else
678		family = AF_INET;
679
680	ifp->if_ibytes += top->m_pkthdr.len;
681	ifp->if_ipackets++;
682
683	return family_enqueue(family, top);
684}
685
686/*
687 * tunpoll - the poll interface, this is only useful on reads
688 * really. The write detect always returns true, write never blocks
689 * anyway, it either accepts the packet or drops it.
690 */
691static	int
692tunpoll(dev, events, p)
693	dev_t dev;
694	int events;
695	struct proc *p;
696{
697	int		s;
698	struct tun_softc *tp = dev->si_drv1;
699	struct ifnet	*ifp = &tp->tun_if;
700	int		revents = 0;
701
702	s = splimp();
703	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
704
705	if (events & (POLLIN | POLLRDNORM)) {
706		if (ifp->if_snd.ifq_len > 0) {
707			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
708			    ifp->if_unit, ifp->if_snd.ifq_len);
709			revents |= events & (POLLIN | POLLRDNORM);
710		} else {
711			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
712			    ifp->if_unit);
713			selrecord(p, &tp->tun_rsel);
714		}
715	}
716	if (events & (POLLOUT | POLLWRNORM))
717		revents |= events & (POLLOUT | POLLWRNORM);
718
719	splx(s);
720	return (revents);
721}
722