if_tun.c revision 69152
1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: head/sys/net/if_tun.c 69152 2000-11-25 07:35:38Z jlemon $
17 */
18
19#include "opt_inet.h"
20
21#include <sys/param.h>
22#include <sys/proc.h>
23#include <sys/systm.h>
24#include <sys/mbuf.h>
25#include <sys/socket.h>
26#include <sys/filio.h>
27#include <sys/sockio.h>
28#include <sys/ttycom.h>
29#include <sys/poll.h>
30#include <sys/signalvar.h>
31#include <sys/filedesc.h>
32#include <sys/kernel.h>
33#include <sys/sysctl.h>
34#include <sys/conf.h>
35#include <sys/uio.h>
36#include <sys/vnode.h>
37#include <sys/malloc.h>
38
39#include <net/if.h>
40#include <net/if_types.h>
41#include <net/route.h>
42#include <net/intrq.h>
43
44#ifdef INET
45#include <netinet/in.h>
46#endif
47
48#include <net/bpf.h>
49
50#include <net/if_tunvar.h>
51#include <net/if_tun.h>
52
53static MALLOC_DEFINE(M_TUN, "tun", "Tunnel Interface");
54
55static void tunattach __P((void *));
56PSEUDO_SET(tunattach, if_tun);
57
58static void tuncreate __P((dev_t dev));
59
60#define TUNDEBUG	if (tundebug) printf
61static int tundebug = 0;
62SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
63
64static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *,
65	    struct rtentry *rt));
66static int tunifioctl __P((struct ifnet *, u_long, caddr_t));
67static int tuninit __P((struct ifnet *));
68
69static	d_open_t	tunopen;
70static	d_close_t	tunclose;
71static	d_read_t	tunread;
72static	d_write_t	tunwrite;
73static	d_ioctl_t	tunioctl;
74static	d_poll_t	tunpoll;
75
76#define CDEV_MAJOR 52
77static struct cdevsw tun_cdevsw = {
78	/* open */	tunopen,
79	/* close */	tunclose,
80	/* read */	tunread,
81	/* write */	tunwrite,
82	/* ioctl */	tunioctl,
83	/* poll */	tunpoll,
84	/* mmap */	nommap,
85	/* strategy */	nostrategy,
86	/* name */	"tun",
87	/* maj */	CDEV_MAJOR,
88	/* dump */	nodump,
89	/* psize */	nopsize,
90	/* flags */	0,
91	/* bmaj */	-1
92};
93
94static void tun_clone __P((void *arg, char *name, int namelen, dev_t *dev));
95
96static void
97tun_clone(arg, name, namelen, dev)
98	void *arg;
99	char *name;
100	int namelen;
101	dev_t *dev;
102{
103	int u;
104
105	if (*dev != NODEV)
106		return;
107	if (dev_stdclone(name, NULL, "tun", &u) != 1)
108		return;
109	/* XXX: minor encoding if u > 255 */
110	*dev = make_dev(&tun_cdevsw, u,
111	    UID_UUCP, GID_DIALER, 0600, "tun%d", u);
112
113}
114
115static void
116tunattach(dummy)
117	void *dummy;
118{
119
120	EVENTHANDLER_REGISTER(dev_clone, tun_clone, 0, 1000);
121	cdevsw_add(&tun_cdevsw);
122}
123
124static void
125tuncreate(dev)
126	dev_t dev;
127{
128	struct tun_softc *sc;
129	struct ifnet *ifp;
130
131	dev = make_dev(&tun_cdevsw, minor(dev),
132	    UID_UUCP, GID_DIALER, 0600, "tun%d", dev2unit(dev));
133
134	MALLOC(sc, struct tun_softc *, sizeof(*sc), M_TUN, M_WAITOK);
135	bzero(sc, sizeof *sc);
136	sc->tun_flags = TUN_INITED;
137
138	ifp = &sc->tun_if;
139	ifp->if_unit = dev2unit(dev);
140	ifp->if_name = "tun";
141	ifp->if_mtu = TUNMTU;
142	ifp->if_ioctl = tunifioctl;
143	ifp->if_output = tunoutput;
144	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
145	ifp->if_type = IFT_PPP;
146	ifp->if_snd.ifq_maxlen = ifqmaxlen;
147	ifp->if_softc = sc;
148	if_attach(ifp);
149	bpfattach(ifp, DLT_NULL, sizeof(u_int));
150	dev->si_drv1 = sc;
151}
152
153/*
154 * tunnel open - must be superuser & the device must be
155 * configured in
156 */
157static	int
158tunopen(dev, flag, mode, p)
159	dev_t	dev;
160	int	flag, mode;
161	struct proc *p;
162{
163	struct ifnet	*ifp;
164	struct tun_softc *tp;
165	register int	error;
166
167	error = suser(p);
168	if (error)
169		return (error);
170
171	tp = dev->si_drv1;
172	if (!tp) {
173		tuncreate(dev);
174		tp = dev->si_drv1;
175	}
176	if (tp->tun_flags & TUN_OPEN)
177		return EBUSY;
178	tp->tun_pid = p->p_pid;
179	ifp = &tp->tun_if;
180	tp->tun_flags |= TUN_OPEN;
181	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
182	return (0);
183}
184
185/*
186 * tunclose - close the device - mark i/f down & delete
187 * routing info
188 */
189static	int
190tunclose(dev, foo, bar, p)
191	dev_t dev;
192	int foo;
193	int bar;
194	struct proc *p;
195{
196	register int	s;
197	struct tun_softc *tp;
198	struct ifnet	*ifp;
199
200	tp = dev->si_drv1;
201	ifp = &tp->tun_if;
202
203	tp->tun_flags &= ~TUN_OPEN;
204	tp->tun_pid = 0;
205
206	/*
207	 * junk all pending output
208	 */
209	IF_DRAIN(&ifp->if_snd);
210
211	if (ifp->if_flags & IFF_UP) {
212		s = splimp();
213		if_down(ifp);
214		splx(s);
215	}
216
217	if (ifp->if_flags & IFF_RUNNING) {
218		register struct ifaddr *ifa;
219
220		s = splimp();
221		/* find internet addresses and delete routes */
222		for (ifa = ifp->if_addrhead.tqh_first; ifa;
223		    ifa = ifa->ifa_link.tqe_next)
224			if (ifa->ifa_addr->sa_family == AF_INET)
225				rtinit(ifa, (int)RTM_DELETE,
226				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
227		ifp->if_flags &= ~IFF_RUNNING;
228		splx(s);
229	}
230
231	funsetown(tp->tun_sigio);
232	selwakeup(&tp->tun_rsel);
233
234	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
235	return (0);
236}
237
238static int
239tuninit(ifp)
240	struct ifnet *ifp;
241{
242	struct tun_softc *tp = ifp->if_softc;
243	register struct ifaddr *ifa;
244	int error = 0;
245
246	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
247
248	ifp->if_flags |= IFF_UP | IFF_RUNNING;
249	getmicrotime(&ifp->if_lastchange);
250
251	for (ifa = ifp->if_addrhead.tqh_first; ifa;
252	     ifa = ifa->ifa_link.tqe_next) {
253		if (ifa->ifa_addr == NULL)
254			error = EFAULT;
255			/* XXX: Should maybe return straight off? */
256		else {
257#ifdef INET
258			if (ifa->ifa_addr->sa_family == AF_INET) {
259			    struct sockaddr_in *si;
260
261			    si = (struct sockaddr_in *)ifa->ifa_addr;
262			    if (si->sin_addr.s_addr)
263				    tp->tun_flags |= TUN_IASET;
264
265			    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
266			    if (si && si->sin_addr.s_addr)
267				    tp->tun_flags |= TUN_DSTADDR;
268			}
269#endif
270		}
271	}
272	return (error);
273}
274
275/*
276 * Process an ioctl request.
277 */
278int
279tunifioctl(ifp, cmd, data)
280	struct ifnet *ifp;
281	u_long	cmd;
282	caddr_t	data;
283{
284	struct ifreq *ifr = (struct ifreq *)data;
285	struct tun_softc *tp = ifp->if_softc;
286	struct ifstat *ifs;
287	int		error = 0, s;
288
289	s = splimp();
290	switch(cmd) {
291	case SIOCGIFSTATUS:
292		ifs = (struct ifstat *)data;
293		if (tp->tun_pid)
294			sprintf(ifs->ascii + strlen(ifs->ascii),
295			    "\tOpened by PID %d\n", tp->tun_pid);
296		break;
297	case SIOCSIFADDR:
298		error = tuninit(ifp);
299		TUNDEBUG("%s%d: address set, error=%d\n",
300			 ifp->if_name, ifp->if_unit, error);
301		break;
302	case SIOCSIFDSTADDR:
303		error = tuninit(ifp);
304		TUNDEBUG("%s%d: destination address set, error=%d\n",
305			 ifp->if_name, ifp->if_unit, error);
306		break;
307	case SIOCSIFMTU:
308		ifp->if_mtu = ifr->ifr_mtu;
309		TUNDEBUG("%s%d: mtu set\n",
310			 ifp->if_name, ifp->if_unit);
311		break;
312	case SIOCADDMULTI:
313	case SIOCDELMULTI:
314		break;
315	default:
316		error = EINVAL;
317	}
318	splx(s);
319	return (error);
320}
321
322/*
323 * tunoutput - queue packets from higher level ready to put out.
324 */
325int
326tunoutput(ifp, m0, dst, rt)
327	struct ifnet   *ifp;
328	struct mbuf    *m0;
329	struct sockaddr *dst;
330	struct rtentry *rt;
331{
332	struct tun_softc *tp = ifp->if_softc;
333
334	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
335
336	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
337		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
338			  ifp->if_unit, tp->tun_flags);
339		m_freem (m0);
340		return EHOSTDOWN;
341	}
342
343	/* BPF write needs to be handled specially */
344	if (dst->sa_family == AF_UNSPEC) {
345		dst->sa_family = *(mtod(m0, int *));
346		m0->m_len -= sizeof(int);
347		m0->m_pkthdr.len -= sizeof(int);
348		m0->m_data += sizeof(int);
349	}
350
351	if (ifp->if_bpf) {
352		/*
353		 * We need to prepend the address family as
354		 * a four byte field.  Cons up a dummy header
355		 * to pacify bpf.  This is safe because bpf
356		 * will only read from the mbuf (i.e., it won't
357		 * try to free it or keep a pointer to it).
358		 */
359		struct mbuf m;
360		uint32_t af = dst->sa_family;
361
362		m.m_next = m0;
363		m.m_len = 4;
364		m.m_data = (char *)&af;
365
366		bpf_mtap(ifp, &m);
367	}
368
369	/* prepend sockaddr? this may abort if the mbuf allocation fails */
370	if (tp->tun_flags & TUN_LMODE) {
371		/* allocate space for sockaddr */
372		M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
373
374		/* if allocation failed drop packet */
375		if (m0 == NULL) {
376			ifp->if_iqdrops++;
377			ifp->if_oerrors++;
378			return (ENOBUFS);
379		} else {
380			bcopy(dst, m0->m_data, dst->sa_len);
381		}
382	}
383
384	if (tp->tun_flags & TUN_IFHEAD) {
385		/* Prepend the address family */
386		M_PREPEND(m0, 4, M_DONTWAIT);
387
388		/* if allocation failed drop packet */
389		if (m0 == NULL) {
390			ifp->if_iqdrops++;
391			ifp->if_oerrors++;
392			return ENOBUFS;
393		} else
394			*(u_int32_t *)m0->m_data = htonl(dst->sa_family);
395	} else {
396#ifdef INET
397		if (dst->sa_family != AF_INET)
398#endif
399		{
400			m_freem(m0);
401			return EAFNOSUPPORT;
402		}
403	}
404
405	if (! IF_HANDOFF(&ifp->if_snd, m0, NULL)) {
406		ifp->if_collisions++;
407		return ENOBUFS;
408	}
409	ifp->if_opackets++;
410
411	if (tp->tun_flags & TUN_RWAIT) {
412		tp->tun_flags &= ~TUN_RWAIT;
413		wakeup((caddr_t)tp);
414	}
415	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
416		pgsigio(tp->tun_sigio, SIGIO, 0);
417	selwakeup(&tp->tun_rsel);
418	return 0;
419}
420
421/*
422 * the cdevsw interface is now pretty minimal.
423 */
424static	int
425tunioctl(dev, cmd, data, flag, p)
426	dev_t		dev;
427	u_long		cmd;
428	caddr_t		data;
429	int		flag;
430	struct proc	*p;
431{
432	int		s;
433	struct tun_softc *tp = dev->si_drv1;
434 	struct tuninfo *tunp;
435
436	switch (cmd) {
437 	case TUNSIFINFO:
438 		tunp = (struct tuninfo *)data;
439		if (tunp->mtu < IF_MINMTU)
440			return (EINVAL);
441 		tp->tun_if.if_mtu = tunp->mtu;
442 		tp->tun_if.if_type = tunp->type;
443 		tp->tun_if.if_baudrate = tunp->baudrate;
444 		break;
445 	case TUNGIFINFO:
446 		tunp = (struct tuninfo *)data;
447 		tunp->mtu = tp->tun_if.if_mtu;
448 		tunp->type = tp->tun_if.if_type;
449 		tunp->baudrate = tp->tun_if.if_baudrate;
450 		break;
451	case TUNSDEBUG:
452		tundebug = *(int *)data;
453		break;
454	case TUNGDEBUG:
455		*(int *)data = tundebug;
456		break;
457	case TUNSLMODE:
458		if (*(int *)data) {
459			tp->tun_flags |= TUN_LMODE;
460			tp->tun_flags &= ~TUN_IFHEAD;
461		} else
462			tp->tun_flags &= ~TUN_LMODE;
463		break;
464	case TUNSIFHEAD:
465		if (*(int *)data) {
466			tp->tun_flags |= TUN_IFHEAD;
467			tp->tun_flags &= ~TUN_LMODE;
468		} else
469			tp->tun_flags &= ~TUN_IFHEAD;
470		break;
471	case TUNGIFHEAD:
472		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
473		break;
474	case TUNSIFMODE:
475		/* deny this if UP */
476		if (tp->tun_if.if_flags & IFF_UP)
477			return(EBUSY);
478
479		switch (*(int *)data) {
480		case IFF_POINTOPOINT:
481			tp->tun_if.if_flags |= IFF_POINTOPOINT;
482			tp->tun_if.if_flags &= ~IFF_BROADCAST;
483			break;
484		case IFF_BROADCAST:
485			tp->tun_if.if_flags &= ~IFF_POINTOPOINT;
486			tp->tun_if.if_flags |= IFF_BROADCAST;
487			break;
488		default:
489			return(EINVAL);
490		}
491		break;
492	case TUNSIFPID:
493		tp->tun_pid = curproc->p_pid;
494		break;
495	case FIONBIO:
496		break;
497	case FIOASYNC:
498		if (*(int *)data)
499			tp->tun_flags |= TUN_ASYNC;
500		else
501			tp->tun_flags &= ~TUN_ASYNC;
502		break;
503	case FIONREAD:
504		s = splimp();
505		if (tp->tun_if.if_snd.ifq_head) {
506			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
507			for( *(int *)data = 0; mb != 0; mb = mb->m_next)
508				*(int *)data += mb->m_len;
509		} else
510			*(int *)data = 0;
511		splx(s);
512		break;
513	case FIOSETOWN:
514		return (fsetown(*(int *)data, &tp->tun_sigio));
515
516	case FIOGETOWN:
517		*(int *)data = fgetown(tp->tun_sigio);
518		return (0);
519
520	/* This is deprecated, FIOSETOWN should be used instead. */
521	case TIOCSPGRP:
522		return (fsetown(-(*(int *)data), &tp->tun_sigio));
523
524	/* This is deprecated, FIOGETOWN should be used instead. */
525	case TIOCGPGRP:
526		*(int *)data = -fgetown(tp->tun_sigio);
527		return (0);
528
529	default:
530		return (ENOTTY);
531	}
532	return (0);
533}
534
535/*
536 * The cdevsw read interface - reads a packet at a time, or at
537 * least as much of a packet as can be read.
538 */
539static	int
540tunread(dev, uio, flag)
541	dev_t dev;
542	struct uio *uio;
543	int flag;
544{
545	struct tun_softc *tp = dev->si_drv1;
546	struct ifnet	*ifp = &tp->tun_if;
547	struct mbuf	*m, *m0;
548	int		error=0, len, s;
549
550	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
551	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
552		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
553			  ifp->if_unit, tp->tun_flags);
554		return EHOSTDOWN;
555	}
556
557	tp->tun_flags &= ~TUN_RWAIT;
558
559	s = splimp();
560	do {
561		IF_DEQUEUE(&ifp->if_snd, m0);
562		if (m0 == 0) {
563			if (flag & IO_NDELAY) {
564				splx(s);
565				return EWOULDBLOCK;
566			}
567			tp->tun_flags |= TUN_RWAIT;
568			if((error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
569					"tunread", 0)) != 0) {
570				splx(s);
571				return error;
572			}
573		}
574	} while (m0 == 0);
575	splx(s);
576
577	while (m0 && uio->uio_resid > 0 && error == 0) {
578		len = min(uio->uio_resid, m0->m_len);
579		if (len == 0)
580			break;
581		error = uiomove(mtod(m0, caddr_t), len, uio);
582		MFREE(m0, m);
583		m0 = m;
584	}
585
586	if (m0) {
587		TUNDEBUG("Dropping mbuf\n");
588		m_freem(m0);
589	}
590	return error;
591}
592
593/*
594 * the cdevsw write interface - an atomic write is a packet - or else!
595 */
596static	int
597tunwrite(dev, uio, flag)
598	dev_t dev;
599	struct uio *uio;
600	int flag;
601{
602	struct tun_softc *tp = dev->si_drv1;
603	struct ifnet	*ifp = &tp->tun_if;
604	struct mbuf	*top, **mp, *m;
605	int		error=0, tlen, mlen;
606	uint32_t	family;
607
608	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
609
610	if (uio->uio_resid == 0)
611		return 0;
612
613	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
614		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
615		    uio->uio_resid);
616		return EIO;
617	}
618	tlen = uio->uio_resid;
619
620	/* get a header mbuf */
621	MGETHDR(m, M_DONTWAIT, MT_DATA);
622	if (m == NULL)
623		return ENOBUFS;
624	mlen = MHLEN;
625
626	top = 0;
627	mp = &top;
628	while (error == 0 && uio->uio_resid > 0) {
629		m->m_len = min(mlen, uio->uio_resid);
630		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
631		*mp = m;
632		mp = &m->m_next;
633		if (uio->uio_resid > 0) {
634			MGET (m, M_DONTWAIT, MT_DATA);
635			if (m == 0) {
636				error = ENOBUFS;
637				break;
638			}
639			mlen = MLEN;
640		}
641	}
642	if (error) {
643		if (top)
644			m_freem (top);
645		ifp->if_ierrors++;
646		return error;
647	}
648
649	top->m_pkthdr.len = tlen;
650	top->m_pkthdr.rcvif = ifp;
651
652	if (ifp->if_bpf) {
653		if (tp->tun_flags & TUN_IFHEAD) {
654			/*
655			 * Conveniently, we already have a 4-byte address
656			 * family prepended to our packet !
657			 * Inconveniently, it's in the wrong byte order !
658			 */
659			if ((top = m_pullup(top, sizeof(family))) == NULL)
660				return ENOBUFS;
661			*mtod(top, u_int32_t *) =
662			    ntohl(*mtod(top, u_int32_t *));
663			bpf_mtap(ifp, top);
664			*mtod(top, u_int32_t *) =
665			    htonl(*mtod(top, u_int32_t *));
666		} else {
667			/*
668			 * We need to prepend the address family as
669			 * a four byte field.  Cons up a dummy header
670			 * to pacify bpf.  This is safe because bpf
671			 * will only read from the mbuf (i.e., it won't
672			 * try to free it or keep a pointer to it).
673			 */
674			struct mbuf m;
675			uint32_t af = AF_INET;
676
677			m.m_next = top;
678			m.m_len = 4;
679			m.m_data = (char *)&af;
680
681			bpf_mtap(ifp, &m);
682		}
683	}
684
685	if (tp->tun_flags & TUN_IFHEAD) {
686		if (top->m_len < sizeof(family) &&
687		    (top = m_pullup(top, sizeof(family))) == NULL)
688				return ENOBUFS;
689		family = ntohl(*mtod(top, u_int32_t *));
690		m_adj(top, sizeof(family));
691	} else
692		family = AF_INET;
693
694	ifp->if_ibytes += top->m_pkthdr.len;
695	ifp->if_ipackets++;
696
697	return family_enqueue(family, top);
698}
699
700/*
701 * tunpoll - the poll interface, this is only useful on reads
702 * really. The write detect always returns true, write never blocks
703 * anyway, it either accepts the packet or drops it.
704 */
705static	int
706tunpoll(dev, events, p)
707	dev_t dev;
708	int events;
709	struct proc *p;
710{
711	int		s;
712	struct tun_softc *tp = dev->si_drv1;
713	struct ifnet	*ifp = &tp->tun_if;
714	int		revents = 0;
715
716	s = splimp();
717	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
718
719	if (events & (POLLIN | POLLRDNORM)) {
720		if (ifp->if_snd.ifq_len > 0) {
721			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
722			    ifp->if_unit, ifp->if_snd.ifq_len);
723			revents |= events & (POLLIN | POLLRDNORM);
724		} else {
725			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
726			    ifp->if_unit);
727			selrecord(p, &tp->tun_rsel);
728		}
729	}
730	if (events & (POLLOUT | POLLWRNORM))
731		revents |= events & (POLLOUT | POLLWRNORM);
732
733	splx(s);
734	return (revents);
735}
736