if_gif.c revision 241610
1/*	$FreeBSD: head/sys/net/if_gif.c 241610 2012-10-16 13:37:54Z glebius $	*/
2/*	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $	*/
3
4/*-
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include "opt_inet.h"
34#include "opt_inet6.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/jail.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/module.h>
43#include <sys/socket.h>
44#include <sys/sockio.h>
45#include <sys/errno.h>
46#include <sys/time.h>
47#include <sys/sysctl.h>
48#include <sys/syslog.h>
49#include <sys/priv.h>
50#include <sys/proc.h>
51#include <sys/protosw.h>
52#include <sys/conf.h>
53#include <machine/cpu.h>
54
55#include <net/if.h>
56#include <net/if_clone.h>
57#include <net/if_types.h>
58#include <net/netisr.h>
59#include <net/route.h>
60#include <net/bpf.h>
61#include <net/vnet.h>
62
63#include <netinet/in.h>
64#include <netinet/in_systm.h>
65#include <netinet/ip.h>
66#ifdef	INET
67#include <netinet/in_var.h>
68#include <netinet/in_gif.h>
69#include <netinet/ip_var.h>
70#endif	/* INET */
71
72#ifdef INET6
73#ifndef INET
74#include <netinet/in.h>
75#endif
76#include <netinet6/in6_var.h>
77#include <netinet/ip6.h>
78#include <netinet6/ip6_var.h>
79#include <netinet6/scope6_var.h>
80#include <netinet6/in6_gif.h>
81#include <netinet6/ip6protosw.h>
82#endif /* INET6 */
83
84#include <netinet/ip_encap.h>
85#include <net/ethernet.h>
86#include <net/if_bridgevar.h>
87#include <net/if_gif.h>
88
89#include <security/mac/mac_framework.h>
90
91static const char gifname[] = "gif";
92
93/*
94 * gif_mtx protects the global gif_softc_list.
95 */
96static struct mtx gif_mtx;
97static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
98static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
99#define	V_gif_softc_list	VNET(gif_softc_list)
100
101void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
102void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
103void	(*ng_gif_attach_p)(struct ifnet *ifp);
104void	(*ng_gif_detach_p)(struct ifnet *ifp);
105
106static void	gif_start(struct ifnet *);
107static int	gif_clone_create(struct if_clone *, int, caddr_t);
108static void	gif_clone_destroy(struct ifnet *);
109static struct if_clone *gif_cloner;
110
111static int gifmodevent(module_t, int, void *);
112
113SYSCTL_DECL(_net_link);
114static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
115    "Generic Tunnel Interface");
116#ifndef MAX_GIF_NEST
117/*
118 * This macro controls the default upper limitation on nesting of gif tunnels.
119 * Since, setting a large value to this macro with a careless configuration
120 * may introduce system crash, we don't allow any nestings by default.
121 * If you need to configure nested gif tunnels, you can define this macro
122 * in your kernel configuration file.  However, if you do so, please be
123 * careful to configure the tunnels so that it won't make a loop.
124 */
125#define MAX_GIF_NEST 1
126#endif
127static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
128#define	V_max_gif_nesting	VNET(max_gif_nesting)
129SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
130    &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
131
132/*
133 * By default, we disallow creation of multiple tunnels between the same
134 * pair of addresses.  Some applications require this functionality so
135 * we allow control over this check here.
136 */
137#ifdef XBONEHACK
138static VNET_DEFINE(int, parallel_tunnels) = 1;
139#else
140static VNET_DEFINE(int, parallel_tunnels) = 0;
141#endif
142#define	V_parallel_tunnels	VNET(parallel_tunnels)
143SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
144    &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?");
145
146/* copy from src/sys/net/if_ethersubr.c */
147static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
148			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
149#ifndef ETHER_IS_BROADCAST
150#define ETHER_IS_BROADCAST(addr) \
151	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
152#endif
153
154static int
155gif_clone_create(ifc, unit, params)
156	struct if_clone *ifc;
157	int unit;
158	caddr_t params;
159{
160	struct gif_softc *sc;
161
162	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
163	sc->gif_fibnum = curthread->td_proc->p_fibnum;
164	GIF2IFP(sc) = if_alloc(IFT_GIF);
165	if (GIF2IFP(sc) == NULL) {
166		free(sc, M_GIF);
167		return (ENOSPC);
168	}
169
170	GIF_LOCK_INIT(sc);
171
172	GIF2IFP(sc)->if_softc = sc;
173	if_initname(GIF2IFP(sc), gifname, unit);
174
175	sc->encap_cookie4 = sc->encap_cookie6 = NULL;
176	sc->gif_options = GIF_ACCEPT_REVETHIP;
177
178	GIF2IFP(sc)->if_addrlen = 0;
179	GIF2IFP(sc)->if_mtu    = GIF_MTU;
180	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
181#if 0
182	/* turn off ingress filter */
183	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
184#endif
185	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
186	GIF2IFP(sc)->if_start  = gif_start;
187	GIF2IFP(sc)->if_output = gif_output;
188	GIF2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
189	if_attach(GIF2IFP(sc));
190	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
191	if (ng_gif_attach_p != NULL)
192		(*ng_gif_attach_p)(GIF2IFP(sc));
193
194	mtx_lock(&gif_mtx);
195	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
196	mtx_unlock(&gif_mtx);
197
198	return (0);
199}
200
201static void
202gif_clone_destroy(ifp)
203	struct ifnet *ifp;
204{
205#if defined(INET) || defined(INET6)
206	int err;
207#endif
208	struct gif_softc *sc = ifp->if_softc;
209
210	mtx_lock(&gif_mtx);
211	LIST_REMOVE(sc, gif_list);
212	mtx_unlock(&gif_mtx);
213
214	gif_delete_tunnel(ifp);
215#ifdef INET6
216	if (sc->encap_cookie6 != NULL) {
217		err = encap_detach(sc->encap_cookie6);
218		KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
219	}
220#endif
221#ifdef INET
222	if (sc->encap_cookie4 != NULL) {
223		err = encap_detach(sc->encap_cookie4);
224		KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
225	}
226#endif
227
228	if (ng_gif_detach_p != NULL)
229		(*ng_gif_detach_p)(ifp);
230	bpfdetach(ifp);
231	if_detach(ifp);
232	if_free(ifp);
233
234	GIF_LOCK_DESTROY(sc);
235
236	free(sc, M_GIF);
237}
238
239static void
240vnet_gif_init(const void *unused __unused)
241{
242
243	LIST_INIT(&V_gif_softc_list);
244}
245VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_gif_init,
246    NULL);
247
248static int
249gifmodevent(mod, type, data)
250	module_t mod;
251	int type;
252	void *data;
253{
254
255	switch (type) {
256	case MOD_LOAD:
257		mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
258		gif_cloner = if_clone_simple(gifname, gif_clone_create,
259		    gif_clone_destroy, 0);
260		break;
261
262	case MOD_UNLOAD:
263		if_clone_detach(gif_cloner);
264		mtx_destroy(&gif_mtx);
265		break;
266	default:
267		return EOPNOTSUPP;
268	}
269	return 0;
270}
271
272static moduledata_t gif_mod = {
273	"if_gif",
274	gifmodevent,
275	0
276};
277
278DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
279MODULE_VERSION(if_gif, 1);
280
281int
282gif_encapcheck(m, off, proto, arg)
283	const struct mbuf *m;
284	int off;
285	int proto;
286	void *arg;
287{
288	struct ip ip;
289	struct gif_softc *sc;
290
291	sc = (struct gif_softc *)arg;
292	if (sc == NULL)
293		return 0;
294
295	if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
296		return 0;
297
298	/* no physical address */
299	if (!sc->gif_psrc || !sc->gif_pdst)
300		return 0;
301
302	switch (proto) {
303#ifdef INET
304	case IPPROTO_IPV4:
305		break;
306#endif
307#ifdef INET6
308	case IPPROTO_IPV6:
309		break;
310#endif
311	case IPPROTO_ETHERIP:
312		break;
313
314	default:
315		return 0;
316	}
317
318	/* Bail on short packets */
319	if (m->m_pkthdr.len < sizeof(ip))
320		return 0;
321
322	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
323
324	switch (ip.ip_v) {
325#ifdef INET
326	case 4:
327		if (sc->gif_psrc->sa_family != AF_INET ||
328		    sc->gif_pdst->sa_family != AF_INET)
329			return 0;
330		return gif_encapcheck4(m, off, proto, arg);
331#endif
332#ifdef INET6
333	case 6:
334		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
335			return 0;
336		if (sc->gif_psrc->sa_family != AF_INET6 ||
337		    sc->gif_pdst->sa_family != AF_INET6)
338			return 0;
339		return gif_encapcheck6(m, off, proto, arg);
340#endif
341	default:
342		return 0;
343	}
344}
345#ifdef INET
346#define GIF_HDR_LEN (ETHER_HDR_LEN + sizeof (struct ip))
347#endif
348#ifdef INET6
349#define GIF_HDR_LEN6 (ETHER_HDR_LEN + sizeof (struct ip6_hdr))
350#endif
351
352static void
353gif_start(struct ifnet *ifp)
354{
355	struct gif_softc *sc;
356	struct mbuf *m;
357	uint32_t af;
358	int error = 0;
359
360	sc = ifp->if_softc;
361	GIF_LOCK(sc);
362	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
363	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
364
365		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
366		if (m == 0)
367			break;
368
369#ifdef ALTQ
370		/* Take out those altq bytes we add in gif_output  */
371#ifdef INET
372		if (sc->gif_psrc->sa_family == AF_INET)
373			m->m_pkthdr.len -= GIF_HDR_LEN;
374#endif
375#ifdef INET6
376		if (sc->gif_psrc->sa_family == AF_INET6)
377		    m->m_pkthdr.len -= GIF_HDR_LEN6;
378#endif
379#endif
380		/*
381		 * Now pull back the af that we
382		 * stashed in the csum_data.
383		 */
384		af = m->m_pkthdr.csum_data;
385
386		if (ifp->if_bridge)
387			af = AF_LINK;
388
389		BPF_MTAP2(ifp, &af, sizeof(af), m);
390		ifp->if_opackets++;
391
392/*              Done by IFQ_HANDOFF */
393/* 		ifp->if_obytes += m->m_pkthdr.len;*/
394		/* override to IPPROTO_ETHERIP for bridged traffic */
395
396		M_SETFIB(m, sc->gif_fibnum);
397		/* inner AF-specific encapsulation */
398		/* XXX should we check if our outer source is legal? */
399		/* dispatch to output logic based on outer AF */
400		switch (sc->gif_psrc->sa_family) {
401#ifdef INET
402		case AF_INET:
403			error = in_gif_output(ifp, af, m);
404			break;
405#endif
406#ifdef INET6
407		case AF_INET6:
408			error = in6_gif_output(ifp, af, m);
409			break;
410#endif
411		default:
412			m_freem(m);
413			error = ENETDOWN;
414		}
415		if (error)
416			ifp->if_oerrors++;
417
418	}
419	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
420	GIF_UNLOCK(sc);
421	return;
422}
423
424int
425gif_output(ifp, m, dst, ro)
426	struct ifnet *ifp;
427	struct mbuf *m;
428	struct sockaddr *dst;
429	struct route *ro;
430{
431	struct gif_softc *sc = ifp->if_softc;
432	struct m_tag *mtag;
433	int error = 0;
434	int gif_called;
435	uint32_t af;
436#ifdef MAC
437	error = mac_ifnet_check_transmit(ifp, m);
438	if (error) {
439		m_freem(m);
440		goto end;
441	}
442#endif
443
444	/*
445	 * gif may cause infinite recursion calls when misconfigured.
446	 * We'll prevent this by detecting loops.
447	 *
448	 * High nesting level may cause stack exhaustion.
449	 * We'll prevent this by introducing upper limit.
450	 */
451	gif_called = 1;
452	mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
453	while (mtag != NULL) {
454		if (*(struct ifnet **)(mtag + 1) == ifp) {
455			log(LOG_NOTICE,
456			    "gif_output: loop detected on %s\n",
457			    (*(struct ifnet **)(mtag + 1))->if_xname);
458			m_freem(m);
459			error = EIO;	/* is there better errno? */
460			goto end;
461		}
462		mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
463		gif_called++;
464	}
465	if (gif_called > V_max_gif_nesting) {
466		log(LOG_NOTICE,
467		    "gif_output: recursively called too many times(%d)\n",
468		    gif_called);
469		m_freem(m);
470		error = EIO;	/* is there better errno? */
471		goto end;
472	}
473	mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
474	    M_NOWAIT);
475	if (mtag == NULL) {
476		m_freem(m);
477		error = ENOMEM;
478		goto end;
479	}
480	*(struct ifnet **)(mtag + 1) = ifp;
481	m_tag_prepend(m, mtag);
482
483	m->m_flags &= ~(M_BCAST|M_MCAST);
484	/* BPF writes need to be handled specially. */
485	if (dst->sa_family == AF_UNSPEC) {
486		bcopy(dst->sa_data, &af, sizeof(af));
487		dst->sa_family = af;
488	}
489	af = dst->sa_family;
490	/*
491	 * Now save the af in the inbound pkt csum
492	 * data, this is a cheat since we are using
493	 * the inbound csum_data field to carry the
494	 * af over to the gif_start() routine, avoiding
495	 * using yet another mtag.
496	 */
497	m->m_pkthdr.csum_data = af;
498	if (!(ifp->if_flags & IFF_UP) ||
499	    sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
500		m_freem(m);
501		error = ENETDOWN;
502		goto end;
503	}
504#ifdef ALTQ
505	/*
506	 * Make altq aware of the bytes we will add
507	 * when we actually send it.
508	 */
509#ifdef INET
510	if (sc->gif_psrc->sa_family == AF_INET)
511		m->m_pkthdr.len += GIF_HDR_LEN;
512#endif
513#ifdef INET6
514	if (sc->gif_psrc->sa_family == AF_INET6)
515		m->m_pkthdr.len += GIF_HDR_LEN6;
516#endif
517#endif
518	/*
519	 * Queue message on interface, update output statistics if
520	 * successful, and start output if interface not yet active.
521	 */
522	IFQ_HANDOFF(ifp, m, error);
523  end:
524	if (error)
525		ifp->if_oerrors++;
526	return (error);
527}
528
529void
530gif_input(m, af, ifp)
531	struct mbuf *m;
532	int af;
533	struct ifnet *ifp;
534{
535	int isr, n;
536	struct gif_softc *sc;
537	struct etherip_header *eip;
538	struct ether_header *eh;
539	struct ifnet *oldifp;
540
541	if (ifp == NULL) {
542		/* just in case */
543		m_freem(m);
544		return;
545	}
546	sc = ifp->if_softc;
547	m->m_pkthdr.rcvif = ifp;
548
549#ifdef MAC
550	mac_ifnet_create_mbuf(ifp, m);
551#endif
552
553	if (bpf_peers_present(ifp->if_bpf)) {
554		u_int32_t af1 = af;
555		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
556	}
557
558	if (ng_gif_input_p != NULL) {
559		(*ng_gif_input_p)(ifp, &m, af);
560		if (m == NULL)
561			return;
562	}
563
564	/*
565	 * Put the packet to the network layer input queue according to the
566	 * specified address family.
567	 * Note: older versions of gif_input directly called network layer
568	 * input functions, e.g. ip6_input, here.  We changed the policy to
569	 * prevent too many recursive calls of such input functions, which
570	 * might cause kernel panic.  But the change may introduce another
571	 * problem; if the input queue is full, packets are discarded.
572	 * The kernel stack overflow really happened, and we believed
573	 * queue-full rarely occurs, so we changed the policy.
574	 */
575	switch (af) {
576#ifdef INET
577	case AF_INET:
578		isr = NETISR_IP;
579		break;
580#endif
581#ifdef INET6
582	case AF_INET6:
583		isr = NETISR_IPV6;
584		break;
585#endif
586	case AF_LINK:
587		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
588		if (n > m->m_len) {
589			m = m_pullup(m, n);
590			if (m == NULL) {
591				ifp->if_ierrors++;
592				return;
593			}
594		}
595
596		eip = mtod(m, struct etherip_header *);
597		/*
598		 * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
599		 * accepts an EtherIP packet with revered version field in
600		 * the header.  This is a knob for backward compatibility
601		 * with FreeBSD 7.2R or prior.
602		 */
603		if (sc->gif_options & GIF_ACCEPT_REVETHIP) {
604			if (eip->eip_resvl != ETHERIP_VERSION
605			    && eip->eip_ver != ETHERIP_VERSION) {
606				/* discard unknown versions */
607				m_freem(m);
608				return;
609			}
610		} else {
611			if (eip->eip_ver != ETHERIP_VERSION) {
612				/* discard unknown versions */
613				m_freem(m);
614				return;
615			}
616		}
617		m_adj(m, sizeof(struct etherip_header));
618
619		m->m_flags &= ~(M_BCAST|M_MCAST);
620		m->m_pkthdr.rcvif = ifp;
621
622		if (ifp->if_bridge) {
623			oldifp = ifp;
624			eh = mtod(m, struct ether_header *);
625			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
626				if (ETHER_IS_BROADCAST(eh->ether_dhost))
627					m->m_flags |= M_BCAST;
628				else
629					m->m_flags |= M_MCAST;
630				ifp->if_imcasts++;
631			}
632			BRIDGE_INPUT(ifp, m);
633
634			if (m != NULL && ifp != oldifp) {
635				/*
636				 * The bridge gave us back itself or one of the
637				 * members for which the frame is addressed.
638				 */
639				ether_demux(ifp, m);
640				return;
641			}
642		}
643		if (m != NULL)
644			m_freem(m);
645		return;
646
647	default:
648		if (ng_gif_input_orphan_p != NULL)
649			(*ng_gif_input_orphan_p)(ifp, m, af);
650		else
651			m_freem(m);
652		return;
653	}
654
655	ifp->if_ipackets++;
656	ifp->if_ibytes += m->m_pkthdr.len;
657	M_SETFIB(m, ifp->if_fib);
658	netisr_dispatch(isr, m);
659}
660
661/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
662int
663gif_ioctl(ifp, cmd, data)
664	struct ifnet *ifp;
665	u_long cmd;
666	caddr_t data;
667{
668	struct gif_softc *sc  = ifp->if_softc;
669	struct ifreq     *ifr = (struct ifreq*)data;
670	int error = 0, size;
671	u_int	options;
672	struct sockaddr *dst, *src;
673#ifdef	SIOCSIFMTU /* xxx */
674	u_long mtu;
675#endif
676
677	switch (cmd) {
678	case SIOCSIFADDR:
679		ifp->if_flags |= IFF_UP;
680		break;
681
682	case SIOCSIFDSTADDR:
683		break;
684
685	case SIOCADDMULTI:
686	case SIOCDELMULTI:
687		break;
688
689#ifdef	SIOCSIFMTU /* xxx */
690	case SIOCGIFMTU:
691		break;
692
693	case SIOCSIFMTU:
694		mtu = ifr->ifr_mtu;
695		if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
696			return (EINVAL);
697		ifp->if_mtu = mtu;
698		break;
699#endif /* SIOCSIFMTU */
700
701#ifdef INET
702	case SIOCSIFPHYADDR:
703#endif
704#ifdef INET6
705	case SIOCSIFPHYADDR_IN6:
706#endif /* INET6 */
707	case SIOCSLIFPHYADDR:
708		switch (cmd) {
709#ifdef INET
710		case SIOCSIFPHYADDR:
711			src = (struct sockaddr *)
712				&(((struct in_aliasreq *)data)->ifra_addr);
713			dst = (struct sockaddr *)
714				&(((struct in_aliasreq *)data)->ifra_dstaddr);
715			break;
716#endif
717#ifdef INET6
718		case SIOCSIFPHYADDR_IN6:
719			src = (struct sockaddr *)
720				&(((struct in6_aliasreq *)data)->ifra_addr);
721			dst = (struct sockaddr *)
722				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
723			break;
724#endif
725		case SIOCSLIFPHYADDR:
726			src = (struct sockaddr *)
727				&(((struct if_laddrreq *)data)->addr);
728			dst = (struct sockaddr *)
729				&(((struct if_laddrreq *)data)->dstaddr);
730			break;
731		default:
732			return EINVAL;
733		}
734
735		/* sa_family must be equal */
736		if (src->sa_family != dst->sa_family)
737			return EINVAL;
738
739		/* validate sa_len */
740		switch (src->sa_family) {
741#ifdef INET
742		case AF_INET:
743			if (src->sa_len != sizeof(struct sockaddr_in))
744				return EINVAL;
745			break;
746#endif
747#ifdef INET6
748		case AF_INET6:
749			if (src->sa_len != sizeof(struct sockaddr_in6))
750				return EINVAL;
751			break;
752#endif
753		default:
754			return EAFNOSUPPORT;
755		}
756		switch (dst->sa_family) {
757#ifdef INET
758		case AF_INET:
759			if (dst->sa_len != sizeof(struct sockaddr_in))
760				return EINVAL;
761			break;
762#endif
763#ifdef INET6
764		case AF_INET6:
765			if (dst->sa_len != sizeof(struct sockaddr_in6))
766				return EINVAL;
767			break;
768#endif
769		default:
770			return EAFNOSUPPORT;
771		}
772
773		/* check sa_family looks sane for the cmd */
774		switch (cmd) {
775		case SIOCSIFPHYADDR:
776			if (src->sa_family == AF_INET)
777				break;
778			return EAFNOSUPPORT;
779#ifdef INET6
780		case SIOCSIFPHYADDR_IN6:
781			if (src->sa_family == AF_INET6)
782				break;
783			return EAFNOSUPPORT;
784#endif /* INET6 */
785		case SIOCSLIFPHYADDR:
786			/* checks done in the above */
787			break;
788		}
789
790		error = gif_set_tunnel(GIF2IFP(sc), src, dst);
791		break;
792
793#ifdef SIOCDIFPHYADDR
794	case SIOCDIFPHYADDR:
795		gif_delete_tunnel(GIF2IFP(sc));
796		break;
797#endif
798
799	case SIOCGIFPSRCADDR:
800#ifdef INET6
801	case SIOCGIFPSRCADDR_IN6:
802#endif /* INET6 */
803		if (sc->gif_psrc == NULL) {
804			error = EADDRNOTAVAIL;
805			goto bad;
806		}
807		src = sc->gif_psrc;
808		switch (cmd) {
809#ifdef INET
810		case SIOCGIFPSRCADDR:
811			dst = &ifr->ifr_addr;
812			size = sizeof(ifr->ifr_addr);
813			break;
814#endif /* INET */
815#ifdef INET6
816		case SIOCGIFPSRCADDR_IN6:
817			dst = (struct sockaddr *)
818				&(((struct in6_ifreq *)data)->ifr_addr);
819			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
820			break;
821#endif /* INET6 */
822		default:
823			error = EADDRNOTAVAIL;
824			goto bad;
825		}
826		if (src->sa_len > size)
827			return EINVAL;
828		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
829#ifdef INET6
830		if (dst->sa_family == AF_INET6) {
831			error = sa6_recoverscope((struct sockaddr_in6 *)dst);
832			if (error != 0)
833				return (error);
834		}
835#endif
836		break;
837
838	case SIOCGIFPDSTADDR:
839#ifdef INET6
840	case SIOCGIFPDSTADDR_IN6:
841#endif /* INET6 */
842		if (sc->gif_pdst == NULL) {
843			error = EADDRNOTAVAIL;
844			goto bad;
845		}
846		src = sc->gif_pdst;
847		switch (cmd) {
848#ifdef INET
849		case SIOCGIFPDSTADDR:
850			dst = &ifr->ifr_addr;
851			size = sizeof(ifr->ifr_addr);
852			break;
853#endif /* INET */
854#ifdef INET6
855		case SIOCGIFPDSTADDR_IN6:
856			dst = (struct sockaddr *)
857				&(((struct in6_ifreq *)data)->ifr_addr);
858			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
859			break;
860#endif /* INET6 */
861		default:
862			error = EADDRNOTAVAIL;
863			goto bad;
864		}
865		if (src->sa_len > size)
866			return EINVAL;
867		error = prison_if(curthread->td_ucred, src);
868		if (error != 0)
869			return (error);
870		error = prison_if(curthread->td_ucred, dst);
871		if (error != 0)
872			return (error);
873		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
874#ifdef INET6
875		if (dst->sa_family == AF_INET6) {
876			error = sa6_recoverscope((struct sockaddr_in6 *)dst);
877			if (error != 0)
878				return (error);
879		}
880#endif
881		break;
882
883	case SIOCGLIFPHYADDR:
884		if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
885			error = EADDRNOTAVAIL;
886			goto bad;
887		}
888
889		/* copy src */
890		src = sc->gif_psrc;
891		dst = (struct sockaddr *)
892			&(((struct if_laddrreq *)data)->addr);
893		size = sizeof(((struct if_laddrreq *)data)->addr);
894		if (src->sa_len > size)
895			return EINVAL;
896		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
897
898		/* copy dst */
899		src = sc->gif_pdst;
900		dst = (struct sockaddr *)
901			&(((struct if_laddrreq *)data)->dstaddr);
902		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
903		if (src->sa_len > size)
904			return EINVAL;
905		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
906		break;
907
908	case SIOCSIFFLAGS:
909		/* if_ioctl() takes care of it */
910		break;
911
912	case GIFGOPTS:
913		options = sc->gif_options;
914		error = copyout(&options, ifr->ifr_data,
915				sizeof(options));
916		break;
917
918	case GIFSOPTS:
919		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
920			break;
921		error = copyin(ifr->ifr_data, &options, sizeof(options));
922		if (error)
923			break;
924		if (options & ~GIF_OPTMASK)
925			error = EINVAL;
926		else
927			sc->gif_options = options;
928		break;
929
930	default:
931		error = EINVAL;
932		break;
933	}
934 bad:
935	return error;
936}
937
938/*
939 * XXXRW: There's a general event-ordering issue here: the code to check
940 * if a given tunnel is already present happens before we perform a
941 * potentially blocking setup of the tunnel.  This code needs to be
942 * re-ordered so that the check and replacement can be atomic using
943 * a mutex.
944 */
945int
946gif_set_tunnel(ifp, src, dst)
947	struct ifnet *ifp;
948	struct sockaddr *src;
949	struct sockaddr *dst;
950{
951	struct gif_softc *sc = ifp->if_softc;
952	struct gif_softc *sc2;
953	struct sockaddr *osrc, *odst, *sa;
954	int error = 0;
955
956	mtx_lock(&gif_mtx);
957	LIST_FOREACH(sc2, &V_gif_softc_list, gif_list) {
958		if (sc2 == sc)
959			continue;
960		if (!sc2->gif_pdst || !sc2->gif_psrc)
961			continue;
962		if (sc2->gif_pdst->sa_family != dst->sa_family ||
963		    sc2->gif_pdst->sa_len != dst->sa_len ||
964		    sc2->gif_psrc->sa_family != src->sa_family ||
965		    sc2->gif_psrc->sa_len != src->sa_len)
966			continue;
967
968		/*
969		 * Disallow parallel tunnels unless instructed
970		 * otherwise.
971		 */
972		if (!V_parallel_tunnels &&
973		    bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
974		    bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
975			error = EADDRNOTAVAIL;
976			mtx_unlock(&gif_mtx);
977			goto bad;
978		}
979
980		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
981	}
982	mtx_unlock(&gif_mtx);
983
984	/* XXX we can detach from both, but be polite just in case */
985	if (sc->gif_psrc)
986		switch (sc->gif_psrc->sa_family) {
987#ifdef INET
988		case AF_INET:
989			(void)in_gif_detach(sc);
990			break;
991#endif
992#ifdef INET6
993		case AF_INET6:
994			(void)in6_gif_detach(sc);
995			break;
996#endif
997		}
998
999	osrc = sc->gif_psrc;
1000	sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
1001	bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
1002	sc->gif_psrc = sa;
1003
1004	odst = sc->gif_pdst;
1005	sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
1006	bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
1007	sc->gif_pdst = sa;
1008
1009	switch (sc->gif_psrc->sa_family) {
1010#ifdef INET
1011	case AF_INET:
1012		error = in_gif_attach(sc);
1013		break;
1014#endif
1015#ifdef INET6
1016	case AF_INET6:
1017		/*
1018		 * Check validity of the scope zone ID of the addresses, and
1019		 * convert it into the kernel internal form if necessary.
1020		 */
1021		error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
1022		if (error != 0)
1023			break;
1024		error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
1025		if (error != 0)
1026			break;
1027		error = in6_gif_attach(sc);
1028		break;
1029#endif
1030	}
1031	if (error) {
1032		/* rollback */
1033		free((caddr_t)sc->gif_psrc, M_IFADDR);
1034		free((caddr_t)sc->gif_pdst, M_IFADDR);
1035		sc->gif_psrc = osrc;
1036		sc->gif_pdst = odst;
1037		goto bad;
1038	}
1039
1040	if (osrc)
1041		free((caddr_t)osrc, M_IFADDR);
1042	if (odst)
1043		free((caddr_t)odst, M_IFADDR);
1044
1045 bad:
1046	if (sc->gif_psrc && sc->gif_pdst)
1047		ifp->if_drv_flags |= IFF_DRV_RUNNING;
1048	else
1049		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1050
1051	return error;
1052}
1053
1054void
1055gif_delete_tunnel(ifp)
1056	struct ifnet *ifp;
1057{
1058	struct gif_softc *sc = ifp->if_softc;
1059
1060	if (sc->gif_psrc) {
1061		free((caddr_t)sc->gif_psrc, M_IFADDR);
1062		sc->gif_psrc = NULL;
1063	}
1064	if (sc->gif_pdst) {
1065		free((caddr_t)sc->gif_pdst, M_IFADDR);
1066		sc->gif_pdst = NULL;
1067	}
1068	/* it is safe to detach from both */
1069#ifdef INET
1070	(void)in_gif_detach(sc);
1071#endif
1072#ifdef INET6
1073	(void)in6_gif_detach(sc);
1074#endif
1075	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1076}
1077