1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: releng/11.0/sys/net/if_gif.c 297793 2016-04-10 23:07:00Z pfg $");
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/module.h>
46#include <sys/rmlock.h>
47#include <sys/socket.h>
48#include <sys/sockio.h>
49#include <sys/sx.h>
50#include <sys/errno.h>
51#include <sys/time.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/priv.h>
55#include <sys/proc.h>
56#include <sys/protosw.h>
57#include <sys/conf.h>
58#include <machine/cpu.h>
59
60#include <net/if.h>
61#include <net/if_var.h>
62#include <net/if_clone.h>
63#include <net/if_types.h>
64#include <net/netisr.h>
65#include <net/route.h>
66#include <net/bpf.h>
67#include <net/vnet.h>
68
69#include <netinet/in.h>
70#include <netinet/in_systm.h>
71#include <netinet/ip.h>
72#include <netinet/ip_ecn.h>
73#ifdef	INET
74#include <netinet/in_var.h>
75#include <netinet/ip_var.h>
76#endif	/* INET */
77
78#ifdef INET6
79#ifndef INET
80#include <netinet/in.h>
81#endif
82#include <netinet6/in6_var.h>
83#include <netinet/ip6.h>
84#include <netinet6/ip6_ecn.h>
85#include <netinet6/ip6_var.h>
86#include <netinet6/scope6_var.h>
87#include <netinet6/ip6protosw.h>
88#endif /* INET6 */
89
90#include <netinet/ip_encap.h>
91#include <net/ethernet.h>
92#include <net/if_bridgevar.h>
93#include <net/if_gif.h>
94
95#include <security/mac/mac_framework.h>
96
97static const char gifname[] = "gif";
98
99/*
100 * gif_mtx protects a per-vnet gif_softc_list.
101 */
102static VNET_DEFINE(struct mtx, gif_mtx);
103#define	V_gif_mtx		VNET(gif_mtx)
104static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
105static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
106#define	V_gif_softc_list	VNET(gif_softc_list)
107static struct sx gif_ioctl_sx;
108SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
109
110#define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
111					    NULL, MTX_DEF)
112#define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
113#define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
114#define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
115
116void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
117void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
118void	(*ng_gif_attach_p)(struct ifnet *ifp);
119void	(*ng_gif_detach_p)(struct ifnet *ifp);
120
121static int	gif_check_nesting(struct ifnet *, struct mbuf *);
122static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
123    struct sockaddr *);
124static void	gif_delete_tunnel(struct ifnet *);
125static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
126static int	gif_transmit(struct ifnet *, struct mbuf *);
127static void	gif_qflush(struct ifnet *);
128static int	gif_clone_create(struct if_clone *, int, caddr_t);
129static void	gif_clone_destroy(struct ifnet *);
130static VNET_DEFINE(struct if_clone *, gif_cloner);
131#define	V_gif_cloner	VNET(gif_cloner)
132
133static int gifmodevent(module_t, int, void *);
134
135SYSCTL_DECL(_net_link);
136static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
137    "Generic Tunnel Interface");
138#ifndef MAX_GIF_NEST
139/*
140 * This macro controls the default upper limitation on nesting of gif tunnels.
141 * Since, setting a large value to this macro with a careless configuration
142 * may introduce system crash, we don't allow any nestings by default.
143 * If you need to configure nested gif tunnels, you can define this macro
144 * in your kernel configuration file.  However, if you do so, please be
145 * careful to configure the tunnels so that it won't make a loop.
146 */
147#define MAX_GIF_NEST 1
148#endif
149static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
150#define	V_max_gif_nesting	VNET(max_gif_nesting)
151SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
152    &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
153
154/*
155 * By default, we disallow creation of multiple tunnels between the same
156 * pair of addresses.  Some applications require this functionality so
157 * we allow control over this check here.
158 */
159#ifdef XBONEHACK
160static VNET_DEFINE(int, parallel_tunnels) = 1;
161#else
162static VNET_DEFINE(int, parallel_tunnels) = 0;
163#endif
164#define	V_parallel_tunnels	VNET(parallel_tunnels)
165SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
166    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
167    "Allow parallel tunnels?");
168
169/* copy from src/sys/net/if_ethersubr.c */
170static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
171			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
172#ifndef ETHER_IS_BROADCAST
173#define ETHER_IS_BROADCAST(addr) \
174	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
175#endif
176
177static int
178gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
179{
180	struct gif_softc *sc;
181
182	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
183	sc->gif_fibnum = curthread->td_proc->p_fibnum;
184	GIF2IFP(sc) = if_alloc(IFT_GIF);
185	GIF_LOCK_INIT(sc);
186	GIF2IFP(sc)->if_softc = sc;
187	if_initname(GIF2IFP(sc), gifname, unit);
188
189	GIF2IFP(sc)->if_addrlen = 0;
190	GIF2IFP(sc)->if_mtu    = GIF_MTU;
191	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
192#if 0
193	/* turn off ingress filter */
194	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
195#endif
196	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
197	GIF2IFP(sc)->if_transmit  = gif_transmit;
198	GIF2IFP(sc)->if_qflush  = gif_qflush;
199	GIF2IFP(sc)->if_output = gif_output;
200	GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
201	GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
202	if_attach(GIF2IFP(sc));
203	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
204	if (ng_gif_attach_p != NULL)
205		(*ng_gif_attach_p)(GIF2IFP(sc));
206
207	GIF_LIST_LOCK();
208	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
209	GIF_LIST_UNLOCK();
210	return (0);
211}
212
213static void
214gif_clone_destroy(struct ifnet *ifp)
215{
216	struct gif_softc *sc;
217
218	sx_xlock(&gif_ioctl_sx);
219	sc = ifp->if_softc;
220	gif_delete_tunnel(ifp);
221	GIF_LIST_LOCK();
222	LIST_REMOVE(sc, gif_list);
223	GIF_LIST_UNLOCK();
224	if (ng_gif_detach_p != NULL)
225		(*ng_gif_detach_p)(ifp);
226	bpfdetach(ifp);
227	if_detach(ifp);
228	ifp->if_softc = NULL;
229	sx_xunlock(&gif_ioctl_sx);
230
231	if_free(ifp);
232	GIF_LOCK_DESTROY(sc);
233	free(sc, M_GIF);
234}
235
236static void
237vnet_gif_init(const void *unused __unused)
238{
239
240	LIST_INIT(&V_gif_softc_list);
241	GIF_LIST_LOCK_INIT();
242	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
243	    gif_clone_destroy, 0);
244}
245VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
246    vnet_gif_init, NULL);
247
248static void
249vnet_gif_uninit(const void *unused __unused)
250{
251
252	if_clone_detach(V_gif_cloner);
253	GIF_LIST_LOCK_DESTROY();
254}
255VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
256    vnet_gif_uninit, NULL);
257
258static int
259gifmodevent(module_t mod, int type, void *data)
260{
261
262	switch (type) {
263	case MOD_LOAD:
264	case MOD_UNLOAD:
265		break;
266	default:
267		return (EOPNOTSUPP);
268	}
269	return (0);
270}
271
272static moduledata_t gif_mod = {
273	"if_gif",
274	gifmodevent,
275	0
276};
277
278DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
279MODULE_VERSION(if_gif, 1);
280
281int
282gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
283{
284	GIF_RLOCK_TRACKER;
285	const struct ip *ip;
286	struct gif_softc *sc;
287	int ret;
288
289	sc = (struct gif_softc *)arg;
290	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
291		return (0);
292
293	ret = 0;
294	GIF_RLOCK(sc);
295
296	/* no physical address */
297	if (sc->gif_family == 0)
298		goto done;
299
300	switch (proto) {
301#ifdef INET
302	case IPPROTO_IPV4:
303#endif
304#ifdef INET6
305	case IPPROTO_IPV6:
306#endif
307	case IPPROTO_ETHERIP:
308		break;
309	default:
310		goto done;
311	}
312
313	/* Bail on short packets */
314	M_ASSERTPKTHDR(m);
315	if (m->m_pkthdr.len < sizeof(struct ip))
316		goto done;
317
318	ip = mtod(m, const struct ip *);
319	switch (ip->ip_v) {
320#ifdef INET
321	case 4:
322		if (sc->gif_family != AF_INET)
323			goto done;
324		ret = in_gif_encapcheck(m, off, proto, arg);
325		break;
326#endif
327#ifdef INET6
328	case 6:
329		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
330			goto done;
331		if (sc->gif_family != AF_INET6)
332			goto done;
333		ret = in6_gif_encapcheck(m, off, proto, arg);
334		break;
335#endif
336	}
337done:
338	GIF_RUNLOCK(sc);
339	return (ret);
340}
341
342static int
343gif_transmit(struct ifnet *ifp, struct mbuf *m)
344{
345	struct gif_softc *sc;
346	struct etherip_header *eth;
347#ifdef INET
348	struct ip *ip;
349#endif
350#ifdef INET6
351	struct ip6_hdr *ip6;
352	uint32_t t;
353#endif
354	uint32_t af;
355	uint8_t proto, ecn;
356	int error;
357
358#ifdef MAC
359	error = mac_ifnet_check_transmit(ifp, m);
360	if (error) {
361		m_freem(m);
362		goto err;
363	}
364#endif
365	error = ENETDOWN;
366	sc = ifp->if_softc;
367	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
368	    (ifp->if_flags & IFF_UP) == 0 ||
369	    sc->gif_family == 0 ||
370	    (error = gif_check_nesting(ifp, m)) != 0) {
371		m_freem(m);
372		goto err;
373	}
374	/* Now pull back the af that we stashed in the csum_data. */
375	if (ifp->if_bridge)
376		af = AF_LINK;
377	else
378		af = m->m_pkthdr.csum_data;
379	m->m_flags &= ~(M_BCAST|M_MCAST);
380	M_SETFIB(m, sc->gif_fibnum);
381	BPF_MTAP2(ifp, &af, sizeof(af), m);
382	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
383	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
384	/* inner AF-specific encapsulation */
385	ecn = 0;
386	switch (af) {
387#ifdef INET
388	case AF_INET:
389		proto = IPPROTO_IPV4;
390		if (m->m_len < sizeof(struct ip))
391			m = m_pullup(m, sizeof(struct ip));
392		if (m == NULL) {
393			error = ENOBUFS;
394			goto err;
395		}
396		ip = mtod(m, struct ip *);
397		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
398		    ECN_NOCARE, &ecn, &ip->ip_tos);
399		break;
400#endif
401#ifdef INET6
402	case AF_INET6:
403		proto = IPPROTO_IPV6;
404		if (m->m_len < sizeof(struct ip6_hdr))
405			m = m_pullup(m, sizeof(struct ip6_hdr));
406		if (m == NULL) {
407			error = ENOBUFS;
408			goto err;
409		}
410		t = 0;
411		ip6 = mtod(m, struct ip6_hdr *);
412		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
413		    ECN_NOCARE, &t, &ip6->ip6_flow);
414		ecn = (ntohl(t) >> 20) & 0xff;
415		break;
416#endif
417	case AF_LINK:
418		proto = IPPROTO_ETHERIP;
419		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
420		if (m == NULL) {
421			error = ENOBUFS;
422			goto err;
423		}
424		eth = mtod(m, struct etherip_header *);
425		eth->eip_resvh = 0;
426		eth->eip_ver = ETHERIP_VERSION;
427		eth->eip_resvl = 0;
428		break;
429	default:
430		error = EAFNOSUPPORT;
431		m_freem(m);
432		goto err;
433	}
434	/* XXX should we check if our outer source is legal? */
435	/* dispatch to output logic based on outer AF */
436	switch (sc->gif_family) {
437#ifdef INET
438	case AF_INET:
439		error = in_gif_output(ifp, m, proto, ecn);
440		break;
441#endif
442#ifdef INET6
443	case AF_INET6:
444		error = in6_gif_output(ifp, m, proto, ecn);
445		break;
446#endif
447	default:
448		m_freem(m);
449	}
450err:
451	if (error)
452		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
453	return (error);
454}
455
456static void
457gif_qflush(struct ifnet *ifp __unused)
458{
459
460}
461
462#define	MTAG_GIF	1080679712
463static int
464gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
465{
466	struct m_tag *mtag;
467	int count;
468
469	/*
470	 * gif may cause infinite recursion calls when misconfigured.
471	 * We'll prevent this by detecting loops.
472	 *
473	 * High nesting level may cause stack exhaustion.
474	 * We'll prevent this by introducing upper limit.
475	 */
476	count = 1;
477	mtag = NULL;
478	while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
479		if (*(struct ifnet **)(mtag + 1) == ifp) {
480			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
481			return (EIO);
482		}
483		count++;
484	}
485	if (count > V_max_gif_nesting) {
486		log(LOG_NOTICE,
487		    "%s: if_output recursively called too many times(%d)\n",
488		    if_name(ifp), count);
489		return (EIO);
490	}
491	mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
492	if (mtag == NULL)
493		return (ENOMEM);
494	*(struct ifnet **)(mtag + 1) = ifp;
495	m_tag_prepend(m, mtag);
496	return (0);
497}
498
499int
500gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
501	struct route *ro)
502{
503	uint32_t af;
504
505	if (dst->sa_family == AF_UNSPEC)
506		bcopy(dst->sa_data, &af, sizeof(af));
507	else
508		af = dst->sa_family;
509	/*
510	 * Now save the af in the inbound pkt csum data, this is a cheat since
511	 * we are using the inbound csum_data field to carry the af over to
512	 * the gif_transmit() routine, avoiding using yet another mtag.
513	 */
514	m->m_pkthdr.csum_data = af;
515	return (ifp->if_transmit(ifp, m));
516}
517
518void
519gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
520{
521	struct etherip_header *eip;
522#ifdef INET
523	struct ip *ip;
524#endif
525#ifdef INET6
526	struct ip6_hdr *ip6;
527	uint32_t t;
528#endif
529	struct gif_softc *sc;
530	struct ether_header *eh;
531	struct ifnet *oldifp;
532	int isr, n, af;
533
534	if (ifp == NULL) {
535		/* just in case */
536		m_freem(m);
537		return;
538	}
539	sc = ifp->if_softc;
540	m->m_pkthdr.rcvif = ifp;
541	m_clrprotoflags(m);
542	switch (proto) {
543#ifdef INET
544	case IPPROTO_IPV4:
545		af = AF_INET;
546		if (m->m_len < sizeof(struct ip))
547			m = m_pullup(m, sizeof(struct ip));
548		if (m == NULL)
549			goto drop;
550		ip = mtod(m, struct ip *);
551		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
552		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
553			m_freem(m);
554			goto drop;
555		}
556		break;
557#endif
558#ifdef INET6
559	case IPPROTO_IPV6:
560		af = AF_INET6;
561		if (m->m_len < sizeof(struct ip6_hdr))
562			m = m_pullup(m, sizeof(struct ip6_hdr));
563		if (m == NULL)
564			goto drop;
565		t = htonl((uint32_t)ecn << 20);
566		ip6 = mtod(m, struct ip6_hdr *);
567		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
568		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
569			m_freem(m);
570			goto drop;
571		}
572		break;
573#endif
574	case IPPROTO_ETHERIP:
575		af = AF_LINK;
576		break;
577	default:
578		m_freem(m);
579		goto drop;
580	}
581
582#ifdef MAC
583	mac_ifnet_create_mbuf(ifp, m);
584#endif
585
586	if (bpf_peers_present(ifp->if_bpf)) {
587		uint32_t af1 = af;
588		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
589	}
590
591	if ((ifp->if_flags & IFF_MONITOR) != 0) {
592		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
593		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
594		m_freem(m);
595		return;
596	}
597
598	if (ng_gif_input_p != NULL) {
599		(*ng_gif_input_p)(ifp, &m, af);
600		if (m == NULL)
601			goto drop;
602	}
603
604	/*
605	 * Put the packet to the network layer input queue according to the
606	 * specified address family.
607	 * Note: older versions of gif_input directly called network layer
608	 * input functions, e.g. ip6_input, here.  We changed the policy to
609	 * prevent too many recursive calls of such input functions, which
610	 * might cause kernel panic.  But the change may introduce another
611	 * problem; if the input queue is full, packets are discarded.
612	 * The kernel stack overflow really happened, and we believed
613	 * queue-full rarely occurs, so we changed the policy.
614	 */
615	switch (af) {
616#ifdef INET
617	case AF_INET:
618		isr = NETISR_IP;
619		break;
620#endif
621#ifdef INET6
622	case AF_INET6:
623		isr = NETISR_IPV6;
624		break;
625#endif
626	case AF_LINK:
627		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
628		if (n > m->m_len)
629			m = m_pullup(m, n);
630		if (m == NULL)
631			goto drop;
632		eip = mtod(m, struct etherip_header *);
633		if (eip->eip_ver != ETHERIP_VERSION) {
634			/* discard unknown versions */
635			m_freem(m);
636			goto drop;
637		}
638		m_adj(m, sizeof(struct etherip_header));
639
640		m->m_flags &= ~(M_BCAST|M_MCAST);
641		m->m_pkthdr.rcvif = ifp;
642
643		if (ifp->if_bridge) {
644			oldifp = ifp;
645			eh = mtod(m, struct ether_header *);
646			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
647				if (ETHER_IS_BROADCAST(eh->ether_dhost))
648					m->m_flags |= M_BCAST;
649				else
650					m->m_flags |= M_MCAST;
651				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
652			}
653			BRIDGE_INPUT(ifp, m);
654
655			if (m != NULL && ifp != oldifp) {
656				/*
657				 * The bridge gave us back itself or one of the
658				 * members for which the frame is addressed.
659				 */
660				ether_demux(ifp, m);
661				return;
662			}
663		}
664		if (m != NULL)
665			m_freem(m);
666		return;
667
668	default:
669		if (ng_gif_input_orphan_p != NULL)
670			(*ng_gif_input_orphan_p)(ifp, m, af);
671		else
672			m_freem(m);
673		return;
674	}
675
676	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
677	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
678	M_SETFIB(m, ifp->if_fib);
679	netisr_dispatch(isr, m);
680	return;
681drop:
682	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
683}
684
685/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
686int
687gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
688{
689	GIF_RLOCK_TRACKER;
690	struct ifreq *ifr = (struct ifreq*)data;
691	struct sockaddr *dst, *src;
692	struct gif_softc *sc;
693#ifdef INET
694	struct sockaddr_in *sin = NULL;
695#endif
696#ifdef INET6
697	struct sockaddr_in6 *sin6 = NULL;
698#endif
699	u_int options;
700	int error;
701
702	switch (cmd) {
703	case SIOCSIFADDR:
704		ifp->if_flags |= IFF_UP;
705	case SIOCADDMULTI:
706	case SIOCDELMULTI:
707	case SIOCGIFMTU:
708	case SIOCSIFFLAGS:
709		return (0);
710	case SIOCSIFMTU:
711		if (ifr->ifr_mtu < GIF_MTU_MIN ||
712		    ifr->ifr_mtu > GIF_MTU_MAX)
713			return (EINVAL);
714		else
715			ifp->if_mtu = ifr->ifr_mtu;
716		return (0);
717	}
718	sx_xlock(&gif_ioctl_sx);
719	sc = ifp->if_softc;
720	if (sc == NULL) {
721		error = ENXIO;
722		goto bad;
723	}
724	error = 0;
725	switch (cmd) {
726	case SIOCSIFPHYADDR:
727#ifdef INET6
728	case SIOCSIFPHYADDR_IN6:
729#endif
730		error = EINVAL;
731		switch (cmd) {
732#ifdef INET
733		case SIOCSIFPHYADDR:
734			src = (struct sockaddr *)
735				&(((struct in_aliasreq *)data)->ifra_addr);
736			dst = (struct sockaddr *)
737				&(((struct in_aliasreq *)data)->ifra_dstaddr);
738			break;
739#endif
740#ifdef INET6
741		case SIOCSIFPHYADDR_IN6:
742			src = (struct sockaddr *)
743				&(((struct in6_aliasreq *)data)->ifra_addr);
744			dst = (struct sockaddr *)
745				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
746			break;
747#endif
748		default:
749			goto bad;
750		}
751		/* sa_family must be equal */
752		if (src->sa_family != dst->sa_family ||
753		    src->sa_len != dst->sa_len)
754			goto bad;
755
756		/* validate sa_len */
757		/* check sa_family looks sane for the cmd */
758		switch (src->sa_family) {
759#ifdef INET
760		case AF_INET:
761			if (src->sa_len != sizeof(struct sockaddr_in))
762				goto bad;
763			if (cmd != SIOCSIFPHYADDR) {
764				error = EAFNOSUPPORT;
765				goto bad;
766			}
767			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
768			    satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
769				error = EADDRNOTAVAIL;
770				goto bad;
771			}
772			break;
773#endif
774#ifdef INET6
775		case AF_INET6:
776			if (src->sa_len != sizeof(struct sockaddr_in6))
777				goto bad;
778			if (cmd != SIOCSIFPHYADDR_IN6) {
779				error = EAFNOSUPPORT;
780				goto bad;
781			}
782			error = EADDRNOTAVAIL;
783			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
784			    ||
785			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
786				goto bad;
787			/*
788			 * Check validity of the scope zone ID of the
789			 * addresses, and convert it into the kernel
790			 * internal form if necessary.
791			 */
792			error = sa6_embedscope(satosin6(src), 0);
793			if (error != 0)
794				goto bad;
795			error = sa6_embedscope(satosin6(dst), 0);
796			if (error != 0)
797				goto bad;
798			break;
799#endif
800		default:
801			error = EAFNOSUPPORT;
802			goto bad;
803		}
804		error = gif_set_tunnel(ifp, src, dst);
805		break;
806	case SIOCDIFPHYADDR:
807		gif_delete_tunnel(ifp);
808		break;
809	case SIOCGIFPSRCADDR:
810	case SIOCGIFPDSTADDR:
811#ifdef INET6
812	case SIOCGIFPSRCADDR_IN6:
813	case SIOCGIFPDSTADDR_IN6:
814#endif
815		if (sc->gif_family == 0) {
816			error = EADDRNOTAVAIL;
817			break;
818		}
819		GIF_RLOCK(sc);
820		switch (cmd) {
821#ifdef INET
822		case SIOCGIFPSRCADDR:
823		case SIOCGIFPDSTADDR:
824			if (sc->gif_family != AF_INET) {
825				error = EADDRNOTAVAIL;
826				break;
827			}
828			sin = (struct sockaddr_in *)&ifr->ifr_addr;
829			memset(sin, 0, sizeof(*sin));
830			sin->sin_family = AF_INET;
831			sin->sin_len = sizeof(*sin);
832			break;
833#endif
834#ifdef INET6
835		case SIOCGIFPSRCADDR_IN6:
836		case SIOCGIFPDSTADDR_IN6:
837			if (sc->gif_family != AF_INET6) {
838				error = EADDRNOTAVAIL;
839				break;
840			}
841			sin6 = (struct sockaddr_in6 *)
842				&(((struct in6_ifreq *)data)->ifr_addr);
843			memset(sin6, 0, sizeof(*sin6));
844			sin6->sin6_family = AF_INET6;
845			sin6->sin6_len = sizeof(*sin6);
846			break;
847#endif
848		default:
849			error = EAFNOSUPPORT;
850		}
851		if (error == 0) {
852			switch (cmd) {
853#ifdef INET
854			case SIOCGIFPSRCADDR:
855				sin->sin_addr = sc->gif_iphdr->ip_src;
856				break;
857			case SIOCGIFPDSTADDR:
858				sin->sin_addr = sc->gif_iphdr->ip_dst;
859				break;
860#endif
861#ifdef INET6
862			case SIOCGIFPSRCADDR_IN6:
863				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
864				break;
865			case SIOCGIFPDSTADDR_IN6:
866				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
867				break;
868#endif
869			}
870		}
871		GIF_RUNLOCK(sc);
872		if (error != 0)
873			break;
874		switch (cmd) {
875#ifdef INET
876		case SIOCGIFPSRCADDR:
877		case SIOCGIFPDSTADDR:
878			error = prison_if(curthread->td_ucred,
879			    (struct sockaddr *)sin);
880			if (error != 0)
881				memset(sin, 0, sizeof(*sin));
882			break;
883#endif
884#ifdef INET6
885		case SIOCGIFPSRCADDR_IN6:
886		case SIOCGIFPDSTADDR_IN6:
887			error = prison_if(curthread->td_ucred,
888			    (struct sockaddr *)sin6);
889			if (error == 0)
890				error = sa6_recoverscope(sin6);
891			if (error != 0)
892				memset(sin6, 0, sizeof(*sin6));
893#endif
894		}
895		break;
896	case SIOCGTUNFIB:
897		ifr->ifr_fib = sc->gif_fibnum;
898		break;
899	case SIOCSTUNFIB:
900		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
901			break;
902		if (ifr->ifr_fib >= rt_numfibs)
903			error = EINVAL;
904		else
905			sc->gif_fibnum = ifr->ifr_fib;
906		break;
907	case GIFGOPTS:
908		options = sc->gif_options;
909		error = copyout(&options, ifr->ifr_data, sizeof(options));
910		break;
911	case GIFSOPTS:
912		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
913			break;
914		error = copyin(ifr->ifr_data, &options, sizeof(options));
915		if (error)
916			break;
917		if (options & ~GIF_OPTMASK)
918			error = EINVAL;
919		else
920			sc->gif_options = options;
921		break;
922	default:
923		error = EINVAL;
924		break;
925	}
926bad:
927	sx_xunlock(&gif_ioctl_sx);
928	return (error);
929}
930
931static void
932gif_detach(struct gif_softc *sc)
933{
934
935	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
936	if (sc->gif_ecookie != NULL)
937		encap_detach(sc->gif_ecookie);
938	sc->gif_ecookie = NULL;
939}
940
941static int
942gif_attach(struct gif_softc *sc, int af)
943{
944
945	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
946	switch (af) {
947#ifdef INET
948	case AF_INET:
949		return (in_gif_attach(sc));
950#endif
951#ifdef INET6
952	case AF_INET6:
953		return (in6_gif_attach(sc));
954#endif
955	}
956	return (EAFNOSUPPORT);
957}
958
959static int
960gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
961{
962	struct gif_softc *sc = ifp->if_softc;
963	struct gif_softc *tsc;
964#ifdef INET
965	struct ip *ip;
966#endif
967#ifdef INET6
968	struct ip6_hdr *ip6;
969#endif
970	void *hdr;
971	int error = 0;
972
973	if (sc == NULL)
974		return (ENXIO);
975	/* Disallow parallel tunnels unless instructed otherwise. */
976	if (V_parallel_tunnels == 0) {
977		GIF_LIST_LOCK();
978		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
979			if (tsc == sc || tsc->gif_family != src->sa_family)
980				continue;
981#ifdef INET
982			if (tsc->gif_family == AF_INET &&
983			    tsc->gif_iphdr->ip_src.s_addr ==
984			    satosin(src)->sin_addr.s_addr &&
985			    tsc->gif_iphdr->ip_dst.s_addr ==
986			    satosin(dst)->sin_addr.s_addr) {
987				error = EADDRNOTAVAIL;
988				GIF_LIST_UNLOCK();
989				goto bad;
990			}
991#endif
992#ifdef INET6
993			if (tsc->gif_family == AF_INET6 &&
994			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
995			    &satosin6(src)->sin6_addr) &&
996			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
997			    &satosin6(dst)->sin6_addr)) {
998				error = EADDRNOTAVAIL;
999				GIF_LIST_UNLOCK();
1000				goto bad;
1001			}
1002#endif
1003		}
1004		GIF_LIST_UNLOCK();
1005	}
1006	switch (src->sa_family) {
1007#ifdef INET
1008	case AF_INET:
1009		hdr = ip = malloc(sizeof(struct ip), M_GIF,
1010		    M_WAITOK | M_ZERO);
1011		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
1012		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
1013		break;
1014#endif
1015#ifdef INET6
1016	case AF_INET6:
1017		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
1018		    M_WAITOK | M_ZERO);
1019		ip6->ip6_src = satosin6(src)->sin6_addr;
1020		ip6->ip6_dst = satosin6(dst)->sin6_addr;
1021		ip6->ip6_vfc = IPV6_VERSION;
1022		break;
1023#endif
1024	default:
1025		return (EAFNOSUPPORT);
1026	}
1027
1028	if (sc->gif_family != src->sa_family)
1029		gif_detach(sc);
1030	if (sc->gif_family == 0 ||
1031	    sc->gif_family != src->sa_family)
1032		error = gif_attach(sc, src->sa_family);
1033
1034	GIF_WLOCK(sc);
1035	if (sc->gif_family != 0)
1036		free(sc->gif_hdr, M_GIF);
1037	sc->gif_family = src->sa_family;
1038	sc->gif_hdr = hdr;
1039	GIF_WUNLOCK(sc);
1040#if defined(INET) || defined(INET6)
1041bad:
1042#endif
1043	if (error == 0 && sc->gif_family != 0) {
1044		ifp->if_drv_flags |= IFF_DRV_RUNNING;
1045		if_link_state_change(ifp, LINK_STATE_UP);
1046	} else {
1047		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1048		if_link_state_change(ifp, LINK_STATE_DOWN);
1049	}
1050	return (error);
1051}
1052
1053static void
1054gif_delete_tunnel(struct ifnet *ifp)
1055{
1056	struct gif_softc *sc = ifp->if_softc;
1057	int family;
1058
1059	if (sc == NULL)
1060		return;
1061
1062	GIF_WLOCK(sc);
1063	family = sc->gif_family;
1064	sc->gif_family = 0;
1065	GIF_WUNLOCK(sc);
1066	if (family != 0) {
1067		gif_detach(sc);
1068		free(sc->gif_hdr, M_GIF);
1069	}
1070	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1071	if_link_state_change(ifp, LINK_STATE_DOWN);
1072}
1073