1/*	$OpenBSD: if_gif.c,v 1.138 2024/05/13 01:15:53 jsg Exp $	*/
2/*	$KAME: if_gif.c,v 1.43 2001/02/20 08:51:07 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/mbuf.h>
36#include <sys/socket.h>
37#include <sys/sockio.h>
38#include <sys/syslog.h>
39#include <sys/queue.h>
40
41#include <net/if.h>
42#include <net/if_var.h>
43#include <net/if_types.h>
44#include <net/route.h>
45
46#include <netinet/in.h>
47#include <netinet/in_var.h>
48#include <netinet/ip.h>
49#include <netinet/ip_var.h>
50#include <netinet/ip_ipip.h>
51#include <netinet/ip_ecn.h>
52
53#ifdef INET6
54#include <netinet6/in6_var.h>
55#include <netinet/ip6.h>
56#include <netinet6/ip6_var.h>
57#endif /* INET6 */
58
59#include <net/if_gif.h>
60
61#include "bpfilter.h"
62#if NBPFILTER > 0
63#include <net/bpf.h>
64#endif
65
66#ifdef MPLS
67#include <netmpls/mpls.h>
68#endif
69
70#include "pf.h"
71#if NPF > 0
72#include <net/pfvar.h>
73#endif
74
75#define GIF_MTU		(1280)	/* Default MTU */
76#define GIF_MTU_MIN	(1280)	/* Minimum MTU */
77#define GIF_MTU_MAX	(8192)	/* Maximum MTU */
78
79union gif_addr {
80	struct in6_addr		in6;
81	struct in_addr		in4;
82};
83
84struct gif_tunnel {
85	TAILQ_ENTRY(gif_tunnel)	t_entry;
86
87	union gif_addr		t_src;
88#define t_src4		t_src.in4
89#define t_src6		t_src.in6
90	union gif_addr		t_dst;
91#define t_dst4		t_dst.in4
92#define t_dst6		t_dst.in6
93	u_int			t_rtableid;
94
95	sa_family_t		t_af;
96};
97
98TAILQ_HEAD(gif_list, gif_tunnel);
99
100static inline int	gif_cmp(const struct gif_tunnel *,
101			    const struct gif_tunnel *);
102
103struct gif_softc {
104	struct gif_tunnel	sc_tunnel; /* must be first */
105	struct ifnet		sc_if;
106	uint16_t		sc_df;
107	int			sc_ttl;
108	int			sc_txhprio;
109	int			sc_rxhprio;
110	int			sc_ecn;
111};
112
113struct gif_list gif_list = TAILQ_HEAD_INITIALIZER(gif_list);
114
115void	gifattach(int);
116int	gif_clone_create(struct if_clone *, int);
117int	gif_clone_destroy(struct ifnet *);
118
119void	gif_start(struct ifnet *);
120int	gif_ioctl(struct ifnet *, u_long, caddr_t);
121int	gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
122	    struct rtentry *);
123int	gif_send(struct gif_softc *, struct mbuf *, uint8_t, uint8_t, uint8_t);
124
125int	gif_up(struct gif_softc *);
126int	gif_down(struct gif_softc *);
127int	gif_set_tunnel(struct gif_softc *, struct if_laddrreq *);
128int	gif_get_tunnel(struct gif_softc *, struct if_laddrreq *);
129int	gif_del_tunnel(struct gif_softc *);
130int	gif_input(struct gif_tunnel *, struct mbuf **, int *, int, int,
131	    uint8_t);
132
133/*
134 * gif global variable definitions
135 */
136struct if_clone gif_cloner =
137    IF_CLONE_INITIALIZER("gif", gif_clone_create, gif_clone_destroy);
138
139void
140gifattach(int count)
141{
142	if_clone_attach(&gif_cloner);
143}
144
145int
146gif_clone_create(struct if_clone *ifc, int unit)
147{
148	struct gif_softc *sc;
149	struct ifnet *ifp;
150
151	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
152	ifp = &sc->sc_if;
153
154	sc->sc_df = htons(0);
155	sc->sc_ttl = ip_defttl;
156	sc->sc_txhprio = IF_HDRPRIO_PAYLOAD;
157	sc->sc_rxhprio = IF_HDRPRIO_PAYLOAD;
158	sc->sc_ecn = ECN_ALLOWED;
159
160	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
161	    "%s%d", ifc->ifc_name, unit);
162
163	ifp->if_mtu    = GIF_MTU;
164	ifp->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
165	ifp->if_xflags = IFXF_CLONED;
166	ifp->if_ioctl  = gif_ioctl;
167	ifp->if_bpf_mtap = p2p_bpf_mtap;
168	ifp->if_input  = p2p_input;
169	ifp->if_start  = gif_start;
170	ifp->if_output = gif_output;
171	ifp->if_rtrequest = p2p_rtrequest;
172	ifp->if_type   = IFT_GIF;
173	ifp->if_softc = sc;
174
175	if_counters_alloc(ifp);
176	if_attach(ifp);
177	if_alloc_sadl(ifp);
178
179#if NBPFILTER > 0
180	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
181#endif
182
183	NET_LOCK();
184	TAILQ_INSERT_TAIL(&gif_list, &sc->sc_tunnel, t_entry);
185	NET_UNLOCK();
186
187	return (0);
188}
189
190int
191gif_clone_destroy(struct ifnet *ifp)
192{
193	struct gif_softc *sc = ifp->if_softc;
194
195	NET_LOCK();
196	if (ISSET(ifp->if_flags, IFF_RUNNING))
197		gif_down(sc);
198
199	TAILQ_REMOVE(&gif_list, &sc->sc_tunnel, t_entry);
200	NET_UNLOCK();
201
202	if_detach(ifp);
203
204	free(sc, M_DEVBUF, sizeof(*sc));
205
206	return (0);
207}
208
209void
210gif_start(struct ifnet *ifp)
211{
212	struct gif_softc *sc = ifp->if_softc;
213	struct mbuf *m;
214#if NBPFILTER > 0
215	caddr_t if_bpf;
216#endif
217	uint8_t proto, ttl, tos;
218	int ttloff, tttl;
219
220	tttl = sc->sc_ttl;
221
222	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
223#if NBPFILTER > 0
224		if_bpf = ifp->if_bpf;
225		if (if_bpf) {
226			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m,
227			    BPF_DIRECTION_OUT);
228		}
229#endif
230
231		switch (m->m_pkthdr.ph_family) {
232		case AF_INET: {
233			struct ip *ip;
234
235			m = m_pullup(m, sizeof(*ip));
236			if (m == NULL)
237				continue;
238
239			ip = mtod(m, struct ip *);
240			tos = ip->ip_tos;
241
242			ttloff = offsetof(struct ip, ip_ttl);
243			proto = IPPROTO_IPV4;
244			break;
245		}
246#ifdef INET6
247		case AF_INET6: {
248			struct ip6_hdr *ip6;
249
250			m = m_pullup(m, sizeof(*ip6));
251			if (m == NULL)
252				continue;
253
254			ip6 = mtod(m, struct ip6_hdr *);
255			tos = ntohl(ip6->ip6_flow >> 20);
256
257			ttloff = offsetof(struct ip6_hdr, ip6_hlim);
258			proto = IPPROTO_IPV6;
259			break;
260		}
261#endif
262#ifdef MPLS
263		case AF_MPLS: {
264			uint32_t shim;
265
266			m = m_pullup(m, sizeof(shim));
267			if (m == NULL)
268				continue;
269
270			shim = *mtod(m, uint32_t *) & MPLS_EXP_MASK;
271			tos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5;
272
273			ttloff = 3;
274
275			proto = IPPROTO_MPLS;
276			break;
277		}
278#endif
279		default:
280			unhandled_af(m->m_pkthdr.ph_family);
281		}
282
283		if (tttl == -1) {
284			KASSERT(m->m_len > ttloff);
285
286			ttl = *(m->m_data + ttloff);
287		} else
288			ttl = tttl;
289
290		switch (sc->sc_txhprio) {
291		case IF_HDRPRIO_PAYLOAD:
292			/* tos is already set */
293			break;
294		case IF_HDRPRIO_PACKET:
295			tos = IFQ_PRIO2TOS(m->m_pkthdr.pf.prio);
296			break;
297		default:
298			tos = IFQ_PRIO2TOS(sc->sc_txhprio);
299			break;
300		}
301
302		gif_send(sc, m, proto, ttl, tos);
303	}
304}
305
306int
307gif_send(struct gif_softc *sc, struct mbuf *m,
308    uint8_t proto, uint8_t ttl, uint8_t itos)
309{
310	uint8_t otos;
311
312	m->m_flags &= ~(M_BCAST|M_MCAST);
313	m->m_pkthdr.ph_rtableid = sc->sc_tunnel.t_rtableid;
314
315#if NPF > 0
316	pf_pkt_addr_changed(m);
317#endif
318
319	ip_ecn_ingress(sc->sc_ecn, &otos, &itos);
320
321	switch (sc->sc_tunnel.t_af) {
322	case AF_INET: {
323		struct ip *ip;
324
325		if (in_nullhost(sc->sc_tunnel.t_dst4))
326			goto drop;
327
328		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
329		if (m == NULL)
330			return (-1);
331
332		ip = mtod(m, struct ip *);
333		ip->ip_off = sc->sc_df;
334		ip->ip_tos = otos;
335		ip->ip_len = htons(m->m_pkthdr.len);
336		ip->ip_ttl = ttl;
337		ip->ip_p = proto;
338		ip->ip_src = sc->sc_tunnel.t_src4;
339		ip->ip_dst = sc->sc_tunnel.t_dst4;
340
341		ip_send(m);
342		break;
343	}
344#ifdef INET6
345	case AF_INET6: {
346		struct ip6_hdr *ip6;
347		int len = m->m_pkthdr.len;
348		uint32_t flow;
349
350		if (IN6_IS_ADDR_UNSPECIFIED(&sc->sc_tunnel.t_dst6))
351			goto drop;
352
353		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
354		if (m == NULL)
355			return (-1);
356
357		flow = otos << 20;
358		if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
359			flow |= m->m_pkthdr.ph_flowid;
360
361		ip6 = mtod(m, struct ip6_hdr *);
362		ip6->ip6_flow = htonl(flow);
363		ip6->ip6_vfc |= IPV6_VERSION;
364		ip6->ip6_plen = htons(len);
365		ip6->ip6_nxt = proto;
366		ip6->ip6_hlim = ttl;
367		ip6->ip6_src = sc->sc_tunnel.t_src6;
368		ip6->ip6_dst = sc->sc_tunnel.t_dst6;
369
370		if (sc->sc_df)
371			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
372
373		ip6_send(m);
374		break;
375	}
376#endif
377	default:
378		m_freem(m);
379		break;
380	}
381
382	return (0);
383
384drop:
385	m_freem(m);
386	return (0);
387}
388
389int
390gif_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
391    struct rtentry *rt)
392{
393	struct m_tag *mtag;
394	int error = 0;
395
396	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
397		error = ENETDOWN;
398		goto drop;
399	}
400
401	switch (dst->sa_family) {
402	case AF_INET:
403#ifdef INET6
404	case AF_INET6:
405#endif
406#ifdef MPLS
407	case AF_MPLS:
408#endif
409		break;
410	default:
411		error = EAFNOSUPPORT;
412		goto drop;
413	}
414
415	/* Try to limit infinite recursion through misconfiguration. */
416	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
417	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
418		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
419		    sizeof(ifp->if_index)) == 0) {
420			error = EIO;
421			goto drop;
422		}
423	}
424
425	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
426	if (mtag == NULL) {
427		error = ENOBUFS;
428		goto drop;
429	}
430	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
431	m_tag_prepend(m, mtag);
432
433	m->m_pkthdr.ph_family = dst->sa_family;
434
435	error = if_enqueue(ifp, m);
436
437	if (error)
438		ifp->if_oerrors++;
439	return (error);
440
441drop:
442	m_freem(m);
443	return (error);
444}
445
446int
447gif_up(struct gif_softc *sc)
448{
449	NET_ASSERT_LOCKED();
450
451	SET(sc->sc_if.if_flags, IFF_RUNNING);
452
453	return (0);
454}
455
456int
457gif_down(struct gif_softc *sc)
458{
459	NET_ASSERT_LOCKED();
460
461	CLR(sc->sc_if.if_flags, IFF_RUNNING);
462
463	/* barrier? */
464
465	return (0);
466}
467
468int
469gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
470{
471	struct gif_softc *sc = ifp->if_softc;
472	struct ifreq *ifr = (struct ifreq *)data;
473	int error = 0;
474
475	switch (cmd) {
476	case SIOCSIFADDR:
477		SET(ifp->if_flags, IFF_UP);
478		/* FALLTHROUGH */
479	case SIOCSIFFLAGS:
480		if (ISSET(ifp->if_flags, IFF_UP)) {
481			if (!ISSET(ifp->if_flags, IFF_RUNNING))
482				error = gif_up(sc);
483			else
484				error = 0;
485		} else {
486			if (ISSET(ifp->if_flags, IFF_RUNNING))
487				error = gif_down(sc);
488		}
489		break;
490
491	case SIOCADDMULTI:
492	case SIOCDELMULTI:
493		break;
494
495	case SIOCSLIFPHYADDR:
496		error = gif_set_tunnel(sc, (struct if_laddrreq *)data);
497		break;
498	case SIOCGLIFPHYADDR:
499		error = gif_get_tunnel(sc, (struct if_laddrreq *)data);
500		break;
501	case SIOCDIFPHYADDR:
502		error = gif_del_tunnel(sc);
503		break;
504
505	case SIOCSIFMTU:
506		if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX) {
507			error = EINVAL;
508			break;
509		}
510
511		ifp->if_mtu = ifr->ifr_mtu;
512		break;
513
514	case SIOCSLIFPHYRTABLE:
515		if (ifr->ifr_rdomainid < 0 ||
516		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
517		    !rtable_exists(ifr->ifr_rdomainid)) {
518			error = EINVAL;
519			break;
520		}
521		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
522		break;
523	case SIOCGLIFPHYRTABLE:
524		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
525		break;
526
527	case SIOCSLIFPHYTTL:
528		if (ifr->ifr_ttl != -1 &&
529		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
530			error = EINVAL;
531			break;
532		}
533
534		/* commit */
535		sc->sc_ttl = ifr->ifr_ttl;
536		break;
537	case SIOCGLIFPHYTTL:
538		ifr->ifr_ttl = sc->sc_ttl;
539		break;
540
541	case SIOCSLIFPHYDF:
542		/* commit */
543		sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
544		break;
545	case SIOCGLIFPHYDF:
546		ifr->ifr_df = sc->sc_df ? 1 : 0;
547		break;
548
549	case SIOCSLIFPHYECN:
550		sc->sc_ecn = ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
551		break;
552	case SIOCGLIFPHYECN:
553		ifr->ifr_metric = (sc->sc_ecn == ECN_ALLOWED);
554		break;
555
556	case SIOCSTXHPRIO:
557		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
558		if (error != 0)
559			break;
560
561		sc->sc_txhprio = ifr->ifr_hdrprio;
562		break;
563	case SIOCGTXHPRIO:
564		ifr->ifr_hdrprio = sc->sc_txhprio;
565		break;
566
567	case SIOCSRXHPRIO:
568		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
569		if (error != 0)
570			break;
571
572		sc->sc_rxhprio = ifr->ifr_hdrprio;
573		break;
574	case SIOCGRXHPRIO:
575		ifr->ifr_hdrprio = sc->sc_rxhprio;
576		break;
577
578	default:
579		error = ENOTTY;
580		break;
581	}
582
583	return (error);
584}
585
586int
587gif_get_tunnel(struct gif_softc *sc, struct if_laddrreq *req)
588{
589	struct gif_tunnel *tunnel = &sc->sc_tunnel;
590	struct sockaddr *src = (struct sockaddr *)&req->addr;
591	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
592	struct sockaddr_in *sin;
593#ifdef INET6 /* ifconfig already embeds the scopeid */
594	struct sockaddr_in6 *sin6;
595#endif
596
597	switch (tunnel->t_af) {
598	case AF_UNSPEC:
599		return (EADDRNOTAVAIL);
600	case AF_INET:
601		sin = (struct sockaddr_in *)src;
602		memset(sin, 0, sizeof(*sin));
603		sin->sin_family = AF_INET;
604		sin->sin_len = sizeof(*sin);
605		sin->sin_addr = tunnel->t_src4;
606
607		sin = (struct sockaddr_in *)dst;
608		memset(sin, 0, sizeof(*sin));
609		sin->sin_family = AF_INET;
610		sin->sin_len = sizeof(*sin);
611		sin->sin_addr = tunnel->t_dst4;
612
613		break;
614
615#ifdef INET6
616	case AF_INET6:
617		sin6 = (struct sockaddr_in6 *)src;
618		memset(sin6, 0, sizeof(*sin6));
619		sin6->sin6_family = AF_INET6;
620		sin6->sin6_len = sizeof(*sin6);
621		in6_recoverscope(sin6, &tunnel->t_src6);
622
623		sin6 = (struct sockaddr_in6 *)dst;
624		memset(sin6, 0, sizeof(*sin6));
625		sin6->sin6_family = AF_INET6;
626		sin6->sin6_len = sizeof(*sin6);
627		in6_recoverscope(sin6, &tunnel->t_dst6);
628
629		break;
630#endif
631	default:
632		return (EAFNOSUPPORT);
633	}
634
635	return (0);
636}
637
638int
639gif_set_tunnel(struct gif_softc *sc, struct if_laddrreq *req)
640{
641	struct gif_tunnel *tunnel = &sc->sc_tunnel;
642	struct sockaddr *src = (struct sockaddr *)&req->addr;
643	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
644	struct sockaddr_in *src4, *dst4;
645#ifdef INET6
646	struct sockaddr_in6 *src6, *dst6;
647	int error;
648#endif
649
650	/* sa_family and sa_len must be equal */
651	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
652		return (EINVAL);
653
654	/* validate */
655	switch (dst->sa_family) {
656	case AF_INET:
657		if (dst->sa_len != sizeof(*dst4))
658			return (EINVAL);
659
660		src4 = (struct sockaddr_in *)src;
661		if (in_nullhost(src4->sin_addr) ||
662		    IN_MULTICAST(src4->sin_addr.s_addr))
663			return (EINVAL);
664
665		dst4 = (struct sockaddr_in *)dst;
666		/* dst4 can be 0.0.0.0 */
667		if (IN_MULTICAST(dst4->sin_addr.s_addr))
668			return (EINVAL);
669
670		tunnel->t_src4 = src4->sin_addr;
671		tunnel->t_dst4 = dst4->sin_addr;
672
673		break;
674#ifdef INET6
675	case AF_INET6:
676		if (dst->sa_len != sizeof(*dst6))
677			return (EINVAL);
678
679		src6 = (struct sockaddr_in6 *)src;
680		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
681		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
682			return (EINVAL);
683
684		dst6 = (struct sockaddr_in6 *)dst;
685		if (IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
686			return (EINVAL);
687
688		error = in6_embedscope(&tunnel->t_src6, src6, NULL, NULL);
689		if (error != 0)
690			return (error);
691
692		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL, NULL);
693		if (error != 0)
694			return (error);
695
696		break;
697#endif
698	default:
699		return (EAFNOSUPPORT);
700	}
701
702	/* commit */
703	tunnel->t_af = dst->sa_family;
704
705	return (0);
706}
707
708int
709gif_del_tunnel(struct gif_softc *sc)
710{
711	/* commit */
712	sc->sc_tunnel.t_af = AF_UNSPEC;
713
714	return (0);
715}
716
717int
718in_gif_input(struct mbuf **mp, int *offp, int proto, int af)
719{
720	struct mbuf *m = *mp;
721	struct gif_tunnel key;
722	struct ip *ip;
723	int rv;
724
725	ip = mtod(m, struct ip *);
726
727	key.t_af = AF_INET;
728	key.t_src4 = ip->ip_dst;
729	key.t_dst4 = ip->ip_src;
730
731	rv = gif_input(&key, mp, offp, proto, af, ip->ip_tos);
732	if (rv == -1)
733		rv = ipip_input(mp, offp, proto, af);
734
735	return (rv);
736}
737
738#ifdef INET6
739int
740in6_gif_input(struct mbuf **mp, int *offp, int proto, int af)
741{
742	struct mbuf *m = *mp;
743	struct gif_tunnel key;
744	struct ip6_hdr *ip6;
745	uint32_t flow;
746	int rv;
747
748	ip6 = mtod(m, struct ip6_hdr *);
749
750	key.t_af = AF_INET6;
751	key.t_src6 = ip6->ip6_dst;
752	key.t_dst6 = ip6->ip6_src;
753
754	flow = ntohl(ip6->ip6_flow);
755
756	rv = gif_input(&key, mp, offp, proto, af, flow >> 20);
757	if (rv == -1)
758		rv = ipip_input(mp, offp, proto, af);
759
760	return (rv);
761}
762#endif /* INET6 */
763
764struct gif_softc *
765gif_find(const struct gif_tunnel *key)
766{
767	struct gif_tunnel *t;
768	struct gif_softc *sc;
769
770	TAILQ_FOREACH(t, &gif_list, t_entry) {
771		if (gif_cmp(key, t) != 0)
772			continue;
773
774		sc = (struct gif_softc *)t;
775		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
776			continue;
777
778		return (sc);
779	}
780
781	return (NULL);
782}
783
784int
785gif_input(struct gif_tunnel *key, struct mbuf **mp, int *offp, int proto,
786    int af, uint8_t otos)
787{
788	struct mbuf *m = *mp;
789	struct gif_softc *sc;
790	struct ifnet *ifp;
791	uint8_t itos;
792	int rxhprio;
793
794	/* IP-in-IP header is caused by tunnel mode, so skip gif lookup */
795	if (m->m_flags & M_TUNNEL) {
796		m->m_flags &= ~M_TUNNEL;
797		return (-1);
798	}
799
800	key->t_rtableid = m->m_pkthdr.ph_rtableid;
801
802	sc = gif_find(key);
803	if (sc == NULL) {
804		memset(&key->t_dst, 0, sizeof(key->t_dst));
805		sc = gif_find(key);
806		if (sc == NULL)
807			return (-1);
808	}
809
810	m_adj(m, *offp); /* this is ours now */
811
812	ifp = &sc->sc_if;
813	rxhprio = sc->sc_rxhprio;
814
815	switch (proto) {
816	case IPPROTO_IPV4: {
817		struct ip *ip;
818
819		m = *mp = m_pullup(m, sizeof(*ip));
820		if (m == NULL)
821			return (IPPROTO_DONE);
822
823		ip = mtod(m, struct ip *);
824
825		itos = ip->ip_tos;
826		if (ip_ecn_egress(sc->sc_ecn, &otos, &itos) == 0)
827			goto drop;
828
829		if (itos != ip->ip_tos)
830			ip_tos_patch(ip, itos);
831
832		m->m_pkthdr.ph_family = AF_INET;
833		break;
834	}
835#ifdef INET6
836	case IPPROTO_IPV6: {
837		struct ip6_hdr *ip6;
838
839		m = *mp = m_pullup(m, sizeof(*ip6));
840		if (m == NULL)
841			return (IPPROTO_DONE);
842
843		ip6 = mtod(m, struct ip6_hdr *);
844
845		itos = ntohl(ip6->ip6_flow) >> 20;
846		if (!ip_ecn_egress(sc->sc_ecn, &otos, &itos))
847			goto drop;
848
849		CLR(ip6->ip6_flow, htonl(0xff << 20));
850		SET(ip6->ip6_flow, htonl(itos << 20));
851
852		m->m_pkthdr.ph_family = AF_INET6;
853		break;
854	}
855#endif /* INET6 */
856#ifdef MPLS
857	case IPPROTO_MPLS: {
858		uint32_t shim;
859		m = *mp = m_pullup(m, sizeof(shim));
860		if (m == NULL)
861			return (IPPROTO_DONE);
862
863		shim = *mtod(m, uint32_t *) & MPLS_EXP_MASK;
864		itos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5;
865
866		m->m_pkthdr.ph_family = AF_MPLS;
867		break;
868	}
869#endif /* MPLS */
870	default:
871		return (-1);
872	}
873
874	m->m_flags &= ~(M_MCAST|M_BCAST);
875
876	switch (rxhprio) {
877	case IF_HDRPRIO_PACKET:
878		/* nop */
879		break;
880	case IF_HDRPRIO_PAYLOAD:
881		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
882		break;
883	case IF_HDRPRIO_OUTER:
884		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
885		break;
886	default:
887		m->m_pkthdr.pf.prio = rxhprio;
888		break;
889	}
890
891	*mp = NULL;
892	if_vinput(ifp, m);
893	return (IPPROTO_DONE);
894
895 drop:
896	m_freemp(mp);
897	return (IPPROTO_DONE);
898}
899
900static inline int
901gif_ip_cmp(int af, const union gif_addr *a, const union gif_addr *b)
902{
903	switch (af) {
904#ifdef INET6
905	case AF_INET6:
906		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
907#endif /* INET6 */
908	case AF_INET:
909		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
910	default:
911		panic("%s: unsupported af %d", __func__, af);
912	}
913
914	return (0);
915}
916
917
918static inline int
919gif_cmp(const struct gif_tunnel *a, const struct gif_tunnel *b)
920{
921	int rv;
922
923	/* sort by routing table */
924	if (a->t_rtableid > b->t_rtableid)
925		return (1);
926	if (a->t_rtableid < b->t_rtableid)
927		return (-1);
928
929	/* sort by address */
930	if (a->t_af > b->t_af)
931		return (1);
932	if (a->t_af < b->t_af)
933		return (-1);
934
935	rv = gif_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
936	if (rv != 0)
937		return (rv);
938
939	rv = gif_ip_cmp(a->t_af, &a->t_src, &b->t_src);
940	if (rv != 0)
941		return (rv);
942
943	return (0);
944}
945