1/*	$NetBSD: ipsecif.c,v 1.22 2023/09/01 11:23:39 andvar Exp $  */
2
3/*
4 * Copyright (c) 2017 Internet Initiative Japan Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.22 2023/09/01 11:23:39 andvar Exp $");
31
32#ifdef _KERNEL_OPT
33#include "opt_inet.h"
34#include "opt_ipsec.h"
35#endif
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/socket.h>
40#include <sys/sockio.h>
41#include <sys/mbuf.h>
42#include <sys/errno.h>
43#include <sys/ioctl.h>
44#include <sys/syslog.h>
45#include <sys/kernel.h>
46
47#include <net/if.h>
48#include <net/route.h>
49
50#include <netinet/in.h>
51#include <netinet/in_systm.h>
52#include <netinet/ip.h>
53#include <netinet/ip_var.h>
54#include <netinet/in_var.h>
55#include <netinet/ip_encap.h>
56#include <netinet/ip_ecn.h>
57#include <netinet/ip_private.h>
58#include <netinet/udp.h>
59
60#ifdef INET6
61#include <netinet/ip6.h>
62#include <netinet6/ip6_var.h>
63#include <netinet6/ip6_private.h>
64#include <netinet6/in6_var.h>
65#include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
66#include <netinet/ip_ecn.h>
67#endif
68
69#include <netipsec/key.h>
70#include <netipsec/ipsecif.h>
71
72#include <net/if_ipsec.h>
73
74static int ipsecif_set_natt_ports(struct ipsec_variant *, struct mbuf *);
75static void ipsecif4_input(struct mbuf *, int, int, void *);
76static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
77static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
78	struct ifnet *);
79
80#ifdef INET6
81static int ipsecif6_input(struct mbuf **, int *, int, void *);
82static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
83static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
84	struct ifnet *);
85#endif
86
87static int ip_ipsec_ttl = IPSEC_TTL;
88static int ip_ipsec_copy_tos = 0;
89#ifdef INET6
90int ip6_ipsec_hlim = IPSEC_HLIM;
91int ip6_ipsec_pmtu = 0;
92static int ip6_ipsec_copy_tos = 0;
93#endif
94
95static const struct encapsw ipsecif4_encapsw = {
96	.encapsw4 = {
97		.pr_input = ipsecif4_input,
98		.pr_ctlinput = NULL,
99	}
100};
101
102#ifdef INET6
103static const struct encapsw ipsecif6_encapsw;
104#endif
105
106static int
107ipsecif_set_natt_ports(struct ipsec_variant *var, struct mbuf *m)
108{
109
110	KASSERT(if_ipsec_heldref_variant(var));
111
112	if (var->iv_sport || var->iv_dport) {
113		struct m_tag *mtag;
114
115		mtag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
116		    sizeof(uint16_t) + sizeof(uint16_t), M_DONTWAIT);
117		if (mtag) {
118			uint16_t *natt_port;
119
120			natt_port = (uint16_t *)(mtag + 1);
121			natt_port[0] = var->iv_dport;
122			natt_port[1] = var->iv_sport;
123			m_tag_prepend(m, mtag);
124		} else {
125			return ENOBUFS;
126		}
127	}
128
129	return 0;
130}
131
132static struct mbuf *
133ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
134    uint8_t proto, uint8_t tos)
135{
136	struct ip *ip;
137	struct sockaddr_in *src, *dst;
138
139	src = satosin(var->iv_psrc);
140	dst = satosin(var->iv_pdst);
141
142	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
143	    src->sin_addr.s_addr == INADDR_BROADCAST ||
144	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
145		m_freem(m);
146		return NULL;
147	}
148	m->m_flags &= ~M_BCAST;
149
150	if (IN_MULTICAST(src->sin_addr.s_addr) ||
151	    IN_MULTICAST(dst->sin_addr.s_addr)) {
152		m_freem(m);
153		return NULL;
154	}
155
156	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
157	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
158		m = m_pullup(m, sizeof(struct ip));
159	if (m == NULL)
160		return NULL;
161
162	ip = mtod(m, struct ip *);
163	ip->ip_v = IPVERSION;
164	ip->ip_off = htons(0);
165	if (m->m_pkthdr.len < IP_MINFRAGSIZE)
166		ip->ip_id = 0;
167	else
168		ip->ip_id = ip_newid(NULL);
169	ip->ip_hl = sizeof(*ip) >> 2;
170	if (ip_ipsec_copy_tos)
171		ip->ip_tos = tos;
172	else
173		ip->ip_tos = 0;
174	ip->ip_sum = 0;
175	ip->ip_src = src->sin_addr;
176	ip->ip_dst = dst->sin_addr;
177	ip->ip_p = proto;
178	ip->ip_ttl = ip_ipsec_ttl;
179	ip->ip_len = htons(m->m_pkthdr.len);
180#ifndef IPSEC_TX_TOS_CLEAR
181	struct ifnet *ifp = &var->iv_softc->ipsec_if;
182	if (ifp->if_flags & IFF_ECN)
183		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
184	else
185		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
186#endif
187
188	return m;
189}
190
191static int
192ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
193{
194	struct ip ip0;
195	struct ip *ip;
196	int mtu;
197	struct secasvar *sav;
198
199	sav = key_lookup_sa_bysaidx(&isr->saidx);
200	if (sav == NULL)
201		return 0;
202
203	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP)) {
204		mtu = 0;
205		goto out;
206	}
207
208	if (m->m_len < sizeof(struct ip)) {
209		m_copydata(m, 0, sizeof(ip0), &ip0);
210		ip = &ip0;
211	} else {
212		ip = mtod(m, struct ip *);
213	}
214	mtu = sav->esp_frag;
215	if (ntohs(ip->ip_len) <= mtu)
216		mtu = 0;
217
218out:
219	KEY_SA_UNREF(&sav);
220	return mtu;
221}
222
223static struct mbuf *
224ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
225{
226	const struct ip *ip;
227	int proto;
228	int tos;
229
230	KASSERT(proto0 != NULL);
231	KASSERT(tos0 != NULL);
232
233	switch (family) {
234	case AF_INET:
235		proto = IPPROTO_IPV4;
236		if (m->m_len < sizeof(*ip)) {
237			m = m_pullup(m, sizeof(*ip));
238			if (m == NULL) {
239				*tos0 = 0;
240				*proto0 = 0;
241				return NULL;
242			}
243		}
244		ip = mtod(m, const struct ip *);
245		tos = ip->ip_tos;
246		/* TODO: support ALTQ for inner packet */
247		break;
248#ifdef INET6
249	case AF_INET6: {
250		const struct ip6_hdr *ip6;
251		proto = IPPROTO_IPV6;
252		if (m->m_len < sizeof(*ip6)) {
253			m = m_pullup(m, sizeof(*ip6));
254			if (m == NULL) {
255				*tos0 = 0;
256				*proto0 = 0;
257				return NULL;
258			}
259		}
260		ip6 = mtod(m, const struct ip6_hdr *);
261		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
262		/* TODO: support ALTQ for inner packet */
263		break;
264	}
265#endif /* INET6 */
266	default:
267		*tos0 = 0;
268		*proto0 = 0;
269		return NULL;
270	}
271
272	*proto0 = proto;
273	*tos0 = tos;
274	return m;
275}
276
277static int
278ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
279{
280	struct ifnet *ifp = &var->iv_softc->ipsec_if;
281	struct mbuf *next;
282	struct m_tag *mtag;
283	int error;
284
285	KASSERT(if_ipsec_heldref_variant(var));
286
287	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
288	if (mtag)
289		m_tag_delete(m, mtag);
290
291	/* consider new IP header prepended in ipsecif4_output() */
292	if (mtu <= sizeof(struct ip)) {
293		m_freem(m);
294		return ENETUNREACH;
295	}
296	m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
297	error = ip_fragment(m, ifp, mtu - sizeof(struct ip));
298	if (error)
299		return error;
300
301	for (error = 0; m; m = next) {
302		next = m->m_nextpkt;
303		m->m_nextpkt = NULL;
304		if (error) {
305			m_freem(m);
306			continue;
307		}
308
309		error = ipsecif4_output(var, family, m);
310	}
311	if (error == 0)
312		IP_STATINC(IP_STAT_FRAGMENTED);
313
314	return error;
315}
316
317int
318ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
319{
320	struct m_tag *mtag;
321	struct sockaddr_in *src, *dst;
322	u_int16_t src_port = 0;
323	u_int16_t dst_port = 0;
324
325	KASSERT(var != NULL);
326
327	src = satosin(var->iv_psrc);
328	dst = satosin(var->iv_pdst);
329	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
330	if (mtag) {
331		u_int16_t *ports;
332
333		ports = (u_int16_t *)(mtag + 1);
334		src_port = ports[0];
335		dst_port = ports[1];
336	}
337
338	/* address match */
339	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
340	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
341		return 0;
342
343	/* UDP encap? */
344	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
345		goto match;
346
347	/* port match */
348	if (src_port != var->iv_dport ||
349	    dst_port != var->iv_sport) {
350#ifdef DEBUG
351		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
352		    __func__, ntohs(src_port), ntohs(dst_port),
353		    ntohs(var->iv_sport), ntohs(var->iv_dport));
354#endif
355		return 0;
356	}
357
358match:
359	/*
360	 * hide NAT-T information from encapsulated traffics.
361	 * they don't know about IPsec.
362	 */
363	if (mtag)
364		m_tag_delete(m, mtag);
365	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
366}
367
368static int
369ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
370{
371	struct secpolicy *sp = NULL;
372	u_int8_t tos;
373	int proto;
374	int error;
375	int mtu;
376	u_long sa_mtu = 0;
377
378	KASSERT(if_ipsec_heldref_variant(var));
379	KASSERT(if_ipsec_variant_is_configured(var));
380	KASSERT(var->iv_psrc->sa_family == AF_INET);
381	KASSERT(var->iv_pdst->sa_family == AF_INET);
382
383	switch (family) {
384	case AF_INET:
385		sp = IV_SP_OUT(var);
386		break;
387	case AF_INET6:
388		sp = IV_SP_OUT6(var);
389		break;
390	default:
391		m_freem(m);
392		return EAFNOSUPPORT;
393	}
394	KASSERT(sp != NULL);
395	/*
396	 * The SPs in ipsec_variant are prevented from freed by
397	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
398	 *
399	 * However, lastused should be updated.
400	 */
401	key_sp_touch(sp);
402
403	KASSERT(sp->policy != IPSEC_POLICY_NONE);
404	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
405	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
406	if (sp->policy != IPSEC_POLICY_IPSEC) {
407		m_freem(m);
408		error = ENETUNREACH;
409		goto done;
410	}
411
412	/* get flowinfo */
413	m = ipsecif4_flowinfo(m, family, &proto, &tos);
414	if (m == NULL) {
415		error = ENETUNREACH;
416		goto done;
417	}
418
419	/* prepend new IP header */
420	m = ipsecif4_prepend_hdr(var, m, proto, tos);
421	if (m == NULL) {
422		error = ENETUNREACH;
423		goto done;
424	}
425
426	/*
427	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
428	 * See "natt_frag" processing.
429	 * However, ipsec(4) interface's one is not done in the same way,
430	 * so we must do NAT-T fragmentation by own code.
431	 */
432	/* NAT-T ESP fragmentation */
433	mtu = ipsecif4_needfrag(m, sp->req);
434	if (mtu > 0)
435		return ipsecif4_fragout(var, family, m, mtu);
436
437	/* set NAT-T ports */
438	error = ipsecif_set_natt_ports(var, m);
439	if (error) {
440		m_freem(m);
441		goto done;
442	}
443
444	/* IPsec output */
445	IP_STATINC(IP_STAT_LOCALOUT);
446	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
447	if (error == ENOENT)
448		error = 0;
449	/*
450	 * frangmentation is already done in ipsecif4_fragout(),
451	 * so ipsec4_process_packet() must not do fragmentation here.
452	 */
453	KASSERT(sa_mtu == 0);
454
455done:
456	return error;
457}
458
459#ifdef INET6
460int
461ipsecif6_encap_func(struct mbuf *m, struct ip6_hdr *ip6, struct ipsec_variant *var)
462{
463	struct m_tag *mtag;
464	struct sockaddr_in6 *src, *dst;
465	u_int16_t src_port = 0;
466	u_int16_t dst_port = 0;
467
468	KASSERT(var != NULL);
469
470	src = satosin6(var->iv_psrc);
471	dst = satosin6(var->iv_pdst);
472	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
473	if (mtag) {
474		u_int16_t *ports;
475
476		ports = (u_int16_t *)(mtag + 1);
477		src_port = ports[0];
478		dst_port = ports[1];
479	}
480
481	/* address match */
482	if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
483	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
484		return 0;
485
486	/* UDP encap? */
487	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
488		goto match;
489
490	/* port match */
491	if (src_port != var->iv_dport ||
492	    dst_port != var->iv_sport) {
493#ifdef DEBUG
494		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
495		    __func__, ntohs(src_port), ntohs(dst_port),
496		    ntohs(var->iv_sport), ntohs(var->iv_dport));
497#endif
498		return 0;
499	}
500
501match:
502	/*
503	 * hide NAT-T information from encapsulated traffics.
504	 * they don't know about IPsec.
505	 */
506	if (mtag)
507		m_tag_delete(m, mtag);
508	return sizeof(src->sin6_addr) + sizeof(dst->sin6_addr);
509}
510
511static int
512ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
513{
514	struct ifnet *ifp = &var->iv_softc->ipsec_if;
515	struct ipsec_softc *sc = ifp->if_softc;
516	struct route *ro_pc;
517	kmutex_t *lock_pc;
518	struct rtentry *rt;
519	struct sockaddr_in6 *sin6_src;
520	struct sockaddr_in6 *sin6_dst;
521	struct ip6_hdr *ip6;
522	int proto, error, flags;
523	u_int8_t itos, otos;
524	union {
525		struct sockaddr		dst;
526		struct sockaddr_in6	dst6;
527	} u;
528
529	KASSERT(if_ipsec_heldref_variant(var));
530	KASSERT(if_ipsec_variant_is_configured(var));
531
532	sin6_src = satosin6(var->iv_psrc);
533	sin6_dst = satosin6(var->iv_pdst);
534
535	KASSERT(sin6_src->sin6_family == AF_INET6);
536	KASSERT(sin6_dst->sin6_family == AF_INET6);
537
538	switch (family) {
539#ifdef INET
540	case AF_INET:
541	    {
542		struct ip *ip;
543
544		proto = IPPROTO_IPV4;
545		if (m->m_len < sizeof(*ip)) {
546			m = m_pullup(m, sizeof(*ip));
547			if (m == NULL)
548				return ENOBUFS;
549		}
550		ip = mtod(m, struct ip *);
551		itos = ip->ip_tos;
552		/* TODO: support ALTQ for inner packet */
553		break;
554	    }
555#endif /* INET */
556	case AF_INET6:
557	    {
558		struct ip6_hdr *xip6;
559		proto = IPPROTO_IPV6;
560		if (m->m_len < sizeof(*xip6)) {
561			m = m_pullup(m, sizeof(*xip6));
562			if (m == NULL)
563				return ENOBUFS;
564		}
565		xip6 = mtod(m, struct ip6_hdr *);
566		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
567		/* TODO: support ALTQ for inner packet */
568		break;
569	    }
570	default:
571		m_freem(m);
572		return EAFNOSUPPORT;
573	}
574
575	/* prepend new IP header */
576	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
577	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
578		m = m_pullup(m, sizeof(struct ip6_hdr));
579	if (m == NULL)
580		return ENOBUFS;
581
582	ip6 = mtod(m, struct ip6_hdr *);
583	ip6->ip6_flow	= 0;
584	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
585	ip6->ip6_vfc	|= IPV6_VERSION;
586#if 0	/* ip6->ip6_plen will be filled by ip6_output */
587	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len - sizeof(*ip6));
588#endif
589	ip6->ip6_nxt	= proto;
590	ip6->ip6_hlim	= ip6_ipsec_hlim;
591	ip6->ip6_src	= sin6_src->sin6_addr;
592	/* bidirectional configured tunnel mode */
593	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
594		ip6->ip6_dst = sin6_dst->sin6_addr;
595	} else  {
596		m_freem(m);
597		return ENETUNREACH;
598	}
599#ifndef IPSEC_TX_TOS_CLEAR
600	if (!ip6_ipsec_copy_tos)
601		otos = 0;
602
603	if (ifp->if_flags & IFF_ECN)
604		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
605	else
606		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
607#else
608	if (ip6_ipsec_copy_tos)
609		otos = itos;
610	else
611		otos = 0;
612#endif
613	ip6->ip6_flow &= ~ntohl(0xff00000);
614	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
615
616	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
617
618	if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
619	if ((rt = rtcache_lookup(ro_pc, &u.dst)) == NULL) {
620		if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
621		m_freem(m);
622		return ENETUNREACH;
623	}
624
625	if (rt->rt_ifp == ifp) {
626		rtcache_unref(rt, ro_pc);
627		rtcache_free(ro_pc);
628		if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
629		m_freem(m);
630		return ENETUNREACH;
631	}
632	rtcache_unref(rt, ro_pc);
633
634	/* set NAT-T ports */
635	error = ipsecif_set_natt_ports(var, m);
636	if (error) {
637		m_freem(m);
638		goto out;
639	}
640
641	/*
642	 * - IPSEC_PMTU_MINMTU
643	 *   Force fragmentation to minimum MTU to avoid path MTU discovery
644	 * - IPSEC_PMTU_OUTERMTU
645	 *   Trust outer MTU is large enough to send all packets
646	 *
647	 * It is too painful to ask for resend of inner packet, to achieve
648	 * path MTU discovery for encapsulated packets.
649	 *
650	 * See RFC4459.
651	 */
652	if (sc->ipsec_pmtu == IPSEC_PMTU_SYSDEFAULT) {
653		switch (ip6_ipsec_pmtu) {
654		case IPSEC_PMTU_MINMTU:
655			flags = IPV6_MINMTU;
656			break;
657		case IPSEC_PMTU_OUTERMTU:
658			flags = 0;
659			break;
660		default:
661#ifdef DEBUG
662			log(LOG_DEBUG, "%s: ignore unexpected ip6_ipsec_pmtu %d\n",
663			    __func__, ip6_ipsec_pmtu);
664#endif
665			flags = IPV6_MINMTU;
666			break;
667		}
668	} else {
669		switch (sc->ipsec_pmtu) {
670		case IPSEC_PMTU_MINMTU:
671			flags = IPV6_MINMTU;
672			break;
673		case IPSEC_PMTU_OUTERMTU:
674			flags = 0;
675			break;
676		default:
677#ifdef DEBUG
678			log(LOG_DEBUG, "%s: ignore unexpected ipsec_pmtu of %s %d\n",
679			    __func__, ifp->if_xname, sc->ipsec_pmtu);
680#endif
681			flags = IPV6_MINMTU;
682			break;
683		}
684	}
685	error = ip6_output(m, 0, ro_pc, flags, 0, NULL, NULL);
686
687out:
688	if (error)
689		rtcache_free(ro_pc);
690	if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
691
692	return error;
693}
694#endif /* INET6 */
695
696static void
697ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
698{
699	struct ifnet *ipsecp;
700	struct ipsec_softc *sc = eparg;
701	struct ipsec_variant *var;
702	const struct ip *ip;
703	int af;
704#ifndef IPSEC_TX_TOS_CLEAR
705	u_int8_t otos;
706#endif
707	struct psref psref_rcvif;
708	struct psref psref_var;
709	struct ifnet *rcvif;
710
711	KASSERT(sc != NULL);
712
713	ipsecp = &sc->ipsec_if;
714	if ((ipsecp->if_flags & IFF_UP) == 0) {
715		m_freem(m);
716		ip_statinc(IP_STAT_NOIPSEC);
717		return;
718	}
719
720	var = if_ipsec_getref_variant(sc, &psref_var);
721	if (if_ipsec_variant_is_unconfigured(var)) {
722		if_ipsec_putref_variant(var, &psref_var);
723		m_freem(m);
724		ip_statinc(IP_STAT_NOIPSEC);
725		return;
726	}
727
728	ip = mtod(m, const struct ip *);
729
730	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
731	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
732		m_put_rcvif_psref(rcvif, &psref_rcvif);
733		if_ipsec_putref_variant(var, &psref_var);
734		m_freem(m);
735		ip_statinc(IP_STAT_NOIPSEC);
736		return;
737	}
738	m_put_rcvif_psref(rcvif, &psref_rcvif);
739	if_ipsec_putref_variant(var, &psref_var);
740#ifndef IPSEC_TX_TOS_CLEAR
741	otos = ip->ip_tos;
742#endif
743	m_adj(m, off);
744
745	switch (proto) {
746	case IPPROTO_IPV4:
747	    {
748		struct ip *xip;
749		af = AF_INET;
750		if (M_UNWRITABLE(m, sizeof(*xip))) {
751			m = m_pullup(m, sizeof(*xip));
752			if (m == NULL)
753				return;
754		}
755		xip = mtod(m, struct ip *);
756#ifndef IPSEC_TX_TOS_CLEAR
757		if (ipsecp->if_flags & IFF_ECN)
758			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
759		else
760			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
761#endif
762		break;
763	    }
764#ifdef INET6
765	case IPPROTO_IPV6:
766	    {
767		struct ip6_hdr *ip6;
768		u_int8_t itos;
769		af = AF_INET6;
770		if (M_UNWRITABLE(m, sizeof(*ip6))) {
771			m = m_pullup(m, sizeof(*ip6));
772			if (m == NULL)
773				return;
774		}
775		ip6 = mtod(m, struct ip6_hdr *);
776		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
777#ifndef IPSEC_TX_TOS_CLEAR
778		if (ipsecp->if_flags & IFF_ECN)
779			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
780		else
781			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
782#endif
783		ip6->ip6_flow &= ~htonl(0xff << 20);
784		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
785		break;
786	    }
787#endif /* INET6 */
788	default:
789		ip_statinc(IP_STAT_NOIPSEC);
790		m_freem(m);
791		return;
792	}
793	if_ipsec_input(m, af, ipsecp);
794
795	return;
796}
797
798/*
799 * validate and filter the packet
800 */
801static int
802ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
803    struct ifnet *ifp)
804{
805	struct sockaddr_in *src, *dst;
806
807	src = satosin(var->iv_psrc);
808	dst = satosin(var->iv_pdst);
809
810	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
811}
812
813#ifdef INET6
814static int
815ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
816{
817	struct mbuf *m = *mp;
818	struct ifnet *ipsecp;
819	struct ipsec_softc *sc = eparg;
820	struct ipsec_variant *var;
821	struct ip6_hdr *ip6;
822	int af = 0;
823#ifndef IPSEC_TX_TOS_CLEAR
824	u_int32_t otos;
825#endif
826	struct psref psref_rcvif;
827	struct psref psref_var;
828	struct ifnet *rcvif;
829
830	KASSERT(eparg != NULL);
831
832	ipsecp = &sc->ipsec_if;
833	if ((ipsecp->if_flags & IFF_UP) == 0) {
834		m_freem(m);
835		IP6_STATINC(IP6_STAT_NOIPSEC);
836		return IPPROTO_DONE;
837	}
838
839	var = if_ipsec_getref_variant(sc, &psref_var);
840	if (if_ipsec_variant_is_unconfigured(var)) {
841		if_ipsec_putref_variant(var, &psref_var);
842		m_freem(m);
843		IP6_STATINC(IP6_STAT_NOIPSEC);
844		return IPPROTO_DONE;
845	}
846
847	ip6 = mtod(m, struct ip6_hdr *);
848
849	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
850	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
851		m_put_rcvif_psref(rcvif, &psref_rcvif);
852		if_ipsec_putref_variant(var, &psref_var);
853		m_freem(m);
854		IP6_STATINC(IP6_STAT_NOIPSEC);
855		return IPPROTO_DONE;
856	}
857	m_put_rcvif_psref(rcvif, &psref_rcvif);
858	if_ipsec_putref_variant(var, &psref_var);
859
860#ifndef IPSEC_TX_TOS_CLEAR
861	otos = ip6->ip6_flow;
862#endif
863	m_adj(m, *offp);
864
865	switch (proto) {
866#ifdef INET
867	case IPPROTO_IPV4:
868	    {
869		af = AF_INET;
870#ifndef IPSEC_TX_TOS_CLEAR
871		struct ip *ip;
872		u_int8_t otos8;
873		otos8 = (ntohl(otos) >> 20) & 0xff;
874
875		if (M_UNWRITABLE(m, sizeof(*ip))) {
876			m = m_pullup(m, sizeof(*ip));
877			if (m == NULL)
878				return IPPROTO_DONE;
879		}
880		ip = mtod(m, struct ip *);
881		if (ipsecp->if_flags & IFF_ECN)
882			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
883		else
884			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
885#endif
886		break;
887	    }
888#endif /* INET */
889	case IPPROTO_IPV6:
890	    {
891		af = AF_INET6;
892#ifndef IPSEC_TX_TOS_CLEAR
893		struct ip6_hdr *xip6;
894
895		if (M_UNWRITABLE(m, sizeof(*xip6))) {
896			m = m_pullup(m, sizeof(*xip6));
897			if (m == NULL)
898				return IPPROTO_DONE;
899		}
900		xip6 = mtod(m, struct ip6_hdr *);
901		if (ipsecp->if_flags & IFF_ECN)
902			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
903		else
904			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
905		break;
906#endif
907	    }
908	default:
909		IP6_STATINC(IP6_STAT_NOIPSEC);
910		m_freem(m);
911		return IPPROTO_DONE;
912	}
913
914	if_ipsec_input(m, af, ipsecp);
915	return IPPROTO_DONE;
916}
917
918/*
919 * validate and filter the packet.
920 */
921static int
922ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
923    struct ifnet *ifp)
924{
925	struct sockaddr_in6 *src, *dst;
926
927	src = satosin6(var->iv_psrc);
928	dst = satosin6(var->iv_pdst);
929
930	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
931}
932#endif /* INET6 */
933
934int
935ipsecif4_attach(struct ipsec_variant *var)
936{
937	struct ipsec_softc *sc = var->iv_softc;
938
939	KASSERT(if_ipsec_variant_is_configured(var));
940
941	if (var->iv_encap_cookie4 != NULL)
942		return EALREADY;
943
944	var->iv_encap_cookie4 = encap_attach_addr(AF_INET, -1,
945	    var->iv_psrc, var->iv_pdst, if_ipsec_encap_func, &ipsecif4_encapsw,
946	    sc);
947	if (var->iv_encap_cookie4 == NULL)
948		return EEXIST;
949
950	var->iv_output = ipsecif4_output;
951	return 0;
952}
953
954int
955ipsecif4_detach(struct ipsec_variant *var)
956{
957	int error;
958
959	if (var->iv_encap_cookie4 == NULL)
960		return 0;
961
962	var->iv_output = NULL;
963	error = encap_detach(var->iv_encap_cookie4);
964	if (error == 0)
965		var->iv_encap_cookie4 = NULL;
966
967	return error;
968}
969
970#ifdef INET6
971int
972ipsecif6_attach(struct ipsec_variant *var)
973{
974	struct ipsec_softc *sc = var->iv_softc;
975
976	KASSERT(if_ipsec_variant_is_configured(var));
977	KASSERT(var->iv_encap_cookie6 == NULL);
978
979	var->iv_encap_cookie6 = encap_attach_addr(AF_INET6, -1,
980	    var->iv_psrc, var->iv_pdst, if_ipsec_encap_func, &ipsecif6_encapsw,
981	    sc);
982	if (var->iv_encap_cookie6 == NULL)
983		return EEXIST;
984
985	var->iv_output = ipsecif6_output;
986	return 0;
987}
988
989int
990ipsecif6_detach(struct ipsec_variant *var)
991{
992	struct ipsec_softc *sc = var->iv_softc;
993	int error;
994
995	KASSERT(var->iv_encap_cookie6 != NULL);
996
997	if_tunnel_ro_percpu_rtcache_free(sc->ipsec_ro_percpu);
998
999	var->iv_output = NULL;
1000	error = encap_detach(var->iv_encap_cookie6);
1001	if (error == 0)
1002		var->iv_encap_cookie6 = NULL;
1003	return error;
1004}
1005
1006void *
1007ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
1008{
1009	struct ipsec_softc *sc = eparg;
1010	struct ip6ctlparam *ip6cp = NULL;
1011	struct ip6_hdr *ip6;
1012	const struct sockaddr_in6 *dst6;
1013	struct route *ro_pc;
1014	kmutex_t *lock_pc;
1015
1016	if (sa->sa_family != AF_INET6 ||
1017	    sa->sa_len != sizeof(struct sockaddr_in6))
1018		return NULL;
1019
1020	if ((unsigned)cmd >= PRC_NCMDS)
1021		return NULL;
1022	if (cmd == PRC_HOSTDEAD)
1023		d = NULL;
1024	else if (inet6ctlerrmap[cmd] == 0)
1025		return NULL;
1026
1027	/* if the parameter is from icmp6, decode it. */
1028	if (d != NULL) {
1029		ip6cp = (struct ip6ctlparam *)d;
1030		ip6 = ip6cp->ip6c_ip6;
1031	} else {
1032		ip6 = NULL;
1033	}
1034
1035	if (!ip6)
1036		return NULL;
1037
1038	if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
1039	dst6 = satocsin6(rtcache_getdst(ro_pc));
1040	/* XXX scope */
1041	if (dst6 == NULL)
1042		;
1043	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
1044		/* flush route cache */
1045		rtcache_free(ro_pc);
1046
1047	if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
1048
1049	return NULL;
1050}
1051
1052ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
1053#define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
1054
1055static const struct encapsw ipsecif6_encapsw = {
1056	.encapsw6 = {
1057		.pr_input = ipsecif6_input,
1058		.pr_ctlinput = ipsecif6_ctlinput,
1059	}
1060};
1061#endif /* INET6 */
1062