1/*	$NetBSD: ipsec_output.c,v 1.86 2023/01/27 09:33:43 ozaki-r Exp $	*/
2
3/*
4 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD: sys/netipsec/ipsec_output.c,v 1.3.2.2 2003/03/28 20:32:53 sam Exp $
29 */
30
31#include <sys/cdefs.h>
32__KERNEL_RCSID(0, "$NetBSD: ipsec_output.c,v 1.86 2023/01/27 09:33:43 ozaki-r Exp $");
33
34#if defined(_KERNEL_OPT)
35#include "opt_inet.h"
36#include "opt_net_mpsafe.h"
37#endif
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/mbuf.h>
42#include <sys/domain.h>
43#include <sys/protosw.h>
44#include <sys/socket.h>
45#include <sys/errno.h>
46#include <sys/syslog.h>
47
48#include <net/if.h>
49#include <net/route.h>
50
51#include <netinet/in.h>
52#include <netinet/in_systm.h>
53#include <netinet/ip.h>
54#include <netinet/ip_var.h>
55#include <netinet/in_var.h>
56#include <netinet/ip_ecn.h>
57
58#include <netinet/ip6.h>
59#ifdef INET6
60#include <netinet6/ip6_var.h>
61#endif
62#include <netinet/in_pcb.h>
63#ifdef INET6
64#include <netinet/icmp6.h>
65#endif
66#include <netinet/udp.h>
67
68#include <netipsec/ipsec.h>
69#include <netipsec/ipsec_var.h>
70#include <netipsec/ipsec_private.h>
71#ifdef INET6
72#include <netipsec/ipsec6.h>
73#endif
74#include <netipsec/ah_var.h>
75#include <netipsec/esp_var.h>
76#include <netipsec/ipcomp_var.h>
77
78#include <netipsec/xform.h>
79
80#include <netipsec/key.h>
81#include <netipsec/keydb.h>
82#include <netipsec/key_debug.h>
83
84static percpu_t *ipsec_rtcache_percpu __cacheline_aligned;
85
86/*
87 * Add a IPSEC_OUT_DONE tag to mark that we have finished the ipsec processing
88 * It will be used by ip{,6}_output to check if we have already or not
89 * processed this packet.
90 */
91static int
92ipsec_register_done(struct mbuf *m, int *error)
93{
94	struct m_tag *mtag;
95
96	mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, 0, M_NOWAIT);
97	if (mtag == NULL) {
98		IPSECLOG(LOG_DEBUG, "could not get packet tag\n");
99		*error = ENOMEM;
100		return -1;
101	}
102
103	m_tag_prepend(m, mtag);
104	return 0;
105}
106
107static int
108ipsec_reinject_ipstack(struct mbuf *m, int af, int flags)
109{
110	int rv = -1;
111	struct route *ro;
112
113	KASSERT(af == AF_INET || af == AF_INET6);
114
115	KERNEL_LOCK_UNLESS_NET_MPSAFE();
116	ro = rtcache_percpu_getref(ipsec_rtcache_percpu);
117	switch (af) {
118#ifdef INET
119	case AF_INET:
120		rv = ip_output(m, NULL, ro, IP_RAWOUTPUT|IP_NOIPNEWID,
121		    NULL, NULL);
122		break;
123#endif
124#ifdef INET6
125	case AF_INET6:
126		/*
127		 * We don't need massage, IPv6 header fields are always in
128		 * net endian.
129		 */
130		rv = ip6_output(m, NULL, ro, flags, NULL, NULL, NULL);
131		break;
132#endif
133	}
134	rtcache_percpu_putref(ipsec_rtcache_percpu);
135	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
136
137	return rv;
138}
139
140int
141ipsec_process_done(struct mbuf *m, const struct ipsecrequest *isr,
142    struct secasvar *sav, int flags)
143{
144	struct secasindex *saidx;
145	int error;
146#ifdef INET
147	struct ip *ip;
148#endif
149#ifdef INET6
150	struct ip6_hdr *ip6;
151#endif
152	struct mbuf *mo;
153	struct udphdr *udp = NULL;
154	int hlen, roff, iphlen;
155
156	KASSERT(m != NULL);
157	KASSERT(isr != NULL);
158	KASSERT(sav != NULL);
159
160	saidx = &sav->sah->saidx;
161
162	if (sav->natt_type != 0) {
163		hlen = sizeof(struct udphdr);
164
165		switch (saidx->dst.sa.sa_family) {
166#ifdef INET
167		case AF_INET:
168			ip = mtod(m, struct ip *);
169			mo = m_makespace(m, sizeof(struct ip), hlen, &roff);
170			iphlen = ip->ip_hl << 2;
171			break;
172#endif
173#ifdef INET6
174		case AF_INET6:
175			ip6 = mtod(m, struct ip6_hdr *);
176			mo = m_makespace(m, sizeof(struct ip6_hdr), hlen, &roff);
177			iphlen = sizeof(*ip6);
178			break;
179#endif
180		default:
181			IPSECLOG(LOG_DEBUG, "unknown protocol family %u\n",
182			    saidx->dst.sa.sa_family);
183			error = ENXIO;
184			goto bad;
185		}
186
187		if (mo == NULL) {
188			char buf[IPSEC_ADDRSTRLEN];
189			IPSECLOG(LOG_DEBUG,
190			    "failed to inject %u byte UDP for SA %s/%08lx\n",
191			    hlen, ipsec_address(&saidx->dst, buf, sizeof(buf)),
192			    (u_long)ntohl(sav->spi));
193			error = ENOBUFS;
194			goto bad;
195		}
196
197		udp = (struct udphdr *)(mtod(mo, char *) + roff);
198		udp->uh_sport = key_portfromsaddr(&saidx->src);
199		udp->uh_dport = key_portfromsaddr(&saidx->dst);
200		udp->uh_sum = 0;
201		udp->uh_ulen = htons(m->m_pkthdr.len - iphlen);
202	}
203
204	/*
205	 * Fix the header length, for AH processing.
206	 */
207	switch (saidx->dst.sa.sa_family) {
208#ifdef INET
209	case AF_INET:
210		ip = mtod(m, struct ip *);
211		ip->ip_len = htons(m->m_pkthdr.len);
212		/* IPv4 packet does not have to be set UDP checksum. */
213		if (sav->natt_type != 0)
214			ip->ip_p = IPPROTO_UDP;
215		break;
216#endif
217#ifdef INET6
218	case AF_INET6:
219		if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) {
220			error = ENXIO;
221			goto bad;
222		}
223		if (m->m_pkthdr.len - sizeof(struct ip6_hdr) > IPV6_MAXPACKET) {
224			/* No jumbogram support. */
225			error = ENXIO;	/*?*/
226			goto bad;
227		}
228		ip6 = mtod(m, struct ip6_hdr *);
229		ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
230		/* IPv6 packet should be set UDP checksum. */
231		if (sav->natt_type != 0) {
232			ip6->ip6_nxt = IPPROTO_UDP;
233			ipsec6_udp_cksum(m);
234		}
235		break;
236#endif
237	default:
238		IPSECLOG(LOG_DEBUG, "unknown protocol family %u\n",
239		    saidx->dst.sa.sa_family);
240		error = ENXIO;
241		goto bad;
242	}
243
244	key_sa_recordxfer(sav, m);
245
246	/*
247	 * If there's another (bundled) SA to apply, do so.
248	 * Note that this puts a burden on the kernel stack size.
249	 * If this is a problem we'll need to introduce a queue
250	 * to set the packet on so we can unwind the stack before
251	 * doing further processing.
252	 */
253	if (isr->next) {
254		IPSEC_STATINC(IPSEC_STAT_OUT_BUNDLESA);
255		switch (saidx->dst.sa.sa_family) {
256#ifdef INET
257		case AF_INET:
258			return ipsec4_process_packet(m, isr->next, NULL);
259#endif
260#ifdef INET6
261		case AF_INET6:
262			return ipsec6_process_packet(m, isr->next, flags);
263#endif
264		default:
265			IPSECLOG(LOG_DEBUG, "unknown protocol family %u\n",
266			    saidx->dst.sa.sa_family);
267			error = ENXIO;
268			goto bad;
269		}
270	}
271
272	/*
273	 * We're done with IPsec processing, mark the packet as processed,
274	 * and transmit it using the appropriate network protocol
275	 * (IPv4/IPv6).
276	 */
277
278	if (ipsec_register_done(m, &error) < 0)
279		goto bad;
280
281	return ipsec_reinject_ipstack(m, saidx->dst.sa.sa_family, flags);
282
283bad:
284	m_freem(m);
285	return error;
286}
287
288static void
289ipsec_fill_saidx_bymbuf(struct secasindex *saidx, const struct mbuf *m,
290    const int af)
291{
292	struct m_tag *mtag;
293	u_int16_t natt_src = IPSEC_PORT_ANY;
294	u_int16_t natt_dst = IPSEC_PORT_ANY;
295
296	/*
297	 * For NAT-T enabled ipsecif(4), set NAT-T port numbers
298	 * even if the saidx uses transport mode.
299	 *
300	 * See also ipsecif[46]_output().
301	 */
302	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
303	if (mtag) {
304		u_int16_t *natt_ports;
305
306		natt_ports = (u_int16_t *)(mtag + 1);
307		natt_src = natt_ports[1];
308		natt_dst = natt_ports[0];
309	}
310
311	if (af == AF_INET) {
312		struct sockaddr_in *sin;
313		struct ip *ip = mtod(m, struct ip *);
314
315		if (saidx->src.sa.sa_len == 0) {
316			sin = &saidx->src.sin;
317			sin->sin_len = sizeof(*sin);
318			sin->sin_family = AF_INET;
319			sin->sin_port = natt_src;
320			sin->sin_addr = ip->ip_src;
321		}
322		if (saidx->dst.sa.sa_len == 0) {
323			sin = &saidx->dst.sin;
324			sin->sin_len = sizeof(*sin);
325			sin->sin_family = AF_INET;
326			sin->sin_port = natt_dst;
327			sin->sin_addr = ip->ip_dst;
328		}
329	} else {
330		struct sockaddr_in6 *sin6;
331		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
332
333		if (saidx->src.sin6.sin6_len == 0) {
334			sin6 = (struct sockaddr_in6 *)&saidx->src;
335			sin6->sin6_len = sizeof(*sin6);
336			sin6->sin6_family = AF_INET6;
337			sin6->sin6_port = natt_src;
338			sin6->sin6_addr = ip6->ip6_src;
339			if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
340				/* fix scope id for comparing SPD */
341				sin6->sin6_addr.s6_addr16[1] = 0;
342				sin6->sin6_scope_id =
343				    ntohs(ip6->ip6_src.s6_addr16[1]);
344			}
345		}
346		if (saidx->dst.sin6.sin6_len == 0) {
347			sin6 = (struct sockaddr_in6 *)&saidx->dst;
348			sin6->sin6_len = sizeof(*sin6);
349			sin6->sin6_family = AF_INET6;
350			sin6->sin6_port = natt_dst;
351			sin6->sin6_addr = ip6->ip6_dst;
352			if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
353				/* fix scope id for comparing SPD */
354				sin6->sin6_addr.s6_addr16[1] = 0;
355				sin6->sin6_scope_id =
356				    ntohs(ip6->ip6_dst.s6_addr16[1]);
357			}
358		}
359	}
360}
361
362struct secasvar *
363ipsec_lookup_sa(const struct ipsecrequest *isr, const struct mbuf *m)
364{
365	struct secasindex saidx;
366
367	saidx = isr->saidx;
368	if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) {
369		/* Fillin unspecified SA peers only for transport mode */
370		ipsec_fill_saidx_bymbuf(&saidx, m, isr->saidx.dst.sa.sa_family);
371	}
372
373	return key_lookup_sa_bysaidx(&saidx);
374}
375
376/*
377 * ipsec_nextisr can return :
378 * - isr == NULL and error != 0 => something is bad : the packet must be
379 *   discarded
380 * - isr == NULL and error == 0 => no more rules to apply, ipsec processing
381 *   is done, reinject it in ip stack
382 * - isr != NULL (error == 0) => we need to apply one rule to the packet
383 */
384static const struct ipsecrequest *
385ipsec_nextisr(struct mbuf *m, const struct ipsecrequest *isr, int af,
386    int *error, struct secasvar **ret)
387{
388#define	IPSEC_OSTAT(type)						\
389do {									\
390	switch (isr->saidx.proto) {					\
391	case IPPROTO_ESP:						\
392		ESP_STATINC(ESP_STAT_ ## type);				\
393		break;							\
394	case IPPROTO_AH:						\
395		AH_STATINC(AH_STAT_ ## type);				\
396		break;							\
397	default:							\
398		IPCOMP_STATINC(IPCOMP_STAT_ ## type);			\
399		break;							\
400	}								\
401} while (/*CONSTCOND*/0)
402
403	struct secasvar *sav = NULL;
404	struct secasindex saidx;
405
406	KASSERTMSG(af == AF_INET || af == AF_INET6,
407	    "invalid address family %u", af);
408again:
409	/*
410	 * Craft SA index to search for proper SA.  Note that
411	 * we only fillin unspecified SA peers for transport
412	 * mode; for tunnel mode they must already be filled in.
413	 */
414	saidx = isr->saidx;
415	if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) {
416		/* Fillin unspecified SA peers only for transport mode */
417		ipsec_fill_saidx_bymbuf(&saidx, m, af);
418	}
419
420	/*
421	 * Lookup SA and validate it.
422	 */
423	*error = key_checkrequest(isr, &saidx, &sav);
424	if (*error != 0) {
425		/*
426		 * IPsec processing is required, but no SA found.
427		 * I assume that key_acquire() had been called
428		 * to get/establish the SA. Here I discard
429		 * this packet because it is responsibility for
430		 * upper layer to retransmit the packet.
431		 */
432		IPSEC_STATINC(IPSEC_STAT_OUT_NOSA);
433		goto bad;
434	}
435	/* sav may be NULL here if we have an USE rule */
436	if (sav == NULL) {
437		KASSERTMSG(ipsec_get_reqlevel(isr) == IPSEC_LEVEL_USE,
438		    "no SA found, but required; level %u",
439		    ipsec_get_reqlevel(isr));
440		isr = isr->next;
441		/*
442		 * No more rules to apply, return NULL isr and no error.
443		 * It can happen when the last rules are USE rules.
444		 */
445		if (isr == NULL) {
446			*ret = NULL;
447			*error = 0;
448			return isr;
449		}
450		goto again;
451	}
452
453	/*
454	 * Check system global policy controls.
455	 */
456	if ((isr->saidx.proto == IPPROTO_ESP && !esp_enable) ||
457	    (isr->saidx.proto == IPPROTO_AH && !ah_enable) ||
458	    (isr->saidx.proto == IPPROTO_IPCOMP && !ipcomp_enable)) {
459		IPSECLOG(LOG_DEBUG, "IPsec outbound packet dropped due"
460		    " to policy (check your sysctls)\n");
461		IPSEC_OSTAT(PDROPS);
462		*error = EHOSTUNREACH;
463		KEY_SA_UNREF(&sav);
464		goto bad;
465	}
466
467	/*
468	 * Sanity check the SA contents for the caller
469	 * before they invoke the xform output method.
470	 */
471	KASSERT(sav->tdb_xform != NULL);
472	*ret = sav;
473	return isr;
474
475bad:
476	KASSERTMSG(*error != 0, "error return w/ no error code");
477	return NULL;
478#undef IPSEC_OSTAT
479}
480
481#ifdef INET
482/*
483 * IPsec output logic for IPv4.
484 */
485int
486ipsec4_process_packet(struct mbuf *m, const struct ipsecrequest *isr,
487    u_long *mtu)
488{
489	struct secasvar *sav = NULL;
490	struct ip *ip;
491	int error, i, off;
492	union sockaddr_union *dst;
493	int setdf;
494
495	KASSERT(m != NULL);
496	KASSERT(m->m_nextpkt == NULL);
497	KASSERT(isr != NULL);
498
499	isr = ipsec_nextisr(m, isr, AF_INET, &error, &sav);
500	if (isr == NULL) {
501		if (error != 0) {
502			goto bad;
503		} else {
504			if (ipsec_register_done(m, &error) < 0)
505				goto bad;
506
507			return ipsec_reinject_ipstack(m, AF_INET, 0);
508		}
509	}
510	KASSERT(sav != NULL);
511
512	if (m->m_len < sizeof(struct ip) &&
513	    (m = m_pullup(m, sizeof(struct ip))) == NULL) {
514		error = ENOBUFS;
515		goto unrefsav;
516	}
517
518	/*
519	 * Check if we need to handle NAT-T fragmentation.
520	 */
521	if (isr == isr->sp->req) { /* Check only if called from ipsec4_output */
522		KASSERT(mtu != NULL);
523		ip = mtod(m, struct ip *);
524		if (!(sav->natt_type & UDP_ENCAP_ESPINUDP)) {
525			goto noneed;
526		}
527		if (ntohs(ip->ip_len) <= sav->esp_frag)
528			goto noneed;
529		*mtu = sav->esp_frag;
530		KEY_SA_UNREF(&sav);
531		return 0;
532	}
533noneed:
534	dst = &sav->sah->saidx.dst;
535
536	/*
537	 * Collect IP_DF state from the outer header.
538	 */
539	if (dst->sa.sa_family == AF_INET) {
540		ip = mtod(m, struct ip *);
541		/* Honor system-wide control of how to handle IP_DF */
542		switch (ip4_ipsec_dfbit) {
543		case 0:			/* clear in outer header */
544		case 1:			/* set in outer header */
545			setdf = ip4_ipsec_dfbit;
546			break;
547		default:		/* propagate to outer header */
548			setdf = ip->ip_off;
549			setdf = ntohs(setdf);
550			setdf = htons(setdf & IP_DF);
551			break;
552		}
553	} else {
554		ip = NULL;		/* keep compiler happy */
555		setdf = 0;
556	}
557
558	/* Do the appropriate encapsulation, if necessary */
559	if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */
560	    dst->sa.sa_family != AF_INET ||	    /* PF mismatch */
561	    (dst->sa.sa_family == AF_INET &&	    /* Proxy */
562	     dst->sin.sin_addr.s_addr != INADDR_ANY &&
563	     dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) {
564		struct mbuf *mp;
565
566		/* Fix IPv4 header checksum and length */
567		ip = mtod(m, struct ip *);
568		ip->ip_len = htons(m->m_pkthdr.len);
569		ip->ip_sum = 0;
570		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
571
572		/* Encapsulate the packet */
573		error = ipip_output(m, sav, &mp);
574		if (mp == NULL && !error) {
575			/* Should never happen. */
576			IPSECLOG(LOG_DEBUG,
577			    "ipip_output returns no mbuf and no error!");
578			error = EFAULT;
579		}
580		if (error) {
581			if (mp) {
582				/* XXX: Should never happen! */
583				m_freem(mp);
584			}
585			m = NULL; /* ipip_output() already freed it */
586			goto unrefsav;
587		}
588		m = mp, mp = NULL;
589
590		/*
591		 * ipip_output clears IP_DF in the new header.  If
592		 * we need to propagate IP_DF from the outer header,
593		 * then we have to do it here.
594		 *
595		 * XXX shouldn't assume what ipip_output does.
596		 */
597		if (dst->sa.sa_family == AF_INET && setdf) {
598			if (m->m_len < sizeof(struct ip) &&
599			    (m = m_pullup(m, sizeof(struct ip))) == NULL) {
600				error = ENOBUFS;
601				goto unrefsav;
602			}
603			ip = mtod(m, struct ip *);
604			ip->ip_off |= htons(IP_DF);
605		}
606	}
607
608	/*
609	 * Dispatch to the appropriate IPsec transform logic.  The
610	 * packet will be returned for transmission after crypto
611	 * processing, etc. are completed.  For encapsulation we
612	 * bypass this call because of the explicit call done above
613	 * (necessary to deal with IP_DF handling for IPv4).
614	 *
615	 * NB: m & sav are ``passed to caller'' who's responsible for
616	 *     for reclaiming their resources.
617	 */
618	if (sav->tdb_xform->xf_type != XF_IP4) {
619		if (dst->sa.sa_family == AF_INET) {
620			ip = mtod(m, struct ip *);
621			i = ip->ip_hl << 2;
622			off = offsetof(struct ip, ip_p);
623		} else {
624			i = sizeof(struct ip6_hdr);
625			off = offsetof(struct ip6_hdr, ip6_nxt);
626		}
627		error = (*sav->tdb_xform->xf_output)(m, isr, sav, i, off, 0);
628	} else {
629		error = ipsec_process_done(m, isr, sav, 0);
630	}
631	KEY_SA_UNREF(&sav);
632	return error;
633
634unrefsav:
635	KEY_SA_UNREF(&sav);
636bad:
637	if (m)
638		m_freem(m);
639	return error;
640}
641#endif
642
643#ifdef INET6
644static int
645compute_ipsec_pos(struct mbuf *m, int *i, int *off)
646{
647	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
648	struct ip6_ext ip6e;
649	int dstopt = 0;
650	int nxt;
651
652	*i = sizeof(struct ip6_hdr);
653	*off = offsetof(struct ip6_hdr, ip6_nxt);
654	nxt = ip6->ip6_nxt;
655
656	/*
657	 * chase mbuf chain to find the appropriate place to
658	 * put AH/ESP/IPcomp header.
659	 *     IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
660	 */
661	while (1) {
662		switch (nxt) {
663		case IPPROTO_AH:
664		case IPPROTO_ESP:
665		case IPPROTO_IPCOMP:
666			/*
667			 * We should not skip security header added
668			 * beforehand.
669			 */
670			return 0;
671
672		case IPPROTO_HOPOPTS:
673		case IPPROTO_DSTOPTS:
674		case IPPROTO_ROUTING:
675			if (*i + sizeof(ip6e) > m->m_pkthdr.len) {
676				return EINVAL;
677			}
678
679			/*
680			 * If we see 2nd destination option header,
681			 * we should stop there.
682			 */
683			if (nxt == IPPROTO_DSTOPTS && dstopt)
684				return 0;
685
686			if (nxt == IPPROTO_DSTOPTS) {
687				/*
688				 * Seen 1st or 2nd destination option.
689				 * next time we see one, it must be 2nd.
690				 */
691				dstopt = 1;
692			} else if (nxt == IPPROTO_ROUTING) {
693				/*
694				 * If we see destination option next
695				 * time, it must be dest2.
696				 */
697				dstopt = 2;
698			}
699
700			/* skip this header */
701			m_copydata(m, *i, sizeof(ip6e), &ip6e);
702			nxt = ip6e.ip6e_nxt;
703			*off = *i + offsetof(struct ip6_ext, ip6e_nxt);
704			*i += (ip6e.ip6e_len + 1) << 3;
705			if (*i > m->m_pkthdr.len) {
706				return EINVAL;
707			}
708			break;
709		default:
710			return 0;
711		}
712	}
713
714	return 0;
715}
716
717static int
718in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa,
719    const struct in6_addr *ia)
720{
721	struct in6_addr ia2;
722
723	memcpy(&ia2, &sa->sin6_addr, sizeof(ia2));
724	if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6_addr))
725		ia2.s6_addr16[1] = htons(sa->sin6_scope_id);
726
727	return IN6_ARE_ADDR_EQUAL(ia, &ia2);
728}
729
730int
731ipsec6_process_packet(struct mbuf *m, const struct ipsecrequest *isr, int flags)
732{
733	struct secasvar *sav = NULL;
734	struct ip6_hdr *ip6;
735	int error, i, off;
736	union sockaddr_union *dst;
737
738	KASSERT(m != NULL);
739	KASSERT(m->m_nextpkt == NULL);
740	KASSERT(isr != NULL);
741
742	isr = ipsec_nextisr(m, isr, AF_INET6, &error, &sav);
743	if (isr == NULL) {
744		if (error != 0) {
745			/* XXX Should we send a notification ? */
746			goto bad;
747		} else {
748			if (ipsec_register_done(m, &error) < 0)
749				goto bad;
750
751			return ipsec_reinject_ipstack(m, AF_INET6, flags);
752		}
753	}
754
755	KASSERT(sav != NULL);
756	dst = &sav->sah->saidx.dst;
757
758	if (m->m_len < sizeof(struct ip6_hdr)) {
759		if ((m = m_pullup(m,sizeof(struct ip6_hdr))) == NULL) {
760			error = ENOBUFS;
761			goto unrefsav;
762		}
763	}
764	ip6 = mtod(m, struct ip6_hdr *);
765
766	/* Do the appropriate encapsulation, if necessary */
767	if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */
768	    dst->sa.sa_family != AF_INET6 ||        /* AF mismatch */
769	    ((dst->sa.sa_family == AF_INET6) &&
770	     (!IN6_IS_ADDR_UNSPECIFIED(&dst->sin6.sin6_addr)) &&
771	     (!in6_sa_equal_addrwithscope(&dst->sin6, &ip6->ip6_dst)))) {
772		struct mbuf *mp;
773
774		if (m->m_pkthdr.len - sizeof(*ip6) > IPV6_MAXPACKET) {
775			/* No jumbogram support. */
776			error = ENXIO;   /*XXX*/
777			goto unrefsav;
778		}
779
780		/* Fix IPv6 header payload length. */
781		ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
782
783		/* Encapsulate the packet */
784		error = ipip_output(m, sav, &mp);
785		if (mp == NULL && !error) {
786			/* Should never happen. */
787			IPSECLOG(LOG_DEBUG,
788			    "ipip_output returns no mbuf and no error!");
789			error = EFAULT;
790		}
791
792		if (error) {
793			if (mp) {
794				/* XXX: Should never happen! */
795				m_freem(mp);
796			}
797			m = NULL; /* ipip_output() already freed it */
798			goto unrefsav;
799		}
800
801		m = mp;
802		mp = NULL;
803	}
804
805	if (dst->sa.sa_family == AF_INET) {
806		struct ip *ip;
807		ip = mtod(m, struct ip *);
808		i = ip->ip_hl << 2;
809		off = offsetof(struct ip, ip_p);
810	} else {
811		error = compute_ipsec_pos(m, &i, &off);
812		if (error)
813			goto unrefsav;
814	}
815	error = (*sav->tdb_xform->xf_output)(m, isr, sav, i, off, flags);
816	KEY_SA_UNREF(&sav);
817	return error;
818
819unrefsav:
820	KEY_SA_UNREF(&sav);
821bad:
822	if (m)
823		m_freem(m);
824	return error;
825}
826#endif /* INET6 */
827
828void
829ipsec_output_init(void)
830{
831
832	ipsec_rtcache_percpu = rtcache_percpu_alloc();
833}
834