1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 *    may be used to endorse or promote products derived from this software
43 *    without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58/*
59 * Copyright (c) 1982, 1986, 1988, 1990, 1993
60 *	The Regents of the University of California.  All rights reserved.
61 *
62 * Redistribution and use in source and binary forms, with or without
63 * modification, are permitted provided that the following conditions
64 * are met:
65 * 1. Redistributions of source code must retain the above copyright
66 *    notice, this list of conditions and the following disclaimer.
67 * 2. Redistributions in binary form must reproduce the above copyright
68 *    notice, this list of conditions and the following disclaimer in the
69 *    documentation and/or other materials provided with the distribution.
70 * 3. All advertising materials mentioning features or use of this software
71 *    must display the following acknowledgement:
72 *	This product includes software developed by the University of
73 *	California, Berkeley and its contributors.
74 * 4. Neither the name of the University nor the names of its contributors
75 *    may be used to endorse or promote products derived from this software
76 *    without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
91 */
92/*
93 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
94 * support for mandatory and extensible security protections.  This notice
95 * is included in support of clause 2.2 (b) of the Apple Public License,
96 * Version 2.0.
97 */
98
99#include <sys/param.h>
100#include <sys/malloc.h>
101#include <sys/mbuf.h>
102#include <sys/errno.h>
103#include <sys/protosw.h>
104#include <sys/socket.h>
105#include <sys/socketvar.h>
106#include <sys/systm.h>
107#include <sys/kernel.h>
108#include <sys/proc.h>
109#include <sys/kauth.h>
110#include <sys/mcache.h>
111#include <sys/sysctl.h>
112#include <kern/zalloc.h>
113#include <libkern/OSByteOrder.h>
114
115#include <pexpert/pexpert.h>
116#include <mach/sdt.h>
117
118#include <net/if.h>
119#include <net/route.h>
120#include <net/dlil.h>
121#include <net/net_osdep.h>
122
123#include <netinet/in.h>
124#include <netinet/in_var.h>
125#include <netinet/ip_var.h>
126#include <netinet6/in6_var.h>
127#include <netinet/ip6.h>
128#include <netinet/kpi_ipfilter_var.h>
129
130#include <netinet6/ip6protosw.h>
131#include <netinet/icmp6.h>
132#include <netinet6/ip6_var.h>
133#include <netinet/in_pcb.h>
134#include <netinet6/nd6.h>
135#include <netinet6/scope6_var.h>
136#if IPSEC
137#include <netinet6/ipsec.h>
138#include <netinet6/ipsec6.h>
139#include <netkey/key.h>
140extern int ipsec_bypass;
141#endif /* IPSEC */
142
143#if CONFIG_MACF_NET
144#include <security/mac.h>
145#endif /* CONFIG_MACF_NET */
146
147#if DUMMYNET
148#include <netinet6/ip6_fw.h>
149#include <netinet/ip_fw.h>
150#include <netinet/ip_dummynet.h>
151#endif /* DUMMYNET */
152
153#if PF
154#include <net/pfvar.h>
155#endif /* PF */
156
157static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
158static void ip6_out_cksum_stats(int, u_int32_t);
159static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
160static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
161    struct ip6_frag **);
162static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
163    struct ifnet *, struct in6_addr *, u_int32_t *, boolean_t *);
164static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *,
165    struct sockopt *sopt);
166static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int);
167static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
168static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
169static void im6o_trace(struct ip6_moptions *, int);
170static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
171    int, int);
172static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
173static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
174    int, uint32_t, uint32_t);
175
176#define	IM6O_TRACE_HIST_SIZE	32	/* size of trace history */
177
178/* For gdb */
179__private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
180
181struct ip6_moptions_dbg {
182	struct ip6_moptions	im6o;			/* ip6_moptions */
183	u_int16_t		im6o_refhold_cnt;	/* # of IM6O_ADDREF */
184	u_int16_t		im6o_refrele_cnt;	/* # of IM6O_REMREF */
185	/*
186	 * Alloc and free callers.
187	 */
188	ctrace_t		im6o_alloc;
189	ctrace_t		im6o_free;
190	/*
191	 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
192	 */
193	ctrace_t		im6o_refhold[IM6O_TRACE_HIST_SIZE];
194	ctrace_t		im6o_refrele[IM6O_TRACE_HIST_SIZE];
195};
196
197#if DEBUG
198static unsigned int im6o_debug = 1;	/* debugging (enabled) */
199#else
200static unsigned int im6o_debug;		/* debugging (disabled) */
201#endif /* !DEBUG */
202
203static unsigned int im6o_size;		/* size of zone element */
204static struct zone *im6o_zone;		/* zone for ip6_moptions */
205
206#define	IM6O_ZONE_MAX		64		/* maximum elements in zone */
207#define	IM6O_ZONE_NAME		"ip6_moptions"	/* zone name */
208
209SYSCTL_DECL(_net_inet6_ip6);
210
211static int ip6_maxchainsent = 0;
212SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent,
213	CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0,
214	"use dlil_output_list");
215
216/*
217 * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only
218 * walks through the packet chain and sends each mbuf separately.
219 */
220int
221ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt,
222    struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
223    struct ifnet **ifpp, struct ip6_out_args *ip6oa)
224{
225#pragma unused(packetlist)
226	struct mbuf *m = m0, *nextpkt;
227	int error = 0;
228
229	while (m != NULL) {
230		/*
231		 * Break the chain before calling ip6_output() and free the
232		 * mbufs if there was an error.
233		 */
234		nextpkt = m->m_nextpkt;
235		m->m_nextpkt = NULL;
236		error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oa);
237		if (error != 0) {
238			if (nextpkt != NULL)
239				m_freem_list(nextpkt);
240			return (error);
241		}
242		m = nextpkt;
243	}
244
245	return (error);
246}
247
248/*
249 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
250 * header (with pri, len, nxt, hlim, src, dst).
251 * This function may modify ver and hlim only.
252 * The mbuf chain containing the packet will be freed.
253 * The mbuf opt, if present, will not be freed.
254 *
255 * If ro is non-NULL and has valid ro->ro_rt, route lookup would be
256 * skipped and ro->ro_rt would be used.  Otherwise the result of route
257 * lookup is stored in ro->ro_rt.
258 *
259 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
260 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_int32_t to hold largest one,
261 * which is rt_rmx.rmx_mtu.
262 */
263int
264ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
265    int flags, struct ip6_moptions *im6o, struct ifnet **ifpp,
266    struct ip6_out_args *ip6oa)
267{
268	struct ip6_hdr *ip6;
269	u_char *nexthdrp;
270	struct ifnet *ifp = NULL, *origifp = NULL;	/* refcnt'd */
271	struct mbuf *m, *mprev;
272	int hlen, tlen, len, off, nxt0;
273	struct route_in6 *ro_pmtu = NULL;
274	struct rtentry *rt = NULL;
275	struct sockaddr_in6 *dst, src_sa, dst_sa;
276	int error = 0;
277	struct in6_ifaddr *ia = NULL, *src_ia = NULL;
278	u_int32_t mtu;
279	boolean_t alwaysfrag = FALSE;
280	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
281	struct ip6_rthdr *rh;
282	struct in6_addr finaldst;
283	ipfilter_t inject_filter_ref;
284	struct ipf_pktopts *ippo = NULL;
285	struct flowadv *adv = NULL;
286#if DUMMYNET
287	struct m_tag *tag;
288	struct ip6_out_args saved_ip6oa;
289	struct sockaddr_in6 dst_buf;
290#endif /* DUMMYNET */
291#if IPSEC
292	struct socket *so = NULL;
293	struct secpolicy *sp = NULL;
294	struct route_in6 *ipsec_saved_route = NULL;
295	boolean_t needipsectun = FALSE;
296#endif /* IPSEC */
297	struct {
298		struct ipf_pktopts ipf_pktopts;
299		struct ip6_exthdrs exthdrs;
300		struct route_in6 ip6route;
301#if IPSEC
302		struct ipsec_output_state ipsec_state;
303#endif /* IPSEC */
304#if DUMMYNET
305		struct route_in6 saved_route;
306		struct route_in6 saved_ro_pmtu;
307		struct ip_fw_args args;
308#endif /* DUMMYNET */
309	} ip6obz;
310#define	ipf_pktopts	ip6obz.ipf_pktopts
311#define	exthdrs		ip6obz.exthdrs
312#define	ip6route	ip6obz.ip6route
313#define	ipsec_state	ip6obz.ipsec_state
314#define	saved_route	ip6obz.saved_route
315#define	saved_ro_pmtu	ip6obz.saved_ro_pmtu
316#define	args		ip6obz.args
317	union {
318		struct {
319			boolean_t select_srcif : 1;
320			boolean_t hdrsplit : 1;
321			boolean_t dontfrag : 1;
322#if IPSEC
323			boolean_t needipsec : 1;
324			boolean_t noipsec : 1;
325#endif /* IPSEC */
326		};
327		uint32_t raw;
328	} ip6obf = { .raw = 0 };
329
330	VERIFY(m0->m_flags & M_PKTHDR);
331
332	/* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
333	bzero(&ip6obz, sizeof (ip6obz));
334
335#if DUMMYNET
336	if (SLIST_EMPTY(&m0->m_pkthdr.tags))
337		goto tags_done;
338
339	/* Grab info from mtags prepended to the chain */
340	if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
341	    KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
342		struct dn_pkt_tag	*dn_tag;
343
344		dn_tag = (struct dn_pkt_tag *)(tag+1);
345		args.fwa_pf_rule = dn_tag->dn_pf_rule;
346
347		bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof (dst_buf));
348		dst = &dst_buf;
349		ifp = dn_tag->dn_ifp;
350		if (ifp != NULL)
351			ifnet_reference(ifp);
352		flags = dn_tag->dn_flags;
353		if (dn_tag->dn_flags & IPV6_OUTARGS) {
354			saved_ip6oa = dn_tag->dn_ip6oa;
355			ip6oa = &saved_ip6oa;
356		}
357
358		saved_route = dn_tag->dn_ro6;
359		ro = &saved_route;
360		saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
361		ro_pmtu = &saved_ro_pmtu;
362		origifp = dn_tag->dn_origifp;
363		if (origifp != NULL)
364			ifnet_reference(origifp);
365		mtu = dn_tag->dn_mtu;
366		alwaysfrag = (dn_tag->dn_alwaysfrag != 0);
367		unfragpartlen = dn_tag->dn_unfragpartlen;
368
369		bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof (exthdrs));
370
371		m_tag_delete(m0, tag);
372	}
373
374tags_done:
375#endif /* DUMMYNET */
376
377	m = m0;
378	m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
379
380#if IPSEC
381	/* for AH processing. stupid to have "socket" variable in IP layer... */
382	if (ipsec_bypass == 0) {
383		so = ipsec_getsocket(m);
384		(void) ipsec_setsocket(m, NULL);
385
386		/* If packet is bound to an interface, check bound policies */
387		if ((flags & IPV6_OUTARGS) &&
388		    (ip6oa->ip6oa_flags & IPOAF_BOUND_IF) &&
389		    ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
390			/* ip6obf.noipsec is a bitfield, use temp integer */
391			int noipsec = 0;
392
393			if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
394			    flags, ip6oa, &noipsec, &sp) != 0)
395				goto bad;
396
397			ip6obf.noipsec = (noipsec != 0);
398		}
399	}
400#endif /* IPSEC */
401
402	ip6 = mtod(m, struct ip6_hdr *);
403	nxt0 = ip6->ip6_nxt;
404	finaldst = ip6->ip6_dst;
405	inject_filter_ref = ipf_get_inject_filter(m);
406	ippo = &ipf_pktopts;
407
408	if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
409		/*
410		 * In the forwarding case, only the ifscope value is used,
411		 * as source interface selection doesn't take place.
412		 */
413		if ((ip6obf.select_srcif = (!(flags & (IPV6_FORWARDING |
414		    IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
415		    (ip6oa->ip6oa_flags & IP6OAF_SELECT_SRCIF))))
416			ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
417
418		if ((ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
419		    ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
420			ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
421			    (ip6oa->ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
422		}
423
424		if (ip6oa->ip6oa_flags & IP6OAF_BOUND_SRCADDR)
425			ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
426	} else {
427		ip6obf.select_srcif = FALSE;
428		if (flags & IPV6_OUTARGS) {
429			ip6oa->ip6oa_boundif = IFSCOPE_NONE;
430			ip6oa->ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF |
431			    IP6OAF_BOUND_IF | IP6OAF_BOUND_SRCADDR);
432		}
433	}
434
435	if ((flags & IPV6_OUTARGS) && (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR))
436		ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
437
438	if (flags & IPV6_OUTARGS) {
439		adv = &ip6oa->ip6oa_flowadv;
440		adv->code = FADV_SUCCESS;
441		ip6oa->ip6oa_retflags = 0;
442	}
443
444#if DUMMYNET
445	if (args.fwa_pf_rule) {
446		ip6 = mtod(m, struct ip6_hdr *);
447		VERIFY(ro != NULL);	/* ro == saved_route */
448		goto check_with_pf;
449	}
450#endif /* DUMMYNET */
451
452#define	MAKE_EXTHDR(hp, mp) do {					\
453	if (hp != NULL) {						\
454		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
455		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
456		    ((eh)->ip6e_len + 1) << 3);				\
457		if (error)						\
458			goto freehdrs;					\
459	}								\
460} while (0)
461
462	if (opt != NULL) {
463		/* Hop-by-Hop options header */
464		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
465		/* Destination options header(1st part) */
466		if (opt->ip6po_rthdr) {
467			/*
468			 * Destination options header(1st part)
469			 * This only makes sense with a routing header.
470			 * See Section 9.2 of RFC 3542.
471			 * Disabling this part just for MIP6 convenience is
472			 * a bad idea.  We need to think carefully about a
473			 * way to make the advanced API coexist with MIP6
474			 * options, which might automatically be inserted in
475			 * the kernel.
476			 */
477			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
478		}
479		/* Routing header */
480		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
481		/* Destination options header(2nd part) */
482		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
483	}
484
485#undef MAKE_EXTHDR
486
487#if IPSEC
488	if (ipsec_bypass != 0 || ip6obf.noipsec)
489		goto skip_ipsec;
490
491	/* May have been set above if packet was bound */
492	if (sp == NULL) {
493		/* get a security policy for this packet */
494		if (so == NULL) {
495			sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
496			    0, &error);
497		} else {
498			sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
499			    so, &error);
500		}
501		if (sp == NULL) {
502			IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
503			goto freehdrs;
504		}
505	}
506
507	error = 0;
508
509	/* check policy */
510	switch (sp->policy) {
511	case IPSEC_POLICY_DISCARD:
512	case IPSEC_POLICY_GENERATE:
513		/*
514		 * This packet is just discarded.
515		 */
516		IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
517		goto freehdrs;
518
519	case IPSEC_POLICY_BYPASS:
520	case IPSEC_POLICY_NONE:
521		/* no need to do IPsec. */
522		ip6obf.needipsec = FALSE;
523		break;
524
525	case IPSEC_POLICY_IPSEC:
526		if (sp->req == NULL) {
527			/* acquire a policy */
528			error = key_spdacquire(sp);
529			goto freehdrs;
530		}
531		if (sp->ipsec_if) {
532			/* Verify the redirect to ipsec interface */
533			if (sp->ipsec_if == ifp) {
534				/* Set policy for mbuf */
535				m->m_pkthdr.ipsec_policy = sp->id;
536				goto skip_ipsec;
537			}
538			goto bad;
539		} else {
540			ip6obf.needipsec = TRUE;
541		}
542		break;
543
544	case IPSEC_POLICY_ENTRUST:
545	default:
546		printf("%s: Invalid policy found: %d\n", __func__, sp->policy);
547		break;
548	}
549skip_ipsec:
550#endif /* IPSEC */
551
552	/*
553	 * Calculate the total length of the extension header chain.
554	 * Keep the length of the unfragmentable part for fragmentation.
555	 */
556	optlen = 0;
557	if (exthdrs.ip6e_hbh != NULL)
558		optlen += exthdrs.ip6e_hbh->m_len;
559	if (exthdrs.ip6e_dest1 != NULL)
560		optlen += exthdrs.ip6e_dest1->m_len;
561	if (exthdrs.ip6e_rthdr != NULL)
562		optlen += exthdrs.ip6e_rthdr->m_len;
563	unfragpartlen = optlen + sizeof (struct ip6_hdr);
564
565	/* NOTE: we don't add AH/ESP length here. do that later. */
566	if (exthdrs.ip6e_dest2 != NULL)
567		optlen += exthdrs.ip6e_dest2->m_len;
568
569	/*
570	 * If we need IPsec, or there is at least one extension header,
571	 * separate IP6 header from the payload.
572	 */
573	if ((
574#if IPSEC
575	    ip6obf.needipsec ||
576#endif /* IPSEC */
577	    optlen) && !ip6obf.hdrsplit) {
578		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
579			m = NULL;
580			goto freehdrs;
581		}
582		m = exthdrs.ip6e_ip6;
583		ip6obf.hdrsplit = TRUE;
584	}
585
586	/* adjust pointer */
587	ip6 = mtod(m, struct ip6_hdr *);
588
589	/* adjust mbuf packet header length */
590	m->m_pkthdr.len += optlen;
591	plen = m->m_pkthdr.len - sizeof (*ip6);
592
593	/* If this is a jumbo payload, insert a jumbo payload option. */
594	if (plen > IPV6_MAXPACKET) {
595		if (!ip6obf.hdrsplit) {
596			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
597				m = NULL;
598				goto freehdrs;
599			}
600			m = exthdrs.ip6e_ip6;
601			ip6obf.hdrsplit = TRUE;
602		}
603		/* adjust pointer */
604		ip6 = mtod(m, struct ip6_hdr *);
605		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
606			goto freehdrs;
607		ip6->ip6_plen = 0;
608	} else {
609		ip6->ip6_plen = htons(plen);
610	}
611	/*
612	 * Concatenate headers and fill in next header fields.
613	 * Here we have, on "m"
614	 *	IPv6 payload
615	 * and we insert headers accordingly.  Finally, we should be getting:
616	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
617	 *
618	 * during the header composing process, "m" points to IPv6 header.
619	 * "mprev" points to an extension header prior to esp.
620	 */
621	nexthdrp = &ip6->ip6_nxt;
622	mprev = m;
623
624	/*
625	 * we treat dest2 specially.  this makes IPsec processing
626	 * much easier.  the goal here is to make mprev point the
627	 * mbuf prior to dest2.
628	 *
629	 * result: IPv6 dest2 payload
630	 * m and mprev will point to IPv6 header.
631	 */
632	if (exthdrs.ip6e_dest2 != NULL) {
633		if (!ip6obf.hdrsplit) {
634			panic("assumption failed: hdr not split");
635			/* NOTREACHED */
636		}
637		exthdrs.ip6e_dest2->m_next = m->m_next;
638		m->m_next = exthdrs.ip6e_dest2;
639		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
640		ip6->ip6_nxt = IPPROTO_DSTOPTS;
641	}
642
643#define	MAKE_CHAIN(m, mp, p, i)	do {					\
644	if (m != NULL) {						\
645		if (!ip6obf.hdrsplit) {					\
646			panic("assumption failed: hdr not split");	\
647			/* NOTREACHED */				\
648		}							\
649		*mtod((m), u_char *) = *(p);				\
650		*(p) = (i);						\
651		p = mtod((m), u_char *);				\
652		(m)->m_next = (mp)->m_next;				\
653		(mp)->m_next = (m);					\
654		(mp) = (m);						\
655	}								\
656} while (0)
657	/*
658	 * result: IPv6 hbh dest1 rthdr dest2 payload
659	 * m will point to IPv6 header.  mprev will point to the
660	 * extension header prior to dest2 (rthdr in the above case).
661	 */
662	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
663	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
664	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
665
666#undef MAKE_CHAIN
667
668#if IPSEC
669	if (ip6obf.needipsec && (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA))
670		in6_delayed_cksum_offset(m, 0, optlen, nxt0);
671#endif /* IPSEC */
672
673	if (!TAILQ_EMPTY(&ipv6_filters)) {
674		struct ipfilter	*filter;
675		int seen = (inject_filter_ref == NULL);
676		int fixscope = 0;
677
678		if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
679			ippo->ippo_flags |= IPPOF_MCAST_OPTS;
680			IM6O_LOCK(im6o);
681			ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
682			ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
683			ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
684			IM6O_UNLOCK(im6o);
685		}
686
687		/* Hack: embed the scope_id in the destination */
688		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
689		    (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
690			fixscope = 1;
691			ip6->ip6_dst.s6_addr16[1] =
692			    htons(ro->ro_dst.sin6_scope_id);
693		}
694
695		ipf_ref();
696		TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
697			/*
698			 * Don't process packet twice if we've already seen it.
699			 */
700			if (seen == 0) {
701				if ((struct ipfilter *)inject_filter_ref ==
702				    filter)
703					seen = 1;
704			} else if (filter->ipf_filter.ipf_output != NULL) {
705				errno_t result;
706
707				result = filter->ipf_filter.ipf_output(
708				    filter->ipf_filter.cookie,
709				    (mbuf_t *)&m, ippo);
710				if (result == EJUSTRETURN) {
711					ipf_unref();
712					goto done;
713				}
714				if (result != 0) {
715					ipf_unref();
716					goto bad;
717				}
718			}
719		}
720		ipf_unref();
721
722		ip6 = mtod(m, struct ip6_hdr *);
723		/* Hack: cleanup embedded scope_id if we put it there */
724		if (fixscope)
725			ip6->ip6_dst.s6_addr16[1] = 0;
726	}
727
728#if IPSEC
729	if (ip6obf.needipsec) {
730		int segleft_org;
731
732		/*
733		 * pointers after IPsec headers are not valid any more.
734		 * other pointers need a great care too.
735		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
736		 */
737		exthdrs.ip6e_dest2 = NULL;
738
739		if (exthdrs.ip6e_rthdr != NULL) {
740			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
741			segleft_org = rh->ip6r_segleft;
742			rh->ip6r_segleft = 0;
743		} else {
744			rh = NULL;
745			segleft_org = 0;
746		}
747
748		ipsec_state.m = m;
749		error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev,
750		    sp, flags, &needipsectun);
751		m = ipsec_state.m;
752		if (error) {
753			/* mbuf is already reclaimed in ipsec6_output_trans. */
754			m = NULL;
755			switch (error) {
756			case EHOSTUNREACH:
757			case ENETUNREACH:
758			case EMSGSIZE:
759			case ENOBUFS:
760			case ENOMEM:
761				break;
762			default:
763				printf("ip6_output (ipsec): error code %d\n",
764				    error);
765				/* FALLTHRU */
766			case ENOENT:
767				/* don't show these error codes to the user */
768				error = 0;
769				break;
770			}
771			goto bad;
772		}
773		if (exthdrs.ip6e_rthdr != NULL) {
774			/* ah6_output doesn't modify mbuf chain */
775			rh->ip6r_segleft = segleft_org;
776		}
777	}
778#endif /* IPSEC */
779
780	/*
781	 * If there is a routing header, replace the destination address field
782	 * with the first hop of the routing header.
783	 */
784	if (exthdrs.ip6e_rthdr != NULL) {
785		struct ip6_rthdr0 *rh0;
786		struct in6_addr *addr;
787		struct sockaddr_in6 sa;
788
789		rh = (struct ip6_rthdr *)
790		    (mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *));
791		switch (rh->ip6r_type) {
792		case IPV6_RTHDR_TYPE_0:
793			rh0 = (struct ip6_rthdr0 *)rh;
794			addr = (struct in6_addr *)(void *)(rh0 + 1);
795
796			/*
797			 * construct a sockaddr_in6 form of
798			 * the first hop.
799			 *
800			 * XXX: we may not have enough
801			 * information about its scope zone;
802			 * there is no standard API to pass
803			 * the information from the
804			 * application.
805			 */
806			bzero(&sa, sizeof (sa));
807			sa.sin6_family = AF_INET6;
808			sa.sin6_len = sizeof (sa);
809			sa.sin6_addr = addr[0];
810			if ((error = sa6_embedscope(&sa,
811			    ip6_use_defzone)) != 0) {
812				goto bad;
813			}
814			ip6->ip6_dst = sa.sin6_addr;
815			bcopy(&addr[1], &addr[0], sizeof (struct in6_addr) *
816			    (rh0->ip6r0_segleft - 1));
817			addr[rh0->ip6r0_segleft - 1] = finaldst;
818			/* XXX */
819			in6_clearscope(addr + rh0->ip6r0_segleft - 1);
820			break;
821		default:	/* is it possible? */
822			error = EINVAL;
823			goto bad;
824		}
825	}
826
827	/* Source address validation */
828	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
829	    !(flags & IPV6_UNSPECSRC)) {
830		error = EOPNOTSUPP;
831		ip6stat.ip6s_badscope++;
832		goto bad;
833	}
834	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
835		error = EOPNOTSUPP;
836		ip6stat.ip6s_badscope++;
837		goto bad;
838	}
839
840	ip6stat.ip6s_localout++;
841
842	/*
843	 * Route packet.
844	 */
845	if (ro == NULL) {
846		ro = &ip6route;
847		bzero((caddr_t)ro, sizeof (*ro));
848	}
849	VERIFY(ro_pmtu == NULL);	/* must not get here if dummynet */
850	ro_pmtu = ro;
851	if (opt != NULL && opt->ip6po_rthdr)
852		ro = &opt->ip6po_route;
853	dst = SIN6(&ro->ro_dst);
854
855	if (ro->ro_rt != NULL)
856		RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
857	/*
858	 * if specified, try to fill in the traffic class field.
859	 * do not override if a non-zero value is already set.
860	 * we check the diffserv field and the ecn field separately.
861	 */
862	if (opt != NULL && opt->ip6po_tclass >= 0) {
863		int mask = 0;
864
865		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
866			mask |= 0xfc;
867		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
868			mask |= 0x03;
869		if (mask != 0) {
870			ip6->ip6_flow |=
871			    htonl((opt->ip6po_tclass & mask) << 20);
872		}
873	}
874
875	/* fill in or override the hop limit field, if necessary. */
876	if (opt && opt->ip6po_hlim != -1) {
877		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
878	} else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
879		if (im6o != NULL) {
880			IM6O_LOCK(im6o);
881			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
882			IM6O_UNLOCK(im6o);
883		} else {
884			ip6->ip6_hlim = ip6_defmcasthlim;
885		}
886	}
887
888	/*
889	 * If there is a cached route, check that it is to the same
890	 * destination and is still up. If not, free it and try again.
891	 * Test rt_flags without holding rt_lock for performance reasons;
892	 * if the route is down it will hopefully be caught by the layer
893	 * below (since it uses this route as a hint) or during the
894	 * next transmit.
895	 */
896	if (ROUTE_UNUSABLE(ro) || dst->sin6_family != AF_INET6 ||
897	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))
898		ROUTE_RELEASE(ro);
899
900	if (ro->ro_rt == NULL) {
901		bzero(dst, sizeof (*dst));
902		dst->sin6_family = AF_INET6;
903		dst->sin6_len = sizeof (struct sockaddr_in6);
904		dst->sin6_addr = ip6->ip6_dst;
905	}
906#if IPSEC
907	if (ip6obf.needipsec && needipsectun) {
908#if CONFIG_DTRACE
909		struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
910#endif /* CONFIG_DTRACE */
911		/*
912		 * All the extension headers will become inaccessible
913		 * (since they can be encrypted).
914		 * Don't panic, we need no more updates to extension headers
915		 * on inner IPv6 packet (since they are now encapsulated).
916		 *
917		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
918		 */
919		bzero(&exthdrs, sizeof (exthdrs));
920		exthdrs.ip6e_ip6 = m;
921
922		ipsec_state.m = m;
923		route_copyout(&ipsec_state.ro, (struct route *)ro,
924		    sizeof (ipsec_state.ro));
925		ipsec_state.dst = SA(dst);
926
927		/* So that we can see packets inside the tunnel */
928		DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
929		    struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
930		    struct ip *, NULL, struct ip6_hdr *, ip6);
931
932		error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
933		/* tunneled in IPv4? packet is gone */
934		if (ipsec_state.tunneled == 4)
935			goto done;
936		m = ipsec_state.m;
937		ipsec_saved_route = ro;
938		ro = (struct route_in6 *)&ipsec_state.ro;
939		dst = SIN6(ipsec_state.dst);
940		if (error) {
941			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
942			m0 = m = NULL;
943			m = NULL;
944			switch (error) {
945			case EHOSTUNREACH:
946			case ENETUNREACH:
947			case EMSGSIZE:
948			case ENOBUFS:
949			case ENOMEM:
950				break;
951			default:
952				printf("ip6_output (ipsec): error code %d\n",
953				    error);
954				/* FALLTHRU */
955			case ENOENT:
956				/* don't show these error codes to the user */
957				error = 0;
958				break;
959			}
960			goto bad;
961		}
962		/*
963		 * The packet has been encapsulated so the ifscope
964		 * is no longer valid since it does not apply to the
965		 * outer address: ignore the ifscope.
966		 */
967		if (flags & IPV6_OUTARGS) {
968			ip6oa->ip6oa_boundif = IFSCOPE_NONE;
969			ip6oa->ip6oa_flags &= ~IP6OAF_BOUND_IF;
970		}
971		if (opt != NULL && opt->ip6po_pktinfo != NULL) {
972			if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE)
973				opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
974		}
975		exthdrs.ip6e_ip6 = m;
976	}
977#endif /* IPSEC */
978
979	/* for safety */
980	if (ifp != NULL) {
981		ifnet_release(ifp);
982		ifp = NULL;
983	}
984
985	/* adjust pointer */
986	ip6 = mtod(m, struct ip6_hdr *);
987
988	if (ip6obf.select_srcif) {
989		bzero(&src_sa, sizeof (src_sa));
990		src_sa.sin6_family = AF_INET6;
991		src_sa.sin6_len = sizeof (src_sa);
992		src_sa.sin6_addr = ip6->ip6_src;
993	}
994	bzero(&dst_sa, sizeof (dst_sa));
995	dst_sa.sin6_family = AF_INET6;
996	dst_sa.sin6_len = sizeof (dst_sa);
997	dst_sa.sin6_addr = ip6->ip6_dst;
998
999	/*
1000	 * in6_selectroute() might return an ifp with its reference held
1001	 * even in the error case, so make sure to release its reference.
1002	 * ip6oa may be NULL if IPV6_OUTARGS isn't set.
1003	 */
1004	if ((error = in6_selectroute(ip6obf.select_srcif ? &src_sa : NULL,
1005	    &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa)) != 0) {
1006		switch (error) {
1007		case EHOSTUNREACH:
1008			ip6stat.ip6s_noroute++;
1009			break;
1010		case EADDRNOTAVAIL:
1011		default:
1012			break; /* XXX statistics? */
1013		}
1014		if (ifp != NULL)
1015			in6_ifstat_inc(ifp, ifs6_out_discard);
1016		/* ifp (if non-NULL) will be released at the end */
1017		goto bad;
1018	}
1019	if (rt == NULL) {
1020		/*
1021		 * If in6_selectroute() does not return a route entry,
1022		 * dst may not have been updated.
1023		 */
1024		*dst = dst_sa;	/* XXX */
1025	}
1026
1027	/*
1028	 * then rt (for unicast) and ifp must be non-NULL valid values.
1029	 */
1030	if (!(flags & IPV6_FORWARDING)) {
1031		/* XXX: the FORWARDING flag can be set for mrouting. */
1032		in6_ifstat_inc_na(ifp, ifs6_out_request);
1033	}
1034	if (rt != NULL) {
1035		RT_LOCK(rt);
1036		ia = (struct in6_ifaddr *)(rt->rt_ifa);
1037		if (ia != NULL)
1038			IFA_ADDREF(&ia->ia_ifa);
1039		rt->rt_use++;
1040		RT_UNLOCK(rt);
1041	}
1042
1043	/*
1044	 * The outgoing interface must be in the zone of source and
1045	 * destination addresses (except local/loopback).  We should
1046	 * use ia_ifp to support the case of sending packets to an
1047	 * address of our own.
1048	 */
1049	if (ia != NULL && ia->ia_ifp) {
1050		ifnet_reference(ia->ia_ifp);	/* for origifp */
1051		if (origifp != NULL)
1052			ifnet_release(origifp);
1053		origifp = ia->ia_ifp;
1054	} else {
1055		if (ifp != NULL)
1056			ifnet_reference(ifp);	/* for origifp */
1057		if (origifp != NULL)
1058			ifnet_release(origifp);
1059		origifp = ifp;
1060	}
1061
1062	/* skip scope enforcements for local/loopback route */
1063	if (rt == NULL || !(rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1064		struct in6_addr src0, dst0;
1065		u_int32_t zone;
1066
1067		src0 = ip6->ip6_src;
1068		if (in6_setscope(&src0, origifp, &zone))
1069			goto badscope;
1070		bzero(&src_sa, sizeof (src_sa));
1071		src_sa.sin6_family = AF_INET6;
1072		src_sa.sin6_len = sizeof (src_sa);
1073		src_sa.sin6_addr = ip6->ip6_src;
1074		if ((sa6_recoverscope(&src_sa, TRUE) ||
1075		    zone != src_sa.sin6_scope_id))
1076			goto badscope;
1077
1078		dst0 = ip6->ip6_dst;
1079		if ((in6_setscope(&dst0, origifp, &zone)))
1080			goto badscope;
1081		/* re-initialize to be sure */
1082		bzero(&dst_sa, sizeof (dst_sa));
1083		dst_sa.sin6_family = AF_INET6;
1084		dst_sa.sin6_len = sizeof (dst_sa);
1085		dst_sa.sin6_addr = ip6->ip6_dst;
1086		if ((sa6_recoverscope(&dst_sa, TRUE) ||
1087		    zone != dst_sa.sin6_scope_id))
1088			goto badscope;
1089
1090		/* scope check is done. */
1091		goto routefound;
1092
1093badscope:
1094		ip6stat.ip6s_badscope++;
1095		in6_ifstat_inc(origifp, ifs6_out_discard);
1096		if (error == 0)
1097			error = EHOSTUNREACH; /* XXX */
1098		goto bad;
1099	}
1100
1101routefound:
1102	if (rt != NULL && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1103		if (opt != NULL && opt->ip6po_nextroute.ro_rt) {
1104			/*
1105			 * The nexthop is explicitly specified by the
1106			 * application.  We assume the next hop is an IPv6
1107			 * address.
1108			 */
1109			dst = SIN6(opt->ip6po_nexthop);
1110		} else if ((rt->rt_flags & RTF_GATEWAY)) {
1111			dst = SIN6(rt->rt_gateway);
1112		}
1113		/*
1114		 * For packets destined to local/loopback, record the
1115		 * source the source interface (which owns the source
1116		 * address), as well as the output interface.  This is
1117		 * needed to reconstruct the embedded zone for the
1118		 * link-local address case in ip6_input().
1119		 */
1120		if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK)) {
1121			uint32_t srcidx;
1122
1123			if (src_ia != NULL)
1124				srcidx = src_ia->ia_ifp->if_index;
1125			else if (ro->ro_srcia != NULL)
1126				srcidx = ro->ro_srcia->ifa_ifp->if_index;
1127			else
1128				srcidx = 0;
1129
1130			ip6_setsrcifaddr_info(m, srcidx, NULL);
1131			ip6_setdstifaddr_info(m, 0, ia);
1132		}
1133	}
1134
1135	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1136		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
1137	} else {
1138		struct	in6_multi *in6m;
1139
1140		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
1141		in6_ifstat_inc_na(ifp, ifs6_out_mcast);
1142
1143		/*
1144		 * Confirm that the outgoing interface supports multicast.
1145		 */
1146		if (!(ifp->if_flags & IFF_MULTICAST)) {
1147			ip6stat.ip6s_noroute++;
1148			in6_ifstat_inc(ifp, ifs6_out_discard);
1149			error = ENETUNREACH;
1150			goto bad;
1151		}
1152		in6_multihead_lock_shared();
1153		IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
1154		in6_multihead_lock_done();
1155		if (im6o != NULL)
1156			IM6O_LOCK(im6o);
1157		if (in6m != NULL &&
1158		    (im6o == NULL || im6o->im6o_multicast_loop)) {
1159			if (im6o != NULL)
1160				IM6O_UNLOCK(im6o);
1161			/*
1162			 * If we belong to the destination multicast group
1163			 * on the outgoing interface, and the caller did not
1164			 * forbid loopback, loop back a copy.
1165			 */
1166			ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0);
1167		} else {
1168			if (im6o != NULL)
1169				IM6O_UNLOCK(im6o);
1170			/*
1171			 * If we are acting as a multicast router, perform
1172			 * multicast forwarding as if the packet had just
1173			 * arrived on the interface to which we are about
1174			 * to send.  The multicast forwarding function
1175			 * recursively calls this function, using the
1176			 * IPV6_FORWARDING flag to prevent infinite recursion.
1177			 *
1178			 * Multicasts that are looped back by ip6_mloopback(),
1179			 * above, will be forwarded by the ip6_input() routine,
1180			 * if necessary.
1181			 */
1182#if MROUTING
1183			if (ip6_mrouter && !(flags & IPV6_FORWARDING)) {
1184				/*
1185				 * XXX: ip6_mforward expects that rcvif is NULL
1186				 * when it is called from the originating path.
1187				 * However, it is not always the case, since
1188				 * some versions of MGETHDR() does not
1189				 * initialize the field.
1190				 */
1191				m->m_pkthdr.rcvif = NULL;
1192				if (ip6_mforward(ip6, ifp, m) != 0) {
1193					m_freem(m);
1194					if (in6m != NULL)
1195						IN6M_REMREF(in6m);
1196					goto done;
1197				}
1198			}
1199#endif /* MROUTING */
1200		}
1201		if (in6m != NULL)
1202			IN6M_REMREF(in6m);
1203		/*
1204		 * Multicasts with a hoplimit of zero may be looped back,
1205		 * above, but must not be transmitted on a network.
1206		 * Also, multicasts addressed to the loopback interface
1207		 * are not sent -- the above call to ip6_mloopback() will
1208		 * loop back a copy if this host actually belongs to the
1209		 * destination group on the loopback interface.
1210		 */
1211		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1212		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1213			m_freem(m);
1214			goto done;
1215		}
1216	}
1217
1218	/*
1219	 * Fill the outgoing inteface to tell the upper layer
1220	 * to increment per-interface statistics.
1221	 */
1222	if (ifpp != NULL) {
1223		ifnet_reference(ifp);	/* for caller */
1224		if (*ifpp != NULL)
1225			ifnet_release(*ifpp);
1226		*ifpp = ifp;
1227	}
1228
1229	/* Determine path MTU. */
1230	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
1231	    &alwaysfrag)) != 0)
1232		goto bad;
1233
1234	/*
1235	 * The caller of this function may specify to use the minimum MTU
1236	 * in some cases.
1237	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1238	 * setting.  The logic is a bit complicated; by default, unicast
1239	 * packets will follow path MTU while multicast packets will be sent at
1240	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
1241	 * including unicast ones will be sent at the minimum MTU.  Multicast
1242	 * packets will always be sent at the minimum MTU unless
1243	 * IP6PO_MINMTU_DISABLE is explicitly specified.
1244	 * See RFC 3542 for more details.
1245	 */
1246	if (mtu > IPV6_MMTU) {
1247		if ((flags & IPV6_MINMTU)) {
1248			mtu = IPV6_MMTU;
1249		} else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) {
1250			mtu = IPV6_MMTU;
1251		} else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1252		    (opt == NULL ||
1253		    opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1254			mtu = IPV6_MMTU;
1255		}
1256	}
1257
1258	/*
1259	 * clear embedded scope identifiers if necessary.
1260	 * in6_clearscope will touch the addresses only when necessary.
1261	 */
1262	in6_clearscope(&ip6->ip6_src);
1263	in6_clearscope(&ip6->ip6_dst);
1264
1265#if IPFW2
1266	/*
1267	 * Check with the firewall...
1268	 */
1269	if (ip6_fw_enable && ip6_fw_chk_ptr) {
1270		u_short port = 0;
1271		m->m_pkthdr.rcvif = NULL;	/* XXX */
1272		/* If ipfw says divert, we have to just drop packet */
1273		if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) {
1274			m_freem(m);
1275			goto done;
1276		}
1277		if (m == NULL) {
1278			error = EACCES;
1279			goto done;
1280		}
1281	}
1282#endif /* IPFW2 */
1283
1284	/*
1285	 * If the outgoing packet contains a hop-by-hop options header,
1286	 * it must be examined and processed even by the source node.
1287	 * (RFC 2460, section 4.)
1288	 */
1289	if (exthdrs.ip6e_hbh != NULL) {
1290		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
1291		u_int32_t dummy; /* XXX unused */
1292		uint32_t oplen = 0; /* for ip6_process_hopopts() */
1293#if DIAGNOSTIC
1294		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
1295			panic("ip6e_hbh is not continuous");
1296#endif
1297		/*
1298		 * XXX: If we have to send an ICMPv6 error to the sender,
1299		 * we need the M_LOOP flag since icmp6_error() expects
1300		 * the IPv6 and the hop-by-hop options header are
1301		 * continuous unless the flag is set.
1302		 */
1303		m->m_flags |= M_LOOP;
1304		m->m_pkthdr.rcvif = ifp;
1305		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1306		    ((hbh->ip6h_len + 1) << 3) - sizeof (struct ip6_hbh),
1307		    &dummy, &oplen) < 0) {
1308			/* m was already freed at this point */
1309			error = EINVAL;	/* better error? */
1310			goto done;
1311		}
1312		m->m_flags &= ~M_LOOP; /* XXX */
1313		m->m_pkthdr.rcvif = NULL;
1314	}
1315
1316#if DUMMYNET
1317check_with_pf:
1318#endif /* DUMMYNET */
1319#if PF
1320	if (PF_IS_ENABLED) {
1321#if DUMMYNET
1322		/*
1323		 * TODO: Need to save opt->ip6po_flags for reinjection
1324		 * rdar://10434993
1325		 */
1326		args.fwa_m = m;
1327		args.fwa_oif = ifp;
1328		args.fwa_oflags = flags;
1329		if (flags & IPV6_OUTARGS)
1330			args.fwa_ip6oa = ip6oa;
1331		args.fwa_ro6 = ro;
1332		args.fwa_dst6 = dst;
1333		args.fwa_ro6_pmtu = ro_pmtu;
1334		args.fwa_origifp = origifp;
1335		args.fwa_mtu = mtu;
1336		args.fwa_alwaysfrag = alwaysfrag;
1337		args.fwa_unfragpartlen = unfragpartlen;
1338		args.fwa_exthdrs = &exthdrs;
1339		/* Invoke outbound packet filter */
1340		error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
1341#else /* !DUMMYNET */
1342		error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
1343#endif /* !DUMMYNET */
1344
1345		if (error != 0 || m == NULL) {
1346			/*
1347			 * Note that if we ever handle packet chain, we will
1348			 * have to restore the linkage from the previous
1349			 * packet to the next like in ip_outout_list()
1350			 */
1351			if (m != NULL) {
1352				panic("%s: unexpected packet %p\n",
1353				    __func__, m);
1354				/* NOTREACHED */
1355			}
1356			/* Already freed by callee */
1357			goto done;
1358		}
1359		ip6 = mtod(m, struct ip6_hdr *);
1360	}
1361#endif /* PF */
1362
1363	/*
1364	 * Send the packet to the outgoing interface.
1365	 * If necessary, do IPv6 fragmentation before sending.
1366	 *
1367	 * the logic here is rather complex:
1368	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1369	 * 1-a:	send as is if tlen <= path mtu
1370	 * 1-b:	fragment if tlen > path mtu
1371	 *
1372	 * 2: if user asks us not to fragment (dontfrag == 1)
1373	 * 2-a:	send as is if tlen <= interface mtu
1374	 * 2-b:	error if tlen > interface mtu
1375	 *
1376	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1377	 *	always fragment
1378	 *
1379	 * 4: if dontfrag == 1 && alwaysfrag == 1
1380	 *	error, as we cannot handle this conflicting request
1381	 */
1382	tlen = m->m_pkthdr.len;
1383
1384	if (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG))
1385		ip6obf.dontfrag = TRUE;
1386	else
1387		ip6obf.dontfrag = FALSE;
1388	if (ip6obf.dontfrag && alwaysfrag) {	/* case 4 */
1389		/* conflicting request - can't transmit */
1390		error = EMSGSIZE;
1391		goto bad;
1392	}
1393
1394	lck_rw_lock_shared(nd_if_rwlock);
1395	/* Access without acquiring nd_ifinfo lock for performance */
1396	if (ip6obf.dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
1397		lck_rw_done(nd_if_rwlock);
1398		/*
1399		 * Even if the DONTFRAG option is specified, we cannot send the
1400		 * packet when the data length is larger than the MTU of the
1401		 * outgoing interface.
1402		 * Notify the error by sending IPV6_PATHMTU ancillary data as
1403		 * well as returning an error code (the latter is not described
1404		 * in the API spec.)
1405		 */
1406		u_int32_t mtu32;
1407		struct ip6ctlparam ip6cp;
1408
1409		mtu32 = (u_int32_t)mtu;
1410		bzero(&ip6cp, sizeof (ip6cp));
1411		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1412		pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp);
1413		error = EMSGSIZE;
1414		goto bad;
1415	} else {
1416		lck_rw_done(nd_if_rwlock);
1417	}
1418
1419	/*
1420	 * transmit packet without fragmentation
1421	 */
1422	if (ip6obf.dontfrag || (!alwaysfrag &&		/* case 1-a and 2-a */
1423	    (tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
1424	    (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
1425#ifdef IPSEC
1426		/* clean ipsec history once it goes out of the node */
1427		ipsec_delaux(m);
1428#endif /* IPSEC */
1429
1430		ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
1431
1432		if (ro->ro_rt)
1433			RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
1434		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv);
1435		goto done;
1436	}
1437
1438	/*
1439	 * try to fragment the packet.  case 1-b and 3
1440	 */
1441	if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
1442		/* TSO and fragment aren't compatible */
1443		error = EMSGSIZE;
1444		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1445		goto bad;
1446	} else if (mtu < IPV6_MMTU) {
1447		/* path MTU cannot be less than IPV6_MMTU */
1448		error = EMSGSIZE;
1449		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1450		goto bad;
1451	} else if (ip6->ip6_plen == 0) {
1452		/* jumbo payload cannot be fragmented */
1453		error = EMSGSIZE;
1454		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1455		goto bad;
1456	} else {
1457		struct mbuf **mnext, *m_frgpart;
1458		struct ip6_frag *ip6f;
1459		u_int32_t id = htonl(ip6_randomid());
1460		u_char nextproto;
1461
1462		/*
1463		 * Too large for the destination or interface;
1464		 * fragment if possible.
1465		 * Must be able to put at least 8 bytes per fragment.
1466		 */
1467		hlen = unfragpartlen;
1468		if (mtu > IPV6_MAXPACKET)
1469			mtu = IPV6_MAXPACKET;
1470
1471		len = (mtu - hlen - sizeof (struct ip6_frag)) & ~7;
1472		if (len < 8) {
1473			error = EMSGSIZE;
1474			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1475			goto bad;
1476		}
1477
1478		mnext = &m->m_nextpkt;
1479
1480		/*
1481		 * Change the next header field of the last header in the
1482		 * unfragmentable part.
1483		 */
1484		if (exthdrs.ip6e_rthdr != NULL) {
1485			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1486			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1487		} else if (exthdrs.ip6e_dest1 != NULL) {
1488			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1489			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1490		} else if (exthdrs.ip6e_hbh != NULL) {
1491			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1492			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1493		} else {
1494			nextproto = ip6->ip6_nxt;
1495			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1496		}
1497
1498		if (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
1499			in6_delayed_cksum_offset(m, 0, optlen, nxt0);
1500
1501		/*
1502		 * Loop through length of segment after first fragment,
1503		 * make new header and copy data of each part and link onto
1504		 * chain.
1505		 */
1506		m0 = m;
1507		for (off = hlen; off < tlen; off += len) {
1508			struct ip6_hdr *mhip6;
1509
1510			MGETHDR(m, M_DONTWAIT, MT_HEADER);	/* MAC-OK */
1511			if (m == NULL) {
1512				error = ENOBUFS;
1513				ip6stat.ip6s_odropped++;
1514				goto sendorfree;
1515			}
1516			m->m_pkthdr.rcvif = NULL;
1517			m->m_flags = m0->m_flags & M_COPYFLAGS;
1518			*mnext = m;
1519			mnext = &m->m_nextpkt;
1520			m->m_data += max_linkhdr;
1521			mhip6 = mtod(m, struct ip6_hdr *);
1522			*mhip6 = *ip6;
1523			m->m_len = sizeof (*mhip6);
1524			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1525			if (error) {
1526				ip6stat.ip6s_odropped++;
1527				goto sendorfree;
1528			}
1529			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1530			if (off + len >= tlen)
1531				len = tlen - off;
1532			else
1533				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1534			mhip6->ip6_plen = htons((u_short)(len + hlen +
1535			    sizeof (*ip6f) - sizeof (struct ip6_hdr)));
1536			if ((m_frgpart = m_copy(m0, off, len)) == NULL) {
1537				error = ENOBUFS;
1538				ip6stat.ip6s_odropped++;
1539				goto sendorfree;
1540			}
1541			m_cat(m, m_frgpart);
1542			m->m_pkthdr.len = len + hlen + sizeof (*ip6f);
1543			m->m_pkthdr.rcvif = NULL;
1544
1545			M_COPY_CLASSIFIER(m, m0);
1546			M_COPY_PFTAG(m, m0);
1547
1548#ifdef notyet
1549#if CONFIG_MACF_NET
1550			mac_create_fragment(m0, m);
1551#endif /* CONFIG_MACF_NET */
1552#endif /* notyet */
1553
1554			ip6f->ip6f_reserved = 0;
1555			ip6f->ip6f_ident = id;
1556			ip6f->ip6f_nxt = nextproto;
1557			ip6stat.ip6s_ofragments++;
1558			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1559		}
1560
1561		in6_ifstat_inc(ifp, ifs6_out_fragok);
1562	}
1563
1564	/*
1565	 * Remove leading garbages.
1566	 */
1567sendorfree:
1568	m = m0->m_nextpkt;
1569	m0->m_nextpkt = NULL;
1570	m_freem(m0);
1571	for (m0 = m; m != NULL; m = m0) {
1572		m0 = m->m_nextpkt;
1573		m->m_nextpkt = NULL;
1574		if (error == 0) {
1575#if IPSEC
1576			/* clean ipsec history once it goes out of the node */
1577			ipsec_delaux(m);
1578#endif /* IPSEC */
1579			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt,
1580			    adv);
1581		} else {
1582			m_freem(m);
1583		}
1584	}
1585
1586	if (error == 0)
1587		ip6stat.ip6s_fragmented++;
1588
1589done:
1590	ROUTE_RELEASE(&ip6route);
1591#if IPSEC
1592	ROUTE_RELEASE(&ipsec_state.ro);
1593	if (sp != NULL)
1594		key_freesp(sp, KEY_SADB_UNLOCKED);
1595#endif /* IPSEC */
1596#if DUMMYNET
1597	ROUTE_RELEASE(&saved_route);
1598	ROUTE_RELEASE(&saved_ro_pmtu);
1599#endif /* DUMMYNET */
1600
1601	if (ia != NULL)
1602		IFA_REMREF(&ia->ia_ifa);
1603	if (src_ia != NULL)
1604		IFA_REMREF(&src_ia->ia_ifa);
1605	if (ifp != NULL)
1606		ifnet_release(ifp);
1607	if (origifp != NULL)
1608		ifnet_release(origifp);
1609	return (error);
1610
1611freehdrs:
1612	if (exthdrs.ip6e_hbh != NULL)
1613		m_freem(exthdrs.ip6e_hbh);
1614	if (exthdrs.ip6e_dest1 != NULL)
1615		m_freem(exthdrs.ip6e_dest1);
1616	if (exthdrs.ip6e_rthdr != NULL)
1617		m_freem(exthdrs.ip6e_rthdr);
1618	if (exthdrs.ip6e_dest2 != NULL)
1619		m_freem(exthdrs.ip6e_dest2);
1620	/* FALLTHRU */
1621bad:
1622	if (m != NULL)
1623		m_freem(m);
1624	goto done;
1625
1626#undef ipf_pktopts
1627#undef exthdrs
1628#undef ip6route
1629#undef ipsec_state
1630#undef saved_route
1631#undef saved_ro_pmtu
1632#undef args
1633}
1634
1635static int
1636ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1637{
1638	struct mbuf *m;
1639
1640	if (hlen > MCLBYTES)
1641		return (ENOBUFS); /* XXX */
1642
1643	MGET(m, M_DONTWAIT, MT_DATA);
1644	if (m == NULL)
1645		return (ENOBUFS);
1646
1647	if (hlen > MLEN) {
1648		MCLGET(m, M_DONTWAIT);
1649		if (!(m->m_flags & M_EXT)) {
1650			m_free(m);
1651			return (ENOBUFS);
1652		}
1653	}
1654	m->m_len = hlen;
1655	if (hdr != NULL)
1656		bcopy(hdr, mtod(m, caddr_t), hlen);
1657
1658	*mp = m;
1659	return (0);
1660}
1661
1662static void
1663ip6_out_cksum_stats(int proto, u_int32_t len)
1664{
1665	switch (proto) {
1666	case IPPROTO_TCP:
1667		tcp_out6_cksum_stats(len);
1668		break;
1669	case IPPROTO_UDP:
1670		udp_out6_cksum_stats(len);
1671		break;
1672	default:
1673		/* keep only TCP or UDP stats for now */
1674		break;
1675	}
1676}
1677
1678/*
1679 * Process a delayed payload checksum calculation (outbound path.)
1680 *
1681 * hoff is the number of bytes beyond the mbuf data pointer which
1682 * points to the IPv6 header.  optlen is the number of bytes, if any,
1683 * between the end of IPv6 header and the beginning of the ULP payload
1684 * header, which represents the extension headers.  If optlen is less
1685 * than zero, this routine will bail when it detects extension headers.
1686 *
1687 * Returns a bitmask representing all the work done in software.
1688 */
1689uint32_t
1690in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen,
1691    int32_t nxt0, uint32_t csum_flags)
1692{
1693	unsigned char buf[sizeof (struct ip6_hdr)] __attribute__((aligned(8)));
1694	struct ip6_hdr *ip6;
1695	uint32_t offset, mlen, hlen, olen, sw_csum;
1696	uint16_t csum, ulpoff, plen;
1697	uint8_t nxt;
1698
1699	_CASSERT(sizeof (csum) == sizeof (uint16_t));
1700	VERIFY(m->m_flags & M_PKTHDR);
1701
1702	sw_csum = (csum_flags & m->m_pkthdr.csum_flags);
1703
1704	if ((sw_csum &= CSUM_DELAY_IPV6_DATA) == 0)
1705		goto done;
1706
1707	mlen = m->m_pkthdr.len;				/* total mbuf len */
1708	hlen = sizeof (*ip6);				/* IPv6 header len */
1709
1710	/* sanity check (need at least IPv6 header) */
1711	if (mlen < (hoff + hlen)) {
1712		panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr "
1713		    "(%u+%u)\n", __func__, m, mlen, hoff, hlen);
1714		/* NOTREACHED */
1715	}
1716
1717	/*
1718	 * In case the IPv6 header is not contiguous, or not 32-bit
1719	 * aligned, copy it to a local buffer.
1720	 */
1721	if ((hoff + hlen) > m->m_len ||
1722	    !IP6_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) {
1723		m_copydata(m, hoff, hlen, (caddr_t)buf);
1724		ip6 = (struct ip6_hdr *)(void *)buf;
1725	} else {
1726		ip6 = (struct ip6_hdr *)(void *)(m->m_data + hoff);
1727	}
1728
1729	nxt = ip6->ip6_nxt;
1730	plen = ntohs(ip6->ip6_plen);
1731	if (plen != (mlen - (hoff + hlen))) {
1732		plen = OSSwapInt16(plen);
1733		if (plen != (mlen - (hoff + hlen))) {
1734			/* Don't complain for jumbograms */
1735			if (plen != 0 || nxt != IPPROTO_HOPOPTS) {
1736				printf("%s: mbuf 0x%llx proto %d IPv6 "
1737				    "plen %d (%x) [swapped %d (%x)] doesn't "
1738				    "match actual packet length; %d is used "
1739				    "instead\n", __func__,
1740				    (uint64_t)VM_KERNEL_ADDRPERM(m), nxt,
1741				    ip6->ip6_plen, ip6->ip6_plen, plen, plen,
1742				    (mlen - (hoff + hlen)));
1743			}
1744			plen = mlen - (hoff + hlen);
1745		}
1746	}
1747
1748	if (optlen < 0) {
1749		/* next header isn't TCP/UDP and we don't know optlen, bail */
1750		if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
1751			sw_csum = 0;
1752			goto done;
1753		}
1754		olen = 0;
1755	} else {
1756		/* caller supplied the original transport number; use it */
1757		if (nxt0 >= 0)
1758			nxt = nxt0;
1759		olen = optlen;
1760	}
1761
1762	offset = hoff + hlen + olen;			/* ULP header */
1763
1764	/* sanity check */
1765	if (mlen < offset) {
1766		panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr+ext_hdr "
1767		    "(%u+%u+%u)\n", __func__, m, mlen, hoff, hlen, olen);
1768		/* NOTREACHED */
1769	}
1770
1771	/*
1772	 * offset is added to the lower 16-bit value of csum_data,
1773	 * which is expected to contain the ULP offset; therefore
1774	 * CSUM_PARTIAL offset adjustment must be undone.
1775	 */
1776	if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL|CSUM_DATA_VALID)) ==
1777	    (CSUM_PARTIAL|CSUM_DATA_VALID)) {
1778		/*
1779		 * Get back the original ULP offset (this will
1780		 * undo the CSUM_PARTIAL logic in ip6_output.)
1781		 */
1782		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff -
1783		    m->m_pkthdr.csum_tx_start);
1784	}
1785
1786	ulpoff = (m->m_pkthdr.csum_data & 0xffff);	/* ULP csum offset */
1787
1788	if (mlen < (ulpoff + sizeof (csum))) {
1789		panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
1790		    "cksum offset (%u) cksum flags 0x%x\n", __func__,
1791		    m, mlen, nxt, ulpoff, m->m_pkthdr.csum_flags);
1792		/* NOTREACHED */
1793	}
1794
1795	csum = inet6_cksum(m, 0, offset, plen - olen);
1796
1797	/* Update stats */
1798	ip6_out_cksum_stats(nxt, plen - olen);
1799
1800	/* RFC1122 4.1.3.4 */
1801	if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6))
1802		csum = 0xffff;
1803
1804	/* Insert the checksum in the ULP csum field */
1805	offset += ulpoff;
1806	if ((offset + sizeof (csum)) > m->m_len) {
1807		m_copyback(m, offset, sizeof (csum), &csum);
1808	} else if (IP6_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
1809		*(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
1810	} else {
1811		bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
1812	}
1813	m->m_pkthdr.csum_flags &=
1814	    ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID | CSUM_PARTIAL);
1815
1816done:
1817	return (sw_csum);
1818}
1819
1820/*
1821 * Insert jumbo payload option.
1822 */
1823static int
1824ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1825{
1826	struct mbuf *mopt;
1827	u_char *optbuf;
1828	u_int32_t v;
1829
1830#define	JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1831
1832	/*
1833	 * If there is no hop-by-hop options header, allocate new one.
1834	 * If there is one but it doesn't have enough space to store the
1835	 * jumbo payload option, allocate a cluster to store the whole options.
1836	 * Otherwise, use it to store the options.
1837	 */
1838	if (exthdrs->ip6e_hbh == NULL) {
1839		MGET(mopt, M_DONTWAIT, MT_DATA);
1840		if (mopt == NULL)
1841			return (ENOBUFS);
1842		mopt->m_len = JUMBOOPTLEN;
1843		optbuf = mtod(mopt, u_char *);
1844		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1845		exthdrs->ip6e_hbh = mopt;
1846	} else {
1847		struct ip6_hbh *hbh;
1848
1849		mopt = exthdrs->ip6e_hbh;
1850		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1851			/*
1852			 * XXX assumption:
1853			 * - exthdrs->ip6e_hbh is not referenced from places
1854			 *   other than exthdrs.
1855			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1856			 */
1857			u_int32_t oldoptlen = mopt->m_len;
1858			struct mbuf *n;
1859
1860			/*
1861			 * XXX: give up if the whole (new) hbh header does
1862			 * not fit even in an mbuf cluster.
1863			 */
1864			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1865				return (ENOBUFS);
1866
1867			/*
1868			 * As a consequence, we must always prepare a cluster
1869			 * at this point.
1870			 */
1871			MGET(n, M_DONTWAIT, MT_DATA);
1872			if (n != NULL) {
1873				MCLGET(n, M_DONTWAIT);
1874				if (!(n->m_flags & M_EXT)) {
1875					m_freem(n);
1876					n = NULL;
1877				}
1878			}
1879			if (n == NULL)
1880				return (ENOBUFS);
1881			n->m_len = oldoptlen + JUMBOOPTLEN;
1882			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1883			    oldoptlen);
1884			optbuf = mtod(n, u_char *) + oldoptlen;
1885			m_freem(mopt);
1886			mopt = exthdrs->ip6e_hbh = n;
1887		} else {
1888			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1889			mopt->m_len += JUMBOOPTLEN;
1890		}
1891		optbuf[0] = IP6OPT_PADN;
1892		optbuf[1] = 1;
1893
1894		/*
1895		 * Adjust the header length according to the pad and
1896		 * the jumbo payload option.
1897		 */
1898		hbh = mtod(mopt, struct ip6_hbh *);
1899		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1900	}
1901
1902	/* fill in the option. */
1903	optbuf[2] = IP6OPT_JUMBO;
1904	optbuf[3] = 4;
1905	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1906	bcopy(&v, &optbuf[4], sizeof (u_int32_t));
1907
1908	/* finally, adjust the packet header length */
1909	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1910
1911	return (0);
1912#undef JUMBOOPTLEN
1913}
1914
1915/*
1916 * Insert fragment header and copy unfragmentable header portions.
1917 */
1918static int
1919ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1920    struct ip6_frag **frghdrp)
1921{
1922	struct mbuf *n, *mlast;
1923
1924	if (hlen > sizeof (struct ip6_hdr)) {
1925		n = m_copym(m0, sizeof (struct ip6_hdr),
1926		    hlen - sizeof (struct ip6_hdr), M_DONTWAIT);
1927		if (n == NULL)
1928			return (ENOBUFS);
1929		m->m_next = n;
1930	} else
1931		n = m;
1932
1933	/* Search for the last mbuf of unfragmentable part. */
1934	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1935		;
1936
1937	if (!(mlast->m_flags & M_EXT) &&
1938	    M_TRAILINGSPACE(mlast) >= sizeof (struct ip6_frag)) {
1939		/* use the trailing space of the last mbuf for the frag hdr */
1940		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1941		    mlast->m_len);
1942		mlast->m_len += sizeof (struct ip6_frag);
1943		m->m_pkthdr.len += sizeof (struct ip6_frag);
1944	} else {
1945		/* allocate a new mbuf for the fragment header */
1946		struct mbuf *mfrg;
1947
1948		MGET(mfrg, M_DONTWAIT, MT_DATA);
1949		if (mfrg == NULL)
1950			return (ENOBUFS);
1951		mfrg->m_len = sizeof (struct ip6_frag);
1952		*frghdrp = mtod(mfrg, struct ip6_frag *);
1953		mlast->m_next = mfrg;
1954	}
1955
1956	return (0);
1957}
1958
1959static int
1960ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1961    struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup,
1962    boolean_t *alwaysfragp)
1963{
1964	u_int32_t mtu = 0;
1965	boolean_t alwaysfrag = FALSE;
1966	int error = 0;
1967
1968	if (ro_pmtu != ro) {
1969		/* The first hop and the final destination may differ. */
1970		struct sockaddr_in6 *sa6_dst = SIN6(&ro_pmtu->ro_dst);
1971		if (ROUTE_UNUSABLE(ro_pmtu) ||
1972		    !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
1973			ROUTE_RELEASE(ro_pmtu);
1974
1975		if (ro_pmtu->ro_rt == NULL) {
1976			bzero(sa6_dst, sizeof (*sa6_dst));
1977			sa6_dst->sin6_family = AF_INET6;
1978			sa6_dst->sin6_len = sizeof (struct sockaddr_in6);
1979			sa6_dst->sin6_addr = *dst;
1980
1981			rtalloc_scoped((struct route *)ro_pmtu,
1982			    ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
1983		}
1984	}
1985
1986	if (ro_pmtu->ro_rt != NULL) {
1987		u_int32_t ifmtu;
1988
1989		lck_rw_lock_shared(nd_if_rwlock);
1990		/* Access without acquiring nd_ifinfo lock for performance */
1991		ifmtu = IN6_LINKMTU(ifp);
1992		lck_rw_done(nd_if_rwlock);
1993
1994		/*
1995		 * Access rmx_mtu without holding the route entry lock,
1996		 * for performance; this isn't something that changes
1997		 * often, so optimize.
1998		 */
1999		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
2000		if (mtu > ifmtu || mtu == 0) {
2001			/*
2002			 * The MTU on the route is larger than the MTU on
2003			 * the interface!  This shouldn't happen, unless the
2004			 * MTU of the interface has been changed after the
2005			 * interface was brought up.  Change the MTU in the
2006			 * route to match the interface MTU (as long as the
2007			 * field isn't locked).
2008			 *
2009			 * if MTU on the route is 0, we need to fix the MTU.
2010			 * this case happens with path MTU discovery timeouts.
2011			 */
2012			mtu = ifmtu;
2013			if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
2014				ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
2015		} else if (mtu < IPV6_MMTU) {
2016			/*
2017			 * RFC2460 section 5, last paragraph:
2018			 * if we record ICMPv6 too big message with
2019			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
2020			 * or smaller, with framgent header attached.
2021			 * (fragment header is needed regardless from the
2022			 * packet size, for translators to identify packets)
2023			 */
2024			alwaysfrag = TRUE;
2025			mtu = IPV6_MMTU;
2026		}
2027	} else {
2028		if (ifp) {
2029			lck_rw_lock_shared(nd_if_rwlock);
2030			/* Don't hold nd_ifinfo lock for performance */
2031			mtu = IN6_LINKMTU(ifp);
2032			lck_rw_done(nd_if_rwlock);
2033		} else {
2034			error = EHOSTUNREACH; /* XXX */
2035		}
2036	}
2037
2038	*mtup = mtu;
2039	if (alwaysfragp != NULL)
2040		*alwaysfragp = alwaysfrag;
2041	return (error);
2042}
2043
2044/*
2045 * IP6 socket option processing.
2046 */
2047int
2048ip6_ctloutput(struct socket *so, struct sockopt *sopt)
2049{
2050	int optdatalen, uproto;
2051	void *optdata;
2052	int privileged;
2053	struct inpcb *in6p = sotoinpcb(so);
2054	int error = 0, optval = 0;
2055	int level, op = -1, optname = 0;
2056	int optlen = 0;
2057	struct proc *p;
2058
2059	VERIFY(sopt != NULL);
2060
2061	level = sopt->sopt_level;
2062	op = sopt->sopt_dir;
2063	optname = sopt->sopt_name;
2064	optlen = sopt->sopt_valsize;
2065	p = sopt->sopt_p;
2066	uproto = (int)SOCK_PROTO(so);
2067
2068	privileged = (proc_suser(p) == 0);
2069
2070	if (level == IPPROTO_IPV6) {
2071		switch (op) {
2072		case SOPT_SET:
2073			switch (optname) {
2074			case IPV6_2292PKTOPTIONS: {
2075				struct mbuf *m;
2076
2077				error = soopt_getm(sopt, &m);
2078				if (error != 0)
2079					break;
2080				error = soopt_mcopyin(sopt, m);
2081				if (error != 0)
2082					break;
2083				error = ip6_pcbopts(&in6p->in6p_outputopts,
2084				    m, so, sopt);
2085				m_freem(m);
2086				break;
2087			}
2088
2089			/*
2090			 * Use of some Hop-by-Hop options or some
2091			 * Destination options, might require special
2092			 * privilege.  That is, normal applications
2093			 * (without special privilege) might be forbidden
2094			 * from setting certain options in outgoing packets,
2095			 * and might never see certain options in received
2096			 * packets. [RFC 2292 Section 6]
2097			 * KAME specific note:
2098			 *  KAME prevents non-privileged users from sending or
2099			 *  receiving ANY hbh/dst options in order to avoid
2100			 *  overhead of parsing options in the kernel.
2101			 */
2102			case IPV6_RECVHOPOPTS:
2103			case IPV6_RECVDSTOPTS:
2104			case IPV6_RECVRTHDRDSTOPTS:
2105				if (!privileged)
2106					break;
2107				/* FALLTHROUGH */
2108			case IPV6_UNICAST_HOPS:
2109			case IPV6_HOPLIMIT:
2110			case IPV6_RECVPKTINFO:
2111			case IPV6_RECVHOPLIMIT:
2112			case IPV6_RECVRTHDR:
2113			case IPV6_RECVPATHMTU:
2114			case IPV6_RECVTCLASS:
2115			case IPV6_V6ONLY:
2116			case IPV6_AUTOFLOWLABEL:
2117				if (optlen != sizeof (int)) {
2118					error = EINVAL;
2119					break;
2120				}
2121				error = sooptcopyin(sopt, &optval,
2122				    sizeof (optval), sizeof (optval));
2123				if (error)
2124					break;
2125
2126				switch (optname) {
2127				case IPV6_UNICAST_HOPS:
2128					if (optval < -1 || optval >= 256) {
2129						error = EINVAL;
2130					} else {
2131						/* -1 = kernel default */
2132						in6p->in6p_hops = optval;
2133						if (in6p->inp_vflag &
2134						    INP_IPV4) {
2135							in6p->inp_ip_ttl =
2136							    optval;
2137						}
2138					}
2139					break;
2140#define	OPTSET(bit) do {						\
2141	if (optval)							\
2142		in6p->inp_flags |= (bit);				\
2143	else								\
2144		in6p->inp_flags &= ~(bit);				\
2145} while (0)
2146
2147#define	OPTSET2292(bit) do {						\
2148	in6p->inp_flags |= IN6P_RFC2292;				\
2149	if (optval)							\
2150		in6p->inp_flags |= (bit);				\
2151	else								\
2152		in6p->inp_flags &= ~(bit);				\
2153} while (0)
2154
2155#define	OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
2156
2157				case IPV6_RECVPKTINFO:
2158					/* cannot mix with RFC2292 */
2159					if (OPTBIT(IN6P_RFC2292)) {
2160						error = EINVAL;
2161						break;
2162					}
2163					OPTSET(IN6P_PKTINFO);
2164					break;
2165
2166				case IPV6_HOPLIMIT: {
2167					struct ip6_pktopts **optp;
2168
2169					/* cannot mix with RFC2292 */
2170					if (OPTBIT(IN6P_RFC2292)) {
2171						error = EINVAL;
2172						break;
2173					}
2174					optp = &in6p->in6p_outputopts;
2175					error = ip6_pcbopt(IPV6_HOPLIMIT,
2176					    (u_char *)&optval, sizeof (optval),
2177					    optp, uproto);
2178					break;
2179				}
2180
2181				case IPV6_RECVHOPLIMIT:
2182					/* cannot mix with RFC2292 */
2183					if (OPTBIT(IN6P_RFC2292)) {
2184						error = EINVAL;
2185						break;
2186					}
2187					OPTSET(IN6P_HOPLIMIT);
2188					break;
2189
2190				case IPV6_RECVHOPOPTS:
2191					/* cannot mix with RFC2292 */
2192					if (OPTBIT(IN6P_RFC2292)) {
2193						error = EINVAL;
2194						break;
2195					}
2196					OPTSET(IN6P_HOPOPTS);
2197					break;
2198
2199				case IPV6_RECVDSTOPTS:
2200					/* cannot mix with RFC2292 */
2201					if (OPTBIT(IN6P_RFC2292)) {
2202						error = EINVAL;
2203						break;
2204					}
2205					OPTSET(IN6P_DSTOPTS);
2206					break;
2207
2208				case IPV6_RECVRTHDRDSTOPTS:
2209					/* cannot mix with RFC2292 */
2210					if (OPTBIT(IN6P_RFC2292)) {
2211						error = EINVAL;
2212						break;
2213					}
2214					OPTSET(IN6P_RTHDRDSTOPTS);
2215					break;
2216
2217				case IPV6_RECVRTHDR:
2218					/* cannot mix with RFC2292 */
2219					if (OPTBIT(IN6P_RFC2292)) {
2220						error = EINVAL;
2221						break;
2222					}
2223					OPTSET(IN6P_RTHDR);
2224					break;
2225
2226				case IPV6_RECVPATHMTU:
2227					/*
2228					 * We ignore this option for TCP
2229					 * sockets.
2230					 * (RFC3542 leaves this case
2231					 * unspecified.)
2232					 */
2233					if (uproto != IPPROTO_TCP)
2234						OPTSET(IN6P_MTU);
2235					break;
2236
2237				case IPV6_V6ONLY:
2238					/*
2239					 * make setsockopt(IPV6_V6ONLY)
2240					 * available only prior to bind(2).
2241					 * see ipng mailing list, Jun 22 2001.
2242					 */
2243					if (in6p->inp_lport ||
2244					    !IN6_IS_ADDR_UNSPECIFIED(
2245					    &in6p->in6p_laddr)) {
2246						error = EINVAL;
2247						break;
2248					}
2249					OPTSET(IN6P_IPV6_V6ONLY);
2250					if (optval)
2251						in6p->inp_vflag &= ~INP_IPV4;
2252					else
2253						in6p->inp_vflag |= INP_IPV4;
2254					break;
2255
2256				case IPV6_RECVTCLASS:
2257					/* we can mix with RFC2292 */
2258					OPTSET(IN6P_TCLASS);
2259					break;
2260
2261				case IPV6_AUTOFLOWLABEL:
2262					OPTSET(IN6P_AUTOFLOWLABEL);
2263					break;
2264
2265				}
2266				break;
2267
2268			case IPV6_TCLASS:
2269			case IPV6_DONTFRAG:
2270			case IPV6_USE_MIN_MTU:
2271			case IPV6_PREFER_TEMPADDR: {
2272				struct ip6_pktopts **optp;
2273
2274				if (optlen != sizeof (optval)) {
2275					error = EINVAL;
2276					break;
2277				}
2278				error = sooptcopyin(sopt, &optval,
2279				    sizeof (optval), sizeof (optval));
2280				if (error)
2281					break;
2282
2283				optp = &in6p->in6p_outputopts;
2284				error = ip6_pcbopt(optname, (u_char *)&optval,
2285				    sizeof (optval), optp, uproto);
2286				break;
2287			}
2288
2289			case IPV6_2292PKTINFO:
2290			case IPV6_2292HOPLIMIT:
2291			case IPV6_2292HOPOPTS:
2292			case IPV6_2292DSTOPTS:
2293			case IPV6_2292RTHDR:
2294				/* RFC 2292 */
2295				if (optlen != sizeof (int)) {
2296					error = EINVAL;
2297					break;
2298				}
2299				error = sooptcopyin(sopt, &optval,
2300				    sizeof (optval), sizeof (optval));
2301				if (error)
2302					break;
2303				switch (optname) {
2304				case IPV6_2292PKTINFO:
2305					OPTSET2292(IN6P_PKTINFO);
2306					break;
2307				case IPV6_2292HOPLIMIT:
2308					OPTSET2292(IN6P_HOPLIMIT);
2309					break;
2310				case IPV6_2292HOPOPTS:
2311					/*
2312					 * Check super-user privilege.
2313					 * See comments for IPV6_RECVHOPOPTS.
2314					 */
2315					if (!privileged)
2316						return (EPERM);
2317					OPTSET2292(IN6P_HOPOPTS);
2318					break;
2319				case IPV6_2292DSTOPTS:
2320					if (!privileged)
2321						return (EPERM);
2322					OPTSET2292(IN6P_DSTOPTS|
2323					    IN6P_RTHDRDSTOPTS); /* XXX */
2324					break;
2325				case IPV6_2292RTHDR:
2326					OPTSET2292(IN6P_RTHDR);
2327					break;
2328				}
2329				break;
2330
2331			case IPV6_3542PKTINFO:
2332			case IPV6_3542HOPOPTS:
2333			case IPV6_3542RTHDR:
2334			case IPV6_3542DSTOPTS:
2335			case IPV6_RTHDRDSTOPTS:
2336			case IPV6_3542NEXTHOP: {
2337				struct ip6_pktopts **optp;
2338				/* new advanced API (RFC3542) */
2339				struct mbuf *m;
2340
2341				/* cannot mix with RFC2292 */
2342				if (OPTBIT(IN6P_RFC2292)) {
2343					error = EINVAL;
2344					break;
2345				}
2346				error = soopt_getm(sopt, &m);
2347				if (error != 0)
2348					break;
2349				error = soopt_mcopyin(sopt, m);
2350				if (error != 0)
2351					break;
2352
2353				optp = &in6p->in6p_outputopts;
2354				error = ip6_pcbopt(optname, mtod(m, u_char *),
2355				    m->m_len, optp, uproto);
2356				m_freem(m);
2357				break;
2358			}
2359#undef OPTSET
2360			case IPV6_MULTICAST_IF:
2361			case IPV6_MULTICAST_HOPS:
2362			case IPV6_MULTICAST_LOOP:
2363			case IPV6_JOIN_GROUP:
2364			case IPV6_LEAVE_GROUP:
2365			case IPV6_MSFILTER:
2366			case MCAST_BLOCK_SOURCE:
2367			case MCAST_UNBLOCK_SOURCE:
2368			case MCAST_JOIN_GROUP:
2369			case MCAST_LEAVE_GROUP:
2370			case MCAST_JOIN_SOURCE_GROUP:
2371			case MCAST_LEAVE_SOURCE_GROUP:
2372				error = ip6_setmoptions(in6p, sopt);
2373				break;
2374
2375			case IPV6_PORTRANGE:
2376				error = sooptcopyin(sopt, &optval,
2377				    sizeof (optval), sizeof (optval));
2378				if (error)
2379					break;
2380
2381				switch (optval) {
2382				case IPV6_PORTRANGE_DEFAULT:
2383					in6p->inp_flags &= ~(INP_LOWPORT);
2384					in6p->inp_flags &= ~(INP_HIGHPORT);
2385					break;
2386
2387				case IPV6_PORTRANGE_HIGH:
2388					in6p->inp_flags &= ~(INP_LOWPORT);
2389					in6p->inp_flags |= INP_HIGHPORT;
2390					break;
2391
2392				case IPV6_PORTRANGE_LOW:
2393					in6p->inp_flags &= ~(INP_HIGHPORT);
2394					in6p->inp_flags |= INP_LOWPORT;
2395					break;
2396
2397				default:
2398					error = EINVAL;
2399					break;
2400				}
2401				break;
2402#if IPSEC
2403			case IPV6_IPSEC_POLICY: {
2404				caddr_t req = NULL;
2405				size_t len = 0;
2406				struct mbuf *m;
2407
2408				if ((error = soopt_getm(sopt, &m)) != 0)
2409					break;
2410				if ((error = soopt_mcopyin(sopt, m)) != 0)
2411					break;
2412
2413				req = mtod(m, caddr_t);
2414				len = m->m_len;
2415				error = ipsec6_set_policy(in6p, optname, req,
2416				    len, privileged);
2417				m_freem(m);
2418				break;
2419			}
2420#endif /* IPSEC */
2421#if IPFIREWALL
2422			case IPV6_FW_ADD:
2423			case IPV6_FW_DEL:
2424			case IPV6_FW_FLUSH:
2425			case IPV6_FW_ZERO: {
2426				if (ip6_fw_ctl_ptr == NULL)
2427					load_ip6fw();
2428				if (ip6_fw_ctl_ptr != NULL)
2429					error = (*ip6_fw_ctl_ptr)(sopt);
2430				else
2431					error = ENOPROTOOPT;
2432				break;
2433			}
2434#endif /* IPFIREWALL */
2435			/*
2436			 * IPv6 variant of IP_BOUND_IF; for details see
2437			 * comments on IP_BOUND_IF in ip_ctloutput().
2438			 */
2439			case IPV6_BOUND_IF:
2440				/* This option is settable only on IPv6 */
2441				if (!(in6p->inp_vflag & INP_IPV6)) {
2442					error = EINVAL;
2443					break;
2444				}
2445
2446				error = sooptcopyin(sopt, &optval,
2447				    sizeof (optval), sizeof (optval));
2448
2449				if (error)
2450					break;
2451
2452				error = inp_bindif(in6p, optval, NULL);
2453				break;
2454
2455			case IPV6_NO_IFT_CELLULAR:
2456				/* This option is settable only for IPv6 */
2457				if (!(in6p->inp_vflag & INP_IPV6)) {
2458					error = EINVAL;
2459					break;
2460				}
2461
2462				error = sooptcopyin(sopt, &optval,
2463				    sizeof (optval), sizeof (optval));
2464
2465				if (error)
2466					break;
2467
2468				/* once set, it cannot be unset */
2469				if (!optval &&
2470				    (in6p->inp_flags & INP_NO_IFT_CELLULAR)) {
2471					error = EINVAL;
2472					break;
2473				}
2474
2475				error = so_set_restrictions(so,
2476				    SO_RESTRICT_DENY_CELLULAR);
2477				break;
2478
2479			case IPV6_OUT_IF:
2480				/* This option is not settable */
2481				error = EINVAL;
2482				break;
2483
2484			default:
2485				error = ENOPROTOOPT;
2486				break;
2487			}
2488			break;
2489
2490		case SOPT_GET:
2491			switch (optname) {
2492
2493			case IPV6_2292PKTOPTIONS:
2494				/*
2495				 * RFC3542 (effectively) deprecated the
2496				 * semantics of the 2292-style pktoptions.
2497				 * Since it was not reliable in nature (i.e.,
2498				 * applications had to expect the lack of some
2499				 * information after all), it would make sense
2500				 * to simplify this part by always returning
2501				 * empty data.
2502				 */
2503				sopt->sopt_valsize = 0;
2504				break;
2505
2506			case IPV6_RECVHOPOPTS:
2507			case IPV6_RECVDSTOPTS:
2508			case IPV6_RECVRTHDRDSTOPTS:
2509			case IPV6_UNICAST_HOPS:
2510			case IPV6_RECVPKTINFO:
2511			case IPV6_RECVHOPLIMIT:
2512			case IPV6_RECVRTHDR:
2513			case IPV6_RECVPATHMTU:
2514			case IPV6_V6ONLY:
2515			case IPV6_PORTRANGE:
2516			case IPV6_RECVTCLASS:
2517			case IPV6_AUTOFLOWLABEL:
2518				switch (optname) {
2519
2520				case IPV6_RECVHOPOPTS:
2521					optval = OPTBIT(IN6P_HOPOPTS);
2522					break;
2523
2524				case IPV6_RECVDSTOPTS:
2525					optval = OPTBIT(IN6P_DSTOPTS);
2526					break;
2527
2528				case IPV6_RECVRTHDRDSTOPTS:
2529					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2530					break;
2531
2532				case IPV6_UNICAST_HOPS:
2533					optval = in6p->in6p_hops;
2534					break;
2535
2536				case IPV6_RECVPKTINFO:
2537					optval = OPTBIT(IN6P_PKTINFO);
2538					break;
2539
2540				case IPV6_RECVHOPLIMIT:
2541					optval = OPTBIT(IN6P_HOPLIMIT);
2542					break;
2543
2544				case IPV6_RECVRTHDR:
2545					optval = OPTBIT(IN6P_RTHDR);
2546					break;
2547
2548				case IPV6_RECVPATHMTU:
2549					optval = OPTBIT(IN6P_MTU);
2550					break;
2551
2552				case IPV6_V6ONLY:
2553					optval = OPTBIT(IN6P_IPV6_V6ONLY);
2554					break;
2555
2556				case IPV6_PORTRANGE: {
2557					int flags;
2558					flags = in6p->inp_flags;
2559					if (flags & INP_HIGHPORT)
2560						optval = IPV6_PORTRANGE_HIGH;
2561					else if (flags & INP_LOWPORT)
2562						optval = IPV6_PORTRANGE_LOW;
2563					else
2564						optval = 0;
2565					break;
2566				}
2567				case IPV6_RECVTCLASS:
2568					optval = OPTBIT(IN6P_TCLASS);
2569					break;
2570
2571				case IPV6_AUTOFLOWLABEL:
2572					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2573					break;
2574				}
2575				if (error)
2576					break;
2577				error = sooptcopyout(sopt, &optval,
2578				    sizeof (optval));
2579				break;
2580
2581			case IPV6_PATHMTU: {
2582				u_int32_t pmtu = 0;
2583				struct ip6_mtuinfo mtuinfo;
2584				struct route_in6 sro;
2585
2586				bzero(&sro, sizeof (sro));
2587
2588				if (!(so->so_state & SS_ISCONNECTED))
2589					return (ENOTCONN);
2590				/*
2591				 * XXX: we dot not consider the case of source
2592				 * routing, or optional information to specify
2593				 * the outgoing interface.
2594				 */
2595				error = ip6_getpmtu(&sro, NULL, NULL,
2596				    &in6p->in6p_faddr, &pmtu, NULL);
2597				ROUTE_RELEASE(&sro);
2598				if (error)
2599					break;
2600				if (pmtu > IPV6_MAXPACKET)
2601					pmtu = IPV6_MAXPACKET;
2602
2603				bzero(&mtuinfo, sizeof (mtuinfo));
2604				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2605				optdata = (void *)&mtuinfo;
2606				optdatalen = sizeof (mtuinfo);
2607				error = sooptcopyout(sopt, optdata,
2608				    optdatalen);
2609				break;
2610			}
2611
2612			case IPV6_2292PKTINFO:
2613			case IPV6_2292HOPLIMIT:
2614			case IPV6_2292HOPOPTS:
2615			case IPV6_2292RTHDR:
2616			case IPV6_2292DSTOPTS:
2617				switch (optname) {
2618				case IPV6_2292PKTINFO:
2619					optval = OPTBIT(IN6P_PKTINFO);
2620					break;
2621				case IPV6_2292HOPLIMIT:
2622					optval = OPTBIT(IN6P_HOPLIMIT);
2623					break;
2624				case IPV6_2292HOPOPTS:
2625					optval = OPTBIT(IN6P_HOPOPTS);
2626					break;
2627				case IPV6_2292RTHDR:
2628					optval = OPTBIT(IN6P_RTHDR);
2629					break;
2630				case IPV6_2292DSTOPTS:
2631					optval = OPTBIT(IN6P_DSTOPTS|
2632					    IN6P_RTHDRDSTOPTS);
2633					break;
2634				}
2635				error = sooptcopyout(sopt, &optval,
2636				    sizeof (optval));
2637				break;
2638
2639			case IPV6_PKTINFO:
2640			case IPV6_HOPOPTS:
2641			case IPV6_RTHDR:
2642			case IPV6_DSTOPTS:
2643			case IPV6_RTHDRDSTOPTS:
2644			case IPV6_NEXTHOP:
2645			case IPV6_TCLASS:
2646			case IPV6_DONTFRAG:
2647			case IPV6_USE_MIN_MTU:
2648			case IPV6_PREFER_TEMPADDR:
2649				error = ip6_getpcbopt(in6p->in6p_outputopts,
2650				    optname, sopt);
2651				break;
2652
2653			case IPV6_MULTICAST_IF:
2654			case IPV6_MULTICAST_HOPS:
2655			case IPV6_MULTICAST_LOOP:
2656			case IPV6_MSFILTER:
2657				error = ip6_getmoptions(in6p, sopt);
2658				break;
2659#if IPSEC
2660			case IPV6_IPSEC_POLICY: {
2661				caddr_t req = NULL;
2662				size_t len = 0;
2663				struct mbuf *m = NULL;
2664				struct mbuf *mp = NULL;
2665
2666				error = soopt_getm(sopt, &m);
2667				if (error != 0)
2668					break;
2669				error = soopt_mcopyin(sopt, m);
2670				if (error != 0)
2671					break;
2672
2673				req = mtod(m, caddr_t);
2674				len = m->m_len;
2675				error = ipsec6_get_policy(in6p, req, len, &mp);
2676				if (error == 0)
2677					error = soopt_mcopyout(sopt, mp);
2678				if (mp != NULL)
2679					m_freem(mp);
2680				m_freem(m);
2681				break;
2682			}
2683#endif /* IPSEC */
2684#if IPFIREWALL
2685			case IPV6_FW_GET: {
2686				if (ip6_fw_ctl_ptr == NULL)
2687					load_ip6fw();
2688				if (ip6_fw_ctl_ptr != NULL)
2689					error = (*ip6_fw_ctl_ptr)(sopt);
2690				else
2691					error = ENOPROTOOPT;
2692				break;
2693			}
2694#endif /* IPFIREWALL */
2695			case IPV6_BOUND_IF:
2696				if (in6p->inp_flags & INP_BOUND_IF)
2697					optval = in6p->inp_boundifp->if_index;
2698				error = sooptcopyout(sopt, &optval,
2699				    sizeof (optval));
2700				break;
2701
2702			case IPV6_NO_IFT_CELLULAR:
2703				optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR)
2704				    ? 1 : 0;
2705				error = sooptcopyout(sopt, &optval,
2706				    sizeof (optval));
2707				break;
2708
2709			case IPV6_OUT_IF:
2710				optval = (in6p->in6p_last_outifp != NULL) ?
2711				    in6p->in6p_last_outifp->if_index : 0;
2712				error = sooptcopyout(sopt, &optval,
2713				    sizeof (optval));
2714				break;
2715
2716			default:
2717				error = ENOPROTOOPT;
2718				break;
2719			}
2720			break;
2721		}
2722	} else {
2723		error = EINVAL;
2724	}
2725	return (error);
2726}
2727
2728int
2729ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2730{
2731	int error = 0, optval, optlen;
2732	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2733	struct inpcb *in6p = sotoinpcb(so);
2734	int level, op, optname;
2735
2736	level = sopt->sopt_level;
2737	op = sopt->sopt_dir;
2738	optname = sopt->sopt_name;
2739	optlen = sopt->sopt_valsize;
2740
2741	if (level != IPPROTO_IPV6)
2742		return (EINVAL);
2743
2744	switch (optname) {
2745	case IPV6_CHECKSUM:
2746		/*
2747		 * For ICMPv6 sockets, no modification allowed for checksum
2748		 * offset, permit "no change" values to help existing apps.
2749		 *
2750		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2751		 * for an ICMPv6 socket will fail."
2752		 * The current behavior does not meet RFC3542.
2753		 */
2754		switch (op) {
2755		case SOPT_SET:
2756			if (optlen != sizeof (int)) {
2757				error = EINVAL;
2758				break;
2759			}
2760			error = sooptcopyin(sopt, &optval, sizeof (optval),
2761			    sizeof (optval));
2762			if (error)
2763				break;
2764			if ((optval % 2) != 0) {
2765				/* the API assumes even offset values */
2766				error = EINVAL;
2767			} else if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
2768				if (optval != icmp6off)
2769					error = EINVAL;
2770			} else {
2771				in6p->in6p_cksum = optval;
2772			}
2773			break;
2774
2775		case SOPT_GET:
2776			if (SOCK_PROTO(so) == IPPROTO_ICMPV6)
2777				optval = icmp6off;
2778			else
2779				optval = in6p->in6p_cksum;
2780
2781			error = sooptcopyout(sopt, &optval, sizeof (optval));
2782			break;
2783
2784		default:
2785			error = EINVAL;
2786			break;
2787		}
2788		break;
2789
2790	default:
2791		error = ENOPROTOOPT;
2792		break;
2793	}
2794
2795	return (error);
2796}
2797
2798/*
2799 * Set up IP6 options in pcb for insertion in output packets or
2800 * specifying behavior of outgoing packets.
2801 */
2802static int
2803ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so,
2804    struct sockopt *sopt)
2805{
2806#pragma unused(sopt)
2807	struct ip6_pktopts *opt = *pktopt;
2808	int error = 0;
2809
2810	/* turn off any old options. */
2811	if (opt != NULL) {
2812#if DIAGNOSTIC
2813		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2814		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2815		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2816			printf("%s: all specified options are cleared.\n",
2817			    __func__);
2818#endif
2819		ip6_clearpktopts(opt, -1);
2820	} else {
2821		opt = _MALLOC(sizeof (*opt), M_IP6OPT, M_WAITOK);
2822		if (opt == NULL)
2823			return (ENOBUFS);
2824	}
2825	*pktopt = NULL;
2826
2827	if (m == NULL || m->m_len == 0) {
2828		/*
2829		 * Only turning off any previous options, regardless of
2830		 * whether the opt is just created or given.
2831		 */
2832		if (opt != NULL)
2833			FREE(opt, M_IP6OPT);
2834		return (0);
2835	}
2836
2837	/*  set options specified by user. */
2838	if ((error = ip6_setpktopts(m, opt, NULL, SOCK_PROTO(so))) != 0) {
2839		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2840		FREE(opt, M_IP6OPT);
2841		return (error);
2842	}
2843	*pktopt = opt;
2844	return (0);
2845}
2846
2847/*
2848 * initialize ip6_pktopts.  beware that there are non-zero default values in
2849 * the struct.
2850 */
2851void
2852ip6_initpktopts(struct ip6_pktopts *opt)
2853{
2854
2855	bzero(opt, sizeof (*opt));
2856	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2857	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2858	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2859	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2860}
2861
2862static int
2863ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2864    int uproto)
2865{
2866	struct ip6_pktopts *opt;
2867
2868	opt = *pktopt;
2869	if (opt == NULL) {
2870		opt = _MALLOC(sizeof (*opt), M_IP6OPT, M_WAITOK);
2871		if (opt == NULL)
2872			return (ENOBUFS);
2873		ip6_initpktopts(opt);
2874		*pktopt = opt;
2875	}
2876
2877	return (ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto));
2878}
2879
2880static int
2881ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2882{
2883	void *optdata = NULL;
2884	int optdatalen = 0;
2885	struct ip6_ext *ip6e;
2886	struct in6_pktinfo null_pktinfo;
2887	int deftclass = 0, on;
2888	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2889	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2890
2891
2892	switch (optname) {
2893	case IPV6_PKTINFO:
2894		if (pktopt && pktopt->ip6po_pktinfo)
2895			optdata = (void *)pktopt->ip6po_pktinfo;
2896		else {
2897			/* XXX: we don't have to do this every time... */
2898			bzero(&null_pktinfo, sizeof (null_pktinfo));
2899			optdata = (void *)&null_pktinfo;
2900		}
2901		optdatalen = sizeof (struct in6_pktinfo);
2902		break;
2903
2904	case IPV6_TCLASS:
2905		if (pktopt && pktopt->ip6po_tclass >= 0)
2906			optdata = (void *)&pktopt->ip6po_tclass;
2907		else
2908			optdata = (void *)&deftclass;
2909		optdatalen = sizeof (int);
2910		break;
2911
2912	case IPV6_HOPOPTS:
2913		if (pktopt && pktopt->ip6po_hbh) {
2914			optdata = (void *)pktopt->ip6po_hbh;
2915			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2916			optdatalen = (ip6e->ip6e_len + 1) << 3;
2917		}
2918		break;
2919
2920	case IPV6_RTHDR:
2921		if (pktopt && pktopt->ip6po_rthdr) {
2922			optdata = (void *)pktopt->ip6po_rthdr;
2923			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2924			optdatalen = (ip6e->ip6e_len + 1) << 3;
2925		}
2926		break;
2927
2928	case IPV6_RTHDRDSTOPTS:
2929		if (pktopt && pktopt->ip6po_dest1) {
2930			optdata = (void *)pktopt->ip6po_dest1;
2931			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2932			optdatalen = (ip6e->ip6e_len + 1) << 3;
2933		}
2934		break;
2935
2936	case IPV6_DSTOPTS:
2937		if (pktopt && pktopt->ip6po_dest2) {
2938			optdata = (void *)pktopt->ip6po_dest2;
2939			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2940			optdatalen = (ip6e->ip6e_len + 1) << 3;
2941		}
2942		break;
2943
2944	case IPV6_NEXTHOP:
2945		if (pktopt && pktopt->ip6po_nexthop) {
2946			optdata = (void *)pktopt->ip6po_nexthop;
2947			optdatalen = pktopt->ip6po_nexthop->sa_len;
2948		}
2949		break;
2950
2951	case IPV6_USE_MIN_MTU:
2952		if (pktopt)
2953			optdata = (void *)&pktopt->ip6po_minmtu;
2954		else
2955			optdata = (void *)&defminmtu;
2956		optdatalen = sizeof (int);
2957		break;
2958
2959	case IPV6_DONTFRAG:
2960		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2961			on = 1;
2962		else
2963			on = 0;
2964		optdata = (void *)&on;
2965		optdatalen = sizeof (on);
2966		break;
2967
2968	case IPV6_PREFER_TEMPADDR:
2969		if (pktopt)
2970			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2971		else
2972			optdata = (void *)&defpreftemp;
2973		optdatalen = sizeof (int);
2974		break;
2975
2976	default:		/* should not happen */
2977#ifdef DIAGNOSTIC
2978		panic("ip6_getpcbopt: unexpected option\n");
2979#endif
2980		return (ENOPROTOOPT);
2981	}
2982
2983	return (sooptcopyout(sopt, optdata, optdatalen));
2984}
2985
2986void
2987ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2988{
2989	if (pktopt == NULL)
2990		return;
2991
2992	if (optname == -1 || optname == IPV6_PKTINFO) {
2993		if (pktopt->ip6po_pktinfo)
2994			FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
2995		pktopt->ip6po_pktinfo = NULL;
2996	}
2997	if (optname == -1 || optname == IPV6_HOPLIMIT)
2998		pktopt->ip6po_hlim = -1;
2999	if (optname == -1 || optname == IPV6_TCLASS)
3000		pktopt->ip6po_tclass = -1;
3001	if (optname == -1 || optname == IPV6_NEXTHOP) {
3002		ROUTE_RELEASE(&pktopt->ip6po_nextroute);
3003		if (pktopt->ip6po_nexthop)
3004			FREE(pktopt->ip6po_nexthop, M_IP6OPT);
3005		pktopt->ip6po_nexthop = NULL;
3006	}
3007	if (optname == -1 || optname == IPV6_HOPOPTS) {
3008		if (pktopt->ip6po_hbh)
3009			FREE(pktopt->ip6po_hbh, M_IP6OPT);
3010		pktopt->ip6po_hbh = NULL;
3011	}
3012	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
3013		if (pktopt->ip6po_dest1)
3014			FREE(pktopt->ip6po_dest1, M_IP6OPT);
3015		pktopt->ip6po_dest1 = NULL;
3016	}
3017	if (optname == -1 || optname == IPV6_RTHDR) {
3018		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
3019			FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
3020		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
3021		ROUTE_RELEASE(&pktopt->ip6po_route);
3022	}
3023	if (optname == -1 || optname == IPV6_DSTOPTS) {
3024		if (pktopt->ip6po_dest2)
3025			FREE(pktopt->ip6po_dest2, M_IP6OPT);
3026		pktopt->ip6po_dest2 = NULL;
3027	}
3028}
3029
3030#define	PKTOPT_EXTHDRCPY(type) do {					\
3031	if (src->type) {						\
3032		int hlen =						\
3033		    (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;	\
3034		dst->type = _MALLOC(hlen, M_IP6OPT, canwait);		\
3035		if (dst->type == NULL && canwait == M_NOWAIT)		\
3036			goto bad;					\
3037		bcopy(src->type, dst->type, hlen);			\
3038	}								\
3039} while (0)
3040
3041static int
3042copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
3043{
3044	if (dst == NULL || src == NULL)  {
3045		printf("copypktopts: invalid argument\n");
3046		return (EINVAL);
3047	}
3048
3049	dst->ip6po_hlim = src->ip6po_hlim;
3050	dst->ip6po_tclass = src->ip6po_tclass;
3051	dst->ip6po_flags = src->ip6po_flags;
3052	if (src->ip6po_pktinfo) {
3053		dst->ip6po_pktinfo = _MALLOC(sizeof (*dst->ip6po_pktinfo),
3054		    M_IP6OPT, canwait);
3055		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
3056			goto bad;
3057		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
3058	}
3059	if (src->ip6po_nexthop) {
3060		dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
3061		    M_IP6OPT, canwait);
3062		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
3063			goto bad;
3064		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
3065		    src->ip6po_nexthop->sa_len);
3066	}
3067	PKTOPT_EXTHDRCPY(ip6po_hbh);
3068	PKTOPT_EXTHDRCPY(ip6po_dest1);
3069	PKTOPT_EXTHDRCPY(ip6po_dest2);
3070	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
3071	return (0);
3072
3073bad:
3074	ip6_clearpktopts(dst, -1);
3075	return (ENOBUFS);
3076}
3077#undef PKTOPT_EXTHDRCPY
3078
3079struct ip6_pktopts *
3080ip6_copypktopts(struct ip6_pktopts *src, int canwait)
3081{
3082	int error;
3083	struct ip6_pktopts *dst;
3084
3085	dst = _MALLOC(sizeof (*dst), M_IP6OPT, canwait);
3086	if (dst == NULL)
3087		return (NULL);
3088	ip6_initpktopts(dst);
3089
3090	if ((error = copypktopts(dst, src, canwait)) != 0) {
3091		FREE(dst, M_IP6OPT);
3092		return (NULL);
3093	}
3094
3095	return (dst);
3096}
3097
3098void
3099ip6_freepcbopts(struct ip6_pktopts *pktopt)
3100{
3101	if (pktopt == NULL)
3102		return;
3103
3104	ip6_clearpktopts(pktopt, -1);
3105
3106	FREE(pktopt, M_IP6OPT);
3107}
3108
3109void
3110ip6_moptions_init(void)
3111{
3112	PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof (im6o_debug));
3113
3114	im6o_size = (im6o_debug == 0) ? sizeof (struct ip6_moptions) :
3115	    sizeof (struct ip6_moptions_dbg);
3116
3117	im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
3118	    IM6O_ZONE_NAME);
3119	if (im6o_zone == NULL) {
3120		panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
3121		/* NOTREACHED */
3122	}
3123	zone_change(im6o_zone, Z_EXPAND, TRUE);
3124}
3125
3126void
3127im6o_addref(struct ip6_moptions *im6o, int locked)
3128{
3129	if (!locked)
3130		IM6O_LOCK(im6o);
3131	else
3132		IM6O_LOCK_ASSERT_HELD(im6o);
3133
3134	if (++im6o->im6o_refcnt == 0) {
3135		panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
3136		/* NOTREACHED */
3137	} else if (im6o->im6o_trace != NULL) {
3138		(*im6o->im6o_trace)(im6o, TRUE);
3139	}
3140
3141	if (!locked)
3142		IM6O_UNLOCK(im6o);
3143}
3144
3145void
3146im6o_remref(struct ip6_moptions *im6o)
3147{
3148	int i;
3149
3150	IM6O_LOCK(im6o);
3151	if (im6o->im6o_refcnt == 0) {
3152		panic("%s: im6o %p negative refcnt", __func__, im6o);
3153		/* NOTREACHED */
3154	} else if (im6o->im6o_trace != NULL) {
3155		(*im6o->im6o_trace)(im6o, FALSE);
3156	}
3157
3158	--im6o->im6o_refcnt;
3159	if (im6o->im6o_refcnt > 0) {
3160		IM6O_UNLOCK(im6o);
3161		return;
3162	}
3163
3164	for (i = 0; i < im6o->im6o_num_memberships; ++i) {
3165		struct in6_mfilter *imf;
3166
3167		imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
3168		if (imf != NULL)
3169			im6f_leave(imf);
3170
3171		(void) in6_mc_leave(im6o->im6o_membership[i], imf);
3172
3173		if (imf != NULL)
3174			im6f_purge(imf);
3175
3176		IN6M_REMREF(im6o->im6o_membership[i]);
3177		im6o->im6o_membership[i] = NULL;
3178	}
3179	im6o->im6o_num_memberships = 0;
3180	if (im6o->im6o_mfilters != NULL) {
3181		FREE(im6o->im6o_mfilters, M_IN6MFILTER);
3182		im6o->im6o_mfilters = NULL;
3183	}
3184	if (im6o->im6o_membership != NULL) {
3185		FREE(im6o->im6o_membership, M_IP6MOPTS);
3186		im6o->im6o_membership = NULL;
3187	}
3188	IM6O_UNLOCK(im6o);
3189
3190	lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
3191
3192	if (!(im6o->im6o_debug & IFD_ALLOC)) {
3193		panic("%s: im6o %p cannot be freed", __func__, im6o);
3194		/* NOTREACHED */
3195	}
3196	zfree(im6o_zone, im6o);
3197}
3198
3199static void
3200im6o_trace(struct ip6_moptions *im6o, int refhold)
3201{
3202	struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
3203	ctrace_t *tr;
3204	u_int32_t idx;
3205	u_int16_t *cnt;
3206
3207	if (!(im6o->im6o_debug & IFD_DEBUG)) {
3208		panic("%s: im6o %p has no debug structure", __func__, im6o);
3209		/* NOTREACHED */
3210	}
3211	if (refhold) {
3212		cnt = &im6o_dbg->im6o_refhold_cnt;
3213		tr = im6o_dbg->im6o_refhold;
3214	} else {
3215		cnt = &im6o_dbg->im6o_refrele_cnt;
3216		tr = im6o_dbg->im6o_refrele;
3217	}
3218
3219	idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
3220	ctrace_record(&tr[idx]);
3221}
3222
3223struct ip6_moptions *
3224ip6_allocmoptions(int how)
3225{
3226	struct ip6_moptions *im6o;
3227
3228	im6o = (how == M_WAITOK) ?
3229	    zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
3230	if (im6o != NULL) {
3231		bzero(im6o, im6o_size);
3232		lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
3233		im6o->im6o_debug |= IFD_ALLOC;
3234		if (im6o_debug != 0) {
3235			im6o->im6o_debug |= IFD_DEBUG;
3236			im6o->im6o_trace = im6o_trace;
3237		}
3238		IM6O_ADDREF(im6o);
3239	}
3240
3241	return (im6o);
3242}
3243
3244/*
3245 * Set IPv6 outgoing packet options based on advanced API.
3246 */
3247int
3248ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3249    struct ip6_pktopts *stickyopt, int uproto)
3250{
3251	struct cmsghdr *cm = NULL;
3252
3253	if (control == NULL || opt == NULL)
3254		return (EINVAL);
3255
3256	ip6_initpktopts(opt);
3257	if (stickyopt) {
3258		int error;
3259
3260		/*
3261		 * If stickyopt is provided, make a local copy of the options
3262		 * for this particular packet, then override them by ancillary
3263		 * objects.
3264		 * XXX: copypktopts() does not copy the cached route to a next
3265		 * hop (if any).  This is not very good in terms of efficiency,
3266		 * but we can allow this since this option should be rarely
3267		 * used.
3268		 */
3269		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
3270			return (error);
3271	}
3272
3273	/*
3274	 * XXX: Currently, we assume all the optional information is stored
3275	 * in a single mbuf.
3276	 */
3277	if (control->m_next)
3278		return (EINVAL);
3279
3280	if (control->m_len < CMSG_LEN(0))
3281		return (EINVAL);
3282
3283	for (cm = M_FIRST_CMSGHDR(control); cm != NULL;
3284	    cm = M_NXT_CMSGHDR(control, cm)) {
3285		int error;
3286
3287		if (cm->cmsg_len < sizeof (struct cmsghdr) ||
3288		    cm->cmsg_len > control->m_len)
3289			return (EINVAL);
3290		if (cm->cmsg_level != IPPROTO_IPV6)
3291			continue;
3292
3293		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3294		    cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
3295		if (error)
3296			return (error);
3297	}
3298
3299	return (0);
3300}
3301/*
3302 * Set a particular packet option, as a sticky option or an ancillary data
3303 * item.  "len" can be 0 only when it's a sticky option.
3304 * We have 4 cases of combination of "sticky" and "cmsg":
3305 * "sticky=0, cmsg=0": impossible
3306 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3307 * "sticky=1, cmsg=0": RFC3542 socket option
3308 * "sticky=1, cmsg=1": RFC2292 socket option
3309 */
3310static int
3311ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3312    int sticky, int cmsg, int uproto)
3313{
3314	int minmtupolicy, preftemp;
3315	int error;
3316
3317	if (!sticky && !cmsg) {
3318#ifdef DIAGNOSTIC
3319		printf("ip6_setpktopt: impossible case\n");
3320#endif
3321		return (EINVAL);
3322	}
3323
3324	/*
3325	 * Caller must have ensured that the buffer is at least
3326	 * aligned on 32-bit boundary.
3327	 */
3328	VERIFY(IS_P2ALIGNED(buf, sizeof (u_int32_t)));
3329
3330	/*
3331	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3332	 * not be specified in the context of RFC3542.  Conversely,
3333	 * RFC3542 types should not be specified in the context of RFC2292.
3334	 */
3335	if (!cmsg) {
3336		switch (optname) {
3337		case IPV6_2292PKTINFO:
3338		case IPV6_2292HOPLIMIT:
3339		case IPV6_2292NEXTHOP:
3340		case IPV6_2292HOPOPTS:
3341		case IPV6_2292DSTOPTS:
3342		case IPV6_2292RTHDR:
3343		case IPV6_2292PKTOPTIONS:
3344			return (ENOPROTOOPT);
3345		}
3346	}
3347	if (sticky && cmsg) {
3348		switch (optname) {
3349		case IPV6_PKTINFO:
3350		case IPV6_HOPLIMIT:
3351		case IPV6_NEXTHOP:
3352		case IPV6_HOPOPTS:
3353		case IPV6_DSTOPTS:
3354		case IPV6_RTHDRDSTOPTS:
3355		case IPV6_RTHDR:
3356		case IPV6_USE_MIN_MTU:
3357		case IPV6_DONTFRAG:
3358		case IPV6_TCLASS:
3359		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3360			return (ENOPROTOOPT);
3361		}
3362	}
3363
3364	switch (optname) {
3365	case IPV6_2292PKTINFO:
3366	case IPV6_PKTINFO: {
3367		struct ifnet *ifp = NULL;
3368		struct in6_pktinfo *pktinfo;
3369
3370		if (len != sizeof (struct in6_pktinfo))
3371			return (EINVAL);
3372
3373		pktinfo = (struct in6_pktinfo *)(void *)buf;
3374
3375		/*
3376		 * An application can clear any sticky IPV6_PKTINFO option by
3377		 * doing a "regular" setsockopt with ipi6_addr being
3378		 * in6addr_any and ipi6_ifindex being zero.
3379		 * [RFC 3542, Section 6]
3380		 */
3381		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3382		    pktinfo->ipi6_ifindex == 0 &&
3383		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3384			ip6_clearpktopts(opt, optname);
3385			break;
3386		}
3387
3388		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3389		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3390			return (EINVAL);
3391		}
3392
3393		/* validate the interface index if specified. */
3394		ifnet_head_lock_shared();
3395
3396		if (pktinfo->ipi6_ifindex > if_index) {
3397			ifnet_head_done();
3398			return (ENXIO);
3399		}
3400
3401		if (pktinfo->ipi6_ifindex) {
3402			ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3403			if (ifp == NULL) {
3404				ifnet_head_done();
3405				return (ENXIO);
3406			}
3407		}
3408
3409		ifnet_head_done();
3410
3411		/*
3412		 * We store the address anyway, and let in6_selectsrc()
3413		 * validate the specified address.  This is because ipi6_addr
3414		 * may not have enough information about its scope zone, and
3415		 * we may need additional information (such as outgoing
3416		 * interface or the scope zone of a destination address) to
3417		 * disambiguate the scope.
3418		 * XXX: the delay of the validation may confuse the
3419		 * application when it is used as a sticky option.
3420		 */
3421		if (opt->ip6po_pktinfo == NULL) {
3422			opt->ip6po_pktinfo = _MALLOC(sizeof (*pktinfo),
3423			    M_IP6OPT, M_NOWAIT);
3424			if (opt->ip6po_pktinfo == NULL)
3425				return (ENOBUFS);
3426		}
3427		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof (*pktinfo));
3428		break;
3429	}
3430
3431	case IPV6_2292HOPLIMIT:
3432	case IPV6_HOPLIMIT: {
3433		int *hlimp;
3434
3435		/*
3436		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3437		 * to simplify the ordering among hoplimit options.
3438		 */
3439		if (optname == IPV6_HOPLIMIT && sticky)
3440			return (ENOPROTOOPT);
3441
3442		if (len != sizeof (int))
3443			return (EINVAL);
3444		hlimp = (int *)(void *)buf;
3445		if (*hlimp < -1 || *hlimp > 255)
3446			return (EINVAL);
3447
3448		opt->ip6po_hlim = *hlimp;
3449		break;
3450	}
3451
3452	case IPV6_TCLASS: {
3453		int tclass;
3454
3455		if (len != sizeof (int))
3456			return (EINVAL);
3457		tclass = *(int *)(void *)buf;
3458		if (tclass < -1 || tclass > 255)
3459			return (EINVAL);
3460
3461		opt->ip6po_tclass = tclass;
3462		break;
3463	}
3464
3465	case IPV6_2292NEXTHOP:
3466	case IPV6_NEXTHOP:
3467		error = suser(kauth_cred_get(), 0);
3468		if (error)
3469			return (EACCES);
3470
3471		if (len == 0) {	/* just remove the option */
3472			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3473			break;
3474		}
3475
3476		/* check if cmsg_len is large enough for sa_len */
3477		if (len < sizeof (struct sockaddr) || len < *buf)
3478			return (EINVAL);
3479
3480		switch (SA(buf)->sa_family) {
3481		case AF_INET6: {
3482			struct sockaddr_in6 *sa6 = SIN6(buf);
3483
3484			if (sa6->sin6_len != sizeof (struct sockaddr_in6))
3485				return (EINVAL);
3486
3487			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3488			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3489				return (EINVAL);
3490			}
3491			if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3492			    != 0) {
3493				return (error);
3494			}
3495			break;
3496		}
3497		case AF_LINK:	/* should eventually be supported */
3498		default:
3499			return (EAFNOSUPPORT);
3500		}
3501
3502		/* turn off the previous option, then set the new option. */
3503		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3504		opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
3505		if (opt->ip6po_nexthop == NULL)
3506			return (ENOBUFS);
3507		bcopy(buf, opt->ip6po_nexthop, *buf);
3508		break;
3509
3510	case IPV6_2292HOPOPTS:
3511	case IPV6_HOPOPTS: {
3512		struct ip6_hbh *hbh;
3513		int hbhlen;
3514
3515		/*
3516		 * XXX: We don't allow a non-privileged user to set ANY HbH
3517		 * options, since per-option restriction has too much
3518		 * overhead.
3519		 */
3520		error = suser(kauth_cred_get(), 0);
3521		if (error)
3522			return (EACCES);
3523
3524		if (len == 0) {
3525			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3526			break;	/* just remove the option */
3527		}
3528
3529		/* message length validation */
3530		if (len < sizeof (struct ip6_hbh))
3531			return (EINVAL);
3532		hbh = (struct ip6_hbh *)(void *)buf;
3533		hbhlen = (hbh->ip6h_len + 1) << 3;
3534		if (len != hbhlen)
3535			return (EINVAL);
3536
3537		/* turn off the previous option, then set the new option. */
3538		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3539		opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
3540		if (opt->ip6po_hbh == NULL)
3541			return (ENOBUFS);
3542		bcopy(hbh, opt->ip6po_hbh, hbhlen);
3543
3544		break;
3545	}
3546
3547	case IPV6_2292DSTOPTS:
3548	case IPV6_DSTOPTS:
3549	case IPV6_RTHDRDSTOPTS: {
3550		struct ip6_dest *dest, **newdest = NULL;
3551		int destlen;
3552
3553		error = suser(kauth_cred_get(), 0);
3554		if (error)
3555			return (EACCES);
3556
3557		if (len == 0) {
3558			ip6_clearpktopts(opt, optname);
3559			break;	/* just remove the option */
3560		}
3561
3562		/* message length validation */
3563		if (len < sizeof (struct ip6_dest))
3564			return (EINVAL);
3565		dest = (struct ip6_dest *)(void *)buf;
3566		destlen = (dest->ip6d_len + 1) << 3;
3567		if (len != destlen)
3568			return (EINVAL);
3569
3570		/*
3571		 * Determine the position that the destination options header
3572		 * should be inserted; before or after the routing header.
3573		 */
3574		switch (optname) {
3575		case IPV6_2292DSTOPTS:
3576			/*
3577			 * The old advacned API is ambiguous on this point.
3578			 * Our approach is to determine the position based
3579			 * according to the existence of a routing header.
3580			 * Note, however, that this depends on the order of the
3581			 * extension headers in the ancillary data; the 1st
3582			 * part of the destination options header must appear
3583			 * before the routing header in the ancillary data,
3584			 * too.
3585			 * RFC3542 solved the ambiguity by introducing
3586			 * separate ancillary data or option types.
3587			 */
3588			if (opt->ip6po_rthdr == NULL)
3589				newdest = &opt->ip6po_dest1;
3590			else
3591				newdest = &opt->ip6po_dest2;
3592			break;
3593		case IPV6_RTHDRDSTOPTS:
3594			newdest = &opt->ip6po_dest1;
3595			break;
3596		case IPV6_DSTOPTS:
3597			newdest = &opt->ip6po_dest2;
3598			break;
3599		}
3600
3601		/* turn off the previous option, then set the new option. */
3602		ip6_clearpktopts(opt, optname);
3603		*newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
3604		if (*newdest == NULL)
3605			return (ENOBUFS);
3606		bcopy(dest, *newdest, destlen);
3607		break;
3608	}
3609
3610	case IPV6_2292RTHDR:
3611	case IPV6_RTHDR: {
3612		struct ip6_rthdr *rth;
3613		int rthlen;
3614
3615		if (len == 0) {
3616			ip6_clearpktopts(opt, IPV6_RTHDR);
3617			break;	/* just remove the option */
3618		}
3619
3620		/* message length validation */
3621		if (len < sizeof (struct ip6_rthdr))
3622			return (EINVAL);
3623		rth = (struct ip6_rthdr *)(void *)buf;
3624		rthlen = (rth->ip6r_len + 1) << 3;
3625		if (len != rthlen)
3626			return (EINVAL);
3627
3628		switch (rth->ip6r_type) {
3629		case IPV6_RTHDR_TYPE_0:
3630			if (rth->ip6r_len == 0)	/* must contain one addr */
3631				return (EINVAL);
3632			if (rth->ip6r_len % 2) /* length must be even */
3633				return (EINVAL);
3634			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3635				return (EINVAL);
3636			break;
3637		default:
3638			return (EINVAL);	/* not supported */
3639		}
3640
3641		/* turn off the previous option */
3642		ip6_clearpktopts(opt, IPV6_RTHDR);
3643		opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
3644		if (opt->ip6po_rthdr == NULL)
3645			return (ENOBUFS);
3646		bcopy(rth, opt->ip6po_rthdr, rthlen);
3647		break;
3648	}
3649
3650	case IPV6_USE_MIN_MTU:
3651		if (len != sizeof (int))
3652			return (EINVAL);
3653		minmtupolicy = *(int *)(void *)buf;
3654		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3655		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3656		    minmtupolicy != IP6PO_MINMTU_ALL) {
3657			return (EINVAL);
3658		}
3659		opt->ip6po_minmtu = minmtupolicy;
3660		break;
3661
3662	case IPV6_DONTFRAG:
3663		if (len != sizeof (int))
3664			return (EINVAL);
3665
3666		if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
3667			/*
3668			 * we ignore this option for TCP sockets.
3669			 * (RFC3542 leaves this case unspecified.)
3670			 */
3671			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3672		} else {
3673			opt->ip6po_flags |= IP6PO_DONTFRAG;
3674		}
3675		break;
3676
3677	case IPV6_PREFER_TEMPADDR:
3678		if (len != sizeof (int))
3679			return (EINVAL);
3680		preftemp = *(int *)(void *)buf;
3681		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3682		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3683		    preftemp != IP6PO_TEMPADDR_PREFER) {
3684			return (EINVAL);
3685		}
3686		opt->ip6po_prefer_tempaddr = preftemp;
3687		break;
3688
3689	default:
3690		return (ENOPROTOOPT);
3691	} /* end of switch */
3692
3693	return (0);
3694}
3695
3696/*
3697 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3698 * packet to the input queue of a specified interface.  Note that this
3699 * calls the output routine of the loopback "driver", but with an interface
3700 * pointer that might NOT be &loif -- easier than replicating that code here.
3701 */
3702void
3703ip6_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
3704    struct sockaddr_in6 *dst, uint32_t optlen, int32_t nxt0)
3705{
3706	struct mbuf *copym;
3707	struct ip6_hdr *ip6;
3708	struct in6_addr src;
3709
3710	if (lo_ifp == NULL)
3711		return;
3712
3713	/*
3714	 * Copy the packet header as it's needed for the checksum.
3715	 * Make sure to deep-copy IPv6 header portion in case the data
3716	 * is in an mbuf cluster, so that we can safely override the IPv6
3717	 * header portion later.
3718	 */
3719	copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR);
3720	if (copym != NULL && ((copym->m_flags & M_EXT) ||
3721	    copym->m_len < sizeof (struct ip6_hdr)))
3722		copym = m_pullup(copym, sizeof (struct ip6_hdr));
3723
3724	if (copym == NULL)
3725		return;
3726
3727	ip6 = mtod(copym, struct ip6_hdr *);
3728	src = ip6->ip6_src;
3729	/*
3730	 * clear embedded scope identifiers if necessary.
3731	 * in6_clearscope will touch the addresses only when necessary.
3732	 */
3733	in6_clearscope(&ip6->ip6_src);
3734	in6_clearscope(&ip6->ip6_dst);
3735
3736	if (copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
3737		in6_delayed_cksum_offset(copym, 0, optlen, nxt0);
3738
3739	/*
3740	 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3741	 * in ip6_input(); we need the loopback ifp/dl_tag passed as args
3742	 * to make the loopback driver compliant with the data link
3743	 * requirements.
3744	 */
3745	copym->m_pkthdr.rcvif = origifp;
3746
3747	/*
3748	 * Also record the source interface (which owns the source address).
3749	 * This is basically a stripped down version of ifa_foraddr6().
3750	 */
3751	if (srcifp == NULL) {
3752		struct in6_ifaddr *ia;
3753
3754		lck_rw_lock_shared(&in6_ifaddr_rwlock);
3755		for (ia = in6_ifaddrs; ia != NULL; ia = ia->ia_next) {
3756			IFA_LOCK_SPIN(&ia->ia_ifa);
3757			/* compare against src addr with embedded scope */
3758			if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &src)) {
3759				srcifp = ia->ia_ifp;
3760				IFA_UNLOCK(&ia->ia_ifa);
3761				break;
3762			}
3763			IFA_UNLOCK(&ia->ia_ifa);
3764		}
3765		lck_rw_done(&in6_ifaddr_rwlock);
3766	}
3767	if (srcifp != NULL)
3768		ip6_setsrcifaddr_info(copym, srcifp->if_index, NULL);
3769	ip6_setdstifaddr_info(copym, origifp->if_index, NULL);
3770
3771	dlil_output(lo_ifp, PF_INET6, copym, NULL, SA(dst), 0, NULL);
3772}
3773
3774/*
3775 * Chop IPv6 header off from the payload.
3776 */
3777static int
3778ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3779{
3780	struct mbuf *mh;
3781	struct ip6_hdr *ip6;
3782
3783	ip6 = mtod(m, struct ip6_hdr *);
3784	if (m->m_len > sizeof (*ip6)) {
3785		MGETHDR(mh, M_DONTWAIT, MT_HEADER);	/* MAC-OK */
3786		if (mh == NULL) {
3787			m_freem(m);
3788			return (ENOBUFS);
3789		}
3790		M_COPY_PKTHDR(mh, m);
3791		MH_ALIGN(mh, sizeof (*ip6));
3792		m->m_flags &= ~M_PKTHDR;
3793		m->m_len -= sizeof (*ip6);
3794		m->m_data += sizeof (*ip6);
3795		mh->m_next = m;
3796		m = mh;
3797		m->m_len = sizeof (*ip6);
3798		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof (*ip6));
3799	}
3800	exthdrs->ip6e_ip6 = m;
3801	return (0);
3802}
3803
3804static void
3805ip6_output_checksum(struct ifnet *ifp, uint32_t mtu, struct mbuf *m,
3806    int nxt0, uint32_t tlen, uint32_t optlen)
3807{
3808	uint32_t sw_csum, hwcap = ifp->if_hwassist;
3809	int tso = TSO_IPV6_OK(ifp, m);
3810
3811	if (!hwcksum_tx) {
3812		/* do all in software; checksum offload is disabled */
3813		sw_csum = CSUM_DELAY_IPV6_DATA & m->m_pkthdr.csum_flags;
3814	} else {
3815		/* do in software what the hardware cannot */
3816		sw_csum = m->m_pkthdr.csum_flags &
3817		    ~IF_HWASSIST_CSUM_FLAGS(hwcap);
3818	}
3819
3820	if (optlen != 0) {
3821		sw_csum |= (CSUM_DELAY_IPV6_DATA &
3822		    m->m_pkthdr.csum_flags);
3823	} else if (!(sw_csum & CSUM_DELAY_IPV6_DATA) &&
3824	    (hwcap & CSUM_PARTIAL)) {
3825		/*
3826		 * Partial checksum offload, ere), if no extension
3827		 * headers, and TCP only (no UDP support, as the
3828		 * hardware may not be able to convert +0 to
3829		 * -0 (0xffff) per RFC1122 4.1.3.4.)
3830		 */
3831		if (hwcksum_tx && !tso &&
3832		    (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) &&
3833		    tlen <= mtu) {
3834			uint16_t start = sizeof (struct ip6_hdr);
3835			uint16_t ulpoff =
3836			    m->m_pkthdr.csum_data & 0xffff;
3837			m->m_pkthdr.csum_flags |=
3838			    (CSUM_DATA_VALID | CSUM_PARTIAL);
3839			m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
3840			m->m_pkthdr.csum_tx_start = start;
3841			sw_csum = 0;
3842		} else {
3843			sw_csum |= (CSUM_DELAY_IPV6_DATA &
3844			    m->m_pkthdr.csum_flags);
3845		}
3846	}
3847
3848	if (sw_csum & CSUM_DELAY_IPV6_DATA) {
3849		in6_delayed_cksum_offset(m, 0, optlen, nxt0);
3850		sw_csum &= ~CSUM_DELAY_IPV6_DATA;
3851	}
3852
3853	if (hwcksum_tx) {
3854		/*
3855		 * Drop off bits that aren't supported by hardware;
3856		 * also make sure to preserve non-checksum related bits.
3857		 */
3858		m->m_pkthdr.csum_flags =
3859		    ((m->m_pkthdr.csum_flags &
3860		    (IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) |
3861		    (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK));
3862	} else {
3863		/* drop all bits; checksum offload is disabled */
3864		m->m_pkthdr.csum_flags = 0;
3865	}
3866}
3867
3868/*
3869 * Compute IPv6 extension header length.
3870 */
3871int
3872ip6_optlen(struct in6pcb *in6p)
3873{
3874	int len;
3875
3876	if (!in6p->in6p_outputopts)
3877		return (0);
3878
3879	len = 0;
3880#define	elen(x)								\
3881	(((struct ip6_ext *)(x)) ?					\
3882	(((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3883
3884	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3885	if (in6p->in6p_outputopts->ip6po_rthdr) {
3886		/* dest1 is valid with rthdr only */
3887		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3888	}
3889	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3890	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3891	return (len);
3892#undef elen
3893}
3894