1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*	$FreeBSD: src/sys/netinet6/ip6_output.c,v 1.43 2002/10/31 19:45:48 ume Exp $	*/
30/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
31
32/*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61/*
62 * Copyright (c) 1982, 1986, 1988, 1990, 1993
63 *	The Regents of the University of California.  All rights reserved.
64 *
65 * Redistribution and use in source and binary forms, with or without
66 * modification, are permitted provided that the following conditions
67 * are met:
68 * 1. Redistributions of source code must retain the above copyright
69 *    notice, this list of conditions and the following disclaimer.
70 * 2. Redistributions in binary form must reproduce the above copyright
71 *    notice, this list of conditions and the following disclaimer in the
72 *    documentation and/or other materials provided with the distribution.
73 * 3. All advertising materials mentioning features or use of this software
74 *    must display the following acknowledgement:
75 *	This product includes software developed by the University of
76 *	California, Berkeley and its contributors.
77 * 4. Neither the name of the University nor the names of its contributors
78 *    may be used to endorse or promote products derived from this software
79 *    without specific prior written permission.
80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE.
92 *
93 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
94 */
95/*
96 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
97 * support for mandatory and extensible security protections.  This notice
98 * is included in support of clause 2.2 (b) of the Apple Public License,
99 * Version 2.0.
100 */
101
102#include <sys/param.h>
103#include <sys/malloc.h>
104#include <sys/mbuf.h>
105#include <sys/errno.h>
106#include <sys/protosw.h>
107#include <sys/socket.h>
108#include <sys/socketvar.h>
109#include <sys/systm.h>
110#include <sys/kernel.h>
111#include <sys/proc.h>
112#include <sys/kauth.h>
113#include <sys/mcache.h>
114#include <sys/sysctl.h>
115#include <kern/zalloc.h>
116
117#include <pexpert/pexpert.h>
118
119#include <net/if.h>
120#include <net/route.h>
121#include <net/dlil.h>
122
123#include <netinet/in.h>
124#include <netinet/in_var.h>
125#include <netinet/ip_var.h>
126#include <netinet6/in6_var.h>
127#include <netinet/ip6.h>
128#include <netinet6/ip6protosw.h>
129#include <netinet/icmp6.h>
130#include <netinet6/ip6_var.h>
131#include <netinet/in_pcb.h>
132#include <netinet6/nd6.h>
133#include <netinet6/scope6_var.h>
134#include <mach/sdt.h>
135
136#if IPSEC
137#include <netinet6/ipsec.h>
138#if INET6
139#include <netinet6/ipsec6.h>
140#endif
141#include <netkey/key.h>
142extern int ipsec_bypass;
143#endif /* IPSEC */
144
145#if CONFIG_MACF_NET
146#include <security/mac.h>
147#endif /* MAC_NET */
148
149#include <netinet6/ip6_fw.h>
150
151#if DUMMYNET
152#include <netinet/ip_fw.h>
153#include <netinet/ip_dummynet.h>
154#endif /* DUMMYNET */
155
156#include <net/net_osdep.h>
157
158#include <netinet/kpi_ipfilter_var.h>
159
160#if PF
161#include <net/pfvar.h>
162#endif /* PF */
163
164#ifndef __APPLE__
165static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
166#endif
167
168int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt);
169static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
170			    struct socket *, struct sockopt *sopt);
171static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto);
172static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt);
173static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, int sticky, int cmsg, int uproto);
174static void im6o_trace(struct ip6_moptions *, int);
175static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
176static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
177				  struct ip6_frag **);
178static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
179static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
180static int ip6_getpmtu (struct route_in6 *, struct route_in6 *,
181	struct ifnet *, struct in6_addr *, u_int32_t *, int *);
182
183#define	IM6O_TRACE_HIST_SIZE	32	/* size of trace history */
184
185/* For gdb */
186__private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
187
188struct ip6_moptions_dbg {
189	struct ip6_moptions	im6o;			/* ip6_moptions */
190	u_int16_t		im6o_refhold_cnt;	/* # of IM6O_ADDREF */
191	u_int16_t		im6o_refrele_cnt;	/* # of IM6O_REMREF */
192	/*
193	 * Alloc and free callers.
194	 */
195	ctrace_t		im6o_alloc;
196	ctrace_t		im6o_free;
197	/*
198	 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
199	 */
200	ctrace_t		im6o_refhold[IM6O_TRACE_HIST_SIZE];
201	ctrace_t		im6o_refrele[IM6O_TRACE_HIST_SIZE];
202};
203
204#if DEBUG
205static unsigned int im6o_debug = 1;	/* debugging (enabled) */
206#else
207static unsigned int im6o_debug;		/* debugging (disabled) */
208#endif /* !DEBUG */
209
210static unsigned int im6o_size;		/* size of zone element */
211static struct zone *im6o_zone;		/* zone for ip6_moptions */
212
213#define	IM6O_ZONE_MAX		64		/* maximum elements in zone */
214#define	IM6O_ZONE_NAME		"ip6_moptions"	/* zone name */
215
216SYSCTL_DECL(_net_inet6_ip6);
217
218static int	ip6_maxchainsent = 0;
219SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED,
220	&ip6_maxchainsent, 0, "use dlil_output_list");
221
222/*
223 * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only
224 * walks through the packet chain and sends each mbuf separately.
225 */
226int
227ip6_output_list(
228	struct mbuf *m0,
229	int packetlist,
230	struct ip6_pktopts *opt,
231	struct route_in6 *ro,
232	int flags,
233	struct ip6_moptions *im6o,
234	struct ifnet **ifpp,	/* XXX: just for statistics */
235	struct ip6_out_args *ip6oap)
236{
237#pragma unused(packetlist)
238	struct mbuf *m = m0, *nextpkt;
239	int error = 0;
240
241	while (m) {
242		/*
243		 * Break the chain before calling ip6_output() and free the
244		 * mbufs if there was an error.
245		 */
246		nextpkt = m->m_nextpkt;
247		m->m_nextpkt = NULL;
248		error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oap);
249		if (error) {
250			if (nextpkt)
251				m_freem_list(nextpkt);
252			return (error);
253		}
254		m = nextpkt;
255	}
256
257	return (error);
258}
259
260/*
261 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
262 * header (with pri, len, nxt, hlim, src, dst).
263 * This function may modify ver and hlim only.
264 * The mbuf chain containing the packet will be freed.
265 * The mbuf opt, if present, will not be freed.
266 *
267 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
268 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_int32_t to hold largest one,
269 * which is rt_rmx.rmx_mtu.
270 */
271int
272ip6_output(
273	struct mbuf *m0,
274	struct ip6_pktopts *opt,
275	struct route_in6 *ro,
276	int flags,
277	struct ip6_moptions *im6o,
278	struct ifnet **ifpp,	/* XXX: just for statistics */
279	struct ip6_out_args *ip6oap)
280{
281	struct ip6_hdr *ip6, *mhip6;
282	struct ifnet *ifp = NULL, *origifp = NULL;
283	struct mbuf *m = m0;
284	int hlen, tlen, len, off;
285	struct route_in6 ip6route;
286	struct rtentry *rt = NULL;
287	struct sockaddr_in6 *dst, src_sa, dst_sa;
288	int error = 0;
289	struct in6_ifaddr *ia = NULL;
290	u_int32_t mtu;
291	int alwaysfrag = 0, dontfrag = 0;
292	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
293	struct ip6_exthdrs exthdrs;
294	struct in6_addr finaldst, src0, dst0;
295	u_int32_t zone;
296	struct route_in6 *ro_pmtu = NULL;
297	int hdrsplit = 0;
298	int needipsec = 0;
299	ipfilter_t inject_filter_ref;
300	int tso;
301	boolean_t select_srcif;
302	struct ipf_pktopts *ippo = NULL, ipf_pktopts;
303	struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, 0 };
304	struct flowadv *adv = NULL;
305	u_int32_t ifmtu;
306#if DUMMYNET
307	struct m_tag *tag;
308	struct route_in6 saved_route;
309	struct route_in6 saved_ro_pmtu;
310	struct ip_fw_args args;
311	struct sockaddr_in6 dst_buf;
312
313	bzero(&args, sizeof(struct ip_fw_args));
314#endif /* DUMMYNET */
315
316	if ((flags & IPV6_OUTARGS) && ip6oap != NULL) {
317		ip6oa = *ip6oap;
318		adv = &ip6oap->ip6oa_flowadv;
319		adv->code = FADV_SUCCESS;
320	}
321
322#if IPSEC
323	int needipsectun = 0;
324	struct socket *so = NULL;
325	struct secpolicy *sp = NULL;
326	struct route_in6 *ipsec_saved_route = NULL;
327	struct ipsec_output_state ipsec_state;
328
329	bzero(&ipsec_state, sizeof(ipsec_state));
330
331	/* for AH processing. stupid to have "socket" variable in IP layer... */
332	if (ipsec_bypass == 0)
333	{
334		so = ipsec_getsocket(m);
335		(void)ipsec_setsocket(m, NULL);
336	}
337#endif /* IPSEC */
338
339	bzero(&ipf_pktopts, sizeof(struct ipf_pktopts));
340	ippo = &ipf_pktopts;
341
342	ip6 = mtod(m, struct ip6_hdr *);
343	inject_filter_ref = ipf_get_inject_filter(m);
344
345	/* Grab info from mtags prepended to the chain */
346#if DUMMYNET
347	if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
348	    KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
349		struct dn_pkt_tag	*dn_tag;
350
351		dn_tag = (struct dn_pkt_tag *)(tag+1);
352		args.fwa_pf_rule = dn_tag->dn_pf_rule;
353
354		bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof(dst_buf));
355		dst = &dst_buf;
356		ifp = dn_tag->dn_ifp;
357		if (ifp)
358			ifnet_reference(ifp);
359		flags = dn_tag->dn_flags;
360		if (dn_tag->dn_flags & IPV6_OUTARGS)
361			ip6oa = dn_tag->dn_ip6oa;
362
363		saved_route = dn_tag->dn_ro6;
364		ro = &saved_route;
365		saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
366		ro_pmtu = &saved_ro_pmtu;
367		origifp = dn_tag->dn_origifp;
368		if (origifp)
369			ifnet_reference(origifp);
370		mtu = dn_tag->dn_mtu;
371		alwaysfrag = dn_tag->dn_alwaysfrag;
372		unfragpartlen = dn_tag->dn_unfragpartlen;
373
374		bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof(exthdrs));
375
376		m_tag_delete(m0, tag);
377	}
378#endif /* DUMMYNET */
379
380	finaldst = ip6->ip6_dst;
381
382	if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
383		if ((select_srcif = (!(flags & (IPV6_FORWARDING |
384		    IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
385		    (ip6oa.ip6oa_flags & IP6OAF_SELECT_SRCIF))))
386			ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
387
388		if ((ip6oa.ip6oa_flags & IP6OAF_BOUND_IF) &&
389		    ip6oa.ip6oa_boundif != IFSCOPE_NONE) {
390			ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
391			    (ip6oa.ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
392		}
393
394		if (ip6oa.ip6oa_flags & IP6OAF_BOUND_SRCADDR)
395			ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
396	} else {
397		select_srcif = FALSE;
398		ip6oa.ip6oa_boundif = IFSCOPE_NONE;
399		ip6oa.ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_IF |
400		    IP6OAF_BOUND_SRCADDR);
401	}
402
403	if ((flags & IPV6_OUTARGS) && (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR))
404		ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
405
406#if DUMMYNET
407	if (args.fwa_pf_rule) {
408		ip6 = mtod(m, struct ip6_hdr *);
409
410		goto check_with_pf;
411	}
412#endif /* DUMMYNET */
413
414#define MAKE_EXTHDR(hp, mp)						\
415    do {								\
416	if (hp) {							\
417		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
418		error = ip6_copyexthdr((mp), (caddr_t)(hp), 		\
419				       ((eh)->ip6e_len + 1) << 3);	\
420		if (error)						\
421			goto freehdrs;					\
422	}								\
423    } while (0)
424
425	bzero(&exthdrs, sizeof(exthdrs));
426
427	if (opt) {
428		/* Hop-by-Hop options header */
429		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
430		/* Destination options header(1st part) */
431		if (opt->ip6po_rthdr) {
432			/*
433			 * Destination options header(1st part)
434			 * This only makes sense with a routing header.
435			 * See Section 9.2 of RFC 3542.
436			 * Disabling this part just for MIP6 convenience is
437			 * a bad idea.  We need to think carefully about a
438			 * way to make the advanced API coexist with MIP6
439			 * options, which might automatically be inserted in
440			 * the kernel.
441			 */
442			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
443		}
444		/* Routing header */
445		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
446		/* Destination options header(2nd part) */
447		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
448	}
449
450#if IPSEC
451	if (ipsec_bypass != 0)
452		goto skip_ipsec;
453
454	/* get a security policy for this packet */
455	if (so == NULL)
456		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
457	else
458		sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
459
460	if (sp == NULL) {
461		IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
462		goto freehdrs;
463	}
464
465	error = 0;
466
467	/* check policy */
468	switch (sp->policy) {
469	case IPSEC_POLICY_DISCARD:
470	case IPSEC_POLICY_GENERATE:
471		/*
472		 * This packet is just discarded.
473		 */
474		IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
475		goto freehdrs;
476
477	case IPSEC_POLICY_BYPASS:
478	case IPSEC_POLICY_NONE:
479		/* no need to do IPsec. */
480		needipsec = 0;
481		break;
482
483	case IPSEC_POLICY_IPSEC:
484		if (sp->req == NULL) {
485			/* acquire a policy */
486			error = key_spdacquire(sp);
487			goto freehdrs;
488		}
489		needipsec = 1;
490		break;
491
492	case IPSEC_POLICY_ENTRUST:
493	default:
494		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
495	}
496	skip_ipsec:
497#endif /* IPSEC */
498
499	/*
500	 * Calculate the total length of the extension header chain.
501	 * Keep the length of the unfragmentable part for fragmentation.
502	 */
503	optlen = 0;
504	if (exthdrs.ip6e_hbh)
505		optlen += exthdrs.ip6e_hbh->m_len;
506	if (exthdrs.ip6e_dest1)
507		optlen += exthdrs.ip6e_dest1->m_len;
508	if (exthdrs.ip6e_rthdr)
509		optlen += exthdrs.ip6e_rthdr->m_len;
510	unfragpartlen = optlen + sizeof(struct ip6_hdr);
511
512	/* NOTE: we don't add AH/ESP length here. do that later. */
513	if (exthdrs.ip6e_dest2)
514		optlen += exthdrs.ip6e_dest2->m_len;
515
516
517	if (needipsec &&
518	    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
519		in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
520		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
521	}
522
523	/*
524	 * If we need IPsec, or there is at least one extension header,
525	 * separate IP6 header from the payload.
526	 */
527	if ((needipsec || optlen) && !hdrsplit) {
528		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
529			m = NULL;
530			goto freehdrs;
531		}
532		m = exthdrs.ip6e_ip6;
533		hdrsplit++;
534	}
535
536	/* adjust pointer */
537	ip6 = mtod(m, struct ip6_hdr *);
538
539	/* adjust mbuf packet header length */
540	m->m_pkthdr.len += optlen;
541	plen = m->m_pkthdr.len - sizeof(*ip6);
542
543	/* If this is a jumbo payload, insert a jumbo payload option. */
544	if (plen > IPV6_MAXPACKET) {
545		if (!hdrsplit) {
546			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
547				m = NULL;
548				goto freehdrs;
549			}
550			m = exthdrs.ip6e_ip6;
551			hdrsplit++;
552		}
553		/* adjust pointer */
554		ip6 = mtod(m, struct ip6_hdr *);
555		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
556			goto freehdrs;
557		ip6->ip6_plen = 0;
558	} else
559		ip6->ip6_plen = htons(plen);
560
561	/*
562	 * Concatenate headers and fill in next header fields.
563	 * Here we have, on "m"
564	 *	IPv6 payload
565	 * and we insert headers accordingly.  Finally, we should be getting:
566	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
567	 *
568	 * during the header composing process, "m" points to IPv6 header.
569	 * "mprev" points to an extension header prior to esp.
570	 */
571	{
572		u_char *nexthdrp = &ip6->ip6_nxt;
573		struct mbuf *mprev = m;
574
575		/*
576		 * we treat dest2 specially.  this makes IPsec processing
577		 * much easier.  the goal here is to make mprev point the
578		 * mbuf prior to dest2.
579		 *
580		 * result: IPv6 dest2 payload
581		 * m and mprev will point to IPv6 header.
582		 */
583		if (exthdrs.ip6e_dest2) {
584			if (!hdrsplit)
585				panic("assumption failed: hdr not split");
586			exthdrs.ip6e_dest2->m_next = m->m_next;
587			m->m_next = exthdrs.ip6e_dest2;
588			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
589			ip6->ip6_nxt = IPPROTO_DSTOPTS;
590		}
591
592#define MAKE_CHAIN(m, mp, p, i)\
593    do {\
594	if (m) {\
595		if (!hdrsplit) \
596			panic("assumption failed: hdr not split"); \
597		*mtod((m), u_char *) = *(p);\
598		*(p) = (i);\
599		p = mtod((m), u_char *);\
600		(m)->m_next = (mp)->m_next;\
601		(mp)->m_next = (m);\
602		(mp) = (m);\
603	}\
604    } while (0)
605		/*
606		 * result: IPv6 hbh dest1 rthdr dest2 payload
607		 * m will point to IPv6 header.  mprev will point to the
608		 * extension header prior to dest2 (rthdr in the above case).
609		 */
610		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev,
611			   nexthdrp, IPPROTO_HOPOPTS);
612		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev,
613			   nexthdrp, IPPROTO_DSTOPTS);
614		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev,
615			   nexthdrp, IPPROTO_ROUTING);
616
617		if (!TAILQ_EMPTY(&ipv6_filters)) {
618			struct ipfilter	*filter;
619			int seen = (inject_filter_ref == 0);
620			int	fixscope = 0;
621
622			if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
623				ippo->ippo_flags |= IPPOF_MCAST_OPTS;
624				IM6O_LOCK(im6o);
625				ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
626				ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
627				ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
628				IM6O_UNLOCK(im6o);
629			}
630
631			/* Hack: embed the scope_id in the destination */
632			if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
633				(ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
634				fixscope = 1;
635				ip6->ip6_dst.s6_addr16[1] = htons(ro->ro_dst.sin6_scope_id);
636			}
637			{
638				ipf_ref();
639				TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
640					/*
641					 * No need to proccess packet twice if we've
642					 * already seen it
643					 */
644					if (seen == 0) {
645						if ((struct ipfilter *)inject_filter_ref == filter)
646							seen = 1;
647					} else if (filter->ipf_filter.ipf_output) {
648						errno_t result;
649
650						result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
651						if (result == EJUSTRETURN) {
652							ipf_unref();
653							goto done;
654						}
655						if (result != 0) {
656							ipf_unref();
657							goto bad;
658						}
659					}
660				}
661				ipf_unref();
662			}
663			ip6 = mtod(m, struct ip6_hdr *);
664			/* Hack: cleanup embedded scope_id if we put it there */
665			if (fixscope)
666				ip6->ip6_dst.s6_addr16[1] = 0;
667		}
668
669#if IPSEC
670		if (!needipsec)
671			goto skip_ipsec2;
672
673		/*
674		 * pointers after IPsec headers are not valid any more.
675		 * other pointers need a great care too.
676		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
677		 */
678		exthdrs.ip6e_dest2 = NULL;
679
680	    {
681		struct ip6_rthdr *rh = NULL;
682		int segleft_org = 0;
683
684		if (exthdrs.ip6e_rthdr) {
685			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
686			segleft_org = rh->ip6r_segleft;
687			rh->ip6r_segleft = 0;
688		}
689
690		ipsec_state.m = m;
691		error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev, sp, flags,
692			&needipsectun);
693		m = ipsec_state.m;
694		if (error) {
695			/* mbuf is already reclaimed in ipsec6_output_trans. */
696			m = NULL;
697			switch (error) {
698			case EHOSTUNREACH:
699			case ENETUNREACH:
700			case EMSGSIZE:
701			case ENOBUFS:
702			case ENOMEM:
703				break;
704			default:
705				printf("ip6_output (ipsec): error code %d\n", error);
706				/* fall through */
707			case ENOENT:
708				/* don't show these error codes to the user */
709				error = 0;
710				break;
711			}
712			goto bad;
713		}
714		if (exthdrs.ip6e_rthdr) {
715			/* ah6_output doesn't modify mbuf chain */
716			rh->ip6r_segleft = segleft_org;
717		}
718	  }
719	}
720skip_ipsec2:
721#endif
722
723	/*
724	 * If there is a routing header, replace the destination address field
725	 * with the first hop of the routing header.
726	 */
727	if (exthdrs.ip6e_rthdr) {
728		struct ip6_rthdr *rh =
729			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
730						  struct ip6_rthdr *));
731		struct ip6_rthdr0 *rh0;
732		struct in6_addr *addr;
733		struct sockaddr_in6 sa;
734
735		switch (rh->ip6r_type) {
736		case IPV6_RTHDR_TYPE_0:
737			 rh0 = (struct ip6_rthdr0 *)rh;
738			 addr = (struct in6_addr *)(void *)(rh0 + 1);
739
740			 /*
741			  * construct a sockaddr_in6 form of
742			  * the first hop.
743			  *
744			  * XXX: we may not have enough
745			  * information about its scope zone;
746			  * there is no standard API to pass
747			  * the information from the
748			  * application.
749			  */
750			 bzero(&sa, sizeof(sa));
751			 sa.sin6_family = AF_INET6;
752			 sa.sin6_len = sizeof(sa);
753			 sa.sin6_addr = addr[0];
754			 if ((error = sa6_embedscope(&sa,
755			     ip6_use_defzone)) != 0) {
756				 goto bad;
757			 }
758			 ip6->ip6_dst = sa.sin6_addr;
759			 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
760			     * (rh0->ip6r0_segleft - 1));
761			 addr[rh0->ip6r0_segleft - 1] = finaldst;
762			 /* XXX */
763			 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
764			 break;
765		default:	/* is it possible? */
766			 error = EINVAL;
767			 goto bad;
768		}
769	}
770
771	/* Source address validation */
772	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
773	    (flags & IPV6_UNSPECSRC) == 0) {
774		error = EOPNOTSUPP;
775		ip6stat.ip6s_badscope++;
776		goto bad;
777	}
778	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
779		error = EOPNOTSUPP;
780		ip6stat.ip6s_badscope++;
781		goto bad;
782	}
783
784	ip6stat.ip6s_localout++;
785
786	/*
787	 * Route packet.
788	 */
789	if (ro == 0) {
790		ro = &ip6route;
791		bzero((caddr_t)ro, sizeof(*ro));
792	}
793	ro_pmtu = ro;
794	if (opt && opt->ip6po_rthdr)
795		ro = &opt->ip6po_route;
796	dst = (struct sockaddr_in6 *)&ro->ro_dst;
797
798	if (ro && ro->ro_rt)
799		RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
800	/*
801	 * if specified, try to fill in the traffic class field.
802	 * do not override if a non-zero value is already set.
803	 * we check the diffserv field and the ecn field separately.
804	 */
805	if (opt && opt->ip6po_tclass >= 0) {
806		int mask = 0;
807
808		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
809			mask |= 0xfc;
810		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
811			mask |= 0x03;
812		if (mask != 0)
813			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
814	}
815
816	/* fill in or override the hop limit field, if necessary. */
817	if (opt && opt->ip6po_hlim != -1)
818		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
819	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
820		if (im6o != NULL) {
821			IM6O_LOCK(im6o);
822			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
823			IM6O_UNLOCK(im6o);
824		} else {
825			ip6->ip6_hlim = ip6_defmcasthlim;
826		}
827	}
828
829	/*
830	 * If there is a cached route, check that it is to the same
831	 * destination and is still up. If not, free it and try again.
832	 * Test rt_flags without holding rt_lock for performance reasons;
833	 * if the route is down it will hopefully be caught by the layer
834	 * below (since it uses this route as a hint) or during the
835	 * next transmit.
836	 */
837	if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) ||
838	    dst->sin6_family != AF_INET6 ||
839	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst) ||
840	    ro->ro_rt->generation_id != route_generation)) {
841		rtfree(ro->ro_rt);
842		ro->ro_rt = NULL;
843	}
844	if (ro->ro_rt == NULL) {
845		bzero(dst, sizeof(*dst));
846		dst->sin6_family = AF_INET6;
847		dst->sin6_len = sizeof(struct sockaddr_in6);
848		dst->sin6_addr = ip6->ip6_dst;
849	}
850#if IPSEC
851	if (needipsec && needipsectun) {
852#if CONFIG_DTRACE
853		struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
854#endif
855		/*
856		 * All the extension headers will become inaccessible
857		 * (since they can be encrypted).
858		 * Don't panic, we need no more updates to extension headers
859		 * on inner IPv6 packet (since they are now encapsulated).
860		 *
861		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
862		 */
863		bzero(&exthdrs, sizeof(exthdrs));
864		exthdrs.ip6e_ip6 = m;
865
866		ipsec_state.m = m;
867		route_copyout(&ipsec_state.ro, (struct route *)ro, sizeof(ipsec_state.ro));
868		ipsec_state.dst = (struct sockaddr *)dst;
869
870		/* Added a trace here so that we can see packets inside a tunnel */
871		DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
872			struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
873			struct ip *, NULL, struct ip6_hdr *, ip6);
874
875		error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
876		if (ipsec_state.tunneled == 4)	/* tunneled in IPv4 - packet is gone */
877			goto done;
878		m = ipsec_state.m;
879		ipsec_saved_route = ro;
880		ro = (struct route_in6 *)&ipsec_state.ro;
881		dst = (struct sockaddr_in6 *)(void *)ipsec_state.dst;
882		if (error) {
883			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
884			m0 = m = NULL;
885			m = NULL;
886			switch (error) {
887			case EHOSTUNREACH:
888			case ENETUNREACH:
889			case EMSGSIZE:
890			case ENOBUFS:
891			case ENOMEM:
892				break;
893			default:
894				printf("ip6_output (ipsec): error code %d\n", error);
895				/* fall through */
896			case ENOENT:
897				/* don't show these error codes to the user */
898				error = 0;
899				break;
900			}
901			goto bad;
902		}
903		/*
904		 * The packet has been encapsulated so the ifscope is no longer valid
905		 * since it does not apply to the outer address: ignore the ifscope.
906		 */
907		ip6oa.ip6oa_boundif = IFSCOPE_NONE;
908		ip6oa.ip6oa_flags &= ~IP6OAF_BOUND_IF;
909		if (opt != NULL && opt->ip6po_pktinfo != NULL) {
910			if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE)
911				opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
912		}
913		exthdrs.ip6e_ip6 = m;
914	}
915#endif /* IPSEC */
916
917	/* for safety */
918	if (ifp != NULL) {
919		ifnet_release(ifp);
920		ifp = NULL;
921	}
922
923	/* adjust pointer */
924	ip6 = mtod(m, struct ip6_hdr *);
925
926	if (select_srcif) {
927		bzero(&src_sa, sizeof(src_sa));
928		src_sa.sin6_family = AF_INET6;
929		src_sa.sin6_len = sizeof(src_sa);
930		src_sa.sin6_addr = ip6->ip6_src;
931	}
932	bzero(&dst_sa, sizeof(dst_sa));
933	dst_sa.sin6_family = AF_INET6;
934	dst_sa.sin6_len = sizeof(dst_sa);
935	dst_sa.sin6_addr = ip6->ip6_dst;
936
937	/*
938	 * in6_selectroute() might return an ifp with its reference held
939	 * even in the error case, so make sure to release its reference.
940	 */
941	if ((error = in6_selectroute(select_srcif ? &src_sa : NULL,
942	    &dst_sa, opt, im6o, ro, &ifp, &rt, 0, &ip6oa)) != 0) {
943		switch (error) {
944		case EHOSTUNREACH:
945			ip6stat.ip6s_noroute++;
946			break;
947		case EADDRNOTAVAIL:
948		default:
949			break; /* XXX statistics? */
950		}
951		if (ifp != NULL)
952			in6_ifstat_inc(ifp, ifs6_out_discard);
953		/* ifp (if non-NULL) will be released at the end */
954		goto bad;
955	}
956	if (rt == NULL) {
957		/*
958		 * If in6_selectroute() does not return a route entry,
959		 * dst may not have been updated.
960		 */
961		*dst = dst_sa;	/* XXX */
962	}
963
964	/*
965	 * then rt (for unicast) and ifp must be non-NULL valid values.
966	 */
967	if ((flags & IPV6_FORWARDING) == 0) {
968		/* XXX: the FORWARDING flag can be set for mrouting. */
969		in6_ifstat_inc(ifp, ifs6_out_request);
970	}
971	if (rt != NULL) {
972		RT_LOCK(rt);
973		ia = (struct in6_ifaddr *)(rt->rt_ifa);
974		if (ia != NULL)
975			IFA_ADDREF(&ia->ia_ifa);
976		rt->rt_use++;
977		RT_UNLOCK(rt);
978	}
979
980	/*
981	 * The outgoing interface must be in the zone of source and
982	 * destination addresses.  We should use ia_ifp to support the
983	 * case of sending packets to an address of our own.
984	 */
985	if (ia != NULL && ia->ia_ifp) {
986		ifnet_reference(ia->ia_ifp);	/* for origifp */
987		if (origifp != NULL)
988			ifnet_release(origifp);
989		origifp = ia->ia_ifp;
990	} else {
991		if (ifp != NULL)
992			ifnet_reference(ifp);	/* for origifp */
993		if (origifp != NULL)
994			ifnet_release(origifp);
995		origifp = ifp;
996	}
997	src0 = ip6->ip6_src;
998	if (in6_setscope(&src0, origifp, &zone))
999		goto badscope;
1000	bzero(&src_sa, sizeof(src_sa));
1001	src_sa.sin6_family = AF_INET6;
1002	src_sa.sin6_len = sizeof(src_sa);
1003	src_sa.sin6_addr = ip6->ip6_src;
1004	if (sa6_recoverscope(&src_sa, TRUE) || zone != src_sa.sin6_scope_id)
1005		goto badscope;
1006
1007	dst0 = ip6->ip6_dst;
1008	if (in6_setscope(&dst0, origifp, &zone))
1009		goto badscope;
1010	/* re-initialize to be sure */
1011	bzero(&dst_sa, sizeof(dst_sa));
1012	dst_sa.sin6_family = AF_INET6;
1013	dst_sa.sin6_len = sizeof(dst_sa);
1014	dst_sa.sin6_addr = ip6->ip6_dst;
1015	if (sa6_recoverscope(&dst_sa, TRUE) || zone != dst_sa.sin6_scope_id) {
1016		goto badscope;
1017	}
1018
1019	/* scope check is done. */
1020	goto routefound;
1021
1022  badscope:
1023	ip6stat.ip6s_badscope++;
1024	in6_ifstat_inc(origifp, ifs6_out_discard);
1025	if (error == 0)
1026		error = EHOSTUNREACH; /* XXX */
1027	goto bad;
1028
1029  routefound:
1030	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1031		if (opt && opt->ip6po_nextroute.ro_rt) {
1032			/*
1033			 * The nexthop is explicitly specified by the
1034			 * application.  We assume the next hop is an IPv6
1035			 * address.
1036			 */
1037			dst = (struct sockaddr_in6 *)(void *)opt->ip6po_nexthop;
1038		}
1039		else if ((rt->rt_flags & RTF_GATEWAY))
1040			dst = (struct sockaddr_in6 *)(void *)rt->rt_gateway;
1041	}
1042
1043	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1044		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
1045	} else {
1046		struct	in6_multi *in6m;
1047
1048		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
1049
1050		in6_ifstat_inc(ifp, ifs6_out_mcast);
1051
1052		/*
1053		 * Confirm that the outgoing interface supports multicast.
1054		 */
1055		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1056			ip6stat.ip6s_noroute++;
1057			in6_ifstat_inc(ifp, ifs6_out_discard);
1058			error = ENETUNREACH;
1059			goto bad;
1060		}
1061		in6_multihead_lock_shared();
1062		IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
1063		in6_multihead_lock_done();
1064		if (im6o != NULL)
1065			IM6O_LOCK(im6o);
1066		if (in6m != NULL &&
1067		   (im6o == NULL || im6o->im6o_multicast_loop)) {
1068			if (im6o != NULL)
1069				IM6O_UNLOCK(im6o);
1070			/*
1071			 * If we belong to the destination multicast group
1072			 * on the outgoing interface, and the caller did not
1073			 * forbid loopback, loop back a copy.
1074			 */
1075			ip6_mloopback(ifp, m, dst);
1076		} else {
1077			if (im6o != NULL)
1078				IM6O_UNLOCK(im6o);
1079			/*
1080			 * If we are acting as a multicast router, perform
1081			 * multicast forwarding as if the packet had just
1082			 * arrived on the interface to which we are about
1083			 * to send.  The multicast forwarding function
1084			 * recursively calls this function, using the
1085			 * IPV6_FORWARDING flag to prevent infinite recursion.
1086			 *
1087			 * Multicasts that are looped back by ip6_mloopback(),
1088			 * above, will be forwarded by the ip6_input() routine,
1089			 * if necessary.
1090			 */
1091#if MROUTING
1092			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
1093				/*
1094				 * XXX: ip6_mforward expects that rcvif is NULL
1095				 * when it is called from the originating path.
1096				 * However, it is not always the case, since
1097				 * some versions of MGETHDR() does not
1098				 * initialize the field.
1099				 */
1100				m->m_pkthdr.rcvif = NULL;
1101				if (ip6_mforward(ip6, ifp, m) != 0) {
1102					m_freem(m);
1103					if (in6m != NULL)
1104						IN6M_REMREF(in6m);
1105					goto done;
1106				}
1107			}
1108#endif
1109		}
1110		if (in6m != NULL)
1111			IN6M_REMREF(in6m);
1112		/*
1113		 * Multicasts with a hoplimit of zero may be looped back,
1114		 * above, but must not be transmitted on a network.
1115		 * Also, multicasts addressed to the loopback interface
1116		 * are not sent -- the above call to ip6_mloopback() will
1117		 * loop back a copy if this host actually belongs to the
1118		 * destination group on the loopback interface.
1119		 */
1120		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1121		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1122			m_freem(m);
1123			goto done;
1124		}
1125	}
1126
1127	/*
1128	 * Fill the outgoing inteface to tell the upper layer
1129	 * to increment per-interface statistics.
1130	 */
1131	if (ifpp != NULL) {
1132		ifnet_reference(ifp);	/* for caller */
1133		if (*ifpp != NULL)
1134			ifnet_release(*ifpp);
1135		*ifpp = ifp;
1136	}
1137
1138	/* Determine path MTU. */
1139	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
1140	    &alwaysfrag)) != 0)
1141		goto bad;
1142
1143	/*
1144	 * The caller of this function may specify to use the minimum MTU
1145	 * in some cases.
1146	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1147	 * setting.  The logic is a bit complicated; by default, unicast
1148	 * packets will follow path MTU while multicast packets will be sent at
1149	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
1150	 * including unicast ones will be sent at the minimum MTU.  Multicast
1151	 * packets will always be sent at the minimum MTU unless
1152	 * IP6PO_MINMTU_DISABLE is explicitly specified.
1153	 * See RFC 3542 for more details.
1154	 */
1155	if (mtu > IPV6_MMTU) {
1156		if ((flags & IPV6_MINMTU))
1157			mtu = IPV6_MMTU;
1158		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
1159			mtu = IPV6_MMTU;
1160		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1161			 (opt == NULL ||
1162			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1163			mtu = IPV6_MMTU;
1164		}
1165	}
1166
1167	/*
1168	 * clear embedded scope identifiers if necessary.
1169	 * in6_clearscope will touch the addresses only when necessary.
1170	 */
1171	in6_clearscope(&ip6->ip6_src);
1172	in6_clearscope(&ip6->ip6_dst);
1173
1174#if IPFW2
1175	/*
1176	 * Check with the firewall...
1177	 */
1178        if (ip6_fw_enable && ip6_fw_chk_ptr) {
1179		u_short port = 0;
1180		m->m_pkthdr.rcvif = NULL;	/* XXX */
1181		/* If ipfw says divert, we have to just drop packet */
1182		if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) {
1183			m_freem(m);
1184			goto done;
1185		}
1186		if (!m) {
1187			error = EACCES;
1188			goto done;
1189		}
1190	}
1191#endif
1192
1193	/*
1194	 * If the outgoing packet contains a hop-by-hop options header,
1195	 * it must be examined and processed even by the source node.
1196	 * (RFC 2460, section 4.)
1197	 */
1198	if (exthdrs.ip6e_hbh) {
1199		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
1200		u_int32_t dummy; /* XXX unused */
1201
1202#if DIAGNOSTIC
1203		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
1204			panic("ip6e_hbh is not continuous");
1205#endif
1206		/*
1207		 *  XXX: if we have to send an ICMPv6 error to the sender,
1208		 *       we need the M_LOOP flag since icmp6_error() expects
1209		 *       the IPv6 and the hop-by-hop options header are
1210		 *       continuous unless the flag is set.
1211		 */
1212		m->m_flags |= M_LOOP;
1213		m->m_pkthdr.rcvif = ifp;
1214		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1215		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
1216		    &dummy, &plen) < 0) {
1217			/* m was already freed at this point */
1218			error = EINVAL;/* better error? */
1219			goto done;
1220		}
1221		m->m_flags &= ~M_LOOP; /* XXX */
1222		m->m_pkthdr.rcvif = NULL;
1223	}
1224
1225#if DUMMYNET
1226check_with_pf:
1227#endif
1228#if PF
1229	if (PF_IS_ENABLED) {
1230#if DUMMYNET
1231		/*
1232		 * TBD: Need to save opt->ip6po_flags for reinjection rdar://10434993
1233		 */
1234		args.fwa_m = m;
1235		args.fwa_oif = ifp;
1236		args.fwa_oflags = flags;
1237		if ((flags & IPV6_OUTARGS))
1238			args.fwa_ip6oa = &ip6oa;
1239		args.fwa_ro6 = ro;
1240		args.fwa_dst6 = dst;
1241		args.fwa_ro6_pmtu = ro_pmtu;
1242		args.fwa_origifp = origifp;
1243		args.fwa_mtu = mtu;
1244		args.fwa_alwaysfrag = alwaysfrag;
1245		args.fwa_unfragpartlen = unfragpartlen;
1246		args.fwa_exthdrs = &exthdrs;
1247		/* Invoke outbound packet filter */
1248		error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
1249#else
1250		error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
1251#endif /* DUMMYNET */
1252
1253		if (error != 0 || m == NULL) {
1254			/*
1255			 * Note that if we ever handle packet chain, we will
1256			 * have to restore the linkage from the previous
1257			 * packet to the next like in ip_outout_list()
1258			 */
1259			if (m != NULL) {
1260				panic("%s: unexpected packet %p\n", __func__, m);
1261				/* NOTREACHED */
1262			}
1263			/* Already freed by callee */
1264			goto done;
1265		}
1266		ip6 = mtod(m, struct ip6_hdr *);
1267	}
1268#endif /* PF */
1269
1270	/*
1271	 * Send the packet to the outgoing interface.
1272	 * If necessary, do IPv6 fragmentation before sending.
1273	 *
1274	 * the logic here is rather complex:
1275	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1276	 * 1-a:	send as is if tlen <= path mtu
1277	 * 1-b:	fragment if tlen > path mtu
1278	 *
1279	 * 2: if user asks us not to fragment (dontfrag == 1)
1280	 * 2-a:	send as is if tlen <= interface mtu
1281	 * 2-b:	error if tlen > interface mtu
1282	 *
1283	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1284	 *	always fragment
1285	 *
1286	 * 4: if dontfrag == 1 && alwaysfrag == 1
1287	 *	error, as we cannot handle this conflicting request
1288	 */
1289	tlen = m->m_pkthdr.len;
1290
1291	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
1292		dontfrag = 1;
1293	else
1294		dontfrag = 0;
1295	if (dontfrag && alwaysfrag) {	/* case 4 */
1296		/* conflicting request - can't transmit */
1297		error = EMSGSIZE;
1298		goto bad;
1299	}
1300
1301	lck_rw_lock_shared(nd_if_rwlock);
1302	/* Access without acquiring nd_ifinfo lock for performance */
1303	ifmtu = IN6_LINKMTU(ifp);
1304	lck_rw_done(nd_if_rwlock);
1305
1306	if (dontfrag && tlen > ifmtu) {	/* case 2-b */
1307		/*
1308		 * Even if the DONTFRAG option is specified, we cannot send the
1309		 * packet when the data length is larger than the MTU of the
1310		 * outgoing interface.
1311		 * Notify the error by sending IPV6_PATHMTU ancillary data as
1312		 * well as returning an error code (the latter is not described
1313		 * in the API spec.)
1314		 */
1315		u_int32_t mtu32;
1316		struct ip6ctlparam ip6cp;
1317
1318		mtu32 = (u_int32_t)mtu;
1319		bzero(&ip6cp, sizeof(ip6cp));
1320		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1321		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1322		    (void *)&ip6cp);
1323
1324		error = EMSGSIZE;
1325		goto bad;
1326	}
1327
1328	/*
1329	 * transmit packet without fragmentation
1330	 */
1331	tso = (ifp->if_hwassist & IFNET_TSO_IPV6) &&
1332	    (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6);
1333	if (dontfrag || (!alwaysfrag &&		/* case 1-a and 2-a */
1334	    (tlen <= mtu || tso || (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
1335		int sw_csum;
1336
1337		ip6 = mtod(m, struct ip6_hdr *);
1338#ifdef IPSEC
1339		/* clean ipsec history once it goes out of the node */
1340		ipsec_delaux(m);
1341#endif
1342
1343		if (apple_hwcksum_tx == 0) /* Do not let HW handle cksum */
1344			sw_csum = m->m_pkthdr.csum_flags;
1345		else
1346			sw_csum = m->m_pkthdr.csum_flags &
1347			    ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1348
1349		if ((sw_csum & CSUM_DELAY_IPV6_DATA) != 0) {
1350			in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
1351			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
1352		}
1353		if (ro->ro_rt)
1354			RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
1355		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv);
1356		goto done;
1357	}
1358
1359	/*
1360	 * try to fragment the packet.  case 1-b and 3
1361	 */
1362	if (mtu < IPV6_MMTU) {
1363		/* path MTU cannot be less than IPV6_MMTU */
1364		error = EMSGSIZE;
1365		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1366		goto bad;
1367	} else if (ip6->ip6_plen == 0) {
1368		/* jumbo payload cannot be fragmented */
1369		error = EMSGSIZE;
1370		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1371		goto bad;
1372	} else {
1373		struct mbuf **mnext, *m_frgpart;
1374		struct ip6_frag *ip6f;
1375		u_int32_t id = htonl(ip6_randomid());
1376		u_char nextproto;
1377
1378		/*
1379		 * Too large for the destination or interface;
1380		 * fragment if possible.
1381		 * Must be able to put at least 8 bytes per fragment.
1382		 */
1383		hlen = unfragpartlen;
1384		if (mtu > IPV6_MAXPACKET)
1385			mtu = IPV6_MAXPACKET;
1386
1387		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1388		if (len < 8) {
1389			error = EMSGSIZE;
1390			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1391			goto bad;
1392		}
1393
1394		mnext = &m->m_nextpkt;
1395
1396		/*
1397		 * Change the next header field of the last header in the
1398		 * unfragmentable part.
1399		 */
1400		if (exthdrs.ip6e_rthdr) {
1401			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1402			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1403		} else if (exthdrs.ip6e_dest1) {
1404			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1405			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1406		} else if (exthdrs.ip6e_hbh) {
1407			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1408			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1409		} else {
1410			nextproto = ip6->ip6_nxt;
1411			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1412		}
1413
1414		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
1415			in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
1416			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
1417		}
1418
1419		/*
1420		 * Loop through length of segment after first fragment,
1421		 * make new header and copy data of each part and link onto
1422		 * chain.
1423		 */
1424		m0 = m;
1425		for (off = hlen; off < tlen; off += len) {
1426			MGETHDR(m, M_DONTWAIT, MT_HEADER);	/* MAC-OK */
1427			if (!m) {
1428				error = ENOBUFS;
1429				ip6stat.ip6s_odropped++;
1430				goto sendorfree;
1431			}
1432			m->m_pkthdr.rcvif = NULL;
1433			m->m_flags = m0->m_flags & M_COPYFLAGS;
1434			*mnext = m;
1435			mnext = &m->m_nextpkt;
1436			m->m_data += max_linkhdr;
1437			mhip6 = mtod(m, struct ip6_hdr *);
1438			*mhip6 = *ip6;
1439			m->m_len = sizeof(*mhip6);
1440 			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1441 			if (error) {
1442				ip6stat.ip6s_odropped++;
1443				goto sendorfree;
1444			}
1445			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1446			if (off + len >= tlen)
1447				len = tlen - off;
1448			else
1449				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1450			mhip6->ip6_plen = htons((u_short)(len + hlen +
1451							  sizeof(*ip6f) -
1452							  sizeof(struct ip6_hdr)));
1453			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1454				error = ENOBUFS;
1455				ip6stat.ip6s_odropped++;
1456				goto sendorfree;
1457			}
1458			m_cat(m, m_frgpart);
1459			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1460			m->m_pkthdr.rcvif = 0;
1461			m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1462
1463			M_COPY_PFTAG(m, m0);
1464			m_set_service_class(m, m0->m_pkthdr.svc);
1465
1466#ifdef __darwin8_notyet
1467#if CONFIG_MACF_NET
1468			mac_create_fragment(m0, m);
1469#endif
1470#endif
1471			ip6f->ip6f_reserved = 0;
1472			ip6f->ip6f_ident = id;
1473			ip6f->ip6f_nxt = nextproto;
1474			ip6stat.ip6s_ofragments++;
1475			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1476		}
1477
1478		in6_ifstat_inc(ifp, ifs6_out_fragok);
1479	}
1480
1481	/*
1482	 * Remove leading garbages.
1483	 */
1484sendorfree:
1485	m = m0->m_nextpkt;
1486	m0->m_nextpkt = 0;
1487	m_freem(m0);
1488	for (m0 = m; m; m = m0) {
1489		m0 = m->m_nextpkt;
1490		m->m_nextpkt = 0;
1491		if (error == 0) {
1492 			/* Record statistics for this interface address. */
1493 			if (ia) {
1494#ifndef __APPLE__
1495 				ia->ia_ifa.if_opackets++;
1496 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1497#endif
1498 			}
1499#if IPSEC
1500			/* clean ipsec history once it goes out of the node */
1501			ipsec_delaux(m);
1502#endif
1503			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt,
1504			    adv);
1505
1506		} else
1507			m_freem(m);
1508	}
1509
1510	if (error == 0)
1511		ip6stat.ip6s_fragmented++;
1512
1513done:
1514#if IPSEC
1515	if (ipsec_saved_route) {
1516		ro = ipsec_saved_route;
1517		if (ipsec_state.ro.ro_rt) {
1518			rtfree(ipsec_state.ro.ro_rt);
1519		}
1520	}
1521#endif /* IPSEC */
1522	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */
1523		rtfree(ro->ro_rt);
1524	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1525		rtfree(ro_pmtu->ro_rt);
1526	}
1527
1528#if IPSEC
1529	if (sp != NULL)
1530		key_freesp(sp, KEY_SADB_UNLOCKED);
1531#endif /* IPSEC */
1532
1533	if (ia != NULL)
1534		IFA_REMREF(&ia->ia_ifa);
1535	if (ifp != NULL)
1536		ifnet_release(ifp);
1537	if (origifp != NULL)
1538		ifnet_release(origifp);
1539	return (error);
1540
1541freehdrs:
1542	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1543	m_freem(exthdrs.ip6e_dest1);
1544	m_freem(exthdrs.ip6e_rthdr);
1545	m_freem(exthdrs.ip6e_dest2);
1546	/* fall through */
1547bad:
1548	m_freem(m);
1549	goto done;
1550}
1551
1552static int
1553ip6_copyexthdr(mp, hdr, hlen)
1554	struct mbuf **mp;
1555	caddr_t hdr;
1556	int hlen;
1557{
1558	struct mbuf *m;
1559
1560	if (hlen > MCLBYTES)
1561		return(ENOBUFS); /* XXX */
1562
1563	MGET(m, M_DONTWAIT, MT_DATA);
1564	if (!m)
1565		return(ENOBUFS);
1566
1567	if (hlen > MLEN) {
1568		MCLGET(m, M_DONTWAIT);
1569		if ((m->m_flags & M_EXT) == 0) {
1570			m_free(m);
1571			return (ENOBUFS);
1572		}
1573	}
1574	m->m_len = hlen;
1575	if (hdr)
1576		bcopy(hdr, mtod(m, caddr_t), hlen);
1577
1578	*mp = m;
1579	return (0);
1580}
1581
1582/*
1583 * Process a delayed payload checksum calculation.
1584 */
1585void
1586in6_delayed_cksum(struct mbuf *m, uint16_t offset)
1587{
1588	uint16_t csum;
1589
1590	csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1591	if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6) != 0) {
1592		csum = 0xffff;
1593	}
1594
1595	offset += (m->m_pkthdr.csum_data & 0xffff);
1596	if ((offset + sizeof(csum)) > m->m_len) {
1597		m_copyback(m, offset, sizeof(csum), &csum);
1598	} else if (IP6_HDR_ALIGNED_P(mtod(m, char *))) {
1599		*(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
1600	} else {
1601		bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
1602	}
1603}
1604/*
1605 * Insert jumbo payload option.
1606 */
1607static int
1608ip6_insert_jumboopt(exthdrs, plen)
1609	struct ip6_exthdrs *exthdrs;
1610	u_int32_t plen;
1611{
1612	struct mbuf *mopt;
1613	u_char *optbuf;
1614	u_int32_t v;
1615
1616#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1617
1618	/*
1619	 * If there is no hop-by-hop options header, allocate new one.
1620	 * If there is one but it doesn't have enough space to store the
1621	 * jumbo payload option, allocate a cluster to store the whole options.
1622	 * Otherwise, use it to store the options.
1623	 */
1624	if (exthdrs->ip6e_hbh == 0) {
1625		MGET(mopt, M_DONTWAIT, MT_DATA);
1626		if (mopt == 0)
1627			return (ENOBUFS);
1628		mopt->m_len = JUMBOOPTLEN;
1629		optbuf = mtod(mopt, u_char *);
1630		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1631		exthdrs->ip6e_hbh = mopt;
1632	} else {
1633		struct ip6_hbh *hbh;
1634
1635		mopt = exthdrs->ip6e_hbh;
1636		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1637			/*
1638			 * XXX assumption:
1639			 * - exthdrs->ip6e_hbh is not referenced from places
1640			 *   other than exthdrs.
1641			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1642			 */
1643			u_int32_t oldoptlen = mopt->m_len;
1644			struct mbuf *n;
1645
1646			/*
1647			 * XXX: give up if the whole (new) hbh header does
1648			 * not fit even in an mbuf cluster.
1649			 */
1650			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1651				return (ENOBUFS);
1652
1653			/*
1654			 * As a consequence, we must always prepare a cluster
1655			 * at this point.
1656			 */
1657			MGET(n, M_DONTWAIT, MT_DATA);
1658			if (n) {
1659				MCLGET(n, M_DONTWAIT);
1660				if ((n->m_flags & M_EXT) == 0) {
1661					m_freem(n);
1662					n = NULL;
1663				}
1664			}
1665			if (!n)
1666				return (ENOBUFS);
1667			n->m_len = oldoptlen + JUMBOOPTLEN;
1668			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1669			    oldoptlen);
1670			optbuf = mtod(n, u_char *) + oldoptlen;
1671			m_freem(mopt);
1672			mopt = exthdrs->ip6e_hbh = n;
1673		} else {
1674			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1675			mopt->m_len += JUMBOOPTLEN;
1676		}
1677		optbuf[0] = IP6OPT_PADN;
1678		optbuf[1] = 1;
1679
1680		/*
1681		 * Adjust the header length according to the pad and
1682		 * the jumbo payload option.
1683		 */
1684		hbh = mtod(mopt, struct ip6_hbh *);
1685		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1686	}
1687
1688	/* fill in the option. */
1689	optbuf[2] = IP6OPT_JUMBO;
1690	optbuf[3] = 4;
1691	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1692	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1693
1694	/* finally, adjust the packet header length */
1695	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1696
1697	return (0);
1698#undef JUMBOOPTLEN
1699}
1700
1701/*
1702 * Insert fragment header and copy unfragmentable header portions.
1703 */
1704static int
1705ip6_insertfraghdr(m0, m, hlen, frghdrp)
1706	struct mbuf *m0, *m;
1707	int hlen;
1708	struct ip6_frag **frghdrp;
1709{
1710	struct mbuf *n, *mlast;
1711
1712	if (hlen > sizeof(struct ip6_hdr)) {
1713		n = m_copym(m0, sizeof(struct ip6_hdr),
1714		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1715		if (n == 0)
1716			return (ENOBUFS);
1717		m->m_next = n;
1718	} else
1719		n = m;
1720
1721	/* Search for the last mbuf of unfragmentable part. */
1722	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1723		;
1724
1725	if ((mlast->m_flags & M_EXT) == 0 &&
1726	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1727		/* use the trailing space of the last mbuf for the fragment hdr */
1728		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1729		    mlast->m_len);
1730		mlast->m_len += sizeof(struct ip6_frag);
1731		m->m_pkthdr.len += sizeof(struct ip6_frag);
1732	} else {
1733		/* allocate a new mbuf for the fragment header */
1734		struct mbuf *mfrg;
1735
1736		MGET(mfrg, M_DONTWAIT, MT_DATA);
1737		if (mfrg == 0)
1738			return (ENOBUFS);
1739		mfrg->m_len = sizeof(struct ip6_frag);
1740		*frghdrp = mtod(mfrg, struct ip6_frag *);
1741		mlast->m_next = mfrg;
1742	}
1743
1744	return (0);
1745}
1746
1747extern int load_ipfw(void);
1748static int
1749ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1750    struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup,
1751    int *alwaysfragp)
1752{
1753	u_int32_t mtu = 0;
1754	int alwaysfrag = 0;
1755	int error = 0;
1756
1757	if (ro_pmtu != ro) {
1758		/* The first hop and the final destination may differ. */
1759		struct sockaddr_in6 *sa6_dst =
1760		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1761		if (ro_pmtu->ro_rt &&
1762		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1763		     ro_pmtu->ro_rt->generation_id != route_generation ||
1764		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1765			rtfree(ro_pmtu->ro_rt);
1766			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1767		}
1768		if (ro_pmtu->ro_rt == NULL) {
1769			bzero(sa6_dst, sizeof(*sa6_dst));
1770			sa6_dst->sin6_family = AF_INET6;
1771			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1772			sa6_dst->sin6_addr = *dst;
1773
1774			rtalloc_scoped((struct route *)ro_pmtu,
1775			    ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
1776		}
1777	}
1778
1779
1780	if (ro_pmtu->ro_rt != NULL) {
1781		u_int32_t ifmtu;
1782
1783		lck_rw_lock_shared(nd_if_rwlock);
1784		/* Access without acquiring nd_ifinfo lock for performance */
1785		ifmtu = IN6_LINKMTU(ifp);
1786		lck_rw_done(nd_if_rwlock);
1787
1788		RT_LOCK_SPIN(ro_pmtu->ro_rt);
1789		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1790		if (mtu > ifmtu || mtu == 0) {
1791			/*
1792			 * The MTU on the route is larger than the MTU on
1793			 * the interface!  This shouldn't happen, unless the
1794			 * MTU of the interface has been changed after the
1795			 * interface was brought up.  Change the MTU in the
1796			 * route to match the interface MTU (as long as the
1797			 * field isn't locked).
1798			 *
1799			 * if MTU on the route is 0, we need to fix the MTU.
1800			 * this case happens with path MTU discovery timeouts.
1801			 */
1802			 mtu = ifmtu;
1803			 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
1804				 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
1805		}
1806		else if (mtu < IPV6_MMTU) {
1807			/*
1808			 * RFC2460 section 5, last paragraph:
1809			 * if we record ICMPv6 too big message with
1810			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1811			 * or smaller, with framgent header attached.
1812			 * (fragment header is needed regardless from the
1813			 * packet size, for translators to identify packets)
1814			 */
1815			alwaysfrag = 1;
1816			mtu = IPV6_MMTU;
1817		}
1818		RT_UNLOCK(ro_pmtu->ro_rt);
1819	} else {
1820		if (ifp) {
1821			lck_rw_lock_shared(nd_if_rwlock);
1822			/* Don't hold nd_ifinfo lock for performance */
1823			mtu = IN6_LINKMTU(ifp);
1824			lck_rw_done(nd_if_rwlock);
1825		} else
1826			error = EHOSTUNREACH; /* XXX */
1827	}
1828
1829	*mtup = mtu;
1830	if (alwaysfragp)
1831		*alwaysfragp = alwaysfrag;
1832	return (error);
1833}
1834
1835/*
1836 * IP6 socket option processing.
1837 */
1838int
1839ip6_ctloutput(so, sopt)
1840	struct socket *so;
1841	struct sockopt *sopt;
1842{
1843	int optdatalen, uproto;
1844	void *optdata;
1845	int privileged;
1846	struct inpcb *in6p = sotoinpcb(so);
1847	int error = 0, optval = 0;
1848	int level, op = -1, optname = 0;
1849	int optlen = 0;
1850	struct proc *p;
1851
1852	if (sopt == NULL) {
1853		panic("ip6_ctloutput: arg soopt is NULL");
1854		/* NOTREACHED */
1855	}
1856	level = sopt->sopt_level;
1857	op = sopt->sopt_dir;
1858	optname = sopt->sopt_name;
1859	optlen = sopt->sopt_valsize;
1860	p = sopt->sopt_p;
1861	uproto = (int)so->so_proto->pr_protocol;
1862
1863	privileged = (proc_suser(p) == 0);
1864
1865	if (level == IPPROTO_IPV6) {
1866		switch (op) {
1867
1868		case SOPT_SET:
1869			switch (optname) {
1870			case IPV6_2292PKTOPTIONS:
1871			{
1872				struct mbuf *m;
1873
1874				error = soopt_getm(sopt, &m); /* XXX */
1875				if (error != 0)
1876					break;
1877				error = soopt_mcopyin(sopt, m); /* XXX */
1878				if (error != 0)
1879					break;
1880				error = ip6_pcbopts(&in6p->in6p_outputopts,
1881						    m, so, sopt);
1882				m_freem(m); /* XXX */
1883				break;
1884			}
1885
1886			/*
1887			 * Use of some Hop-by-Hop options or some
1888			 * Destination options, might require special
1889			 * privilege.  That is, normal applications
1890			 * (without special privilege) might be forbidden
1891			 * from setting certain options in outgoing packets,
1892			 * and might never see certain options in received
1893			 * packets. [RFC 2292 Section 6]
1894			 * KAME specific note:
1895			 *  KAME prevents non-privileged users from sending or
1896			 *  receiving ANY hbh/dst options in order to avoid
1897			 *  overhead of parsing options in the kernel.
1898			 */
1899			case IPV6_RECVHOPOPTS:
1900			case IPV6_RECVDSTOPTS:
1901			case IPV6_RECVRTHDRDSTOPTS:
1902					if (!privileged)
1903						break;
1904				/* FALLTHROUGH */
1905			case IPV6_UNICAST_HOPS:
1906			case IPV6_HOPLIMIT:
1907
1908			case IPV6_RECVPKTINFO:
1909			case IPV6_RECVHOPLIMIT:
1910			case IPV6_RECVRTHDR:
1911			case IPV6_RECVPATHMTU:
1912			case IPV6_RECVTCLASS:
1913			case IPV6_V6ONLY:
1914			case IPV6_AUTOFLOWLABEL:
1915				if (optlen != sizeof(int)) {
1916					error = EINVAL;
1917					break;
1918				}
1919				error = sooptcopyin(sopt, &optval,
1920					sizeof optval, sizeof optval);
1921				if (error)
1922					break;
1923				switch (optname) {
1924
1925				case IPV6_UNICAST_HOPS:
1926					if (optval < -1 || optval >= 256)
1927						error = EINVAL;
1928					else {
1929						/* -1 = kernel default */
1930						in6p->in6p_hops = optval;
1931						if ((in6p->inp_vflag &
1932						     INP_IPV4) != 0)
1933							in6p->inp_ip_ttl = optval;
1934					}
1935					break;
1936#define OPTSET(bit) \
1937do { \
1938	if (optval) \
1939		in6p->inp_flags |= (bit); \
1940	else \
1941		in6p->inp_flags &= ~(bit); \
1942} while (/*CONSTCOND*/ 0)
1943#define OPTSET2292(bit) \
1944do { \
1945	in6p->inp_flags |= IN6P_RFC2292; \
1946	if (optval) \
1947		in6p->inp_flags |= (bit); \
1948	else \
1949		in6p->inp_flags &= ~(bit); \
1950} while (/*CONSTCOND*/ 0)
1951#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
1952
1953				case IPV6_RECVPKTINFO:
1954					/* cannot mix with RFC2292 */
1955					if (OPTBIT(IN6P_RFC2292)) {
1956						error = EINVAL;
1957						break;
1958					}
1959					OPTSET(IN6P_PKTINFO);
1960					break;
1961
1962				case IPV6_HOPLIMIT:
1963				{
1964					struct ip6_pktopts **optp;
1965
1966					/* cannot mix with RFC2292 */
1967					if (OPTBIT(IN6P_RFC2292)) {
1968						error = EINVAL;
1969						break;
1970					}
1971					optp = &in6p->in6p_outputopts;
1972					error = ip6_pcbopt(IPV6_HOPLIMIT,
1973					    (u_char *)&optval, sizeof(optval),
1974					    optp, uproto);
1975					break;
1976				}
1977
1978				case IPV6_RECVHOPLIMIT:
1979					/* cannot mix with RFC2292 */
1980					if (OPTBIT(IN6P_RFC2292)) {
1981						error = EINVAL;
1982						break;
1983					}
1984					OPTSET(IN6P_HOPLIMIT);
1985					break;
1986
1987				case IPV6_RECVHOPOPTS:
1988					/* cannot mix with RFC2292 */
1989					if (OPTBIT(IN6P_RFC2292)) {
1990						error = EINVAL;
1991						break;
1992					}
1993					OPTSET(IN6P_HOPOPTS);
1994					break;
1995
1996				case IPV6_RECVDSTOPTS:
1997					/* cannot mix with RFC2292 */
1998					if (OPTBIT(IN6P_RFC2292)) {
1999						error = EINVAL;
2000						break;
2001					}
2002					OPTSET(IN6P_DSTOPTS);
2003					break;
2004
2005				case IPV6_RECVRTHDRDSTOPTS:
2006					/* cannot mix with RFC2292 */
2007					if (OPTBIT(IN6P_RFC2292)) {
2008						error = EINVAL;
2009						break;
2010					}
2011					OPTSET(IN6P_RTHDRDSTOPTS);
2012					break;
2013
2014				case IPV6_RECVRTHDR:
2015					/* cannot mix with RFC2292 */
2016					if (OPTBIT(IN6P_RFC2292)) {
2017						error = EINVAL;
2018						break;
2019					}
2020					OPTSET(IN6P_RTHDR);
2021					break;
2022
2023				case IPV6_RECVPATHMTU:
2024					/*
2025					 * We ignore this option for TCP
2026					 * sockets.
2027					 * (RFC3542 leaves this case
2028					 * unspecified.)
2029					 */
2030					if (uproto != IPPROTO_TCP)
2031						OPTSET(IN6P_MTU);
2032					break;
2033
2034				case IPV6_V6ONLY:
2035					/*
2036					 * make setsockopt(IPV6_V6ONLY)
2037					 * available only prior to bind(2).
2038					 * see ipng mailing list, Jun 22 2001.
2039					 */
2040					if (in6p->inp_lport ||
2041					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
2042						error = EINVAL;
2043						break;
2044					}
2045					OPTSET(IN6P_IPV6_V6ONLY);
2046					if (optval)
2047						in6p->inp_vflag &= ~INP_IPV4;
2048					else
2049						in6p->inp_vflag |= INP_IPV4;
2050					break;
2051				case IPV6_RECVTCLASS:
2052					/* we can mix with RFC2292 */
2053					OPTSET(IN6P_TCLASS);
2054					break;
2055				case IPV6_AUTOFLOWLABEL:
2056					OPTSET(IN6P_AUTOFLOWLABEL);
2057					break;
2058
2059				}
2060				break;
2061
2062			case IPV6_TCLASS:
2063			case IPV6_DONTFRAG:
2064			case IPV6_USE_MIN_MTU:
2065			case IPV6_PREFER_TEMPADDR:
2066				if (optlen != sizeof(optval)) {
2067					error = EINVAL;
2068					break;
2069				}
2070				error = sooptcopyin(sopt, &optval,
2071					sizeof optval, sizeof optval);
2072				if (error)
2073					break;
2074				{
2075					struct ip6_pktopts **optp;
2076					optp = &in6p->in6p_outputopts;
2077					error = ip6_pcbopt(optname,
2078					    (u_char *)&optval, sizeof(optval),
2079					    optp, uproto);
2080					break;
2081				}
2082
2083			case IPV6_2292PKTINFO:
2084			case IPV6_2292HOPLIMIT:
2085			case IPV6_2292HOPOPTS:
2086			case IPV6_2292DSTOPTS:
2087			case IPV6_2292RTHDR:
2088				/* RFC 2292 */
2089				if (optlen != sizeof(int)) {
2090					error = EINVAL;
2091					break;
2092				}
2093				error = sooptcopyin(sopt, &optval,
2094					sizeof optval, sizeof optval);
2095				if (error)
2096					break;
2097				switch (optname) {
2098				case IPV6_2292PKTINFO:
2099					OPTSET2292(IN6P_PKTINFO);
2100					break;
2101				case IPV6_2292HOPLIMIT:
2102					OPTSET2292(IN6P_HOPLIMIT);
2103					break;
2104				case IPV6_2292HOPOPTS:
2105					/*
2106					 * Check super-user privilege.
2107					 * See comments for IPV6_RECVHOPOPTS.
2108					 */
2109					if (!privileged)
2110						return(EPERM);
2111					OPTSET2292(IN6P_HOPOPTS);
2112					break;
2113				case IPV6_2292DSTOPTS:
2114					if (!privileged)
2115						return(EPERM);
2116					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
2117					break;
2118				case IPV6_2292RTHDR:
2119					OPTSET2292(IN6P_RTHDR);
2120					break;
2121				}
2122				break;
2123			case IPV6_3542PKTINFO:
2124			case IPV6_3542HOPOPTS:
2125			case IPV6_3542RTHDR:
2126			case IPV6_3542DSTOPTS:
2127			case IPV6_RTHDRDSTOPTS:
2128			case IPV6_3542NEXTHOP:
2129			{
2130				struct ip6_pktopts **optp;
2131				/* new advanced API (RFC3542) */
2132				struct mbuf *m;
2133
2134				/* cannot mix with RFC2292 */
2135				if (OPTBIT(IN6P_RFC2292)) {
2136					error = EINVAL;
2137					break;
2138				}
2139				error = soopt_getm(sopt, &m);
2140				if (error != 0)
2141					break;
2142				error = soopt_mcopyin(sopt, m);
2143				if (error) {
2144					m_freem(m);
2145					break;
2146				}
2147				optp = &in6p->in6p_outputopts;
2148				error = ip6_pcbopt(optname, mtod(m, u_char *),
2149					m->m_len, optp, uproto);
2150				m_freem(m);
2151				break;
2152			}
2153#undef OPTSET
2154
2155			case IPV6_MULTICAST_IF:
2156			case IPV6_MULTICAST_HOPS:
2157			case IPV6_MULTICAST_LOOP:
2158			case IPV6_JOIN_GROUP:
2159			case IPV6_LEAVE_GROUP:
2160			case IPV6_MSFILTER:
2161			case MCAST_BLOCK_SOURCE:
2162			case MCAST_UNBLOCK_SOURCE:
2163			case MCAST_JOIN_GROUP:
2164			case MCAST_LEAVE_GROUP:
2165			case MCAST_JOIN_SOURCE_GROUP:
2166			case MCAST_LEAVE_SOURCE_GROUP:
2167				error = ip6_setmoptions(in6p, sopt);
2168				break;
2169
2170			case IPV6_PORTRANGE:
2171				error = sooptcopyin(sopt, &optval,
2172				    sizeof optval, sizeof optval);
2173				if (error)
2174					break;
2175
2176				switch (optval) {
2177				case IPV6_PORTRANGE_DEFAULT:
2178					in6p->inp_flags &= ~(INP_LOWPORT);
2179					in6p->inp_flags &= ~(INP_HIGHPORT);
2180					break;
2181
2182				case IPV6_PORTRANGE_HIGH:
2183					in6p->inp_flags &= ~(INP_LOWPORT);
2184					in6p->inp_flags |= INP_HIGHPORT;
2185					break;
2186
2187				case IPV6_PORTRANGE_LOW:
2188					in6p->inp_flags &= ~(INP_HIGHPORT);
2189					in6p->inp_flags |= INP_LOWPORT;
2190					break;
2191
2192				default:
2193					error = EINVAL;
2194					break;
2195				}
2196				break;
2197
2198#if IPSEC
2199			case IPV6_IPSEC_POLICY:
2200			    {
2201				caddr_t req = NULL;
2202				size_t len = 0;
2203				struct mbuf *m;
2204
2205				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
2206					break;
2207				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
2208					break;
2209				if (m) {
2210					req = mtod(m, caddr_t);
2211					len = m->m_len;
2212				}
2213				error = ipsec6_set_policy(in6p, optname, req,
2214				                          len, privileged);
2215				m_freem(m);
2216			    }
2217				break;
2218#endif /* KAME IPSEC */
2219
2220#if IPFIREWALL
2221			case IPV6_FW_ADD:
2222			case IPV6_FW_DEL:
2223			case IPV6_FW_FLUSH:
2224			case IPV6_FW_ZERO:
2225				{
2226				if (ip6_fw_ctl_ptr == NULL)
2227					load_ip6fw();
2228				if (ip6_fw_ctl_ptr != NULL)
2229					error = (*ip6_fw_ctl_ptr)(sopt);
2230				else
2231					return ENOPROTOOPT;
2232				}
2233				break;
2234#endif /* IPFIREWALL */
2235
2236			/*
2237			 * IPv6 variant of IP_BOUND_IF; for details see
2238			 * comments on IP_BOUND_IF in ip_ctloutput().
2239			 */
2240			case IPV6_BOUND_IF:
2241				/* This option is settable only on IPv6 */
2242				if (!(in6p->inp_vflag & INP_IPV6)) {
2243					error = EINVAL;
2244					break;
2245				}
2246
2247				error = sooptcopyin(sopt, &optval,
2248				    sizeof (optval), sizeof (optval));
2249
2250				if (error)
2251					break;
2252
2253				error = inp_bindif(in6p, optval);
2254				break;
2255
2256			case IPV6_NO_IFT_CELLULAR:
2257				/* This option is settable only for IPv6 */
2258				if (!(in6p->inp_vflag & INP_IPV6)) {
2259					error = EINVAL;
2260					break;
2261				}
2262
2263				error = sooptcopyin(sopt, &optval,
2264				    sizeof (optval), sizeof (optval));
2265
2266				if (error)
2267					break;
2268
2269				error = inp_nocellular(in6p, optval);
2270				break;
2271
2272			case IPV6_OUT_IF:
2273				/* This option is not settable */
2274				error = EINVAL;
2275				break;
2276
2277			default:
2278				error = ENOPROTOOPT;
2279				break;
2280			}
2281			break;
2282
2283		case SOPT_GET:
2284			switch (optname) {
2285
2286			case IPV6_2292PKTOPTIONS:
2287				/*
2288				 * RFC3542 (effectively) deprecated the
2289				 * semantics of the 2292-style pktoptions.
2290				 * Since it was not reliable in nature (i.e.,
2291				 * applications had to expect the lack of some
2292				 * information after all), it would make sense
2293				 * to simplify this part by always returning
2294				 * empty data.
2295				 */
2296				sopt->sopt_valsize = 0;
2297				break;
2298
2299			case IPV6_RECVHOPOPTS:
2300			case IPV6_RECVDSTOPTS:
2301			case IPV6_RECVRTHDRDSTOPTS:
2302			case IPV6_UNICAST_HOPS:
2303			case IPV6_RECVPKTINFO:
2304			case IPV6_RECVHOPLIMIT:
2305			case IPV6_RECVRTHDR:
2306			case IPV6_RECVPATHMTU:
2307
2308			case IPV6_V6ONLY:
2309			case IPV6_PORTRANGE:
2310			case IPV6_RECVTCLASS:
2311			case IPV6_AUTOFLOWLABEL:
2312				switch (optname) {
2313
2314				case IPV6_RECVHOPOPTS:
2315					optval = OPTBIT(IN6P_HOPOPTS);
2316					break;
2317
2318				case IPV6_RECVDSTOPTS:
2319					optval = OPTBIT(IN6P_DSTOPTS);
2320					break;
2321
2322				case IPV6_RECVRTHDRDSTOPTS:
2323					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2324					break;
2325
2326				case IPV6_UNICAST_HOPS:
2327					optval = in6p->in6p_hops;
2328					break;
2329
2330				case IPV6_RECVPKTINFO:
2331					optval = OPTBIT(IN6P_PKTINFO);
2332					break;
2333
2334				case IPV6_RECVHOPLIMIT:
2335					optval = OPTBIT(IN6P_HOPLIMIT);
2336					break;
2337
2338				case IPV6_RECVRTHDR:
2339					optval = OPTBIT(IN6P_RTHDR);
2340					break;
2341
2342				case IPV6_RECVPATHMTU:
2343					optval = OPTBIT(IN6P_MTU);
2344					break;
2345
2346				case IPV6_V6ONLY:
2347					optval = OPTBIT(IN6P_IPV6_V6ONLY);
2348					break;
2349
2350				case IPV6_PORTRANGE:
2351				    {
2352					int flags;
2353					flags = in6p->inp_flags;
2354					if (flags & INP_HIGHPORT)
2355						optval = IPV6_PORTRANGE_HIGH;
2356					else if (flags & INP_LOWPORT)
2357						optval = IPV6_PORTRANGE_LOW;
2358					else
2359						optval = 0;
2360					break;
2361				    }
2362				case IPV6_RECVTCLASS:
2363					optval = OPTBIT(IN6P_TCLASS);
2364					break;
2365
2366				case IPV6_AUTOFLOWLABEL:
2367					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2368					break;
2369				}
2370				if (error)
2371					break;
2372				error = sooptcopyout(sopt, &optval,
2373					sizeof optval);
2374				break;
2375
2376			case IPV6_PATHMTU:
2377			{
2378				u_int32_t pmtu = 0;
2379				struct ip6_mtuinfo mtuinfo;
2380				struct route_in6 sro;
2381
2382				bzero(&sro, sizeof(sro));
2383
2384				if (!(so->so_state & SS_ISCONNECTED))
2385					return (ENOTCONN);
2386				/*
2387				 * XXX: we dot not consider the case of source
2388				 * routing, or optional information to specify
2389				 * the outgoing interface.
2390				 */
2391				error = ip6_getpmtu(&sro, NULL, NULL,
2392				    &in6p->in6p_faddr, &pmtu, NULL);
2393				if (sro.ro_rt)
2394					rtfree(sro.ro_rt);
2395				if (error)
2396					break;
2397				if (pmtu > IPV6_MAXPACKET)
2398					pmtu = IPV6_MAXPACKET;
2399
2400				bzero(&mtuinfo, sizeof(mtuinfo));
2401				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2402				optdata = (void *)&mtuinfo;
2403				optdatalen = sizeof(mtuinfo);
2404				error = sooptcopyout(sopt, optdata,
2405				    optdatalen);
2406				break;
2407			}
2408
2409			case IPV6_2292PKTINFO:
2410			case IPV6_2292HOPLIMIT:
2411			case IPV6_2292HOPOPTS:
2412			case IPV6_2292RTHDR:
2413			case IPV6_2292DSTOPTS:
2414				switch (optname) {
2415				case IPV6_2292PKTINFO:
2416					optval = OPTBIT(IN6P_PKTINFO);
2417					break;
2418				case IPV6_2292HOPLIMIT:
2419					optval = OPTBIT(IN6P_HOPLIMIT);
2420					break;
2421				case IPV6_2292HOPOPTS:
2422					optval = OPTBIT(IN6P_HOPOPTS);
2423					break;
2424				case IPV6_2292RTHDR:
2425					optval = OPTBIT(IN6P_RTHDR);
2426					break;
2427				case IPV6_2292DSTOPTS:
2428					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2429					break;
2430				}
2431				error = sooptcopyout(sopt, &optval,
2432				    sizeof optval);
2433				break;
2434			case IPV6_PKTINFO:
2435			case IPV6_HOPOPTS:
2436			case IPV6_RTHDR:
2437			case IPV6_DSTOPTS:
2438			case IPV6_RTHDRDSTOPTS:
2439			case IPV6_NEXTHOP:
2440			case IPV6_TCLASS:
2441			case IPV6_DONTFRAG:
2442			case IPV6_USE_MIN_MTU:
2443			case IPV6_PREFER_TEMPADDR:
2444				error = ip6_getpcbopt(in6p->in6p_outputopts,
2445				    optname, sopt);
2446				break;
2447
2448			case IPV6_MULTICAST_IF:
2449			case IPV6_MULTICAST_HOPS:
2450			case IPV6_MULTICAST_LOOP:
2451			case IPV6_MSFILTER:
2452				error = ip6_getmoptions(in6p, sopt);
2453				break;
2454
2455#if IPSEC
2456			case IPV6_IPSEC_POLICY:
2457			  {
2458				caddr_t req = NULL;
2459				size_t len = 0;
2460				struct mbuf *m = NULL;
2461				struct mbuf **mp = &m;
2462
2463				error = soopt_getm(sopt, &m); /* XXX */
2464				if (error != 0)
2465					break;
2466				error = soopt_mcopyin(sopt, m); /* XXX */
2467				if (error != 0)
2468					break;
2469				if (m) {
2470					req = mtod(m, caddr_t);
2471					len = m->m_len;
2472				}
2473				error = ipsec6_get_policy(in6p, req, len, mp);
2474				if (error == 0)
2475					error = soopt_mcopyout(sopt, m); /*XXX*/
2476				if (error == 0 && m)
2477					m_freem(m);
2478				break;
2479			  }
2480#endif /* KAME IPSEC */
2481
2482#if IPFIREWALL
2483			case IPV6_FW_GET:
2484				{
2485				if (ip6_fw_ctl_ptr == NULL)
2486					load_ip6fw();
2487				if (ip6_fw_ctl_ptr != NULL)
2488					error = (*ip6_fw_ctl_ptr)(sopt);
2489				else
2490					return ENOPROTOOPT;
2491				}
2492				break;
2493#endif /* IPFIREWALL */
2494
2495			case IPV6_BOUND_IF:
2496				if (in6p->inp_flags & INP_BOUND_IF)
2497					optval = in6p->inp_boundifp->if_index;
2498				error = sooptcopyout(sopt, &optval,
2499				    sizeof (optval));
2500				break;
2501
2502			case IPV6_NO_IFT_CELLULAR:
2503				optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR)
2504				    ? 1 : 0;
2505				error = sooptcopyout(sopt, &optval,
2506				    sizeof (optval));
2507				break;
2508
2509			case IPV6_OUT_IF:
2510				optval = (in6p->in6p_last_outifp != NULL) ?
2511				    in6p->in6p_last_outifp->if_index : 0;
2512				error = sooptcopyout(sopt, &optval,
2513				    sizeof (optval));
2514				break;
2515
2516			default:
2517				error = ENOPROTOOPT;
2518				break;
2519			}
2520			break;
2521		}
2522	} else {
2523		error = EINVAL;
2524	}
2525	return(error);
2526}
2527
2528int
2529ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2530{
2531	int error = 0, optval, optlen;
2532	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2533	struct inpcb *in6p = sotoinpcb(so);
2534	int level, op, optname;
2535
2536	level = sopt->sopt_level;
2537	op = sopt->sopt_dir;
2538	optname = sopt->sopt_name;
2539	optlen = sopt->sopt_valsize;
2540
2541	if (level != IPPROTO_IPV6) {
2542		return (EINVAL);
2543	}
2544
2545	switch (optname) {
2546	case IPV6_CHECKSUM:
2547		/*
2548		 * For ICMPv6 sockets, no modification allowed for checksum
2549		 * offset, permit "no change" values to help existing apps.
2550		 *
2551		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2552		 * for an ICMPv6 socket will fail."
2553		 * The current behavior does not meet RFC3542.
2554		 */
2555		switch (op) {
2556		case SOPT_SET:
2557			if (optlen != sizeof(int)) {
2558				error = EINVAL;
2559				break;
2560			}
2561			error = sooptcopyin(sopt, &optval, sizeof(optval),
2562					    sizeof(optval));
2563			if (error)
2564				break;
2565			if ((optval % 2) != 0) {
2566				/* the API assumes even offset values */
2567				error = EINVAL;
2568			} else if (so->so_proto->pr_protocol ==
2569			    IPPROTO_ICMPV6) {
2570				if (optval != icmp6off)
2571					error = EINVAL;
2572			} else
2573				in6p->in6p_cksum = optval;
2574			break;
2575
2576		case SOPT_GET:
2577			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2578				optval = icmp6off;
2579			else
2580				optval = in6p->in6p_cksum;
2581
2582			error = sooptcopyout(sopt, &optval, sizeof(optval));
2583			break;
2584
2585		default:
2586			error = EINVAL;
2587			break;
2588		}
2589		break;
2590
2591	default:
2592		error = ENOPROTOOPT;
2593		break;
2594	}
2595
2596	return (error);
2597}
2598
2599/*
2600 * Set up IP6 options in pcb for insertion in output packets or
2601 * specifying behavior of outgoing packets.
2602 */
2603static int
2604ip6_pcbopts(
2605	struct ip6_pktopts **pktopt,
2606	struct mbuf *m,
2607	__unused struct socket *so,
2608	__unused struct sockopt *sopt)
2609{
2610	struct ip6_pktopts *opt = *pktopt;
2611	int error = 0;
2612
2613	/* turn off any old options. */
2614	if (opt) {
2615#if DIAGNOSTIC
2616		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2617		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2618		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2619			printf("ip6_pcbopts: all specified options are cleared.\n");
2620#endif
2621		ip6_clearpktopts(opt, -1);
2622	} else {
2623		opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
2624		if (opt == NULL)
2625			return ENOBUFS;
2626	}
2627	*pktopt = NULL;
2628
2629	if (!m || m->m_len == 0) {
2630		/*
2631		 * Only turning off any previous options, regardless of
2632		 * whether the opt is just created or given.
2633		 */
2634		if (opt)
2635			FREE(opt, M_IP6OPT);
2636		return(0);
2637	}
2638
2639	/*  set options specified by user. */
2640	if ((error = ip6_setpktopts(m, opt, NULL, so->so_proto->pr_protocol)) != 0) {
2641		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2642		FREE(opt, M_IP6OPT);
2643		return(error);
2644	}
2645	*pktopt = opt;
2646	return(0);
2647}
2648
2649/*
2650 * initialize ip6_pktopts.  beware that there are non-zero default values in
2651 * the struct.
2652 */
2653void
2654ip6_initpktopts(struct ip6_pktopts *opt)
2655{
2656
2657	bzero(opt, sizeof(*opt));
2658	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2659	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2660	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2661	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2662}
2663
2664static int
2665ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2666    int uproto)
2667{
2668	struct ip6_pktopts *opt;
2669
2670	opt = *pktopt;
2671	if (opt == NULL) {
2672		opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
2673		if (opt == NULL)
2674			return(ENOBUFS);
2675		ip6_initpktopts(opt);
2676		*pktopt = opt;
2677	}
2678
2679	return (ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto));
2680}
2681
2682static int
2683ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2684{
2685	void *optdata = NULL;
2686	int optdatalen = 0;
2687	struct ip6_ext *ip6e;
2688	int error = 0;
2689	struct in6_pktinfo null_pktinfo;
2690	int deftclass = 0, on;
2691	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2692	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2693
2694
2695	switch (optname) {
2696	case IPV6_PKTINFO:
2697		if (pktopt && pktopt->ip6po_pktinfo)
2698			optdata = (void *)pktopt->ip6po_pktinfo;
2699		else {
2700			/* XXX: we don't have to do this every time... */
2701			bzero(&null_pktinfo, sizeof(null_pktinfo));
2702			optdata = (void *)&null_pktinfo;
2703		}
2704		optdatalen = sizeof(struct in6_pktinfo);
2705		break;
2706	case IPV6_TCLASS:
2707		if (pktopt && pktopt->ip6po_tclass >= 0)
2708			optdata = (void *)&pktopt->ip6po_tclass;
2709		else
2710			optdata = (void *)&deftclass;
2711		optdatalen = sizeof(int);
2712		break;
2713	case IPV6_HOPOPTS:
2714		if (pktopt && pktopt->ip6po_hbh) {
2715			optdata = (void *)pktopt->ip6po_hbh;
2716			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2717			optdatalen = (ip6e->ip6e_len + 1) << 3;
2718		}
2719		break;
2720	case IPV6_RTHDR:
2721		if (pktopt && pktopt->ip6po_rthdr) {
2722			optdata = (void *)pktopt->ip6po_rthdr;
2723			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2724			optdatalen = (ip6e->ip6e_len + 1) << 3;
2725		}
2726		break;
2727	case IPV6_RTHDRDSTOPTS:
2728		if (pktopt && pktopt->ip6po_dest1) {
2729			optdata = (void *)pktopt->ip6po_dest1;
2730			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2731			optdatalen = (ip6e->ip6e_len + 1) << 3;
2732		}
2733		break;
2734	case IPV6_DSTOPTS:
2735		if (pktopt && pktopt->ip6po_dest2) {
2736			optdata = (void *)pktopt->ip6po_dest2;
2737			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2738			optdatalen = (ip6e->ip6e_len + 1) << 3;
2739		}
2740		break;
2741	case IPV6_NEXTHOP:
2742		if (pktopt && pktopt->ip6po_nexthop) {
2743			optdata = (void *)pktopt->ip6po_nexthop;
2744			optdatalen = pktopt->ip6po_nexthop->sa_len;
2745		}
2746		break;
2747	case IPV6_USE_MIN_MTU:
2748		if (pktopt)
2749			optdata = (void *)&pktopt->ip6po_minmtu;
2750		else
2751			optdata = (void *)&defminmtu;
2752		optdatalen = sizeof(int);
2753		break;
2754	case IPV6_DONTFRAG:
2755		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2756			on = 1;
2757		else
2758			on = 0;
2759		optdata = (void *)&on;
2760		optdatalen = sizeof(on);
2761		break;
2762	case IPV6_PREFER_TEMPADDR:
2763		if (pktopt)
2764			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2765		else
2766			optdata = (void *)&defpreftemp;
2767		optdatalen = sizeof(int);
2768		break;
2769	default:		/* should not happen */
2770#ifdef DIAGNOSTIC
2771		panic("ip6_getpcbopt: unexpected option\n");
2772#endif
2773		return (ENOPROTOOPT);
2774	}
2775
2776	error = sooptcopyout(sopt, optdata, optdatalen);
2777
2778	return (error);
2779}
2780
2781void
2782ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2783{
2784	if (pktopt == NULL)
2785		return;
2786
2787	if (optname == -1 || optname == IPV6_PKTINFO) {
2788		if (pktopt->ip6po_pktinfo)
2789			FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
2790		pktopt->ip6po_pktinfo = NULL;
2791	}
2792	if (optname == -1 || optname == IPV6_HOPLIMIT)
2793		pktopt->ip6po_hlim = -1;
2794	if (optname == -1 || optname == IPV6_TCLASS)
2795		pktopt->ip6po_tclass = -1;
2796	if (optname == -1 || optname == IPV6_NEXTHOP) {
2797		if (pktopt->ip6po_nextroute.ro_rt) {
2798			rtfree(pktopt->ip6po_nextroute.ro_rt);
2799			pktopt->ip6po_nextroute.ro_rt = NULL;
2800		}
2801		if (pktopt->ip6po_nexthop)
2802			FREE(pktopt->ip6po_nexthop, M_IP6OPT);
2803		pktopt->ip6po_nexthop = NULL;
2804	}
2805	if (optname == -1 || optname == IPV6_HOPOPTS) {
2806		if (pktopt->ip6po_hbh)
2807			FREE(pktopt->ip6po_hbh, M_IP6OPT);
2808		pktopt->ip6po_hbh = NULL;
2809	}
2810	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2811		if (pktopt->ip6po_dest1)
2812			FREE(pktopt->ip6po_dest1, M_IP6OPT);
2813		pktopt->ip6po_dest1 = NULL;
2814	}
2815	if (optname == -1 || optname == IPV6_RTHDR) {
2816		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2817			FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2818		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2819		if (pktopt->ip6po_route.ro_rt) {
2820			rtfree(pktopt->ip6po_route.ro_rt);
2821			pktopt->ip6po_route.ro_rt = NULL;
2822		}
2823	}
2824	if (optname == -1 || optname == IPV6_DSTOPTS) {
2825		if (pktopt->ip6po_dest2)
2826			FREE(pktopt->ip6po_dest2, M_IP6OPT);
2827		pktopt->ip6po_dest2 = NULL;
2828	}
2829}
2830
2831#define PKTOPT_EXTHDRCPY(type) \
2832do {\
2833	if (src->type) {\
2834		int hlen =\
2835			(((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2836		dst->type = _MALLOC(hlen, M_IP6OPT, canwait);\
2837		if (dst->type == NULL && canwait == M_NOWAIT)\
2838			goto bad;\
2839		bcopy(src->type, dst->type, hlen);\
2840	}\
2841} while (0)
2842
2843static int
2844copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2845{
2846	if (dst == NULL || src == NULL)  {
2847		printf("copypktopts: invalid argument\n");
2848		return (EINVAL);
2849	}
2850
2851	dst->ip6po_hlim = src->ip6po_hlim;
2852	dst->ip6po_tclass = src->ip6po_tclass;
2853	dst->ip6po_flags = src->ip6po_flags;
2854	if (src->ip6po_pktinfo) {
2855		dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo),
2856					    M_IP6OPT, canwait);
2857		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2858			goto bad;
2859		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2860	}
2861	if (src->ip6po_nexthop) {
2862		dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
2863					    M_IP6OPT, canwait);
2864		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2865			goto bad;
2866		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2867		      src->ip6po_nexthop->sa_len);
2868	}
2869	PKTOPT_EXTHDRCPY(ip6po_hbh);
2870	PKTOPT_EXTHDRCPY(ip6po_dest1);
2871	PKTOPT_EXTHDRCPY(ip6po_dest2);
2872	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2873	return (0);
2874
2875  bad:
2876	ip6_clearpktopts(dst, -1);
2877	return (ENOBUFS);
2878}
2879#undef PKTOPT_EXTHDRCPY
2880
2881struct ip6_pktopts *
2882ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2883{
2884	int error;
2885	struct ip6_pktopts *dst;
2886
2887	dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait);
2888	if (dst == NULL)
2889		return (NULL);
2890	ip6_initpktopts(dst);
2891
2892	if ((error = copypktopts(dst, src, canwait)) != 0) {
2893		FREE(dst, M_IP6OPT);
2894		return (NULL);
2895	}
2896
2897	return (dst);
2898}
2899
2900void
2901ip6_freepcbopts(struct ip6_pktopts *pktopt)
2902{
2903	if (pktopt == NULL)
2904		return;
2905
2906	ip6_clearpktopts(pktopt, -1);
2907
2908	FREE(pktopt, M_IP6OPT);
2909}
2910
2911void
2912ip6_moptions_init(void)
2913{
2914	PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof (im6o_debug));
2915
2916	im6o_size = (im6o_debug == 0) ? sizeof (struct ip6_moptions) :
2917	    sizeof (struct ip6_moptions_dbg);
2918
2919	im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
2920	    IM6O_ZONE_NAME);
2921	if (im6o_zone == NULL) {
2922		panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
2923		/* NOTREACHED */
2924	}
2925	zone_change(im6o_zone, Z_EXPAND, TRUE);
2926}
2927
2928void
2929im6o_addref(struct ip6_moptions *im6o, int locked)
2930{
2931	if (!locked)
2932		IM6O_LOCK(im6o);
2933	else
2934		IM6O_LOCK_ASSERT_HELD(im6o);
2935
2936	if (++im6o->im6o_refcnt == 0) {
2937		panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
2938		/* NOTREACHED */
2939	} else if (im6o->im6o_trace != NULL) {
2940		(*im6o->im6o_trace)(im6o, TRUE);
2941	}
2942
2943	if (!locked)
2944		IM6O_UNLOCK(im6o);
2945}
2946
2947void
2948im6o_remref(struct ip6_moptions *im6o)
2949{
2950	int i;
2951
2952	IM6O_LOCK(im6o);
2953	if (im6o->im6o_refcnt == 0) {
2954		panic("%s: im6o %p negative refcnt", __func__, im6o);
2955		/* NOTREACHED */
2956	} else if (im6o->im6o_trace != NULL) {
2957		(*im6o->im6o_trace)(im6o, FALSE);
2958	}
2959
2960	--im6o->im6o_refcnt;
2961	if (im6o->im6o_refcnt > 0) {
2962		IM6O_UNLOCK(im6o);
2963		return;
2964	}
2965
2966	for (i = 0; i < im6o->im6o_num_memberships; ++i) {
2967		struct in6_mfilter *imf;
2968
2969		imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
2970		if (imf != NULL)
2971			im6f_leave(imf);
2972
2973		(void) in6_mc_leave(im6o->im6o_membership[i], imf);
2974
2975		if (imf != NULL)
2976			im6f_purge(imf);
2977
2978		IN6M_REMREF(im6o->im6o_membership[i]);
2979		im6o->im6o_membership[i] = NULL;
2980	}
2981	im6o->im6o_num_memberships = 0;
2982	if (im6o->im6o_mfilters != NULL) {
2983		FREE(im6o->im6o_mfilters, M_IN6MFILTER);
2984		im6o->im6o_mfilters = NULL;
2985	}
2986	if (im6o->im6o_membership != NULL) {
2987		FREE(im6o->im6o_membership, M_IP6MOPTS);
2988		im6o->im6o_membership = NULL;
2989	}
2990	IM6O_UNLOCK(im6o);
2991
2992	lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
2993
2994	if (!(im6o->im6o_debug & IFD_ALLOC)) {
2995		panic("%s: im6o %p cannot be freed", __func__, im6o);
2996		/* NOTREACHED */
2997	}
2998	zfree(im6o_zone, im6o);
2999}
3000
3001static void
3002im6o_trace(struct ip6_moptions *im6o, int refhold)
3003{
3004	struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
3005	ctrace_t *tr;
3006	u_int32_t idx;
3007	u_int16_t *cnt;
3008
3009	if (!(im6o->im6o_debug & IFD_DEBUG)) {
3010		panic("%s: im6o %p has no debug structure", __func__, im6o);
3011		/* NOTREACHED */
3012	}
3013	if (refhold) {
3014		cnt = &im6o_dbg->im6o_refhold_cnt;
3015		tr = im6o_dbg->im6o_refhold;
3016	} else {
3017		cnt = &im6o_dbg->im6o_refrele_cnt;
3018		tr = im6o_dbg->im6o_refrele;
3019	}
3020
3021	idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
3022	ctrace_record(&tr[idx]);
3023}
3024
3025struct ip6_moptions *
3026ip6_allocmoptions(int how)
3027{
3028	struct ip6_moptions *im6o;
3029
3030	im6o = (how == M_WAITOK) ?
3031	    zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
3032	if (im6o != NULL) {
3033		bzero(im6o, im6o_size);
3034		lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
3035		im6o->im6o_debug |= IFD_ALLOC;
3036		if (im6o_debug != 0) {
3037			im6o->im6o_debug |= IFD_DEBUG;
3038			im6o->im6o_trace = im6o_trace;
3039		}
3040		IM6O_ADDREF(im6o);
3041	}
3042
3043	return (im6o);
3044}
3045
3046/*
3047 * Set IPv6 outgoing packet options based on advanced API.
3048 */
3049int
3050ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3051    struct ip6_pktopts *stickyopt, int uproto)
3052{
3053	struct cmsghdr *cm = 0;
3054
3055	if (control == NULL || opt == NULL)
3056		return (EINVAL);
3057
3058	ip6_initpktopts(opt);
3059	if (stickyopt) {
3060		int error;
3061
3062		/*
3063		 * If stickyopt is provided, make a local copy of the options
3064		 * for this particular packet, then override them by ancillary
3065		 * objects.
3066		 * XXX: copypktopts() does not copy the cached route to a next
3067		 * hop (if any).  This is not very good in terms of efficiency,
3068		 * but we can allow this since this option should be rarely
3069		 * used.
3070		 */
3071		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
3072			return (error);
3073	}
3074
3075	/*
3076	 * XXX: Currently, we assume all the optional information is stored
3077	 * in a single mbuf.
3078	 */
3079	if (control->m_next)
3080		return (EINVAL);
3081
3082	if (control->m_len < CMSG_LEN(0))
3083		return (EINVAL);
3084
3085	for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) {
3086		int error;
3087
3088		if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len)
3089			return (EINVAL);
3090		if (cm->cmsg_level != IPPROTO_IPV6)
3091			continue;
3092
3093		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3094		    cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
3095		if (error)
3096			return (error);
3097	}
3098
3099	return (0);
3100}
3101/*
3102 * Set a particular packet option, as a sticky option or an ancillary data
3103 * item.  "len" can be 0 only when it's a sticky option.
3104 * We have 4 cases of combination of "sticky" and "cmsg":
3105 * "sticky=0, cmsg=0": impossible
3106 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3107 * "sticky=1, cmsg=0": RFC3542 socket option
3108 * "sticky=1, cmsg=1": RFC2292 socket option
3109 */
3110static int
3111ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3112    int sticky, int cmsg, int uproto)
3113{
3114	int minmtupolicy, preftemp;
3115	int error;
3116
3117	if (!sticky && !cmsg) {
3118#ifdef DIAGNOSTIC
3119		printf("ip6_setpktopt: impossible case\n");
3120#endif
3121		return (EINVAL);
3122	}
3123
3124	/*
3125	 * Caller must have ensured that the buffer is at least
3126	 * aligned on 32-bit boundary.
3127	 */
3128	VERIFY(IS_P2ALIGNED(buf, sizeof (u_int32_t)));
3129
3130	/*
3131	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3132	 * not be specified in the context of RFC3542.  Conversely,
3133	 * RFC3542 types should not be specified in the context of RFC2292.
3134	 */
3135	if (!cmsg) {
3136		switch (optname) {
3137		case IPV6_2292PKTINFO:
3138		case IPV6_2292HOPLIMIT:
3139		case IPV6_2292NEXTHOP:
3140		case IPV6_2292HOPOPTS:
3141		case IPV6_2292DSTOPTS:
3142		case IPV6_2292RTHDR:
3143		case IPV6_2292PKTOPTIONS:
3144			return (ENOPROTOOPT);
3145		}
3146	}
3147	if (sticky && cmsg) {
3148		switch (optname) {
3149		case IPV6_PKTINFO:
3150		case IPV6_HOPLIMIT:
3151		case IPV6_NEXTHOP:
3152		case IPV6_HOPOPTS:
3153		case IPV6_DSTOPTS:
3154		case IPV6_RTHDRDSTOPTS:
3155		case IPV6_RTHDR:
3156		case IPV6_USE_MIN_MTU:
3157		case IPV6_DONTFRAG:
3158		case IPV6_TCLASS:
3159		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3160			return (ENOPROTOOPT);
3161		}
3162	}
3163
3164	switch (optname) {
3165	case IPV6_2292PKTINFO:
3166	case IPV6_PKTINFO:
3167	{
3168		struct ifnet *ifp = NULL;
3169		struct in6_pktinfo *pktinfo;
3170
3171		if (len != sizeof(struct in6_pktinfo))
3172			return (EINVAL);
3173
3174		pktinfo = (struct in6_pktinfo *)(void *)buf;
3175
3176		/*
3177		 * An application can clear any sticky IPV6_PKTINFO option by
3178		 * doing a "regular" setsockopt with ipi6_addr being
3179		 * in6addr_any and ipi6_ifindex being zero.
3180		 * [RFC 3542, Section 6]
3181		 */
3182		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3183		    pktinfo->ipi6_ifindex == 0 &&
3184		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3185			ip6_clearpktopts(opt, optname);
3186			break;
3187		}
3188
3189		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3190		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3191			return (EINVAL);
3192		}
3193
3194		/* validate the interface index if specified. */
3195		ifnet_head_lock_shared();
3196
3197		if (pktinfo->ipi6_ifindex > if_index) {
3198			ifnet_head_done();
3199			return (ENXIO);
3200		}
3201
3202		if (pktinfo->ipi6_ifindex) {
3203			ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3204			if (ifp == NULL) {
3205				ifnet_head_done();
3206				return (ENXIO);
3207			}
3208		}
3209
3210		ifnet_head_done();
3211
3212		/*
3213		 * We store the address anyway, and let in6_selectsrc()
3214		 * validate the specified address.  This is because ipi6_addr
3215		 * may not have enough information about its scope zone, and
3216		 * we may need additional information (such as outgoing
3217		 * interface or the scope zone of a destination address) to
3218		 * disambiguate the scope.
3219		 * XXX: the delay of the validation may confuse the
3220		 * application when it is used as a sticky option.
3221		 */
3222		if (opt->ip6po_pktinfo == NULL) {
3223			opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo),
3224			    M_IP6OPT, M_NOWAIT);
3225			if (opt->ip6po_pktinfo == NULL)
3226				return (ENOBUFS);
3227		}
3228		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3229		break;
3230	}
3231
3232	case IPV6_2292HOPLIMIT:
3233	case IPV6_HOPLIMIT:
3234	{
3235		int *hlimp;
3236
3237		/*
3238		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3239		 * to simplify the ordering among hoplimit options.
3240		 */
3241		if (optname == IPV6_HOPLIMIT && sticky)
3242			return (ENOPROTOOPT);
3243
3244		if (len != sizeof(int))
3245			return (EINVAL);
3246		hlimp = (int *)(void *)buf;
3247		if (*hlimp < -1 || *hlimp > 255)
3248			return (EINVAL);
3249
3250		opt->ip6po_hlim = *hlimp;
3251		break;
3252	}
3253
3254	case IPV6_TCLASS:
3255	{
3256		int tclass;
3257
3258		if (len != sizeof(int))
3259			return (EINVAL);
3260		tclass = *(int *)(void *)buf;
3261		if (tclass < -1 || tclass > 255)
3262			return (EINVAL);
3263
3264		opt->ip6po_tclass = tclass;
3265		break;
3266	}
3267
3268	case IPV6_2292NEXTHOP:
3269	case IPV6_NEXTHOP:
3270		error = suser(kauth_cred_get(), 0);
3271		if (error)
3272			return (EACCES);
3273
3274		if (len == 0) {	/* just remove the option */
3275			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3276			break;
3277		}
3278
3279		/* check if cmsg_len is large enough for sa_len */
3280		if (len < sizeof(struct sockaddr) || len < *buf)
3281			return (EINVAL);
3282
3283		switch (((struct sockaddr *)buf)->sa_family) {
3284		case AF_INET6:
3285		{
3286			struct sockaddr_in6 *sa6 =
3287			    (struct sockaddr_in6 *)(void *)buf;
3288
3289			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3290				return (EINVAL);
3291
3292			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3293			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3294				return (EINVAL);
3295			}
3296			if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3297			    != 0) {
3298				return (error);
3299			}
3300			break;
3301		}
3302		case AF_LINK:	/* should eventually be supported */
3303		default:
3304			return (EAFNOSUPPORT);
3305		}
3306
3307		/* turn off the previous option, then set the new option. */
3308		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3309		opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
3310		if (opt->ip6po_nexthop == NULL)
3311			return (ENOBUFS);
3312		bcopy(buf, opt->ip6po_nexthop, *buf);
3313		break;
3314
3315	case IPV6_2292HOPOPTS:
3316	case IPV6_HOPOPTS:
3317	{
3318		struct ip6_hbh *hbh;
3319		int hbhlen;
3320
3321		/*
3322		 * XXX: We don't allow a non-privileged user to set ANY HbH
3323		 * options, since per-option restriction has too much
3324		 * overhead.
3325		 */
3326		error = suser(kauth_cred_get(), 0);
3327		if (error)
3328			return (EACCES);
3329
3330		if (len == 0) {
3331			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3332			break;	/* just remove the option */
3333		}
3334
3335		/* message length validation */
3336		if (len < sizeof(struct ip6_hbh))
3337			return (EINVAL);
3338		hbh = (struct ip6_hbh *)(void *)buf;
3339		hbhlen = (hbh->ip6h_len + 1) << 3;
3340		if (len != hbhlen)
3341			return (EINVAL);
3342
3343		/* turn off the previous option, then set the new option. */
3344		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3345		opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
3346		if (opt->ip6po_hbh == NULL)
3347			return (ENOBUFS);
3348		bcopy(hbh, opt->ip6po_hbh, hbhlen);
3349
3350		break;
3351	}
3352
3353	case IPV6_2292DSTOPTS:
3354	case IPV6_DSTOPTS:
3355	case IPV6_RTHDRDSTOPTS:
3356	{
3357		struct ip6_dest *dest, **newdest = NULL;
3358		int destlen;
3359
3360		error = suser(kauth_cred_get(), 0);
3361		if (error)
3362			return (EACCES);
3363
3364		if (len == 0) {
3365			ip6_clearpktopts(opt, optname);
3366			break;	/* just remove the option */
3367		}
3368
3369		/* message length validation */
3370		if (len < sizeof(struct ip6_dest))
3371			return (EINVAL);
3372		dest = (struct ip6_dest *)(void *)buf;
3373		destlen = (dest->ip6d_len + 1) << 3;
3374		if (len != destlen)
3375			return (EINVAL);
3376
3377		/*
3378		 * Determine the position that the destination options header
3379		 * should be inserted; before or after the routing header.
3380		 */
3381		switch (optname) {
3382		case IPV6_2292DSTOPTS:
3383			/*
3384			 * The old advacned API is ambiguous on this point.
3385			 * Our approach is to determine the position based
3386			 * according to the existence of a routing header.
3387			 * Note, however, that this depends on the order of the
3388			 * extension headers in the ancillary data; the 1st
3389			 * part of the destination options header must appear
3390			 * before the routing header in the ancillary data,
3391			 * too.
3392			 * RFC3542 solved the ambiguity by introducing
3393			 * separate ancillary data or option types.
3394			 */
3395			if (opt->ip6po_rthdr == NULL)
3396				newdest = &opt->ip6po_dest1;
3397			else
3398				newdest = &opt->ip6po_dest2;
3399			break;
3400		case IPV6_RTHDRDSTOPTS:
3401			newdest = &opt->ip6po_dest1;
3402			break;
3403		case IPV6_DSTOPTS:
3404			newdest = &opt->ip6po_dest2;
3405			break;
3406		}
3407
3408		/* turn off the previous option, then set the new option. */
3409		ip6_clearpktopts(opt, optname);
3410		*newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
3411		if (*newdest == NULL)
3412			return (ENOBUFS);
3413		bcopy(dest, *newdest, destlen);
3414
3415		break;
3416	}
3417
3418	case IPV6_2292RTHDR:
3419	case IPV6_RTHDR:
3420	{
3421		struct ip6_rthdr *rth;
3422		int rthlen;
3423
3424		if (len == 0) {
3425			ip6_clearpktopts(opt, IPV6_RTHDR);
3426			break;	/* just remove the option */
3427		}
3428
3429		/* message length validation */
3430		if (len < sizeof(struct ip6_rthdr))
3431			return (EINVAL);
3432		rth = (struct ip6_rthdr *)(void *)buf;
3433		rthlen = (rth->ip6r_len + 1) << 3;
3434		if (len != rthlen)
3435			return (EINVAL);
3436
3437		switch (rth->ip6r_type) {
3438		case IPV6_RTHDR_TYPE_0:
3439			if (rth->ip6r_len == 0)	/* must contain one addr */
3440				return (EINVAL);
3441			if (rth->ip6r_len % 2) /* length must be even */
3442				return (EINVAL);
3443			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3444				return (EINVAL);
3445			break;
3446		default:
3447			return (EINVAL);	/* not supported */
3448		}
3449
3450		/* turn off the previous option */
3451		ip6_clearpktopts(opt, IPV6_RTHDR);
3452		opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
3453		if (opt->ip6po_rthdr == NULL)
3454			return (ENOBUFS);
3455		bcopy(rth, opt->ip6po_rthdr, rthlen);
3456
3457		break;
3458	}
3459
3460	case IPV6_USE_MIN_MTU:
3461		if (len != sizeof(int))
3462			return (EINVAL);
3463		minmtupolicy = *(int *)(void *)buf;
3464		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3465		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3466		    minmtupolicy != IP6PO_MINMTU_ALL) {
3467			return (EINVAL);
3468		}
3469		opt->ip6po_minmtu = minmtupolicy;
3470		break;
3471
3472	case IPV6_DONTFRAG:
3473		if (len != sizeof(int))
3474			return (EINVAL);
3475
3476		if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
3477			/*
3478			 * we ignore this option for TCP sockets.
3479			 * (RFC3542 leaves this case unspecified.)
3480			 */
3481			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3482		} else
3483			opt->ip6po_flags |= IP6PO_DONTFRAG;
3484		break;
3485
3486	case IPV6_PREFER_TEMPADDR:
3487		if (len != sizeof(int))
3488			return (EINVAL);
3489		preftemp = *(int *)(void *)buf;
3490		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3491		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3492		    preftemp != IP6PO_TEMPADDR_PREFER) {
3493			return (EINVAL);
3494		}
3495		opt->ip6po_prefer_tempaddr = preftemp;
3496		break;
3497
3498	default:
3499		return (ENOPROTOOPT);
3500	} /* end of switch */
3501
3502	return (0);
3503}
3504
3505/*
3506 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3507 * packet to the input queue of a specified interface.  Note that this
3508 * calls the output routine of the loopback "driver", but with an interface
3509 * pointer that might NOT be &loif -- easier than replicating that code here.
3510 */
3511void
3512ip6_mloopback(
3513	struct ifnet *ifp,
3514	struct mbuf *m,
3515	struct sockaddr_in6 *dst)
3516{
3517	struct mbuf *copym;
3518	struct ip6_hdr *ip6;
3519
3520	copym = m_copy(m, 0, M_COPYALL);
3521	if (copym == NULL)
3522		return;
3523
3524	/*
3525	 * Make sure to deep-copy IPv6 header portion in case the data
3526	 * is in an mbuf cluster, so that we can safely override the IPv6
3527	 * header portion later.
3528	 */
3529	if ((copym->m_flags & M_EXT) != 0 ||
3530	    copym->m_len < sizeof(struct ip6_hdr)) {
3531		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3532		if (copym == NULL)
3533			return;
3534	}
3535
3536#if DIAGNOSTIC
3537	if (copym->m_len < sizeof(*ip6)) {
3538		m_freem(copym);
3539		return;
3540	}
3541#endif
3542
3543	ip6 = mtod(copym, struct ip6_hdr *);
3544	/*
3545	 * clear embedded scope identifiers if necessary.
3546	 * in6_clearscope will touch the addresses only when necessary.
3547	 */
3548	in6_clearscope(&ip6->ip6_src);
3549	in6_clearscope(&ip6->ip6_dst);
3550
3551#ifdef __APPLE__
3552
3553	/* Makes sure the HW checksum flags are cleaned before sending the packet */
3554
3555	if ((copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
3556		in6_delayed_cksum(copym, sizeof(struct ip6_hdr));
3557		copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
3558	}
3559	copym->m_pkthdr.rcvif = 0;
3560	copym->m_pkthdr.csum_data = 0;
3561	copym->m_pkthdr.csum_flags = 0;
3562
3563	if (lo_ifp) {
3564		copym->m_pkthdr.rcvif = ifp;
3565		dlil_output(lo_ifp, PF_INET6, copym, 0,
3566		    (struct sockaddr *)dst, 0, NULL);
3567	} else
3568		m_free(copym);
3569#else
3570	(void)if_simloop(ifp, copym, dst->sin6_family, NULL);
3571#endif
3572}
3573
3574/*
3575 * Chop IPv6 header off from the payload.
3576 */
3577static int
3578ip6_splithdr(m, exthdrs)
3579	struct mbuf *m;
3580	struct ip6_exthdrs *exthdrs;
3581{
3582	struct mbuf *mh;
3583	struct ip6_hdr *ip6;
3584
3585	ip6 = mtod(m, struct ip6_hdr *);
3586	if (m->m_len > sizeof(*ip6)) {
3587		MGETHDR(mh, M_DONTWAIT, MT_HEADER);	/* MAC-OK */
3588		if (mh == 0) {
3589			m_freem(m);
3590			return ENOBUFS;
3591		}
3592		M_COPY_PKTHDR(mh, m);
3593		MH_ALIGN(mh, sizeof(*ip6));
3594		m->m_flags &= ~M_PKTHDR;
3595		m->m_len -= sizeof(*ip6);
3596		m->m_data += sizeof(*ip6);
3597		mh->m_next = m;
3598		m = mh;
3599		m->m_len = sizeof(*ip6);
3600		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3601	}
3602	exthdrs->ip6e_ip6 = m;
3603	return 0;
3604}
3605
3606/*
3607 * Compute IPv6 extension header length.
3608 */
3609int
3610ip6_optlen(in6p)
3611	struct in6pcb *in6p;
3612{
3613	int len;
3614
3615	if (!in6p->in6p_outputopts)
3616		return 0;
3617
3618	len = 0;
3619#define elen(x) \
3620	(((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3621
3622	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3623	if (in6p->in6p_outputopts->ip6po_rthdr)
3624		/* dest1 is valid with rthdr only */
3625		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3626	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3627	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3628	return len;
3629#undef elen
3630}
3631