ip6_output.c revision 125595
11556Srgrimes/*	$FreeBSD: head/sys/netinet6/ip6_output.c 125595 2004-02-08 18:22:27Z ume $	*/
21556Srgrimes/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
31556Srgrimes
41556Srgrimes/*
51556Srgrimes * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
61556Srgrimes * All rights reserved.
71556Srgrimes *
81556Srgrimes * Redistribution and use in source and binary forms, with or without
91556Srgrimes * modification, are permitted provided that the following conditions
101556Srgrimes * are met:
111556Srgrimes * 1. Redistributions of source code must retain the above copyright
121556Srgrimes *    notice, this list of conditions and the following disclaimer.
131556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141556Srgrimes *    notice, this list of conditions and the following disclaimer in the
151556Srgrimes *    documentation and/or other materials provided with the distribution.
161556Srgrimes * 3. Neither the name of the project nor the names of its contributors
171556Srgrimes *    may be used to endorse or promote products derived from this software
181556Srgrimes *    without specific prior written permission.
191556Srgrimes *
201556Srgrimes * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
211556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
221556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
231556Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
241556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
251556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
261556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
271556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28127499Sgad * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29127499Sgad * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30127499Sgad * SUCH DAMAGE.
31127499Sgad */
32127499Sgad
33127499Sgad/*
34127499Sgad * Copyright (c) 1982, 1986, 1988, 1990, 1993
351556Srgrimes *	The Regents of the University of California.  All rights reserved.
361556Srgrimes *
371556Srgrimes * Redistribution and use in source and binary forms, with or without
3890143Smarkm * modification, are permitted provided that the following conditions
391556Srgrimes * are met:
401556Srgrimes * 1. Redistributions of source code must retain the above copyright
411556Srgrimes *    notice, this list of conditions and the following disclaimer.
421556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
4390143Smarkm *    notice, this list of conditions and the following disclaimer in the
441556Srgrimes *    documentation and/or other materials provided with the distribution.
4536049Scharnier * 3. All advertising materials mentioning features or use of this software
4690143Smarkm *    must display the following acknowledgement:
4736049Scharnier *	This product includes software developed by the University of
48110391Scharnier *	California, Berkeley and its contributors.
4999110Sobrien * 4. Neither the name of the University nor the names of its contributors
5099110Sobrien *    may be used to endorse or promote products derived from this software
511556Srgrimes *    without specific prior written permission.
521556Srgrimes *
53127546Sgad * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
543296Sdg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
551556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
561556Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
571556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
581556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
591556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
601556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61127149Sgad * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
621556Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63127499Sgad * SUCH DAMAGE.
641556Srgrimes *
6513514Smpp *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
6673367Sache */
671556Srgrimes
6890143Smarkm#include "opt_ip6fw.h"
691556Srgrimes#include "opt_inet.h"
701556Srgrimes#include "opt_inet6.h"
711556Srgrimes#include "opt_ipsec.h"
721556Srgrimes#include "opt_pfil_hooks.h"
731556Srgrimes#include "opt_random_ip_id.h"
741556Srgrimes
751556Srgrimes#include <sys/param.h>
76127499Sgad#include <sys/malloc.h>
77127499Sgad#include <sys/mbuf.h>
7866377Sbrian#include <sys/proc.h>
79127537Sgad#include <sys/errno.h>
80127555Sgad#include <sys/protosw.h>
81127537Sgad#include <sys/socket.h>
82127537Sgad#include <sys/socketvar.h>
83127555Sgad#include <sys/systm.h>
84127537Sgad#include <sys/kernel.h>
85127537Sgad
86127537Sgad#include <net/if.h>
87129914Sgad#include <net/route.h>
88129914Sgad#ifdef PFIL_HOOKS
89129914Sgad#include <net/pfil.h>
90129914Sgad#endif
91129914Sgad
92129914Sgad#include <netinet/in.h>
93127537Sgad#include <netinet/in_var.h>
94127537Sgad#include <netinet6/in6_var.h>
95127537Sgad#include <netinet/ip6.h>
96127537Sgad#include <netinet/icmp6.h>
97127537Sgad#include <netinet6/ip6_var.h>
98127537Sgad#include <netinet/in_pcb.h>
99127537Sgad#include <netinet/tcp_var.h>
100127537Sgad#include <netinet6/nd6.h>
10190143Smarkm
1021556Srgrimes#ifdef IPSEC
103127537Sgad#include <netinet6/ipsec.h>
104127537Sgad#ifdef INET6
105127537Sgad#include <netinet6/ipsec6.h>
106127537Sgad#endif
107127537Sgad#include <netkey/key.h>
108127537Sgad#endif /* IPSEC */
109127537Sgad
1101556Srgrimes#ifdef FAST_IPSEC
111127537Sgad#include <netipsec/ipsec.h>
11297966Sjmallett#include <netipsec/ipsec6.h>
113127499Sgad#include <netipsec/key.h>
114127537Sgad#endif /* FAST_IPSEC */
115127499Sgad
116127499Sgad#include <netinet6/ip6_fw.h>
117127499Sgad
118127499Sgad#include <net/net_osdep.h>
119127499Sgad
120127499Sgad#include <netinet6/ip6protosw.h>
121127499Sgad
122127499Sgadstatic MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
123127499Sgad
124127499Sgadstruct ip6_exthdrs {
125127499Sgad	struct mbuf *ip6e_ip6;
126127499Sgad	struct mbuf *ip6e_hbh;
127127499Sgad	struct mbuf *ip6e_dest1;
128127823Sgad	struct mbuf *ip6e_rthdr;
129127499Sgad	struct mbuf *ip6e_dest2;
130127499Sgad};
131127499Sgad
132127499Sgadstatic int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
133127499Sgad			   int, int));
134127499Sgadstatic int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
135127499Sgad	struct socket *, struct sockopt *));
136127536Sgadstatic int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
137127499Sgadstatic int ip6_setpktoption __P((int, u_char *, int, struct ip6_pktopts *, int,
138127598Sgad	int, int, int));
139127598Sgad
140127536Sgadstatic int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
141127499Sgadstatic int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
142127499Sgadstatic int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
143129914Sgadstatic int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
144127536Sgad	struct ip6_frag **));
145127536Sgadstatic int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
146127536Sgadstatic int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
147127536Sgadstatic int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
148127536Sgad	struct ifnet *, struct in6_addr *, u_long *, int *));
149127499Sgad
15097875Sjmallett
151129635Sgad/*
152127538Sgad * IP6 output. The packet in mbuf chain m contains a skeletal IP6
153127538Sgad * header (with pri, len, nxt, hlim, src, dst).
15490143Smarkm * This function may modify ver and hlim only.
15597875Sjmallett * The mbuf chain containing the packet will be freed.
15697875Sjmallett * The mbuf opt, if present, will not be freed.
157127538Sgad *
158127538Sgad * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
159105831Srwatson * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
1601556Srgrimes * which is rt_rmx.rmx_mtu.
161127843Sgad */
16298494Ssobomaxint
1631556Srgrimesip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
16490110Simp	struct mbuf *m0;
1651556Srgrimes	struct ip6_pktopts *opt;
166127499Sgad	struct route_in6 *ro;
167127499Sgad	int flags;
1681556Srgrimes	struct ip6_moptions *im6o;
1691556Srgrimes	struct ifnet **ifpp;		/* XXX: just for statistics */
1701556Srgrimes	struct inpcb *inp;
171129914Sgad{
172127539Sgad	struct ip6_hdr *ip6, *mhip6;
173127499Sgad	struct ifnet *ifp, *origifp;
174129914Sgad	struct mbuf *m = m0;
175127499Sgad	int hlen, tlen, len, off;
17690143Smarkm	struct route_in6 ip6route;
1771556Srgrimes	struct sockaddr_in6 *dst;
17811809Sache	int error = 0;
179127542Sgad	struct in6_ifaddr *ia = NULL;
18011809Sache	u_long mtu;
18197804Stjr	int alwaysfrag, dontfrag;
18297804Stjr	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
18397804Stjr	struct ip6_exthdrs exthdrs;
1841556Srgrimes	struct in6_addr finaldst;
1851556Srgrimes	struct route_in6 *ro_pmtu = NULL;
1861556Srgrimes	int hdrsplit = 0;
1871556Srgrimes	int needipsec = 0;
1881556Srgrimes#ifdef FAST_IPSEC
1891556Srgrimes	int needipsectun = 0;
1901556Srgrimes	struct secpolicy *sp = NULL;
19198494Ssobomax#endif /* FAST_IPSEC */
192129914Sgad#ifdef IPSEC
193129914Sgad	int needipsectun = 0;
19498494Ssobomax	struct secpolicy *sp = NULL;
195129914Sgad#endif /* IPSEC */
196129914Sgad
1971556Srgrimes	ip6 = mtod(m, struct ip6_hdr *);
198127542Sgad	finaldst = ip6->ip6_dst;
199127542Sgad
200127542Sgad#define MAKE_EXTHDR(hp, mp)						\
201127499Sgad    do {								\
202127499Sgad	if (hp) {							\
203127499Sgad		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
204127499Sgad		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
205127499Sgad		    ((eh)->ip6e_len + 1) << 3);				\
206127499Sgad		if (error)						\
207127499Sgad			goto freehdrs;					\
20889909Sru	}								\
20998494Ssobomax    } while (/*CONSTCOND*/ 0)
2101556Srgrimes
211127499Sgad	bzero(&exthdrs, sizeof(exthdrs));
212127499Sgad
213127499Sgad	if (opt) {
214127499Sgad		/* Hop-by-Hop options header */
215127499Sgad		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
216127499Sgad		/* Destination options header(1st part) */
217127499Sgad		if (opt->ip6po_rthdr) {
218127499Sgad			/*
219127499Sgad			 * Destination options header(1st part)
2201556Srgrimes			 * This only makes sence with a routing header.
221127499Sgad			 * See Section 9.2 of RFC 3542.
2221556Srgrimes			 * Disabling this part just for MIP6 convenience is
2231556Srgrimes			 * a bad idea.  We need to think carefully about a
22419068Speter			 * way to make the advanced API coexist with MIP6
22519068Speter			 * options, which might automatically be inserted in
22619068Speter			 * the kernel.
22719068Speter			 */
22819068Speter			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
22919068Speter		}
2301556Srgrimes		/* Routing header */
2311556Srgrimes		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
2321556Srgrimes		/* Destination options header(2nd part) */
233127506Sgad		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
234127506Sgad	}
235127506Sgad
236127542Sgad#ifdef IPSEC
237127506Sgad	/* get a security policy for this packet */
238127506Sgad	if (inp == NULL)
239127499Sgad		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
240127499Sgad	else
241127499Sgad		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
242127499Sgad
243127499Sgad	if (sp == NULL) {
244127542Sgad		ipsec6stat.out_inval++;
245127499Sgad		goto freehdrs;
246127597Sgad	}
247127542Sgad
248127542Sgad	error = 0;
249127542Sgad
250127542Sgad	/* check policy */
251127499Sgad	switch (sp->policy) {
252127499Sgad	case IPSEC_POLICY_DISCARD:
253127499Sgad		/*
254127499Sgad		 * This packet is just discarded.
255127499Sgad		 */
256127542Sgad		ipsec6stat.out_polvio++;
2571556Srgrimes		goto freehdrs;
258127499Sgad
259116265Sscottl	case IPSEC_POLICY_BYPASS:
260126127Sdeischen	case IPSEC_POLICY_NONE:
261116265Sscottl		/* no need to do IPsec. */
2621556Srgrimes		needipsec = 0;
2631556Srgrimes		break;
2641556Srgrimes
2651556Srgrimes	case IPSEC_POLICY_IPSEC:
266109502Sjmallett		if (sp->req == NULL) {
26790143Smarkm			/* acquire a policy */
2681556Srgrimes			error = key_spdacquire(sp);
2691556Srgrimes			goto freehdrs;
2701556Srgrimes		}
2711556Srgrimes		needipsec = 1;
2721556Srgrimes		break;
2731556Srgrimes
274109502Sjmallett	case IPSEC_POLICY_ENTRUST:
27590143Smarkm	default:
2761556Srgrimes		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
2771556Srgrimes	}
2781556Srgrimes#endif /* IPSEC */
2791556Srgrimes#ifdef FAST_IPSEC
28037317Sphk	/* get a security policy for this packet */
2811556Srgrimes	if (inp == NULL)
2821556Srgrimes		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
2831556Srgrimes	else
2841556Srgrimes		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
2851556Srgrimes
2861556Srgrimes	if (sp == NULL) {
28737317Sphk		newipsecstat.ips_out_inval++;
2881556Srgrimes		goto freehdrs;
2891556Srgrimes	}
290109502Sjmallett
291109502Sjmallett	error = 0;
292109502Sjmallett
2931556Srgrimes	/* check policy */
29490143Smarkm	switch (sp->policy) {
2951556Srgrimes	case IPSEC_POLICY_DISCARD:
2961556Srgrimes		/*
297109502Sjmallett		 * This packet is just discarded.
29890143Smarkm		 */
2991556Srgrimes		newipsecstat.ips_out_polvio++;
3001556Srgrimes		goto freehdrs;
301127499Sgad
302127499Sgad	case IPSEC_POLICY_BYPASS:
303127499Sgad	case IPSEC_POLICY_NONE:
304127499Sgad		/* no need to do IPsec. */
305127499Sgad		needipsec = 0;
306127499Sgad		break;
307127499Sgad
308127499Sgad	case IPSEC_POLICY_IPSEC:
3091556Srgrimes		if (sp->req == NULL) {
310127499Sgad			/* acquire a policy */
311127499Sgad			error = key_spdacquire(sp);
312127597Sgad			goto freehdrs;
313127542Sgad		}
314127542Sgad		needipsec = 1;
315127542Sgad		break;
316127542Sgad
317127542Sgad	case IPSEC_POLICY_ENTRUST:
318127542Sgad	default:
319127499Sgad		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
320127499Sgad	}
321127499Sgad#endif /* FAST_IPSEC */
322127499Sgad
323127499Sgad	/*
3241556Srgrimes	 * Calculate the total length of the extension header chain.
3251556Srgrimes	 * Keep the length of the unfragmentable part for fragmentation.
3261556Srgrimes	 */
3271556Srgrimes	optlen = 0;
3281556Srgrimes	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
3291556Srgrimes	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
330127499Sgad	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
331127499Sgad	unfragpartlen = optlen + sizeof(struct ip6_hdr);
332127597Sgad	/* NOTE: we don't add AH/ESP length here. do that later. */
333127542Sgad	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
334127542Sgad
335127542Sgad	/*
336127542Sgad	 * If we need IPsec, or there is at least one extension header,
337127542Sgad	 * separate IP6 header from the payload.
338127499Sgad	 */
339127499Sgad	if ((needipsec || optlen) && !hdrsplit) {
340127499Sgad		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
341127499Sgad			m = NULL;
342127499Sgad			goto freehdrs;
3431556Srgrimes		}
3441556Srgrimes		m = exthdrs.ip6e_ip6;
3451556Srgrimes		hdrsplit++;
3461556Srgrimes	}
347127499Sgad
348127499Sgad	/* adjust pointer */
349127499Sgad	ip6 = mtod(m, struct ip6_hdr *);
350127499Sgad
3511556Srgrimes	/* adjust mbuf packet header length */
35213020Speter	m->m_pkthdr.len += optlen;
353127499Sgad	plen = m->m_pkthdr.len - sizeof(*ip6);
354127499Sgad
355127499Sgad	/* If this is a jumbo payload, insert a jumbo payload option. */
356127499Sgad	if (plen > IPV6_MAXPACKET) {
35713020Speter		if (!hdrsplit) {
3581556Srgrimes			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
359109502Sjmallett				m = NULL;
3601556Srgrimes				goto freehdrs;
36190143Smarkm			}
3621556Srgrimes			m = exthdrs.ip6e_ip6;
3631556Srgrimes			hdrsplit++;
3641556Srgrimes		}
365109502Sjmallett		/* adjust pointer */
3661556Srgrimes		ip6 = mtod(m, struct ip6_hdr *);
36790143Smarkm		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
3681556Srgrimes			goto freehdrs;
3691556Srgrimes		ip6->ip6_plen = 0;
3701556Srgrimes	} else
3711556Srgrimes		ip6->ip6_plen = htons(plen);
3721556Srgrimes
3731556Srgrimes	/*
3741556Srgrimes	 * Concatenate headers and fill in next header fields.
3751556Srgrimes	 * Here we have, on "m"
3761556Srgrimes	 *	IPv6 payload
377127499Sgad	 * and we insert headers accordingly.  Finally, we should be getting:
378127499Sgad	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
379127499Sgad	 *
380127499Sgad	 * during the header composing process, "m" points to IPv6 header.
381127499Sgad	 * "mprev" points to an extension header prior to esp.
382127499Sgad	 */
383127499Sgad	{
384127499Sgad		u_char *nexthdrp = &ip6->ip6_nxt;
385127499Sgad		struct mbuf *mprev = m;
386127499Sgad
387127499Sgad		/*
388127499Sgad		 * we treat dest2 specially.  this makes IPsec processing
389127499Sgad		 * much easier.  the goal here is to make mprev point the
390127499Sgad		 * mbuf prior to dest2.
3911556Srgrimes		 *
392127499Sgad		 * result: IPv6 dest2 payload
3931556Srgrimes		 * m and mprev will point to IPv6 header.
39486922Sgreen		 */
395109502Sjmallett		if (exthdrs.ip6e_dest2) {
39686922Sgreen			if (!hdrsplit)
39786922Sgreen				panic("assumption failed: hdr not split");
3981556Srgrimes			exthdrs.ip6e_dest2->m_next = m->m_next;
3991556Srgrimes			m->m_next = exthdrs.ip6e_dest2;
4001556Srgrimes			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
4011556Srgrimes			ip6->ip6_nxt = IPPROTO_DSTOPTS;
4021556Srgrimes		}
4031556Srgrimes
404129914Sgad#define MAKE_CHAIN(m, mp, p, i)\
405129914Sgad    do {\
406129914Sgad	if (m) {\
407129914Sgad		if (!hdrsplit) \
408129914Sgad			panic("assumption failed: hdr not split"); \
409129914Sgad		*mtod((m), u_char *) = *(p);\
410129914Sgad		*(p) = (i);\
411129914Sgad		p = mtod((m), u_char *);\
412129914Sgad		(m)->m_next = (mp)->m_next;\
413129914Sgad		(mp)->m_next = (m);\
414129914Sgad		(mp) = (m);\
415129914Sgad	}\
416129914Sgad    } while (/*CONSTCOND*/ 0)
417129914Sgad		/*
418129914Sgad		 * result: IPv6 hbh dest1 rthdr dest2 payload
419129914Sgad		 * m will point to IPv6 header.  mprev will point to the
420127499Sgad		 * extension header prior to dest2 (rthdr in the above case).
421127542Sgad		 */
422127542Sgad		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
423127499Sgad		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
424127499Sgad		    IPPROTO_DSTOPTS);
425129914Sgad		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
4261556Srgrimes		    IPPROTO_ROUTING);
4271556Srgrimes
4281556Srgrimes#if defined(IPSEC) || defined(FAST_IPSEC)
4291556Srgrimes		if (!needipsec)
43037317Sphk			goto skip_ipsec2;
4311556Srgrimes
43237317Sphk		/*
43337317Sphk		 * pointers after IPsec headers are not valid any more.
4341556Srgrimes		 * other pointers need a great care too.
43589909Sru		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
4361556Srgrimes		 */
4371556Srgrimes		exthdrs.ip6e_dest2 = NULL;
4381556Srgrimes
43990143Smarkm	    {
440109502Sjmallett		struct ip6_rthdr *rh = NULL;
4411556Srgrimes		int segleft_org = 0;
442127499Sgad		struct ipsec_output_state state;
443127823Sgad
444127823Sgad		if (exthdrs.ip6e_rthdr) {
44597877Sjmallett			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
446127499Sgad			segleft_org = rh->ip6r_segleft;
447127499Sgad			rh->ip6r_segleft = 0;
448127823Sgad		}
44966377Sbrian
4501556Srgrimes		bzero(&state, sizeof(state));
4511556Srgrimes		state.m = m;
4521556Srgrimes		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
45353170Skris		    &needipsectun);
4541556Srgrimes		m = state.m;
4551556Srgrimes		if (error) {
456127499Sgad			/* mbuf is already reclaimed in ipsec6_output_trans. */
4571556Srgrimes			m = NULL;
458127499Sgad			switch (error) {
459127499Sgad			case EHOSTUNREACH:
460127499Sgad			case ENETUNREACH:
461127499Sgad			case EMSGSIZE:
462127499Sgad			case ENOBUFS:
4631556Srgrimes			case ENOMEM:
464127499Sgad				break;
465127499Sgad			default:
466127499Sgad				printf("ip6_output (ipsec): error code %d\n", error);
467129600Sgad				/* FALLTHROUGH */
468129600Sgad			case ENOENT:
469129600Sgad				/* don't show these error codes to the user */
470129600Sgad				error = 0;
471129600Sgad				break;
472127499Sgad			}
473127823Sgad			goto bad;
474127499Sgad		}
475127499Sgad		if (exthdrs.ip6e_rthdr) {
476127499Sgad			/* ah6_output doesn't modify mbuf chain */
477127823Sgad			rh->ip6r_segleft = segleft_org;
478127499Sgad		}
479127499Sgad	    }
480127499Sgadskip_ipsec2:;
481127823Sgad#endif
482127499Sgad	}
483127499Sgad
484127499Sgad	/*
485127823Sgad	 * If there is a routing header, replace the destination address field
486127499Sgad	 * with the first hop of the routing header.
487127499Sgad	 */
488127499Sgad	if (exthdrs.ip6e_rthdr) {
489127823Sgad		struct ip6_rthdr *rh =
490127499Sgad			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
491127499Sgad						  struct ip6_rthdr *));
492127499Sgad		struct ip6_rthdr0 *rh0;
493127823Sgad		struct in6_addr *addrs;
494127499Sgad
495127499Sgad		switch (rh->ip6r_type) {
496127499Sgad		case IPV6_RTHDR_TYPE_0:
497127499Sgad			 rh0 = (struct ip6_rthdr0 *)rh;
498127499Sgad			 addrs = (struct in6_addr *)(rh0 + 1);
4991556Srgrimes
500126127Sdeischen			 ip6->ip6_dst = *addrs;
5011556Srgrimes			 bcopy((caddr_t)(addrs + 1), (caddr_t)addrs,
5021556Srgrimes			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
5031556Srgrimes				 );
504127499Sgad			 *(addrs + rh0->ip6r0_segleft - 1) = finaldst;
505127149Sgad			 break;
506127544Sgad		default:	/* is it possible? */
5071556Srgrimes			 error = EINVAL;
508127499Sgad			 goto bad;
509127149Sgad		}
510127149Sgad	}
511127149Sgad
512127149Sgad	/* Source address validation */
513127499Sgad	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
514127499Sgad	    (flags & IPV6_DADOUTPUT) == 0) {
515127499Sgad		error = EOPNOTSUPP;
516127499Sgad		ip6stat.ip6s_badscope++;
517127499Sgad		goto bad;
518127499Sgad	}
519127499Sgad	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
520127823Sgad		error = EOPNOTSUPP;
521127499Sgad		ip6stat.ip6s_badscope++;
522127499Sgad		goto bad;
523127499Sgad	}
524127499Sgad
525127499Sgad	ip6stat.ip6s_localout++;
526127499Sgad
527127499Sgad	/*
528127499Sgad	 * Route packet.
529127499Sgad	 */
530127499Sgad	if (ro == 0) {
531127499Sgad		ro = &ip6route;
532127499Sgad		bzero((caddr_t)ro, sizeof(*ro));
533127499Sgad	}
534127499Sgad	ro_pmtu = ro;
535127499Sgad	if (opt && opt->ip6po_rthdr)
536127499Sgad		ro = &opt->ip6po_route;
537127823Sgad	dst = (struct sockaddr_in6 *)&ro->ro_dst;
538127499Sgad
539127499Sgad	/*
540127499Sgad	 * If there is a cached route,
541127499Sgad	 * check that it is to the same destination
542127823Sgad	 * and is still up. If not, free it and try again.
543127823Sgad	 */
544127499Sgad	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
545127499Sgad			 dst->sin6_family != AF_INET6 ||
546127499Sgad			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
547127499Sgad		RTFREE(ro->ro_rt);
548127823Sgad		ro->ro_rt = (struct rtentry *)0;
549127823Sgad	}
550127499Sgad	if (ro->ro_rt == 0) {
551127499Sgad		bzero(dst, sizeof(*dst));
552127499Sgad		dst->sin6_family = AF_INET6;
553127499Sgad		dst->sin6_len = sizeof(struct sockaddr_in6);
554127823Sgad		dst->sin6_addr = ip6->ip6_dst;
555127499Sgad	}
556127499Sgad
557127499Sgad 	/*
558127499Sgad	 * if specified, try to fill in the traffic class field.
559127823Sgad	 * do not override if a non-zero value is already set.
560127499Sgad	 * we check the diffserv field and the ecn field separately.
561127499Sgad	 */
562127499Sgad	if (opt && opt->ip6po_tclass >= 0) {
563127499Sgad		int mask = 0;
564127823Sgad
565127499Sgad		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
566127499Sgad			mask |= 0xfc;
567127499Sgad		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
568127499Sgad			mask |= 0x03;
569127499Sgad		if (mask != 0)
570127499Sgad			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
571127499Sgad	}
572127499Sgad
573127499Sgad	/* fill in or override the hop limit field, if necessary. */
574127499Sgad	if (opt && opt->ip6po_hlim != -1)
575127149Sgad		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
576127499Sgad	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
577127499Sgad		if (im6o != NULL)
578127499Sgad			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
579127149Sgad		else
5801556Srgrimes			ip6->ip6_hlim = ip6_defmcasthlim;
58125271Sjkh	}
58225271Sjkh
58325271Sjkh#if defined(IPSEC) || defined(FAST_IPSEC)
5841556Srgrimes	if (needipsec && needipsectun) {
5851556Srgrimes		struct ipsec_output_state state;
5861556Srgrimes
5871556Srgrimes		/*
588127499Sgad		 * All the extension headers will become inaccessible
58962803Swill		 * (since they can be encrypted).
590127499Sgad		 * Don't panic, we need no more updates to extension headers
5911556Srgrimes		 * on inner IPv6 packet (since they are now encapsulated).
5921556Srgrimes		 *
5931556Srgrimes		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
594127499Sgad		 */
5951556Srgrimes		bzero(&exthdrs, sizeof(exthdrs));
596127499Sgad		exthdrs.ip6e_ip6 = m;
5971556Srgrimes
598127499Sgad		bzero(&state, sizeof(state));
5991556Srgrimes		state.m = m;
6001556Srgrimes		state.ro = (struct route *)ro;
6011556Srgrimes		state.dst = (struct sockaddr *)dst;
6021556Srgrimes
6031556Srgrimes		error = ipsec6_output_tunnel(&state, sp, flags);
6041556Srgrimes
6051556Srgrimes		m = state.m;
6061556Srgrimes		ro = (struct route_in6 *)state.ro;
6071556Srgrimes		dst = (struct sockaddr_in6 *)state.dst;
6081556Srgrimes		if (error) {
6091556Srgrimes			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
6101556Srgrimes			m0 = m = NULL;
611127499Sgad			m = NULL;
612127499Sgad			switch (error) {
613127499Sgad			case EHOSTUNREACH:
614127499Sgad			case ENETUNREACH:
615127499Sgad			case EMSGSIZE:
616127499Sgad			case ENOBUFS:
617127499Sgad			case ENOMEM:
61866377Sbrian				break;
6191556Srgrimes			default:
6201556Srgrimes				printf("ip6_output (ipsec): error code %d\n", error);
6211556Srgrimes				/* FALLTHROUGH */
622127499Sgad			case ENOENT:
623127499Sgad				/* don't show these error codes to the user */
624127499Sgad				error = 0;
625127499Sgad				break;
626127499Sgad			}
627127499Sgad			goto bad;
628127602Sgad		}
629127499Sgad
630127499Sgad		exthdrs.ip6e_ip6 = m;
631127499Sgad	}
632127499Sgad#endif /* IPSEC */
633127499Sgad
634127499Sgad	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
635127499Sgad		/* Unicast */
636127542Sgad
637127499Sgad#define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
638127499Sgad#define sin6tosa(sin6)	((struct sockaddr *)(sin6))
639127499Sgad		/* xxx
640127499Sgad		 * interface selection comes here
641127499Sgad		 * if an interface is specified from an upper layer,
642127499Sgad		 * ifp must point it.
643127499Sgad		 */
644127499Sgad		if (ro->ro_rt == 0) {
645127499Sgad			/*
646127499Sgad			 * non-bsdi always clone routes, if parent is
647127499Sgad			 * PRF_CLONING.
648127499Sgad			 */
649127499Sgad			rtalloc((struct route *)ro);
650127499Sgad		}
651127602Sgad		if (ro->ro_rt == 0) {
652127602Sgad			ip6stat.ip6s_noroute++;
653127499Sgad			error = EHOSTUNREACH;
654127602Sgad			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
655127499Sgad			goto bad;
656127499Sgad		}
657127499Sgad		/* XXX rt not locked */
658127499Sgad		ia = ifatoia6(ro->ro_rt->rt_ifa);
659127499Sgad		ifp = ro->ro_rt->rt_ifp;
660127499Sgad		ro->ro_rt->rt_rmx.rmx_pksent++;
661127542Sgad		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
662127499Sgad			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
663127499Sgad		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
664127499Sgad
665127499Sgad		in6_ifstat_inc(ifp, ifs6_out_request);
666127823Sgad
667127499Sgad		/*
668127499Sgad		 * Check if the outgoing interface conflicts with
669127499Sgad		 * the interface specified by ifi6_ifindex (if specified).
670127597Sgad		 * Note that loopback interface is always okay.
671127499Sgad		 * (this may happen when we are sending a packet to one of
672127499Sgad		 *  our own addresses.)
673127149Sgad		 */
674127539Sgad		if (opt && opt->ip6po_pktinfo
675127149Sgad		 && opt->ip6po_pktinfo->ipi6_ifindex) {
676127149Sgad			if (!(ifp->if_flags & IFF_LOOPBACK)
677127499Sgad			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
678127499Sgad				ip6stat.ip6s_noroute++;
679127499Sgad				in6_ifstat_inc(ifp, ifs6_out_discard);
680127499Sgad				error = EHOSTUNREACH;
681127499Sgad				goto bad;
682127499Sgad			}
683127499Sgad		}
684127499Sgad
685127499Sgad		if (opt && opt->ip6po_hlim != -1)
686127499Sgad			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
687127499Sgad	} else {
688127149Sgad		/* Multicast */
689127499Sgad		struct	in6_multi *in6m;
690127499Sgad
691127542Sgad		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
692127149Sgad
693127149Sgad		/*
694127149Sgad		 * See if the caller provided any multicast options
695127499Sgad		 */
696127499Sgad		ifp = NULL;
697127823Sgad		if (im6o != NULL) {
698127499Sgad			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
699127499Sgad			if (im6o->im6o_multicast_ifp != NULL)
700127499Sgad				ifp = im6o->im6o_multicast_ifp;
701127149Sgad		} else
702127499Sgad			ip6->ip6_hlim = ip6_defmcasthlim;
703127499Sgad
704127499Sgad		/*
705127539Sgad		 * See if the caller provided the outgoing interface
706127539Sgad		 * as an ancillary data.
707127499Sgad		 * Boundary check for ifindex is assumed to be already done.
708127499Sgad		 */
709127499Sgad		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
710127499Sgad			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
711127499Sgad
712127499Sgad		/*
713127499Sgad		 * If the destination is a node-local scope multicast,
714127499Sgad		 * the packet should be loop-backed only.
715127499Sgad		 */
716127499Sgad		if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
717127499Sgad			/*
718127499Sgad			 * If the outgoing interface is already specified,
719127499Sgad			 * it should be a loopback interface.
720127499Sgad			 */
721127499Sgad			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
722127542Sgad				ip6stat.ip6s_badscope++;
723127499Sgad				error = ENETUNREACH; /* XXX: better error? */
724127499Sgad				/* XXX correct ifp? */
725127499Sgad				in6_ifstat_inc(ifp, ifs6_out_discard);
726127499Sgad				goto bad;
727127542Sgad			} else {
728127499Sgad				ifp = &loif[0];
729127499Sgad			}
730127499Sgad		}
731127499Sgad
732127823Sgad		if (opt && opt->ip6po_hlim != -1)
733127499Sgad			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
734127149Sgad
735127149Sgad		/*
736127499Sgad		 * If caller did not provide an interface lookup a
737127499Sgad		 * default in the routing table.  This is either a
73866377Sbrian		 * default for the speicfied group (i.e. a host
73966377Sbrian		 * route), or a multicast default (a route for the
740127539Sgad		 * ``net'' ff00::/8).
741127602Sgad		 */
74266377Sbrian		if (ifp == NULL) {
743127499Sgad			if (ro->ro_rt == 0)
744127499Sgad				ro->ro_rt = rtalloc1((struct sockaddr *)
745127499Sgad						&ro->ro_dst, 0, 0UL);
746127499Sgad			else
747127499Sgad				RT_LOCK(ro->ro_rt);
748127499Sgad			if (ro->ro_rt == 0) {
749127542Sgad				ip6stat.ip6s_noroute++;
750127499Sgad				error = EHOSTUNREACH;
75166377Sbrian				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
752127499Sgad				goto bad;
753127499Sgad			}
754127499Sgad			ia = ifatoia6(ro->ro_rt->rt_ifa);
755127602Sgad			ifp = ro->ro_rt->rt_ifp;
756127602Sgad			ro->ro_rt->rt_rmx.rmx_pksent++;
757127499Sgad			RT_UNLOCK(ro->ro_rt);
758127499Sgad		}
759127499Sgad
760127602Sgad		if ((flags & IPV6_FORWARDING) == 0)
761127499Sgad			in6_ifstat_inc(ifp, ifs6_out_request);
762127499Sgad		in6_ifstat_inc(ifp, ifs6_out_mcast);
763127499Sgad
76466377Sbrian		/*
765127499Sgad		 * Confirm that the outgoing interface supports multicast.
766127499Sgad		 */
767127509Sgad		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
768127509Sgad			ip6stat.ip6s_noroute++;
769127509Sgad			in6_ifstat_inc(ifp, ifs6_out_discard);
770127509Sgad			error = ENETUNREACH;
771127509Sgad			goto bad;
772127509Sgad		}
773127542Sgad		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
774127499Sgad		if (in6m != NULL &&
775127499Sgad		   (im6o == NULL || im6o->im6o_multicast_loop)) {
776127499Sgad			/*
777127499Sgad			 * If we belong to the destination multicast group
778127823Sgad			 * on the outgoing interface, and the caller did not
779127499Sgad			 * forbid loopback, loop back a copy.
780127499Sgad			 */
781127499Sgad			ip6_mloopback(ifp, m, dst);
782127499Sgad		} else {
783127499Sgad			/*
784127499Sgad			 * If we are acting as a multicast router, perform
785127499Sgad			 * multicast forwarding as if the packet had just
786127499Sgad			 * arrived on the interface to which we are about
787127499Sgad			 * to send.  The multicast forwarding function
788127539Sgad			 * recursively calls this function, using the
789127499Sgad			 * IPV6_FORWARDING flag to prevent infinite recursion.
790127499Sgad			 *
791127499Sgad			 * Multicasts that are looped back by ip6_mloopback(),
792127499Sgad			 * above, will be forwarded by the ip6_input() routine,
793127499Sgad			 * if necessary.
794127499Sgad			 */
795127499Sgad			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
796127499Sgad				if (ip6_mforward(ip6, ifp, m) != 0) {
797127499Sgad					m_freem(m);
798127499Sgad					goto done;
799127499Sgad				}
800127499Sgad			}
801127499Sgad		}
802127499Sgad		/*
80366377Sbrian		 * Multicasts with a hoplimit of zero may be looped back,
804127499Sgad		 * above, but must not be transmitted on a network.
805127499Sgad		 * Also, multicasts addressed to the loopback interface
806127499Sgad		 * are not sent -- the above call to ip6_mloopback() will
807127542Sgad		 * loop back a copy if this host actually belongs to the
808127542Sgad		 * destination group on the loopback interface.
809127542Sgad		 */
810127542Sgad		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
811127499Sgad		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
812127499Sgad			m_freem(m);
813127597Sgad			goto done;
814127542Sgad		}
815127542Sgad	}
816127542Sgad
817127542Sgad	/*
818127542Sgad	 * Fill the outgoing inteface to tell the upper layer
819127542Sgad	 * to increment per-interface statistics.
820127542Sgad	 */
821127542Sgad	if (ifpp)
822127542Sgad		*ifpp = ifp;
823127542Sgad
824127542Sgad	/* Determine path MTU. */
825127542Sgad	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
826127499Sgad	    &alwaysfrag)) != 0)
827127499Sgad		goto bad;
828127499Sgad
829127499Sgad	/*
830127499Sgad	 * The caller of this function may specify to use the minimum MTU
831127499Sgad	 * in some cases.
832127499Sgad	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
833127499Sgad	 * setting.  The logic is a bit complicated; by default, unicast
834127499Sgad	 * packets will follow path MTU while multicast packets will be sent at
835127499Sgad	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
836127499Sgad	 * including unicast ones will be sent at the minimum MTU.  Multicast
837127499Sgad	 * packets will always be sent at the minimum MTU unless
838127499Sgad	 * IP6PO_MINMTU_DISABLE is explicitly specified.
839127499Sgad	 * See RFC 3542 for more details.
840127499Sgad	 */
841127499Sgad	if (mtu > IPV6_MMTU) {
842127499Sgad		if ((flags & IPV6_MINMTU))
843127499Sgad			mtu = IPV6_MMTU;
844127499Sgad		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
845127499Sgad			mtu = IPV6_MMTU;
84666377Sbrian		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
847127499Sgad			 (opt == NULL ||
848127499Sgad			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
849127499Sgad			mtu = IPV6_MMTU;
850127499Sgad		}
851127499Sgad	}
852127499Sgad
853127499Sgad	/* Fake scoped addresses */
854127499Sgad	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
855127499Sgad		/*
856127499Sgad		 * If source or destination address is a scoped address, and
857127499Sgad		 * the packet is going to be sent to a loopback interface,
858127499Sgad		 * we should keep the original interface.
859127499Sgad		 */
86066377Sbrian
861127499Sgad		/*
86266377Sbrian		 * XXX: this is a very experimental and temporary solution.
863127499Sgad		 * We eventually have sockaddr_in6 and use the sin6_scope_id
864127499Sgad		 * field of the structure here.
865127499Sgad		 * We rely on the consistency between two scope zone ids
866127539Sgad		 * of source and destination, which should already be assured.
867127499Sgad		 * Larger scopes than link will be supported in the future.
86866377Sbrian		 */
869127499Sgad		origifp = NULL;
870127823Sgad		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
871127499Sgad			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
872127823Sgad		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
873127499Sgad			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
874127499Sgad		/*
875127499Sgad		 * XXX: origifp can be NULL even in those two cases above.
876127499Sgad		 * For example, if we remove the (only) link-local address
877127823Sgad		 * from the loopback interface, and try to send a link-local
878127499Sgad		 * address without link-id information.  Then the source
879127499Sgad		 * address is ::1, and the destination address is the
88066377Sbrian		 * link-local address with its s6_addr16[1] being zero.
88166377Sbrian		 * What is worse, if the packet goes to the loopback interface
882127499Sgad		 * by a default rejected route, the null pointer would be
883127499Sgad		 * passed to looutput, and the kernel would hang.
884127499Sgad		 * The following last resort would prevent such disaster.
885127499Sgad		 */
886127499Sgad		if (origifp == NULL)
887127823Sgad			origifp = ifp;
888127823Sgad	}
889127499Sgad	else
890127499Sgad		origifp = ifp;
891127823Sgad	/*
892127499Sgad	 * clear embedded scope identifiers if necessary.
893127499Sgad	 * in6_clearscope will touch the addresses only when necessary.
894127499Sgad	 */
895127499Sgad	in6_clearscope(&ip6->ip6_src);
896127499Sgad	in6_clearscope(&ip6->ip6_dst);
897127499Sgad
898127499Sgad	/*
899127499Sgad	 * Check with the firewall...
900127499Sgad	 */
901127499Sgad	if (ip6_fw_enable && ip6_fw_chk_ptr) {
902127499Sgad		u_short port = 0;
903127823Sgad		m->m_pkthdr.rcvif = NULL;	/* XXX */
904127499Sgad		/* If ipfw says divert, we have to just drop packet */
905127499Sgad		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
906109502Sjmallett			m_freem(m);
907109502Sjmallett			goto done;
908109502Sjmallett		}
909109502Sjmallett		if (!m) {
910109502Sjmallett			error = EACCES;
911109502Sjmallett			goto done;
912109502Sjmallett		}
913109502Sjmallett	}
914109502Sjmallett
915109502Sjmallett	/*
916109502Sjmallett	 * If the outgoing packet contains a hop-by-hop options header,
917109502Sjmallett	 * it must be examined and processed even by the source node.
9181556Srgrimes	 * (RFC 2460, section 4.)
91990110Simp	 */
9201556Srgrimes	if (exthdrs.ip6e_hbh) {
9211556Srgrimes		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
9221556Srgrimes		u_int32_t dummy1; /* XXX unused */
92325271Sjkh		u_int32_t dummy2; /* XXX unused */
92425271Sjkh
92525271Sjkh#ifdef DIAGNOSTIC
92625271Sjkh		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
92725271Sjkh			panic("ip6e_hbh is not continuous");
92825271Sjkh#endif
92925271Sjkh		/*
93025271Sjkh		 *  XXX: if we have to send an ICMPv6 error to the sender,
93125271Sjkh		 *       we need the M_LOOP flag since icmp6_error() expects
93225271Sjkh		 *       the IPv6 and the hop-by-hop options header are
93325271Sjkh		 *       continuous unless the flag is set.
93425271Sjkh		 */
93525271Sjkh		m->m_flags |= M_LOOP;
93625271Sjkh		m->m_pkthdr.rcvif = ifp;
93725271Sjkh		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
93890110Simp		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
93925271Sjkh		    &dummy1, &dummy2) < 0) {
94025271Sjkh			/* m was already freed at this point */
94125271Sjkh			error = EINVAL;/* better error? */
9421556Srgrimes			goto done;
9431556Srgrimes		}
9441556Srgrimes		m->m_flags &= ~M_LOOP; /* XXX */
9451556Srgrimes		m->m_pkthdr.rcvif = NULL;
94625271Sjkh	}
94725271Sjkh
94825271Sjkh#ifdef PFIL_HOOKS
94925271Sjkh	/*
95025271Sjkh	 * Run through list of hooks for output packets.
95125271Sjkh	 */
95225271Sjkh	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
95325271Sjkh	if (error != 0 || m == NULL)
95425271Sjkh		goto done;
95525271Sjkh	ip6 = mtod(m, struct ip6_hdr *);
95625271Sjkh#endif /* PFIL_HOOKS */
95790110Simp
95825271Sjkh	/*
95925271Sjkh	 * Send the packet to the outgoing interface.
96025271Sjkh	 * If necessary, do IPv6 fragmentation before sending.
96125271Sjkh	 *
96225271Sjkh	 * the logic here is rather complex:
96325271Sjkh	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
96425271Sjkh	 * 1-a:	send as is if tlen <= path mtu
965109504Sjmallett	 * 1-b:	fragment if tlen > path mtu
9661556Srgrimes	 *
9671556Srgrimes	 * 2: if user asks us not to fragment (dontfrag == 1)
9681556Srgrimes	 * 2-a:	send as is if tlen <= interface mtu
9691556Srgrimes	 * 2-b:	error if tlen > interface mtu
9701556Srgrimes	 *
9711556Srgrimes	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
9721556Srgrimes	 *	always fragment
97390143Smarkm	 *
97490110Simp	 * 4: if dontfrag == 1 && alwaysfrag == 1
975127542Sgad	 *	error, as we cannot handle this conflicting request
9761556Srgrimes	 */
97790143Smarkm	tlen = m->m_pkthdr.len;
9781556Srgrimes
97990143Smarkm	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
9801556Srgrimes		dontfrag = 1;
9811556Srgrimes	else
9821556Srgrimes		dontfrag = 0;
98371578Sjhb	if (dontfrag && alwaysfrag) {	/* case 4 */
98431552Sdyson		/* conflicting request - can't transmit */
9851556Srgrimes		error = EMSGSIZE;
98690110Simp		goto bad;
9871556Srgrimes	}
9881556Srgrimes	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
98971578Sjhb		/*
9901556Srgrimes		 * Even if the DONTFRAG option is specified, we cannot send the
9911556Srgrimes		 * packet when the data length is larger than the MTU of the
9921556Srgrimes		 * outgoing interface.
9931556Srgrimes		 * Notify the error by sending IPV6_PATHMTU ancillary data as
9941556Srgrimes		 * well as returning an error code (the latter is not described
9951556Srgrimes		 * in the API spec.)
99669896Smckusick		 */
9971556Srgrimes		u_int32_t mtu32;
99869896Smckusick		struct ip6ctlparam ip6cp;
9991556Srgrimes
10001556Srgrimes		mtu32 = (u_int32_t)mtu;
10011556Srgrimes		bzero(&ip6cp, sizeof(ip6cp));
100269896Smckusick		ip6cp.ip6c_cmdarg = (void *)&mtu32;
100390143Smarkm		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
100490143Smarkm		    (void *)&ip6cp);
100531552Sdyson
100690143Smarkm		error = EMSGSIZE;
100753276Speter		goto bad;
100853276Speter	}
100953276Speter
101053276Speter	/*
101190143Smarkm	 * transmit packet without fragmentation
101253276Speter	 */
101353276Speter	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
101453276Speter		struct in6_ifaddr *ia6;
101553276Speter
101653276Speter		ip6 = mtod(m, struct ip6_hdr *);
101753276Speter		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
10181556Srgrimes		if (ia6) {
10191556Srgrimes			/* Record statistics for this interface address. */
10201556Srgrimes			ia6->ia_ifa.if_opackets++;
102190110Simp			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
10221556Srgrimes		}
1023127596Sgad#ifdef IPSEC
1024127596Sgad		/* clean ipsec history once it goes out of the node */
1025127596Sgad		ipsec_delaux(m);
10261556Srgrimes#endif
1027127596Sgad		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1028127596Sgad		goto done;
1029127596Sgad	}
1030127596Sgad
1031127596Sgad	/*
1032127596Sgad	 * try to fragment the packet.  case 1-b and 3
1033127596Sgad	 */
1034127596Sgad	if (mtu < IPV6_MMTU) {
1035127596Sgad		/* path MTU cannot be less than IPV6_MMTU */
1036127596Sgad		error = EMSGSIZE;
1037127596Sgad		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1038127596Sgad		goto bad;
1039127596Sgad	} else if (ip6->ip6_plen == 0) {
1040127596Sgad		/* jumbo payload cannot be fragmented */
1041127596Sgad		error = EMSGSIZE;
1042127596Sgad		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1043127596Sgad		goto bad;
1044127596Sgad	} else {
1045127596Sgad		struct mbuf **mnext, *m_frgpart;
1046127596Sgad		struct ip6_frag *ip6f;
1047127596Sgad#ifdef RANDOM_IP_ID
1048127596Sgad		u_int32_t id = htonl(ip6_randomid());
1049127596Sgad#else
1050127596Sgad		u_int32_t id = htonl(ip6_id++);
1051127596Sgad#endif
1052127596Sgad		u_char nextproto;
1053127596Sgad		struct ip6ctlparam ip6cp;
1054127596Sgad		u_int32_t mtu32;
1055127596Sgad
1056127596Sgad		/*
1057127596Sgad		 * Too large for the destination or interface;
1058127596Sgad		 * fragment if possible.
1059127596Sgad		 * Must be able to put at least 8 bytes per fragment.
1060127596Sgad		 */
1061127596Sgad		hlen = unfragpartlen;
1062127596Sgad		if (mtu > IPV6_MAXPACKET)
1063127596Sgad			mtu = IPV6_MAXPACKET;
1064127596Sgad
1065127596Sgad		/* Notify a proper path MTU to applications. */
10661556Srgrimes		mtu32 = (u_int32_t)mtu;
10671556Srgrimes		bzero(&ip6cp, sizeof(ip6cp));
10681556Srgrimes		ip6cp.ip6c_cmdarg = (void *)&mtu32;
10691556Srgrimes		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
10701556Srgrimes		    (void *)&ip6cp);
10711556Srgrimes
10721556Srgrimes		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
10731556Srgrimes		if (len < 8) {
10741556Srgrimes			error = EMSGSIZE;
10751556Srgrimes			in6_ifstat_inc(ifp, ifs6_out_fragfail);
10761556Srgrimes			goto bad;
10771556Srgrimes		}
10781556Srgrimes
10791556Srgrimes		mnext = &m->m_nextpkt;
1080129914Sgad
10811556Srgrimes		/*
10821556Srgrimes		 * Change the next header field of the last header in the
1083129914Sgad		 * unfragmentable part.
10841556Srgrimes		 */
1085102886Sjmallett		if (exthdrs.ip6e_rthdr) {
1086129914Sgad			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1087129914Sgad			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1088102886Sjmallett		} else if (exthdrs.ip6e_dest1) {
1089129914Sgad			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1090129914Sgad			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1091129914Sgad		} else if (exthdrs.ip6e_hbh) {
1092129914Sgad			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1093129914Sgad			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1094129914Sgad		} else {
1095129914Sgad			nextproto = ip6->ip6_nxt;
1096129914Sgad			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1097129914Sgad		}
1098129914Sgad
1099129914Sgad		/*
1100129914Sgad		 * Loop through length of segment after first fragment,
1101102886Sjmallett		 * make new header and copy data of each part and link onto
11021556Srgrimes		 * chain.
11031556Srgrimes		 */
11041556Srgrimes		m0 = m;
11051556Srgrimes		for (off = hlen; off < tlen; off += len) {
110681743Sbrian			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1107129634Sgad			if (!m) {
1108129634Sgad				error = ENOBUFS;
1109129634Sgad				ip6stat.ip6s_odropped++;
1110129914Sgad				goto sendorfree;
1111129914Sgad			}
1112129914Sgad			m->m_pkthdr.rcvif = NULL;
11131556Srgrimes			m->m_flags = m0->m_flags & M_COPYFLAGS;
1114129914Sgad			*mnext = m;
1115129914Sgad			mnext = &m->m_nextpkt;
1116129914Sgad			m->m_data += max_linkhdr;
1117129914Sgad			mhip6 = mtod(m, struct ip6_hdr *);
11181556Srgrimes			*mhip6 = *ip6;
1119129914Sgad			m->m_len = sizeof(*mhip6);
11201556Srgrimes			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1121129914Sgad			if (error) {
1122129914Sgad				ip6stat.ip6s_odropped++;
1123129914Sgad				goto sendorfree;
1124129914Sgad			}
11251556Srgrimes			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1126129914Sgad			if (off + len >= tlen)
1127129914Sgad				len = tlen - off;
1128129914Sgad			else
1129129914Sgad				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1130129914Sgad			mhip6->ip6_plen = htons((u_short)(len + hlen +
11311556Srgrimes			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1132129914Sgad			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
11331556Srgrimes				error = ENOBUFS;
1134129914Sgad				ip6stat.ip6s_odropped++;
1135129914Sgad				goto sendorfree;
11361556Srgrimes			}
1137129914Sgad			m_cat(m, m_frgpart);
1138129914Sgad			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1139129914Sgad			m->m_pkthdr.rcvif = (struct ifnet *)0;
1140129914Sgad			ip6f->ip6f_reserved = 0;
1141129914Sgad			ip6f->ip6f_ident = id;
1142129914Sgad			ip6f->ip6f_nxt = nextproto;
1143129914Sgad			ip6stat.ip6s_ofragments++;
1144129914Sgad			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1145129914Sgad		}
1146129914Sgad
1147129914Sgad		in6_ifstat_inc(ifp, ifs6_out_fragok);
1148129914Sgad	}
1149129914Sgad
1150129914Sgad	/*
1151129914Sgad	 * Remove leading garbages.
1152129914Sgad	 */
1153129914Sgadsendorfree:
1154129914Sgad	m = m0->m_nextpkt;
1155129914Sgad	m0->m_nextpkt = 0;
1156129914Sgad	m_freem(m0);
1157129914Sgad	for (m0 = m; m; m = m0) {
1158129914Sgad		m0 = m->m_nextpkt;
1159129914Sgad		m->m_nextpkt = 0;
11601556Srgrimes		if (error == 0) {
1161129914Sgad 			/* Record statistics for this interface address. */
1162129914Sgad 			if (ia) {
11631556Srgrimes 				ia->ia_ifa.if_opackets++;
11641556Srgrimes 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
11651556Srgrimes 			}
11661556Srgrimes#ifdef IPSEC
11671556Srgrimes			/* clean ipsec history once it goes out of the node */
116890110Simp			ipsec_delaux(m);
11691556Srgrimes#endif
1170127507Sgad			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
11711556Srgrimes		} else
1172127499Sgad			m_freem(m);
1173127507Sgad	}
1174127499Sgad
1175127499Sgad	if (error == 0)
117626465Scharnier		ip6stat.ip6s_fragmented++;
11771556Srgrimes
11781556Srgrimesdone:
1179	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1180		RTFREE(ro->ro_rt);
1181	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1182		RTFREE(ro_pmtu->ro_rt);
1183	}
1184
1185#ifdef IPSEC
1186	if (sp != NULL)
1187		key_freesp(sp);
1188#endif /* IPSEC */
1189#ifdef FAST_IPSEC
1190	if (sp != NULL)
1191		KEY_FREESP(&sp);
1192#endif /* FAST_IPSEC */
1193
1194	return (error);
1195
1196freehdrs:
1197	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1198	m_freem(exthdrs.ip6e_dest1);
1199	m_freem(exthdrs.ip6e_rthdr);
1200	m_freem(exthdrs.ip6e_dest2);
1201	/* FALLTHROUGH */
1202bad:
1203	m_freem(m);
1204	goto done;
1205}
1206
1207static int
1208ip6_copyexthdr(mp, hdr, hlen)
1209	struct mbuf **mp;
1210	caddr_t hdr;
1211	int hlen;
1212{
1213	struct mbuf *m;
1214
1215	if (hlen > MCLBYTES)
1216		return (ENOBUFS); /* XXX */
1217
1218	MGET(m, M_DONTWAIT, MT_DATA);
1219	if (!m)
1220		return (ENOBUFS);
1221
1222	if (hlen > MLEN) {
1223		MCLGET(m, M_DONTWAIT);
1224		if ((m->m_flags & M_EXT) == 0) {
1225			m_free(m);
1226			return (ENOBUFS);
1227		}
1228	}
1229	m->m_len = hlen;
1230	if (hdr)
1231		bcopy(hdr, mtod(m, caddr_t), hlen);
1232
1233	*mp = m;
1234	return (0);
1235}
1236
1237/*
1238 * Insert jumbo payload option.
1239 */
1240static int
1241ip6_insert_jumboopt(exthdrs, plen)
1242	struct ip6_exthdrs *exthdrs;
1243	u_int32_t plen;
1244{
1245	struct mbuf *mopt;
1246	u_char *optbuf;
1247	u_int32_t v;
1248
1249#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1250
1251	/*
1252	 * If there is no hop-by-hop options header, allocate new one.
1253	 * If there is one but it doesn't have enough space to store the
1254	 * jumbo payload option, allocate a cluster to store the whole options.
1255	 * Otherwise, use it to store the options.
1256	 */
1257	if (exthdrs->ip6e_hbh == 0) {
1258		MGET(mopt, M_DONTWAIT, MT_DATA);
1259		if (mopt == 0)
1260			return (ENOBUFS);
1261		mopt->m_len = JUMBOOPTLEN;
1262		optbuf = mtod(mopt, u_char *);
1263		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1264		exthdrs->ip6e_hbh = mopt;
1265	} else {
1266		struct ip6_hbh *hbh;
1267
1268		mopt = exthdrs->ip6e_hbh;
1269		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1270			/*
1271			 * XXX assumption:
1272			 * - exthdrs->ip6e_hbh is not referenced from places
1273			 *   other than exthdrs.
1274			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1275			 */
1276			int oldoptlen = mopt->m_len;
1277			struct mbuf *n;
1278
1279			/*
1280			 * XXX: give up if the whole (new) hbh header does
1281			 * not fit even in an mbuf cluster.
1282			 */
1283			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1284				return (ENOBUFS);
1285
1286			/*
1287			 * As a consequence, we must always prepare a cluster
1288			 * at this point.
1289			 */
1290			MGET(n, M_DONTWAIT, MT_DATA);
1291			if (n) {
1292				MCLGET(n, M_DONTWAIT);
1293				if ((n->m_flags & M_EXT) == 0) {
1294					m_freem(n);
1295					n = NULL;
1296				}
1297			}
1298			if (!n)
1299				return (ENOBUFS);
1300			n->m_len = oldoptlen + JUMBOOPTLEN;
1301			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1302			    oldoptlen);
1303			optbuf = mtod(n, caddr_t) + oldoptlen;
1304			m_freem(mopt);
1305			mopt = exthdrs->ip6e_hbh = n;
1306		} else {
1307			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1308			mopt->m_len += JUMBOOPTLEN;
1309		}
1310		optbuf[0] = IP6OPT_PADN;
1311		optbuf[1] = 1;
1312
1313		/*
1314		 * Adjust the header length according to the pad and
1315		 * the jumbo payload option.
1316		 */
1317		hbh = mtod(mopt, struct ip6_hbh *);
1318		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1319	}
1320
1321	/* fill in the option. */
1322	optbuf[2] = IP6OPT_JUMBO;
1323	optbuf[3] = 4;
1324	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1325	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1326
1327	/* finally, adjust the packet header length */
1328	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1329
1330	return (0);
1331#undef JUMBOOPTLEN
1332}
1333
1334/*
1335 * Insert fragment header and copy unfragmentable header portions.
1336 */
1337static int
1338ip6_insertfraghdr(m0, m, hlen, frghdrp)
1339	struct mbuf *m0, *m;
1340	int hlen;
1341	struct ip6_frag **frghdrp;
1342{
1343	struct mbuf *n, *mlast;
1344
1345	if (hlen > sizeof(struct ip6_hdr)) {
1346		n = m_copym(m0, sizeof(struct ip6_hdr),
1347		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1348		if (n == 0)
1349			return (ENOBUFS);
1350		m->m_next = n;
1351	} else
1352		n = m;
1353
1354	/* Search for the last mbuf of unfragmentable part. */
1355	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1356		;
1357
1358	if ((mlast->m_flags & M_EXT) == 0 &&
1359	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1360		/* use the trailing space of the last mbuf for the fragment hdr */
1361		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1362		    mlast->m_len);
1363		mlast->m_len += sizeof(struct ip6_frag);
1364		m->m_pkthdr.len += sizeof(struct ip6_frag);
1365	} else {
1366		/* allocate a new mbuf for the fragment header */
1367		struct mbuf *mfrg;
1368
1369		MGET(mfrg, M_DONTWAIT, MT_DATA);
1370		if (mfrg == 0)
1371			return (ENOBUFS);
1372		mfrg->m_len = sizeof(struct ip6_frag);
1373		*frghdrp = mtod(mfrg, struct ip6_frag *);
1374		mlast->m_next = mfrg;
1375	}
1376
1377	return (0);
1378}
1379
1380static int
1381ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1382	struct route_in6 *ro_pmtu, *ro;
1383	struct ifnet *ifp;
1384	struct in6_addr *dst;
1385	u_long *mtup;
1386	int *alwaysfragp;
1387{
1388	u_int32_t mtu = 0;
1389	int alwaysfrag = 0;
1390	int error = 0;
1391
1392	if (ro_pmtu != ro) {
1393		/* The first hop and the final destination may differ. */
1394		struct sockaddr_in6 *sa6_dst =
1395		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1396		if (ro_pmtu->ro_rt &&
1397		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1398		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1399			RTFREE(ro_pmtu->ro_rt);
1400			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1401		}
1402		if (ro_pmtu->ro_rt == NULL) {
1403			bzero(sa6_dst, sizeof(*sa6_dst));
1404			sa6_dst->sin6_family = AF_INET6;
1405			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1406			sa6_dst->sin6_addr = *dst;
1407
1408			rtalloc((struct route *)ro_pmtu);
1409		}
1410	}
1411	if (ro_pmtu->ro_rt) {
1412		u_int32_t ifmtu;
1413		struct in_conninfo inc;
1414
1415		bzero(&inc, sizeof(inc));
1416		inc.inc_flags = 1; /* IPv6 */
1417		inc.inc6_faddr = *dst;
1418
1419		if (ifp == NULL)
1420			ifp = ro_pmtu->ro_rt->rt_ifp;
1421		ifmtu = IN6_LINKMTU(ifp);
1422		mtu = tcp_hc_getmtu(&inc);
1423		if (mtu)
1424			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1425		else
1426			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1427		if (mtu == 0)
1428			mtu = ifmtu;
1429		else if (mtu < IPV6_MMTU) {
1430			/*
1431			 * RFC2460 section 5, last paragraph:
1432			 * if we record ICMPv6 too big message with
1433			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1434			 * or smaller, with framgent header attached.
1435			 * (fragment header is needed regardless from the
1436			 * packet size, for translators to identify packets)
1437			 */
1438			alwaysfrag = 1;
1439			mtu = IPV6_MMTU;
1440		} else if (mtu > ifmtu) {
1441			/*
1442			 * The MTU on the route is larger than the MTU on
1443			 * the interface!  This shouldn't happen, unless the
1444			 * MTU of the interface has been changed after the
1445			 * interface was brought up.  Change the MTU in the
1446			 * route to match the interface MTU (as long as the
1447			 * field isn't locked).
1448			 */
1449			mtu = ifmtu;
1450			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1451		}
1452	} else if (ifp) {
1453		mtu = IN6_LINKMTU(ifp);
1454	} else
1455		error = EHOSTUNREACH; /* XXX */
1456
1457	*mtup = mtu;
1458	if (alwaysfragp)
1459		*alwaysfragp = alwaysfrag;
1460	return (error);
1461}
1462
1463/*
1464 * IP6 socket option processing.
1465 */
1466int
1467ip6_ctloutput(so, sopt)
1468	struct socket *so;
1469	struct sockopt *sopt;
1470{
1471	int privileged, optdatalen, uproto;
1472	void *optdata;
1473	struct inpcb *in6p = sotoinpcb(so);
1474	int error, optval;
1475	int level, op, optname;
1476	int optlen;
1477	struct thread *td;
1478
1479	if (sopt) {
1480		level = sopt->sopt_level;
1481		op = sopt->sopt_dir;
1482		optname = sopt->sopt_name;
1483		optlen = sopt->sopt_valsize;
1484		td = sopt->sopt_td;
1485	} else {
1486		panic("ip6_ctloutput: arg soopt is NULL");
1487	}
1488	error = optval = 0;
1489
1490	privileged = (td == 0 || suser(td)) ? 0 : 1;
1491	uproto = (int)so->so_proto->pr_protocol;
1492
1493	if (level == IPPROTO_IPV6) {
1494		switch (op) {
1495
1496		case SOPT_SET:
1497			switch (optname) {
1498			case IPV6_2292PKTOPTIONS:
1499#ifdef IPV6_PKTOPTIONS
1500			case IPV6_PKTOPTIONS:
1501#endif
1502			{
1503				struct mbuf *m;
1504
1505				error = soopt_getm(sopt, &m); /* XXX */
1506				if (error != 0)
1507					break;
1508				error = soopt_mcopyin(sopt, m); /* XXX */
1509				if (error != 0)
1510					break;
1511				error = ip6_pcbopts(&in6p->in6p_outputopts,
1512						    m, so, sopt);
1513				m_freem(m); /* XXX */
1514				break;
1515			}
1516
1517			/*
1518			 * Use of some Hop-by-Hop options or some
1519			 * Destination options, might require special
1520			 * privilege.  That is, normal applications
1521			 * (without special privilege) might be forbidden
1522			 * from setting certain options in outgoing packets,
1523			 * and might never see certain options in received
1524			 * packets. [RFC 2292 Section 6]
1525			 * KAME specific note:
1526			 *  KAME prevents non-privileged users from sending or
1527			 *  receiving ANY hbh/dst options in order to avoid
1528			 *  overhead of parsing options in the kernel.
1529			 */
1530			case IPV6_RECVHOPOPTS:
1531			case IPV6_RECVDSTOPTS:
1532			case IPV6_RECVRTHDRDSTOPTS:
1533				if (!privileged) {
1534					error = EPERM;
1535					break;
1536				}
1537				/* FALLTHROUGH */
1538			case IPV6_UNICAST_HOPS:
1539			case IPV6_HOPLIMIT:
1540			case IPV6_FAITH:
1541
1542			case IPV6_RECVPKTINFO:
1543			case IPV6_RECVHOPLIMIT:
1544			case IPV6_RECVRTHDR:
1545			case IPV6_RECVPATHMTU:
1546			case IPV6_RECVTCLASS:
1547			case IPV6_V6ONLY:
1548			case IPV6_AUTOFLOWLABEL:
1549				if (optlen != sizeof(int)) {
1550					error = EINVAL;
1551					break;
1552				}
1553				error = sooptcopyin(sopt, &optval,
1554					sizeof optval, sizeof optval);
1555				if (error)
1556					break;
1557				switch (optname) {
1558
1559				case IPV6_UNICAST_HOPS:
1560					if (optval < -1 || optval >= 256)
1561						error = EINVAL;
1562					else {
1563						/* -1 = kernel default */
1564						in6p->in6p_hops = optval;
1565						if ((in6p->in6p_vflag &
1566						     INP_IPV4) != 0)
1567							in6p->inp_ip_ttl = optval;
1568					}
1569					break;
1570#define OPTSET(bit) \
1571do { \
1572	if (optval) \
1573		in6p->in6p_flags |= (bit); \
1574	else \
1575		in6p->in6p_flags &= ~(bit); \
1576} while (/*CONSTCOND*/ 0)
1577#define OPTSET2292(bit) \
1578do { \
1579	in6p->in6p_flags |= IN6P_RFC2292; \
1580	if (optval) \
1581		in6p->in6p_flags |= (bit); \
1582	else \
1583		in6p->in6p_flags &= ~(bit); \
1584} while (/*CONSTCOND*/ 0)
1585#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1586
1587				case IPV6_RECVPKTINFO:
1588					/* cannot mix with RFC2292 */
1589					if (OPTBIT(IN6P_RFC2292)) {
1590						error = EINVAL;
1591						break;
1592					}
1593					OPTSET(IN6P_PKTINFO);
1594					break;
1595
1596				case IPV6_HOPLIMIT:
1597				{
1598					struct ip6_pktopts **optp;
1599
1600					/* cannot mix with RFC2292 */
1601					if (OPTBIT(IN6P_RFC2292)) {
1602						error = EINVAL;
1603						break;
1604					}
1605					optp = &in6p->in6p_outputopts;
1606					error = ip6_pcbopt(IPV6_HOPLIMIT,
1607							   (u_char *)&optval,
1608							   sizeof(optval),
1609							   optp,
1610							   privileged, uproto);
1611					break;
1612				}
1613
1614				case IPV6_RECVHOPLIMIT:
1615					/* cannot mix with RFC2292 */
1616					if (OPTBIT(IN6P_RFC2292)) {
1617						error = EINVAL;
1618						break;
1619					}
1620					OPTSET(IN6P_HOPLIMIT);
1621					break;
1622
1623				case IPV6_RECVHOPOPTS:
1624					/* cannot mix with RFC2292 */
1625					if (OPTBIT(IN6P_RFC2292)) {
1626						error = EINVAL;
1627						break;
1628					}
1629					OPTSET(IN6P_HOPOPTS);
1630					break;
1631
1632				case IPV6_RECVDSTOPTS:
1633					/* cannot mix with RFC2292 */
1634					if (OPTBIT(IN6P_RFC2292)) {
1635						error = EINVAL;
1636						break;
1637					}
1638					OPTSET(IN6P_DSTOPTS);
1639					break;
1640
1641				case IPV6_RECVRTHDRDSTOPTS:
1642					/* cannot mix with RFC2292 */
1643					if (OPTBIT(IN6P_RFC2292)) {
1644						error = EINVAL;
1645						break;
1646					}
1647					OPTSET(IN6P_RTHDRDSTOPTS);
1648					break;
1649
1650				case IPV6_RECVRTHDR:
1651					/* cannot mix with RFC2292 */
1652					if (OPTBIT(IN6P_RFC2292)) {
1653						error = EINVAL;
1654						break;
1655					}
1656					OPTSET(IN6P_RTHDR);
1657					break;
1658
1659				case IPV6_FAITH:
1660					OPTSET(IN6P_FAITH);
1661					break;
1662
1663				case IPV6_RECVPATHMTU:
1664					/*
1665					 * We ignore this option for TCP
1666					 * sockets.
1667					 * (rfc2292bis leaves this case
1668					 * unspecified.)
1669					 */
1670					if (uproto != IPPROTO_TCP)
1671						OPTSET(IN6P_MTU);
1672					break;
1673
1674				case IPV6_V6ONLY:
1675					/*
1676					 * make setsockopt(IPV6_V6ONLY)
1677					 * available only prior to bind(2).
1678					 * see ipng mailing list, Jun 22 2001.
1679					 */
1680					if (in6p->in6p_lport ||
1681					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1682						error = EINVAL;
1683						break;
1684					}
1685					OPTSET(IN6P_IPV6_V6ONLY);
1686					if (optval)
1687						in6p->in6p_vflag &= ~INP_IPV4;
1688					else
1689						in6p->in6p_vflag |= INP_IPV4;
1690					break;
1691				case IPV6_RECVTCLASS:
1692					/* cannot mix with RFC2292 XXX */
1693					if (OPTBIT(IN6P_RFC2292)) {
1694						error = EINVAL;
1695						break;
1696					}
1697					OPTSET(IN6P_TCLASS);
1698					break;
1699				case IPV6_AUTOFLOWLABEL:
1700					OPTSET(IN6P_AUTOFLOWLABEL);
1701					break;
1702
1703				}
1704				break;
1705
1706			case IPV6_TCLASS:
1707			case IPV6_DONTFRAG:
1708			case IPV6_USE_MIN_MTU:
1709			case IPV6_PREFER_TEMPADDR:
1710				if (optlen != sizeof(optval)) {
1711					error = EINVAL;
1712					break;
1713				}
1714				error = sooptcopyin(sopt, &optval,
1715					sizeof optval, sizeof optval);
1716				if (error)
1717					break;
1718				{
1719					struct ip6_pktopts **optp;
1720					optp = &in6p->in6p_outputopts;
1721					error = ip6_pcbopt(optname,
1722							   (u_char *)&optval,
1723							   sizeof(optval),
1724							   optp,
1725							   privileged, uproto);
1726					break;
1727				}
1728
1729			case IPV6_2292PKTINFO:
1730			case IPV6_2292HOPLIMIT:
1731			case IPV6_2292HOPOPTS:
1732			case IPV6_2292DSTOPTS:
1733			case IPV6_2292RTHDR:
1734				/* RFC 2292 */
1735				if (optlen != sizeof(int)) {
1736					error = EINVAL;
1737					break;
1738				}
1739				error = sooptcopyin(sopt, &optval,
1740					sizeof optval, sizeof optval);
1741				if (error)
1742					break;
1743				switch (optname) {
1744				case IPV6_2292PKTINFO:
1745					OPTSET2292(IN6P_PKTINFO);
1746					break;
1747				case IPV6_2292HOPLIMIT:
1748					OPTSET2292(IN6P_HOPLIMIT);
1749					break;
1750				case IPV6_2292HOPOPTS:
1751					/*
1752					 * Check super-user privilege.
1753					 * See comments for IPV6_RECVHOPOPTS.
1754					 */
1755					if (!privileged)
1756						return (EPERM);
1757					OPTSET2292(IN6P_HOPOPTS);
1758					break;
1759				case IPV6_2292DSTOPTS:
1760					if (!privileged)
1761						return (EPERM);
1762					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1763					break;
1764				case IPV6_2292RTHDR:
1765					OPTSET2292(IN6P_RTHDR);
1766					break;
1767				}
1768				break;
1769			case IPV6_PKTINFO:
1770			case IPV6_HOPOPTS:
1771			case IPV6_RTHDR:
1772			case IPV6_DSTOPTS:
1773			case IPV6_RTHDRDSTOPTS:
1774			case IPV6_NEXTHOP:
1775			{
1776				/* new advanced API (2292bis) */
1777				u_char *optbuf;
1778				int optlen;
1779				struct ip6_pktopts **optp;
1780
1781				/* cannot mix with RFC2292 */
1782				if (OPTBIT(IN6P_RFC2292)) {
1783					error = EINVAL;
1784					break;
1785				}
1786
1787				optbuf = sopt->sopt_val;
1788				optlen = sopt->sopt_valsize;
1789				optp = &in6p->in6p_outputopts;
1790				error = ip6_pcbopt(optname,
1791						   optbuf, optlen,
1792						   optp, privileged, uproto);
1793				break;
1794			}
1795#undef OPTSET
1796
1797			case IPV6_MULTICAST_IF:
1798			case IPV6_MULTICAST_HOPS:
1799			case IPV6_MULTICAST_LOOP:
1800			case IPV6_JOIN_GROUP:
1801			case IPV6_LEAVE_GROUP:
1802			    {
1803				if (sopt->sopt_valsize > MLEN) {
1804					error = EMSGSIZE;
1805					break;
1806				}
1807				/* XXX */
1808			    }
1809			    /* FALLTHROUGH */
1810			    {
1811				struct mbuf *m;
1812
1813				if (sopt->sopt_valsize > MCLBYTES) {
1814					error = EMSGSIZE;
1815					break;
1816				}
1817				/* XXX */
1818				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_HEADER);
1819				if (m == 0) {
1820					error = ENOBUFS;
1821					break;
1822				}
1823				if (sopt->sopt_valsize > MLEN) {
1824					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1825					if ((m->m_flags & M_EXT) == 0) {
1826						m_free(m);
1827						error = ENOBUFS;
1828						break;
1829					}
1830				}
1831				m->m_len = sopt->sopt_valsize;
1832				error = sooptcopyin(sopt, mtod(m, char *),
1833						    m->m_len, m->m_len);
1834				if (error) {
1835					(void)m_free(m);
1836					break;
1837				}
1838				error =	ip6_setmoptions(sopt->sopt_name,
1839							&in6p->in6p_moptions,
1840							m);
1841				(void)m_free(m);
1842			    }
1843				break;
1844
1845			case IPV6_PORTRANGE:
1846				error = sooptcopyin(sopt, &optval,
1847				    sizeof optval, sizeof optval);
1848				if (error)
1849					break;
1850
1851				switch (optval) {
1852				case IPV6_PORTRANGE_DEFAULT:
1853					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1854					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1855					break;
1856
1857				case IPV6_PORTRANGE_HIGH:
1858					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1859					in6p->in6p_flags |= IN6P_HIGHPORT;
1860					break;
1861
1862				case IPV6_PORTRANGE_LOW:
1863					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1864					in6p->in6p_flags |= IN6P_LOWPORT;
1865					break;
1866
1867				default:
1868					error = EINVAL;
1869					break;
1870				}
1871				break;
1872
1873#if defined(IPSEC) || defined(FAST_IPSEC)
1874			case IPV6_IPSEC_POLICY:
1875			    {
1876				caddr_t req = NULL;
1877				size_t len = 0;
1878				struct mbuf *m;
1879
1880				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1881					break;
1882				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1883					break;
1884				if (m) {
1885					req = mtod(m, caddr_t);
1886					len = m->m_len;
1887				}
1888				error = ipsec6_set_policy(in6p, optname, req,
1889							  len, privileged);
1890				m_freem(m);
1891			    }
1892				break;
1893#endif /* KAME IPSEC */
1894
1895			case IPV6_FW_ADD:
1896			case IPV6_FW_DEL:
1897			case IPV6_FW_FLUSH:
1898			case IPV6_FW_ZERO:
1899			    {
1900				struct mbuf *m;
1901				struct mbuf **mp = &m;
1902
1903				if (ip6_fw_ctl_ptr == NULL)
1904					return EINVAL;
1905				/* XXX */
1906				if ((error = soopt_getm(sopt, &m)) != 0)
1907					break;
1908				/* XXX */
1909				if ((error = soopt_mcopyin(sopt, m)) != 0)
1910					break;
1911				error = (*ip6_fw_ctl_ptr)(optname, mp);
1912				m = *mp;
1913			    }
1914				break;
1915
1916			default:
1917				error = ENOPROTOOPT;
1918				break;
1919			}
1920			break;
1921
1922		case SOPT_GET:
1923			switch (optname) {
1924
1925			case IPV6_2292PKTOPTIONS:
1926#ifdef IPV6_PKTOPTIONS
1927			case IPV6_PKTOPTIONS:
1928#endif
1929				/*
1930				 * RFC3542 (effectively) deprecated the
1931				 * semantics of the 2292-style pktoptions.
1932				 * Since it was not reliable in nature (i.e.,
1933				 * applications had to expect the lack of some
1934				 * information after all), it would make sense
1935				 * to simplify this part by always returning
1936				 * empty data.
1937				 */
1938				sopt->sopt_valsize = 0;
1939				break;
1940
1941			case IPV6_RECVHOPOPTS:
1942			case IPV6_RECVDSTOPTS:
1943			case IPV6_RECVRTHDRDSTOPTS:
1944			case IPV6_UNICAST_HOPS:
1945			case IPV6_RECVPKTINFO:
1946			case IPV6_RECVHOPLIMIT:
1947			case IPV6_RECVRTHDR:
1948			case IPV6_RECVPATHMTU:
1949
1950			case IPV6_FAITH:
1951			case IPV6_V6ONLY:
1952			case IPV6_PORTRANGE:
1953			case IPV6_RECVTCLASS:
1954			case IPV6_AUTOFLOWLABEL:
1955				switch (optname) {
1956
1957				case IPV6_RECVHOPOPTS:
1958					optval = OPTBIT(IN6P_HOPOPTS);
1959					break;
1960
1961				case IPV6_RECVDSTOPTS:
1962					optval = OPTBIT(IN6P_DSTOPTS);
1963					break;
1964
1965				case IPV6_RECVRTHDRDSTOPTS:
1966					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1967					break;
1968
1969				case IPV6_UNICAST_HOPS:
1970					optval = in6p->in6p_hops;
1971					break;
1972
1973				case IPV6_RECVPKTINFO:
1974					optval = OPTBIT(IN6P_PKTINFO);
1975					break;
1976
1977				case IPV6_RECVHOPLIMIT:
1978					optval = OPTBIT(IN6P_HOPLIMIT);
1979					break;
1980
1981				case IPV6_RECVRTHDR:
1982					optval = OPTBIT(IN6P_RTHDR);
1983					break;
1984
1985				case IPV6_RECVPATHMTU:
1986					optval = OPTBIT(IN6P_MTU);
1987					break;
1988
1989				case IPV6_FAITH:
1990					optval = OPTBIT(IN6P_FAITH);
1991					break;
1992
1993				case IPV6_V6ONLY:
1994					optval = OPTBIT(IN6P_IPV6_V6ONLY);
1995					break;
1996
1997				case IPV6_PORTRANGE:
1998				    {
1999					int flags;
2000					flags = in6p->in6p_flags;
2001					if (flags & IN6P_HIGHPORT)
2002						optval = IPV6_PORTRANGE_HIGH;
2003					else if (flags & IN6P_LOWPORT)
2004						optval = IPV6_PORTRANGE_LOW;
2005					else
2006						optval = 0;
2007					break;
2008				    }
2009				case IPV6_RECVTCLASS:
2010					optval = OPTBIT(IN6P_TCLASS);
2011					break;
2012
2013				case IPV6_AUTOFLOWLABEL:
2014					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2015					break;
2016				}
2017				if (error)
2018					break;
2019				error = sooptcopyout(sopt, &optval,
2020					sizeof optval);
2021				break;
2022
2023			case IPV6_PATHMTU:
2024			{
2025				u_long pmtu = 0;
2026				struct ip6_mtuinfo mtuinfo;
2027				struct route_in6 sro;
2028
2029				bzero(&sro, sizeof(sro));
2030
2031				if (!(so->so_state & SS_ISCONNECTED))
2032					return (ENOTCONN);
2033				/*
2034				 * XXX: we dot not consider the case of source
2035				 * routing, or optional information to specify
2036				 * the outgoing interface.
2037				 */
2038				error = ip6_getpmtu(&sro, NULL, NULL,
2039				    &in6p->in6p_faddr, &pmtu, NULL);
2040				if (sro.ro_rt)
2041					RTFREE(sro.ro_rt);
2042				if (error)
2043					break;
2044				if (pmtu > IPV6_MAXPACKET)
2045					pmtu = IPV6_MAXPACKET;
2046
2047				bzero(&mtuinfo, sizeof(mtuinfo));
2048				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2049				optdata = (void *)&mtuinfo;
2050				optdatalen = sizeof(mtuinfo);
2051				error = sooptcopyout(sopt, optdata,
2052				    optdatalen);
2053				break;
2054			}
2055
2056			case IPV6_2292PKTINFO:
2057			case IPV6_2292HOPLIMIT:
2058			case IPV6_2292HOPOPTS:
2059			case IPV6_2292RTHDR:
2060			case IPV6_2292DSTOPTS:
2061				switch (optname) {
2062				case IPV6_2292PKTINFO:
2063					optval = OPTBIT(IN6P_PKTINFO);
2064					break;
2065				case IPV6_2292HOPLIMIT:
2066					optval = OPTBIT(IN6P_HOPLIMIT);
2067					break;
2068				case IPV6_2292HOPOPTS:
2069					optval = OPTBIT(IN6P_HOPOPTS);
2070					break;
2071				case IPV6_2292RTHDR:
2072					optval = OPTBIT(IN6P_RTHDR);
2073					break;
2074				case IPV6_2292DSTOPTS:
2075					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2076					break;
2077				}
2078				error = sooptcopyout(sopt, &optval,
2079				    sizeof optval);
2080				break;
2081			case IPV6_PKTINFO:
2082			case IPV6_HOPOPTS:
2083			case IPV6_RTHDR:
2084			case IPV6_DSTOPTS:
2085			case IPV6_RTHDRDSTOPTS:
2086			case IPV6_NEXTHOP:
2087			case IPV6_TCLASS:
2088			case IPV6_DONTFRAG:
2089			case IPV6_USE_MIN_MTU:
2090			case IPV6_PREFER_TEMPADDR:
2091				error = ip6_getpcbopt(in6p->in6p_outputopts,
2092				    optname, sopt);
2093				break;
2094
2095			case IPV6_MULTICAST_IF:
2096			case IPV6_MULTICAST_HOPS:
2097			case IPV6_MULTICAST_LOOP:
2098			case IPV6_JOIN_GROUP:
2099			case IPV6_LEAVE_GROUP:
2100			    {
2101				struct mbuf *m;
2102				error = ip6_getmoptions(sopt->sopt_name,
2103				    in6p->in6p_moptions, &m);
2104				if (error == 0)
2105					error = sooptcopyout(sopt,
2106					    mtod(m, char *), m->m_len);
2107				m_freem(m);
2108			    }
2109				break;
2110
2111#if defined(IPSEC) || defined(FAST_IPSEC)
2112			case IPV6_IPSEC_POLICY:
2113			  {
2114				caddr_t req = NULL;
2115				size_t len = 0;
2116				struct mbuf *m = NULL;
2117				struct mbuf **mp = &m;
2118				size_t ovalsize = sopt->sopt_valsize;
2119				caddr_t oval = (caddr_t)sopt->sopt_val;
2120
2121				error = soopt_getm(sopt, &m); /* XXX */
2122				if (error != 0)
2123					break;
2124				error = soopt_mcopyin(sopt, m); /* XXX */
2125				if (error != 0)
2126					break;
2127				sopt->sopt_valsize = ovalsize;
2128				sopt->sopt_val = oval;
2129				if (m) {
2130					req = mtod(m, caddr_t);
2131					len = m->m_len;
2132				}
2133				error = ipsec6_get_policy(in6p, req, len, mp);
2134				if (error == 0)
2135					error = soopt_mcopyout(sopt, m); /* XXX */
2136				if (error == 0 && m)
2137					m_freem(m);
2138				break;
2139			  }
2140#endif /* KAME IPSEC */
2141
2142			case IPV6_FW_GET:
2143			  {
2144				struct mbuf *m;
2145				struct mbuf **mp = &m;
2146
2147				if (ip6_fw_ctl_ptr == NULL)
2148			        {
2149					return EINVAL;
2150				}
2151				error = (*ip6_fw_ctl_ptr)(optname, mp);
2152				if (error == 0)
2153					error = soopt_mcopyout(sopt, m); /* XXX */
2154				if (error == 0 && m)
2155					m_freem(m);
2156			  }
2157				break;
2158
2159			default:
2160				error = ENOPROTOOPT;
2161				break;
2162			}
2163			break;
2164		}
2165	} else {		/* level != IPPROTO_IPV6 */
2166		error = EINVAL;
2167	}
2168	return (error);
2169}
2170
2171int
2172ip6_raw_ctloutput(so, sopt)
2173	struct socket *so;
2174	struct sockopt *sopt;
2175{
2176	int error = 0, optval, optlen;
2177	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2178	struct in6pcb *in6p = sotoin6pcb(so);
2179	int level, op, optname;
2180
2181	if (sopt) {
2182		level = sopt->sopt_level;
2183		op = sopt->sopt_dir;
2184		optname = sopt->sopt_name;
2185		optlen = sopt->sopt_valsize;
2186	} else
2187		panic("ip6_raw_ctloutput: arg soopt is NULL");
2188
2189	if (level != IPPROTO_IPV6) {
2190		return (EINVAL);
2191	}
2192
2193	switch (optname) {
2194	case IPV6_CHECKSUM:
2195		/*
2196		 * For ICMPv6 sockets, no modification allowed for checksum
2197		 * offset, permit "no change" values to help existing apps.
2198		 *
2199		 * XXX 2292bis says: "An attempt to set IPV6_CHECKSUM
2200		 * for an ICMPv6 socket will fail."
2201		 * The current behavior does not meet 2292bis.
2202		 */
2203		switch (op) {
2204		case SOPT_SET:
2205			if (optlen != sizeof(int)) {
2206				error = EINVAL;
2207				break;
2208			}
2209			error = sooptcopyin(sopt, &optval, sizeof(optval),
2210					    sizeof(optval));
2211			if (error)
2212				break;
2213			if ((optval % 2) != 0) {
2214				/* the API assumes even offset values */
2215				error = EINVAL;
2216			} else if (so->so_proto->pr_protocol ==
2217			    IPPROTO_ICMPV6) {
2218				if (optval != icmp6off)
2219					error = EINVAL;
2220			} else
2221				in6p->in6p_cksum = optval;
2222			break;
2223
2224		case SOPT_GET:
2225			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2226				optval = icmp6off;
2227			else
2228				optval = in6p->in6p_cksum;
2229
2230			error = sooptcopyout(sopt, &optval, sizeof(optval));
2231			break;
2232
2233		default:
2234			error = EINVAL;
2235			break;
2236		}
2237		break;
2238
2239	default:
2240		error = ENOPROTOOPT;
2241		break;
2242	}
2243
2244	return (error);
2245}
2246
2247/*
2248 * Set up IP6 options in pcb for insertion in output packets or
2249 * specifying behavior of outgoing packets.
2250 */
2251static int
2252ip6_pcbopts(pktopt, m, so, sopt)
2253	struct ip6_pktopts **pktopt;
2254	struct mbuf *m;
2255	struct socket *so;
2256	struct sockopt *sopt;
2257{
2258	struct ip6_pktopts *opt = *pktopt;
2259	int error = 0;
2260	struct thread *td = sopt->sopt_td;
2261	int priv = 0;
2262
2263	/* turn off any old options. */
2264	if (opt) {
2265#ifdef DIAGNOSTIC
2266		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2267		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2268		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2269			printf("ip6_pcbopts: all specified options are cleared.\n");
2270#endif
2271		ip6_clearpktopts(opt, -1);
2272	} else
2273		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2274	*pktopt = NULL;
2275
2276	if (!m || m->m_len == 0) {
2277		/*
2278		 * Only turning off any previous options, regardless of
2279		 * whether the opt is just created or given.
2280		 */
2281		free(opt, M_IP6OPT);
2282		return (0);
2283	}
2284
2285	/*  set options specified by user. */
2286	if (td && !suser(td))
2287		priv = 1;
2288	if ((error = ip6_setpktoptions(m, opt, NULL, priv, 1,
2289	    so->so_proto->pr_protocol)) != 0) {
2290		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2291		free(opt, M_IP6OPT);
2292		return (error);
2293	}
2294	*pktopt = opt;
2295	return (0);
2296}
2297
2298/*
2299 * initialize ip6_pktopts.  beware that there are non-zero default values in
2300 * the struct.
2301 */
2302void
2303init_ip6pktopts(opt)
2304	struct ip6_pktopts *opt;
2305{
2306
2307	bzero(opt, sizeof(*opt));
2308	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2309	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2310	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2311	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2312}
2313
2314static int
2315ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2316	int optname, len, priv;
2317	u_char *buf;
2318	struct ip6_pktopts **pktopt;
2319	int uproto;
2320{
2321	struct ip6_pktopts *opt;
2322
2323	if (*pktopt == NULL) {
2324		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2325		    M_WAITOK);
2326		init_ip6pktopts(*pktopt);
2327		(*pktopt)->needfree = 1;
2328	}
2329	opt = *pktopt;
2330
2331	return (ip6_setpktoption(optname, buf, len, opt, priv, 1, 0, uproto));
2332}
2333
2334static int
2335ip6_getpcbopt(pktopt, optname, sopt)
2336	struct ip6_pktopts *pktopt;
2337	struct sockopt *sopt;
2338	int optname;
2339{
2340	void *optdata = NULL;
2341	int optdatalen = 0;
2342	struct ip6_ext *ip6e;
2343	int error = 0;
2344	struct in6_pktinfo null_pktinfo;
2345	int deftclass = 0, on;
2346	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2347	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2348
2349	switch (optname) {
2350	case IPV6_PKTINFO:
2351		if (pktopt && pktopt->ip6po_pktinfo)
2352			optdata = (void *)pktopt->ip6po_pktinfo;
2353		else {
2354			/* XXX: we don't have to do this every time... */
2355			bzero(&null_pktinfo, sizeof(null_pktinfo));
2356			optdata = (void *)&null_pktinfo;
2357		}
2358		optdatalen = sizeof(struct in6_pktinfo);
2359		break;
2360	case IPV6_TCLASS:
2361		if (pktopt && pktopt->ip6po_tclass >= 0)
2362			optdata = (void *)&pktopt->ip6po_tclass;
2363		else
2364			optdata = (void *)&deftclass;
2365		optdatalen = sizeof(int);
2366		break;
2367	case IPV6_HOPOPTS:
2368		if (pktopt && pktopt->ip6po_hbh) {
2369			optdata = (void *)pktopt->ip6po_hbh;
2370			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2371			optdatalen = (ip6e->ip6e_len + 1) << 3;
2372		}
2373		break;
2374	case IPV6_RTHDR:
2375		if (pktopt && pktopt->ip6po_rthdr) {
2376			optdata = (void *)pktopt->ip6po_rthdr;
2377			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2378			optdatalen = (ip6e->ip6e_len + 1) << 3;
2379		}
2380		break;
2381	case IPV6_RTHDRDSTOPTS:
2382		if (pktopt && pktopt->ip6po_dest1) {
2383			optdata = (void *)pktopt->ip6po_dest1;
2384			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2385			optdatalen = (ip6e->ip6e_len + 1) << 3;
2386		}
2387		break;
2388	case IPV6_DSTOPTS:
2389		if (pktopt && pktopt->ip6po_dest2) {
2390			optdata = (void *)pktopt->ip6po_dest2;
2391			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2392			optdatalen = (ip6e->ip6e_len + 1) << 3;
2393		}
2394		break;
2395	case IPV6_NEXTHOP:
2396		if (pktopt && pktopt->ip6po_nexthop) {
2397			optdata = (void *)pktopt->ip6po_nexthop;
2398			optdatalen = pktopt->ip6po_nexthop->sa_len;
2399		}
2400		break;
2401	case IPV6_USE_MIN_MTU:
2402		if (pktopt)
2403			optdata = (void *)&pktopt->ip6po_minmtu;
2404		else
2405			optdata = (void *)&defminmtu;
2406		optdatalen = sizeof(int);
2407		break;
2408	case IPV6_DONTFRAG:
2409		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2410			on = 1;
2411		else
2412			on = 0;
2413		optdata = (void *)&on;
2414		optdatalen = sizeof(on);
2415		break;
2416	case IPV6_PREFER_TEMPADDR:
2417		if (pktopt)
2418			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2419		else
2420			optdata = (void *)&defpreftemp;
2421		optdatalen = sizeof(int);
2422		break;
2423	default:		/* should not happen */
2424#ifdef DIAGNOSTIC
2425		panic("ip6_getpcbopt: unexpected option\n");
2426#endif
2427		return (ENOPROTOOPT);
2428	}
2429
2430	error = sooptcopyout(sopt, optdata, optdatalen);
2431
2432	return (error);
2433}
2434
2435void
2436ip6_clearpktopts(pktopt, optname)
2437	struct ip6_pktopts *pktopt;
2438	int optname;
2439{
2440	int needfree;
2441
2442	if (pktopt == NULL)
2443		return;
2444
2445	needfree = pktopt->needfree;
2446
2447	if (optname == -1 || optname == IPV6_PKTINFO) {
2448		if (needfree && pktopt->ip6po_pktinfo)
2449			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2450		pktopt->ip6po_pktinfo = NULL;
2451	}
2452	if (optname == -1 || optname == IPV6_HOPLIMIT)
2453		pktopt->ip6po_hlim = -1;
2454	if (optname == -1 || optname == IPV6_TCLASS)
2455		pktopt->ip6po_tclass = -1;
2456	if (optname == -1 || optname == IPV6_NEXTHOP) {
2457		if (pktopt->ip6po_nextroute.ro_rt) {
2458			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2459			pktopt->ip6po_nextroute.ro_rt = NULL;
2460		}
2461		if (needfree && pktopt->ip6po_nexthop)
2462			free(pktopt->ip6po_nexthop, M_IP6OPT);
2463		pktopt->ip6po_nexthop = NULL;
2464	}
2465	if (optname == -1 || optname == IPV6_HOPOPTS) {
2466		if (needfree && pktopt->ip6po_hbh)
2467			free(pktopt->ip6po_hbh, M_IP6OPT);
2468		pktopt->ip6po_hbh = NULL;
2469	}
2470	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2471		if (needfree && pktopt->ip6po_dest1)
2472			free(pktopt->ip6po_dest1, M_IP6OPT);
2473		pktopt->ip6po_dest1 = NULL;
2474	}
2475	if (optname == -1 || optname == IPV6_RTHDR) {
2476		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2477			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2478		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2479		if (pktopt->ip6po_route.ro_rt) {
2480			RTFREE(pktopt->ip6po_route.ro_rt);
2481			pktopt->ip6po_route.ro_rt = NULL;
2482		}
2483	}
2484	if (optname == -1 || optname == IPV6_DSTOPTS) {
2485		if (needfree && pktopt->ip6po_dest2)
2486			free(pktopt->ip6po_dest2, M_IP6OPT);
2487		pktopt->ip6po_dest2 = NULL;
2488	}
2489}
2490
2491#define PKTOPT_EXTHDRCPY(type) \
2492do {\
2493	if (src->type) {\
2494		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2495		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2496		if (dst->type == NULL && canwait == M_NOWAIT)\
2497			goto bad;\
2498		bcopy(src->type, dst->type, hlen);\
2499	}\
2500} while (/*CONSTCOND*/ 0)
2501
2502struct ip6_pktopts *
2503ip6_copypktopts(src, canwait)
2504	struct ip6_pktopts *src;
2505	int canwait;
2506{
2507	struct ip6_pktopts *dst;
2508
2509	if (src == NULL) {
2510		printf("ip6_clearpktopts: invalid argument\n");
2511		return (NULL);
2512	}
2513
2514	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2515	if (dst == NULL && canwait == M_NOWAIT)
2516		return (NULL);
2517	bzero(dst, sizeof(*dst));
2518	dst->needfree = 1;
2519
2520	dst->ip6po_hlim = src->ip6po_hlim;
2521	dst->ip6po_tclass = src->ip6po_tclass;
2522	dst->ip6po_flags = src->ip6po_flags;
2523	if (src->ip6po_pktinfo) {
2524		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2525		    M_IP6OPT, canwait);
2526		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2527			goto bad;
2528		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2529	}
2530	if (src->ip6po_nexthop) {
2531		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2532		    M_IP6OPT, canwait);
2533		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2534			goto bad;
2535		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2536		    src->ip6po_nexthop->sa_len);
2537	}
2538	PKTOPT_EXTHDRCPY(ip6po_hbh);
2539	PKTOPT_EXTHDRCPY(ip6po_dest1);
2540	PKTOPT_EXTHDRCPY(ip6po_dest2);
2541	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2542	return (dst);
2543
2544  bad:
2545	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2546	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2547	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2548	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2549	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2550	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2551	free(dst, M_IP6OPT);
2552	return (NULL);
2553}
2554#undef PKTOPT_EXTHDRCPY
2555
2556void
2557ip6_freepcbopts(pktopt)
2558	struct ip6_pktopts *pktopt;
2559{
2560	if (pktopt == NULL)
2561		return;
2562
2563	ip6_clearpktopts(pktopt, -1);
2564
2565	free(pktopt, M_IP6OPT);
2566}
2567
2568/*
2569 * Set the IP6 multicast options in response to user setsockopt().
2570 */
2571static int
2572ip6_setmoptions(optname, im6op, m)
2573	int optname;
2574	struct ip6_moptions **im6op;
2575	struct mbuf *m;
2576{
2577	int error = 0;
2578	u_int loop, ifindex;
2579	struct ipv6_mreq *mreq;
2580	struct ifnet *ifp;
2581	struct ip6_moptions *im6o = *im6op;
2582	struct route_in6 ro;
2583	struct sockaddr_in6 *dst;
2584	struct in6_multi_mship *imm;
2585	struct thread *td = curthread;
2586
2587	if (im6o == NULL) {
2588		/*
2589		 * No multicast option buffer attached to the pcb;
2590		 * allocate one and initialize to default values.
2591		 */
2592		im6o = (struct ip6_moptions *)
2593			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2594
2595		if (im6o == NULL)
2596			return (ENOBUFS);
2597		*im6op = im6o;
2598		im6o->im6o_multicast_ifp = NULL;
2599		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2600		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2601		LIST_INIT(&im6o->im6o_memberships);
2602	}
2603
2604	switch (optname) {
2605
2606	case IPV6_MULTICAST_IF:
2607		/*
2608		 * Select the interface for outgoing multicast packets.
2609		 */
2610		if (m == NULL || m->m_len != sizeof(u_int)) {
2611			error = EINVAL;
2612			break;
2613		}
2614		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2615		if (ifindex < 0 || if_index < ifindex) {
2616			error = ENXIO;	/* XXX EINVAL? */
2617			break;
2618		}
2619		ifp = ifnet_byindex(ifindex);
2620		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2621			error = EADDRNOTAVAIL;
2622			break;
2623		}
2624		im6o->im6o_multicast_ifp = ifp;
2625		break;
2626
2627	case IPV6_MULTICAST_HOPS:
2628	    {
2629		/*
2630		 * Set the IP6 hoplimit for outgoing multicast packets.
2631		 */
2632		int optval;
2633		if (m == NULL || m->m_len != sizeof(int)) {
2634			error = EINVAL;
2635			break;
2636		}
2637		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2638		if (optval < -1 || optval >= 256)
2639			error = EINVAL;
2640		else if (optval == -1)
2641			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2642		else
2643			im6o->im6o_multicast_hlim = optval;
2644		break;
2645	    }
2646
2647	case IPV6_MULTICAST_LOOP:
2648		/*
2649		 * Set the loopback flag for outgoing multicast packets.
2650		 * Must be zero or one.
2651		 */
2652		if (m == NULL || m->m_len != sizeof(u_int)) {
2653			error = EINVAL;
2654			break;
2655		}
2656		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2657		if (loop > 1) {
2658			error = EINVAL;
2659			break;
2660		}
2661		im6o->im6o_multicast_loop = loop;
2662		break;
2663
2664	case IPV6_JOIN_GROUP:
2665		/*
2666		 * Add a multicast group membership.
2667		 * Group must be a valid IP6 multicast address.
2668		 */
2669		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2670			error = EINVAL;
2671			break;
2672		}
2673		mreq = mtod(m, struct ipv6_mreq *);
2674		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2675			/*
2676			 * We use the unspecified address to specify to accept
2677			 * all multicast addresses. Only super user is allowed
2678			 * to do this.
2679			 */
2680			if (suser(td)) {
2681				error = EACCES;
2682				break;
2683			}
2684		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2685			error = EINVAL;
2686			break;
2687		}
2688
2689		/*
2690		 * If the interface is specified, validate it.
2691		 */
2692		if (mreq->ipv6mr_interface < 0 ||
2693		    if_index < mreq->ipv6mr_interface) {
2694			error = ENXIO;	/* XXX EINVAL? */
2695			break;
2696		}
2697		/*
2698		 * If no interface was explicitly specified, choose an
2699		 * appropriate one according to the given multicast address.
2700		 */
2701		if (mreq->ipv6mr_interface == 0) {
2702			/*
2703			 * If the multicast address is in node-local scope,
2704			 * the interface should be a loopback interface.
2705			 * Otherwise, look up the routing table for the
2706			 * address, and choose the outgoing interface.
2707			 *   XXX: is it a good approach?
2708			 */
2709			if (IN6_IS_ADDR_MC_INTFACELOCAL(&mreq->ipv6mr_multiaddr)) {
2710				ifp = &loif[0];
2711			} else {
2712				ro.ro_rt = NULL;
2713				dst = (struct sockaddr_in6 *)&ro.ro_dst;
2714				bzero(dst, sizeof(*dst));
2715				dst->sin6_len = sizeof(struct sockaddr_in6);
2716				dst->sin6_family = AF_INET6;
2717				dst->sin6_addr = mreq->ipv6mr_multiaddr;
2718				rtalloc((struct route *)&ro);
2719				if (ro.ro_rt == NULL) {
2720					error = EADDRNOTAVAIL;
2721					break;
2722				}
2723				ifp = ro.ro_rt->rt_ifp;
2724				RTFREE(ro.ro_rt);
2725			}
2726		} else
2727			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2728
2729		/*
2730		 * See if we found an interface, and confirm that it
2731		 * supports multicast
2732		 */
2733		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2734			error = EADDRNOTAVAIL;
2735			break;
2736		}
2737		/*
2738		 * Put interface index into the multicast address,
2739		 * if the address has link-local scope.
2740		 */
2741		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2742			mreq->ipv6mr_multiaddr.s6_addr16[1] =
2743			    htons(ifp->if_index);
2744		}
2745		/*
2746		 * See if the membership already exists.
2747		 */
2748		for (imm = im6o->im6o_memberships.lh_first;
2749		     imm != NULL; imm = imm->i6mm_chain.le_next)
2750			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2751			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2752					       &mreq->ipv6mr_multiaddr))
2753				break;
2754		if (imm != NULL) {
2755			error = EADDRINUSE;
2756			break;
2757		}
2758		/*
2759		 * Everything looks good; add a new record to the multicast
2760		 * address list for the given interface.
2761		 */
2762		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2763		if (imm == NULL) {
2764			error = ENOBUFS;
2765			break;
2766		}
2767		if ((imm->i6mm_maddr =
2768		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2769			free(imm, M_IPMADDR);
2770			break;
2771		}
2772		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2773		break;
2774
2775	case IPV6_LEAVE_GROUP:
2776		/*
2777		 * Drop a multicast group membership.
2778		 * Group must be a valid IP6 multicast address.
2779		 */
2780		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2781			error = EINVAL;
2782			break;
2783		}
2784		mreq = mtod(m, struct ipv6_mreq *);
2785		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2786			if (suser(td)) {
2787				error = EACCES;
2788				break;
2789			}
2790		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2791			error = EINVAL;
2792			break;
2793		}
2794		/*
2795		 * If an interface address was specified, get a pointer
2796		 * to its ifnet structure.
2797		 */
2798		if (mreq->ipv6mr_interface < 0
2799		 || if_index < mreq->ipv6mr_interface) {
2800			error = ENXIO;	/* XXX EINVAL? */
2801			break;
2802		}
2803		ifp = ifnet_byindex(mreq->ipv6mr_interface);
2804		/*
2805		 * Put interface index into the multicast address,
2806		 * if the address has link-local scope.
2807		 */
2808		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2809			mreq->ipv6mr_multiaddr.s6_addr16[1]
2810				= htons(mreq->ipv6mr_interface);
2811		}
2812
2813		/*
2814		 * Find the membership in the membership list.
2815		 */
2816		for (imm = im6o->im6o_memberships.lh_first;
2817		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2818			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2819			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2820			    &mreq->ipv6mr_multiaddr))
2821				break;
2822		}
2823		if (imm == NULL) {
2824			/* Unable to resolve interface */
2825			error = EADDRNOTAVAIL;
2826			break;
2827		}
2828		/*
2829		 * Give up the multicast address record to which the
2830		 * membership points.
2831		 */
2832		LIST_REMOVE(imm, i6mm_chain);
2833		in6_delmulti(imm->i6mm_maddr);
2834		free(imm, M_IPMADDR);
2835		break;
2836
2837	default:
2838		error = EOPNOTSUPP;
2839		break;
2840	}
2841
2842	/*
2843	 * If all options have default values, no need to keep the mbuf.
2844	 */
2845	if (im6o->im6o_multicast_ifp == NULL &&
2846	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2847	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2848	    im6o->im6o_memberships.lh_first == NULL) {
2849		free(*im6op, M_IPMOPTS);
2850		*im6op = NULL;
2851	}
2852
2853	return (error);
2854}
2855
2856/*
2857 * Return the IP6 multicast options in response to user getsockopt().
2858 */
2859static int
2860ip6_getmoptions(optname, im6o, mp)
2861	int optname;
2862	struct ip6_moptions *im6o;
2863	struct mbuf **mp;
2864{
2865	u_int *hlim, *loop, *ifindex;
2866
2867	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2868
2869	switch (optname) {
2870
2871	case IPV6_MULTICAST_IF:
2872		ifindex = mtod(*mp, u_int *);
2873		(*mp)->m_len = sizeof(u_int);
2874		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2875			*ifindex = 0;
2876		else
2877			*ifindex = im6o->im6o_multicast_ifp->if_index;
2878		return (0);
2879
2880	case IPV6_MULTICAST_HOPS:
2881		hlim = mtod(*mp, u_int *);
2882		(*mp)->m_len = sizeof(u_int);
2883		if (im6o == NULL)
2884			*hlim = ip6_defmcasthlim;
2885		else
2886			*hlim = im6o->im6o_multicast_hlim;
2887		return (0);
2888
2889	case IPV6_MULTICAST_LOOP:
2890		loop = mtod(*mp, u_int *);
2891		(*mp)->m_len = sizeof(u_int);
2892		if (im6o == NULL)
2893			*loop = ip6_defmcasthlim;
2894		else
2895			*loop = im6o->im6o_multicast_loop;
2896		return (0);
2897
2898	default:
2899		return (EOPNOTSUPP);
2900	}
2901}
2902
2903/*
2904 * Discard the IP6 multicast options.
2905 */
2906void
2907ip6_freemoptions(im6o)
2908	struct ip6_moptions *im6o;
2909{
2910	struct in6_multi_mship *imm;
2911
2912	if (im6o == NULL)
2913		return;
2914
2915	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2916		LIST_REMOVE(imm, i6mm_chain);
2917		if (imm->i6mm_maddr)
2918			in6_delmulti(imm->i6mm_maddr);
2919		free(imm, M_IPMADDR);
2920	}
2921	free(im6o, M_IPMOPTS);
2922}
2923
2924/*
2925 * Set IPv6 outgoing packet options based on advanced API.
2926 */
2927int
2928ip6_setpktoptions(control, opt, stickyopt, priv, needcopy, uproto)
2929	struct mbuf *control;
2930	struct ip6_pktopts *opt, *stickyopt;
2931	int priv, needcopy, uproto;
2932{
2933	struct cmsghdr *cm = 0;
2934
2935	if (control == 0 || opt == 0)
2936		return (EINVAL);
2937
2938	if (stickyopt) {
2939		/*
2940		 * If stickyopt is provided, make a local copy of the options
2941		 * for this particular packet, then override them by ancillary
2942		 * objects.
2943		 * XXX: need to gain a reference for the cached route of the
2944		 * next hop in case of the overriding.
2945		 */
2946		*opt = *stickyopt;
2947		if (opt->ip6po_nextroute.ro_rt) {
2948			RT_LOCK(opt->ip6po_nextroute.ro_rt);
2949			RT_ADDREF(opt->ip6po_nextroute.ro_rt);
2950			RT_UNLOCK(opt->ip6po_nextroute.ro_rt);
2951		}
2952	} else
2953		init_ip6pktopts(opt);
2954	opt->needfree = needcopy;
2955
2956	/*
2957	 * XXX: Currently, we assume all the optional information is stored
2958	 * in a single mbuf.
2959	 */
2960	if (control->m_next)
2961		return (EINVAL);
2962
2963	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2964	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2965		int error;
2966
2967		if (control->m_len < CMSG_LEN(0))
2968			return (EINVAL);
2969
2970		cm = mtod(control, struct cmsghdr *);
2971		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2972			return (EINVAL);
2973		if (cm->cmsg_level != IPPROTO_IPV6)
2974			continue;
2975
2976		error = ip6_setpktoption(cm->cmsg_type, CMSG_DATA(cm),
2977		    cm->cmsg_len - CMSG_LEN(0), opt, priv, needcopy, 1, uproto);
2978		if (error)
2979			return (error);
2980	}
2981
2982	return (0);
2983}
2984
2985/*
2986 * Set a particular packet option, as a sticky option or an ancillary data
2987 * item.  "len" can be 0 only when it's a sticky option.
2988 * We have 4 cases of combination of "sticky" and "cmsg":
2989 * "sticky=0, cmsg=0": impossible
2990 * "sticky=0, cmsg=1": RFC2292 or rfc2292bis ancillary data
2991 * "sticky=1, cmsg=0": rfc2292bis socket option
2992 * "sticky=1, cmsg=1": RFC2292 socket option
2993 */
2994static int
2995ip6_setpktoption(optname, buf, len, opt, priv, sticky, cmsg, uproto)
2996	int optname, len, priv, sticky, cmsg, uproto;
2997	u_char *buf;
2998	struct ip6_pktopts *opt;
2999{
3000	int minmtupolicy, preftemp;
3001
3002	if (!sticky && !cmsg) {
3003#ifdef DIAGNOSTIC
3004		printf("ip6_setpktoption: impossible case\n");
3005#endif
3006		return (EINVAL);
3007	}
3008
3009	/*
3010	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3011	 * not be specified in the context of rfc2292bis.  Conversely,
3012	 * rfc2292bis types should not be specified in the context of RFC2292.
3013	 */
3014	if (!cmsg) {
3015		switch (optname) {
3016		case IPV6_2292PKTINFO:
3017		case IPV6_2292HOPLIMIT:
3018		case IPV6_2292NEXTHOP:
3019		case IPV6_2292HOPOPTS:
3020		case IPV6_2292DSTOPTS:
3021		case IPV6_2292RTHDR:
3022		case IPV6_2292PKTOPTIONS:
3023			return (ENOPROTOOPT);
3024		}
3025	}
3026	if (sticky && cmsg) {
3027		switch (optname) {
3028		case IPV6_PKTINFO:
3029		case IPV6_HOPLIMIT:
3030		case IPV6_NEXTHOP:
3031		case IPV6_HOPOPTS:
3032		case IPV6_DSTOPTS:
3033		case IPV6_RTHDRDSTOPTS:
3034		case IPV6_RTHDR:
3035		case IPV6_USE_MIN_MTU:
3036		case IPV6_DONTFRAG:
3037		case IPV6_TCLASS:
3038		case IPV6_PREFER_TEMPADDR: /* XXX: not an rfc2292bis option */
3039			return (ENOPROTOOPT);
3040		}
3041	}
3042
3043	switch (optname) {
3044	case IPV6_2292PKTINFO:
3045	case IPV6_PKTINFO:
3046	{
3047		struct ifnet *ifp = NULL;
3048		struct in6_pktinfo *pktinfo;
3049
3050		if (len != sizeof(struct in6_pktinfo))
3051			return (EINVAL);
3052
3053		pktinfo = (struct in6_pktinfo *)buf;
3054
3055		/*
3056		 * An application can clear any sticky IPV6_PKTINFO option by
3057		 * doing a "regular" setsockopt with ipi6_addr being
3058		 * in6addr_any and ipi6_ifindex being zero.
3059		 * [RFC 3542, Section 6]
3060		 */
3061		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3062		    pktinfo->ipi6_ifindex == 0 &&
3063		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3064			ip6_clearpktopts(opt, optname);
3065			break;
3066		}
3067
3068		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3069		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3070			return (EINVAL);
3071		}
3072
3073		/* validate the interface index if specified. */
3074		if (pktinfo->ipi6_ifindex > if_index ||
3075		    pktinfo->ipi6_ifindex < 0) {
3076			 return (ENXIO);
3077		}
3078		if (pktinfo->ipi6_ifindex) {
3079			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
3080			if (ifp == NULL)
3081				return (ENXIO);
3082		}
3083
3084		/*
3085		 * We store the address anyway, and let in6_selectsrc()
3086		 * validate the specified address.  This is because ipi6_addr
3087		 * may not have enough information about its scope zone, and
3088		 * we may need additional information (such as outgoing
3089		 * interface or the scope zone of a destination address) to
3090		 * disambiguate the scope.
3091		 * XXX: the delay of the validation may confuse the
3092		 * application when it is used as a sticky option.
3093		 */
3094		if (sticky) {
3095			if (opt->ip6po_pktinfo == NULL) {
3096				opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3097				    M_IP6OPT, M_WAITOK);
3098			}
3099			bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3100		} else
3101			opt->ip6po_pktinfo = pktinfo;
3102		break;
3103	}
3104
3105	case IPV6_2292HOPLIMIT:
3106	case IPV6_HOPLIMIT:
3107	{
3108		int *hlimp;
3109
3110		/*
3111		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3112		 * to simplify the ordering among hoplimit options.
3113		 */
3114		if (optname == IPV6_HOPLIMIT && sticky)
3115			return (ENOPROTOOPT);
3116
3117		if (len != sizeof(int))
3118			return (EINVAL);
3119		hlimp = (int *)buf;
3120		if (*hlimp < -1 || *hlimp > 255)
3121			return (EINVAL);
3122
3123		opt->ip6po_hlim = *hlimp;
3124		break;
3125	}
3126
3127	case IPV6_TCLASS:
3128	{
3129		int tclass;
3130
3131		if (len != sizeof(int))
3132			return (EINVAL);
3133		tclass = *(int *)buf;
3134		if (tclass < -1 || tclass > 255)
3135			return (EINVAL);
3136
3137		opt->ip6po_tclass = tclass;
3138		break;
3139	}
3140
3141	case IPV6_2292NEXTHOP:
3142	case IPV6_NEXTHOP:
3143		if (!priv)
3144			return (EPERM);
3145
3146		if (len == 0) {	/* just remove the option */
3147			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3148			break;
3149		}
3150
3151		/* check if cmsg_len is large enough for sa_len */
3152		if (len < sizeof(struct sockaddr) || len < *buf)
3153			return (EINVAL);
3154
3155		switch (((struct sockaddr *)buf)->sa_family) {
3156		case AF_INET6:
3157		{
3158			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3159#if 0
3160			int error;
3161#endif
3162
3163			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3164				return (EINVAL);
3165
3166			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3167			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3168				return (EINVAL);
3169			}
3170#if 0
3171			if ((error = scope6_check_id(sa6, ip6_use_defzone))
3172			    != 0) {
3173				return (error);
3174			}
3175#endif
3176			sa6->sin6_scope_id = 0; /* XXX */
3177			break;
3178		}
3179		case AF_LINK:	/* should eventually be supported */
3180		default:
3181			return (EAFNOSUPPORT);
3182		}
3183
3184		/* turn off the previous option, then set the new option. */
3185		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3186		if (sticky) {
3187			opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_WAITOK);
3188			bcopy(buf, opt->ip6po_nexthop, *buf);
3189		} else
3190			opt->ip6po_nexthop = (struct sockaddr *)buf;
3191		break;
3192
3193	case IPV6_2292HOPOPTS:
3194	case IPV6_HOPOPTS:
3195	{
3196		struct ip6_hbh *hbh;
3197		int hbhlen;
3198
3199		/*
3200		 * XXX: We don't allow a non-privileged user to set ANY HbH
3201		 * options, since per-option restriction has too much
3202		 * overhead.
3203		 */
3204		if (!priv)
3205			return (EPERM);
3206
3207		if (len == 0) {
3208			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3209			break;	/* just remove the option */
3210		}
3211
3212		/* message length validation */
3213		if (len < sizeof(struct ip6_hbh))
3214			return (EINVAL);
3215		hbh = (struct ip6_hbh *)buf;
3216		hbhlen = (hbh->ip6h_len + 1) << 3;
3217		if (len != hbhlen)
3218			return (EINVAL);
3219
3220		/* turn off the previous option, then set the new option. */
3221		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3222		if (sticky) {
3223			opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK);
3224			bcopy(hbh, opt->ip6po_hbh, hbhlen);
3225		} else
3226			opt->ip6po_hbh = hbh;
3227
3228		break;
3229	}
3230
3231	case IPV6_2292DSTOPTS:
3232	case IPV6_DSTOPTS:
3233	case IPV6_RTHDRDSTOPTS:
3234	{
3235		struct ip6_dest *dest, **newdest = NULL;
3236		int destlen;
3237
3238		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3239			return (EPERM);
3240
3241		if (len == 0) {
3242			ip6_clearpktopts(opt, optname);
3243			break;	/* just remove the option */
3244		}
3245
3246		/* message length validation */
3247		if (len < sizeof(struct ip6_dest))
3248			return (EINVAL);
3249		dest = (struct ip6_dest *)buf;
3250		destlen = (dest->ip6d_len + 1) << 3;
3251		if (len != destlen)
3252			return (EINVAL);
3253
3254		/*
3255		 * Determine the position that the destination options header
3256		 * should be inserted; before or after the routing header.
3257		 */
3258		switch (optname) {
3259		case IPV6_2292DSTOPTS:
3260			/*
3261			 * The old advacned API is ambiguous on this point.
3262			 * Our approach is to determine the position based
3263			 * according to the existence of a routing header.
3264			 * Note, however, that this depends on the order of the
3265			 * extension headers in the ancillary data; the 1st
3266			 * part of the destination options header must appear
3267			 * before the routing header in the ancillary data,
3268			 * too.
3269			 * RFC2292bis solved the ambiguity by introducing
3270			 * separate ancillary data or option types.
3271			 */
3272			if (opt->ip6po_rthdr == NULL)
3273				newdest = &opt->ip6po_dest1;
3274			else
3275				newdest = &opt->ip6po_dest2;
3276			break;
3277		case IPV6_RTHDRDSTOPTS:
3278			newdest = &opt->ip6po_dest1;
3279			break;
3280		case IPV6_DSTOPTS:
3281			newdest = &opt->ip6po_dest2;
3282			break;
3283		}
3284
3285		/* turn off the previous option, then set the new option. */
3286		ip6_clearpktopts(opt, optname);
3287		if (sticky) {
3288			*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
3289			bcopy(dest, *newdest, destlen);
3290		} else
3291			*newdest = dest;
3292
3293		break;
3294	}
3295
3296	case IPV6_2292RTHDR:
3297	case IPV6_RTHDR:
3298	{
3299		struct ip6_rthdr *rth;
3300		int rthlen;
3301
3302		if (len == 0) {
3303			ip6_clearpktopts(opt, IPV6_RTHDR);
3304			break;	/* just remove the option */
3305		}
3306
3307		/* message length validation */
3308		if (len < sizeof(struct ip6_rthdr))
3309			return (EINVAL);
3310		rth = (struct ip6_rthdr *)buf;
3311		rthlen = (rth->ip6r_len + 1) << 3;
3312		if (len != rthlen)
3313			return (EINVAL);
3314
3315		switch (rth->ip6r_type) {
3316		case IPV6_RTHDR_TYPE_0:
3317			if (rth->ip6r_len == 0)	/* must contain one addr */
3318				return (EINVAL);
3319			if (rth->ip6r_len % 2) /* length must be even */
3320				return (EINVAL);
3321			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3322				return (EINVAL);
3323			break;
3324		default:
3325			return (EINVAL);	/* not supported */
3326		}
3327
3328		/* turn off the previous option */
3329		ip6_clearpktopts(opt, IPV6_RTHDR);
3330		if (sticky) {
3331			opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK);
3332			bcopy(rth, opt->ip6po_rthdr, rthlen);
3333		} else
3334			opt->ip6po_rthdr = rth;
3335
3336		break;
3337	}
3338
3339	case IPV6_USE_MIN_MTU:
3340		if (len != sizeof(int))
3341			return (EINVAL);
3342		minmtupolicy = *(int *)buf;
3343		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3344		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3345		    minmtupolicy != IP6PO_MINMTU_ALL) {
3346			return (EINVAL);
3347		}
3348		opt->ip6po_minmtu = minmtupolicy;
3349		break;
3350
3351	case IPV6_DONTFRAG:
3352		if (len != sizeof(int))
3353			return (EINVAL);
3354
3355		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3356			/*
3357			 * we ignore this option for TCP sockets.
3358			 * (rfc2292bis leaves this case unspecified.)
3359			 */
3360			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3361		} else
3362			opt->ip6po_flags |= IP6PO_DONTFRAG;
3363		break;
3364
3365	case IPV6_PREFER_TEMPADDR:
3366		if (len != sizeof(int))
3367			return (EINVAL);
3368		preftemp = *(int *)buf;
3369		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3370		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3371		    preftemp != IP6PO_TEMPADDR_PREFER) {
3372			return (EINVAL);
3373		}
3374		opt->ip6po_prefer_tempaddr = preftemp;
3375		break;
3376
3377	default:
3378		return (ENOPROTOOPT);
3379	} /* end of switch */
3380
3381	return (0);
3382}
3383
3384/*
3385 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3386 * packet to the input queue of a specified interface.  Note that this
3387 * calls the output routine of the loopback "driver", but with an interface
3388 * pointer that might NOT be &loif -- easier than replicating that code here.
3389 */
3390void
3391ip6_mloopback(ifp, m, dst)
3392	struct ifnet *ifp;
3393	struct mbuf *m;
3394	struct sockaddr_in6 *dst;
3395{
3396	struct mbuf *copym;
3397	struct ip6_hdr *ip6;
3398
3399	copym = m_copy(m, 0, M_COPYALL);
3400	if (copym == NULL)
3401		return;
3402
3403	/*
3404	 * Make sure to deep-copy IPv6 header portion in case the data
3405	 * is in an mbuf cluster, so that we can safely override the IPv6
3406	 * header portion later.
3407	 */
3408	if ((copym->m_flags & M_EXT) != 0 ||
3409	    copym->m_len < sizeof(struct ip6_hdr)) {
3410		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3411		if (copym == NULL)
3412			return;
3413	}
3414
3415#ifdef DIAGNOSTIC
3416	if (copym->m_len < sizeof(*ip6)) {
3417		m_freem(copym);
3418		return;
3419	}
3420#endif
3421
3422	ip6 = mtod(copym, struct ip6_hdr *);
3423	/*
3424	 * clear embedded scope identifiers if necessary.
3425	 * in6_clearscope will touch the addresses only when necessary.
3426	 */
3427	in6_clearscope(&ip6->ip6_src);
3428	in6_clearscope(&ip6->ip6_dst);
3429
3430	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3431}
3432
3433/*
3434 * Chop IPv6 header off from the payload.
3435 */
3436static int
3437ip6_splithdr(m, exthdrs)
3438	struct mbuf *m;
3439	struct ip6_exthdrs *exthdrs;
3440{
3441	struct mbuf *mh;
3442	struct ip6_hdr *ip6;
3443
3444	ip6 = mtod(m, struct ip6_hdr *);
3445	if (m->m_len > sizeof(*ip6)) {
3446		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3447		if (mh == 0) {
3448			m_freem(m);
3449			return ENOBUFS;
3450		}
3451		M_MOVE_PKTHDR(mh, m);
3452		MH_ALIGN(mh, sizeof(*ip6));
3453		m->m_len -= sizeof(*ip6);
3454		m->m_data += sizeof(*ip6);
3455		mh->m_next = m;
3456		m = mh;
3457		m->m_len = sizeof(*ip6);
3458		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3459	}
3460	exthdrs->ip6e_ip6 = m;
3461	return 0;
3462}
3463
3464/*
3465 * Compute IPv6 extension header length.
3466 */
3467int
3468ip6_optlen(in6p)
3469	struct in6pcb *in6p;
3470{
3471	int len;
3472
3473	if (!in6p->in6p_outputopts)
3474		return 0;
3475
3476	len = 0;
3477#define elen(x) \
3478    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3479
3480	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3481	if (in6p->in6p_outputopts->ip6po_rthdr)
3482		/* dest1 is valid with rthdr only */
3483		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3484	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3485	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3486	return len;
3487#undef elen
3488}
3489