ip6_output.c revision 146228
1139749Simp/*	$FreeBSD: head/sys/netinet6/ip6_output.c 146228 2005-05-15 02:28:30Z gnn $	*/
2113584Ssimokawa/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
3103285Sikob
4103285Sikob/*-
5103285Sikob * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6103285Sikob * All rights reserved.
7103285Sikob *
8103285Sikob * Redistribution and use in source and binary forms, with or without
9103285Sikob * modification, are permitted provided that the following conditions
10103285Sikob * are met:
11103285Sikob * 1. Redistributions of source code must retain the above copyright
12103285Sikob *    notice, this list of conditions and the following disclaimer.
13103285Sikob * 2. Redistributions in binary form must reproduce the above copyright
14103285Sikob *    notice, this list of conditions and the following disclaimer in the
15103285Sikob *    documentation and/or other materials provided with the distribution.
16103285Sikob * 3. Neither the name of the project nor the names of its contributors
17103285Sikob *    may be used to endorse or promote products derived from this software
18103285Sikob *    without specific prior written permission.
19103285Sikob *
20103285Sikob * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21103285Sikob * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22103285Sikob * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23103285Sikob * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24103285Sikob * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25103285Sikob * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26103285Sikob * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27103285Sikob * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28103285Sikob * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29103285Sikob * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30103285Sikob * SUCH DAMAGE.
31103285Sikob */
32103285Sikob
33103285Sikob/*-
34103285Sikob * Copyright (c) 1982, 1986, 1988, 1990, 1993
35227843Smarius *	The Regents of the University of California.  All rights reserved.
36227843Smarius *
37227843Smarius * Redistribution and use in source and binary forms, with or without
38113584Ssimokawa * modification, are permitted provided that the following conditions
39113584Ssimokawa * are met:
40103285Sikob * 1. Redistributions of source code must retain the above copyright
41103285Sikob *    notice, this list of conditions and the following disclaimer.
42103285Sikob * 2. Redistributions in binary form must reproduce the above copyright
43103285Sikob *    notice, this list of conditions and the following disclaimer in the
44103285Sikob *    documentation and/or other materials provided with the distribution.
45103285Sikob * 4. Neither the name of the University nor the names of its contributors
46103285Sikob *    may be used to endorse or promote products derived from this software
47103285Sikob *    without specific prior written permission.
48110195Ssimokawa *
49127468Ssimokawa * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50117126Sscottl * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51117126Sscottl * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52117732Ssimokawa * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53103285Sikob * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54103285Sikob * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55127468Ssimokawa * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56117067Ssimokawa * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57117067Ssimokawa * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58117067Ssimokawa * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59127468Ssimokawa * SUCH DAMAGE.
60127468Ssimokawa *
61127468Ssimokawa *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
62127468Ssimokawa */
63127468Ssimokawa
64127468Ssimokawa#include "opt_ip6fw.h"
65127468Ssimokawa#include "opt_inet.h"
66127468Ssimokawa#include "opt_inet6.h"
67127468Ssimokawa#include "opt_ipsec.h"
68127468Ssimokawa
69127468Ssimokawa#include <sys/param.h>
70119290Ssimokawa#include <sys/malloc.h>
71119290Ssimokawa#include <sys/mbuf.h>
72119290Ssimokawa#include <sys/proc.h>
73119290Ssimokawa#include <sys/errno.h>
74119277Simp#include <sys/protosw.h>
75119277Simp#include <sys/socket.h>
76119290Ssimokawa#include <sys/socketvar.h>
77103285Sikob#include <sys/systm.h>
78103285Sikob#include <sys/kernel.h>
79103285Sikob
80103285Sikob#include <net/if.h>
81113584Ssimokawa#include <net/netisr.h>
82103285Sikob#include <net/route.h>
83103285Sikob#include <net/pfil.h>
84127468Ssimokawa
85103285Sikob#include <netinet/in.h>
86103285Sikob#include <netinet/in_var.h>
87103285Sikob#include <netinet6/in6_var.h>
88103285Sikob#include <netinet/ip6.h>
89103285Sikob#include <netinet/icmp6.h>
90103285Sikob#include <netinet6/ip6_var.h>
91103285Sikob#include <netinet/in_pcb.h>
92103285Sikob#include <netinet/tcp_var.h>
93103285Sikob#include <netinet6/nd6.h>
94103285Sikob
95103285Sikob#ifdef IPSEC
96129585Sdfr#include <netinet6/ipsec.h>
97108504Ssimokawa#ifdef INET6
98111076Ssimokawa#include <netinet6/ipsec6.h>
99132283Ssimokawa#endif
100132283Ssimokawa#include <netkey/key.h>
101143161Simp#endif /* IPSEC */
102132283Ssimokawa
103108504Ssimokawa#ifdef FAST_IPSEC
104103285Sikob#include <netipsec/ipsec.h>
105143161Simp#include <netipsec/ipsec6.h>
106103285Sikob#include <netipsec/key.h>
107111075Ssimokawa#endif /* FAST_IPSEC */
108111075Ssimokawa
109143161Simp#include <netinet6/ip6_fw.h>
110111075Ssimokawa
111113957Ssimokawa#include <net/net_osdep.h>
112113957Ssimokawa
113143161Simp#include <netinet6/ip6protosw.h>
114113957Ssimokawa
115125239Ssimokawastatic MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
116125239Ssimokawa
117143161Simpstruct ip6_exthdrs {
118125239Ssimokawa	struct mbuf *ip6e_ip6;
119113957Ssimokawa	struct mbuf *ip6e_hbh;
120113957Ssimokawa	struct mbuf *ip6e_dest1;
121143161Simp	struct mbuf *ip6e_rthdr;
122113957Ssimokawa	struct mbuf *ip6e_dest2;
123132283Ssimokawa};
124132283Ssimokawa
125132283Ssimokawastatic int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
126143161Simp			   int, int));
127132283Ssimokawastatic int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
128108504Ssimokawa	struct socket *, struct sockopt *));
129103285Sikobstatic int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
130143161Simpstatic int ip6_setpktoption __P((int, u_char *, int, struct ip6_pktopts *, int,
131103285Sikob	int, int, int));
132108504Ssimokawa
133103285Sikobstatic int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
134143161Simpstatic int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
135103285Sikobstatic int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
136108504Ssimokawastatic int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
137103285Sikob	struct ip6_frag **));
138143161Simpstatic int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
139103285Sikobstatic int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
140108504Ssimokawastatic int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
141103285Sikob	struct ifnet *, struct in6_addr *, u_long *, int *));
142143161Simp
143103285Sikob
144108504Ssimokawa/*
145108504Ssimokawa * IP6 output. The packet in mbuf chain m contains a skeletal IP6
146143161Simp * header (with pri, len, nxt, hlim, src, dst).
147108504Ssimokawa * This function may modify ver and hlim only.
148132283Ssimokawa * The mbuf chain containing the packet will be freed.
149132283Ssimokawa * The mbuf opt, if present, will not be freed.
150143161Simp *
151132283Ssimokawa * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
152113957Ssimokawa * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
153113957Ssimokawa * which is rt_rmx.rmx_mtu.
154143161Simp */
155113957Ssimokawaint
156115806Ssimokawaip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
157115806Ssimokawa	struct mbuf *m0;
158143161Simp	struct ip6_pktopts *opt;
159115806Ssimokawa	struct route_in6 *ro;
160108504Ssimokawa	int flags;
161108504Ssimokawa	struct ip6_moptions *im6o;
162143161Simp	struct ifnet **ifpp;		/* XXX: just for statistics */
163108504Ssimokawa	struct inpcb *inp;
164108504Ssimokawa{
165108504Ssimokawa	struct ip6_hdr *ip6, *mhip6;
166143161Simp	struct ifnet *ifp, *origifp;
167108504Ssimokawa	struct mbuf *m = m0;
168113957Ssimokawa	int hlen, tlen, len, off;
169113957Ssimokawa	struct route_in6 ip6route;
170143161Simp	struct sockaddr_in6 *dst;
171113957Ssimokawa	struct in6_addr odst;
172133116Ssimokawa	int error = 0;
173159535Simp	struct in6_ifaddr *ia = NULL;
174148106Simp	u_long mtu;
175103285Sikob	int alwaysfrag, dontfrag;
176133116Ssimokawa	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
177133116Ssimokawa	struct ip6_exthdrs exthdrs;
178143161Simp	struct in6_addr finaldst;
179133116Ssimokawa	struct route_in6 *ro_pmtu = NULL;
180108504Ssimokawa	int hdrsplit = 0;
181133116Ssimokawa	int needipsec = 0;
182143161Simp#if defined(IPSEC) || defined(FAST_IPSEC)
183103285Sikob	int needipsectun = 0;
184113957Ssimokawa	struct secpolicy *sp = NULL;
185113957Ssimokawa#endif /*IPSEC || FAST_IPSEC*/
186143161Simp
187113957Ssimokawa	ip6 = mtod(m, struct ip6_hdr *);
188108504Ssimokawa	finaldst = ip6->ip6_dst;
189103285Sikob
190143161Simp#define MAKE_EXTHDR(hp, mp)						\
191103285Sikob    do {								\
192108504Ssimokawa	if (hp) {							\
193103485Sikob		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
194143161Simp		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
195103485Sikob		    ((eh)->ip6e_len + 1) << 3);				\
196108504Ssimokawa		if (error)						\
197103485Sikob			goto freehdrs;					\
198143161Simp	}								\
199103485Sikob    } while (/*CONSTCOND*/ 0)
200108504Ssimokawa
201103485Sikob	bzero(&exthdrs, sizeof(exthdrs));
202143161Simp
203103485Sikob	if (opt) {
204132283Ssimokawa		/* Hop-by-Hop options header */
205132283Ssimokawa		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
206143161Simp		/* Destination options header(1st part) */
207132283Ssimokawa		if (opt->ip6po_rthdr) {
208132283Ssimokawa			/*
209132283Ssimokawa			 * Destination options header(1st part)
210143161Simp			 * This only makes sence with a routing header.
211132283Ssimokawa			 * See Section 9.2 of RFC 3542.
212146439Smarius			 * Disabling this part just for MIP6 convenience is
213146439Smarius			 * a bad idea.  We need to think carefully about a
214146439Smarius			 * way to make the advanced API coexist with MIP6
215146439Smarius			 * options, which might automatically be inserted in
216103285Sikob			 * the kernel.
217103285Sikob			 */
218103285Sikob			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
219103285Sikob		}
220131398Sjhb		/* Routing header */
221131398Sjhb		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
222131398Sjhb		/* Destination options header(2nd part) */
223103285Sikob		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
224143161Simp	}
225103285Sikob
226103285Sikob#ifdef IPSEC
227103285Sikob	/* get a security policy for this packet */
228103285Sikob	if (inp == NULL)
229103285Sikob		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
230127468Ssimokawa	else
231103285Sikob		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
232103285Sikob
233103285Sikob	if (sp == NULL) {
234103285Sikob		ipsec6stat.out_inval++;
235103285Sikob		goto freehdrs;
236103285Sikob	}
237103285Sikob
238103285Sikob	error = 0;
239108642Ssimokawa
240103285Sikob	/* check policy */
241113584Ssimokawa	switch (sp->policy) {
242129585Sdfr	case IPSEC_POLICY_DISCARD:
243103285Sikob		/*
244103285Sikob		 * This packet is just discarded.
245254263Sscottl		 */
246139759Ssimokawa		ipsec6stat.out_polvio++;
247112522Ssimokawa		goto freehdrs;
248112522Ssimokawa
249103285Sikob	case IPSEC_POLICY_BYPASS:
250103285Sikob	case IPSEC_POLICY_NONE:
251146439Smarius		/* no need to do IPsec. */
252146439Smarius		needipsec = 0;
253146439Smarius		break;
254146439Smarius
255146439Smarius	case IPSEC_POLICY_IPSEC:
256146439Smarius		if (sp->req == NULL) {
257146439Smarius			/* acquire a policy */
258146439Smarius			error = key_spdacquire(sp);
259113584Ssimokawa			goto freehdrs;
260112522Ssimokawa		}
261113584Ssimokawa		needipsec = 1;
262103285Sikob		break;
263113584Ssimokawa
264103285Sikob	case IPSEC_POLICY_ENTRUST:
265108642Ssimokawa	default:
266113584Ssimokawa		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
267113584Ssimokawa	}
268113584Ssimokawa#endif /* IPSEC */
269113584Ssimokawa#ifdef FAST_IPSEC
270113584Ssimokawa	/* get a security policy for this packet */
271113584Ssimokawa	if (inp == NULL)
272113584Ssimokawa		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
273113584Ssimokawa	else
274113584Ssimokawa		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
275113584Ssimokawa
276113584Ssimokawa	if (sp == NULL) {
277113584Ssimokawa		newipsecstat.ips_out_inval++;
278113584Ssimokawa		goto freehdrs;
279113584Ssimokawa	}
280108642Ssimokawa
281108642Ssimokawa	error = 0;
282108642Ssimokawa
283108642Ssimokawa	/* check policy */
284108642Ssimokawa	switch (sp->policy) {
285108642Ssimokawa	case IPSEC_POLICY_DISCARD:
286108642Ssimokawa		/*
287108642Ssimokawa		 * This packet is just discarded.
288125238Ssimokawa		 */
289127468Ssimokawa		newipsecstat.ips_out_polvio++;
290108642Ssimokawa		goto freehdrs;
291108642Ssimokawa
292108642Ssimokawa	case IPSEC_POLICY_BYPASS:
293108642Ssimokawa	case IPSEC_POLICY_NONE:
294108642Ssimokawa		/* no need to do IPsec. */
295108642Ssimokawa		needipsec = 0;
296108642Ssimokawa		break;
297108642Ssimokawa
298108642Ssimokawa	case IPSEC_POLICY_IPSEC:
299108642Ssimokawa		if (sp->req == NULL) {
300108642Ssimokawa			/* acquire a policy */
301132432Ssimokawa			error = key_spdacquire(sp);
302113584Ssimokawa			goto freehdrs;
303113584Ssimokawa		}
304132432Ssimokawa		needipsec = 1;
305113584Ssimokawa		break;
306170374Ssimokawa
307108642Ssimokawa	case IPSEC_POLICY_ENTRUST:
308108642Ssimokawa	default:
309103285Sikob		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
310127468Ssimokawa	}
311127135Snjl#endif /* FAST_IPSEC */
312127468Ssimokawa
313127468Ssimokawa	/*
314127468Ssimokawa	 * Calculate the total length of the extension header chain.
315127468Ssimokawa	 * Keep the length of the unfragmentable part for fragmentation.
316103285Sikob	 */
317103285Sikob	optlen = 0;
318103285Sikob	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
319103285Sikob	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
320103285Sikob	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
321103285Sikob	unfragpartlen = optlen + sizeof(struct ip6_hdr);
322103285Sikob	/* NOTE: we don't add AH/ESP length here. do that later. */
323103285Sikob	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
324103285Sikob
325127468Ssimokawa	/*
326127135Snjl	 * If we need IPsec, or there is at least one extension header,
327103285Sikob	 * separate IP6 header from the payload.
328127468Ssimokawa	 */
329127468Ssimokawa	if ((needipsec || optlen) && !hdrsplit) {
330127468Ssimokawa		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
331127468Ssimokawa			m = NULL;
332103285Sikob			goto freehdrs;
333103285Sikob		}
334103285Sikob		m = exthdrs.ip6e_ip6;
335103285Sikob		hdrsplit++;
336103285Sikob	}
337103285Sikob
338187993Ssbruno	/* adjust pointer */
339187993Ssbruno	ip6 = mtod(m, struct ip6_hdr *);
340187993Ssbruno
341187993Ssbruno	/* adjust mbuf packet header length */
342103285Sikob	m->m_pkthdr.len += optlen;
343127468Ssimokawa	plen = m->m_pkthdr.len - sizeof(*ip6);
344103285Sikob
345103285Sikob	/* If this is a jumbo payload, insert a jumbo payload option. */
346103285Sikob	if (plen > IPV6_MAXPACKET) {
347121506Ssimokawa		if (!hdrsplit) {
348121506Ssimokawa			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
349121506Ssimokawa				m = NULL;
350103285Sikob				goto freehdrs;
351103285Sikob			}
352103285Sikob			m = exthdrs.ip6e_ip6;
353103285Sikob			hdrsplit++;
354103285Sikob		}
355103285Sikob		/* adjust pointer */
356103285Sikob		ip6 = mtod(m, struct ip6_hdr *);
357166165Smarius		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
358166165Smarius			goto freehdrs;
359166165Smarius		ip6->ip6_plen = 0;
360166165Smarius	} else
361166165Smarius		ip6->ip6_plen = htons(plen);
362166165Smarius
363166165Smarius	/*
364113584Ssimokawa	 * Concatenate headers and fill in next header fields.
365113584Ssimokawa	 * Here we have, on "m"
366113584Ssimokawa	 *	IPv6 payload
367113584Ssimokawa	 * and we insert headers accordingly.  Finally, we should be getting:
368113584Ssimokawa	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
369113584Ssimokawa	 *
370113584Ssimokawa	 * during the header composing process, "m" points to IPv6 header.
371113584Ssimokawa	 * "mprev" points to an extension header prior to esp.
372113584Ssimokawa	 */
373113584Ssimokawa	{
374113584Ssimokawa		u_char *nexthdrp = &ip6->ip6_nxt;
375113584Ssimokawa		struct mbuf *mprev = m;
376127468Ssimokawa
377117126Sscottl		/*
378170374Ssimokawa		 * we treat dest2 specially.  this makes IPsec processing
379117228Ssimokawa		 * much easier.  the goal here is to make mprev point the
380117228Ssimokawa		 * mbuf prior to dest2.
381113584Ssimokawa		 *
382113584Ssimokawa		 * result: IPv6 dest2 payload
383113584Ssimokawa		 * m and mprev will point to IPv6 header.
384113584Ssimokawa		 */
385113584Ssimokawa		if (exthdrs.ip6e_dest2) {
386113584Ssimokawa			if (!hdrsplit)
387103285Sikob				panic("assumption failed: hdr not split");
388103285Sikob			exthdrs.ip6e_dest2->m_next = m->m_next;
389124877Ssimokawa			m->m_next = exthdrs.ip6e_dest2;
390124877Ssimokawa			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
391124877Ssimokawa			ip6->ip6_nxt = IPPROTO_DSTOPTS;
392124877Ssimokawa		}
393124877Ssimokawa
394103285Sikob#define MAKE_CHAIN(m, mp, p, i)\
395125238Ssimokawa    do {\
396125238Ssimokawa	if (m) {\
397125238Ssimokawa		if (!hdrsplit) \
398124877Ssimokawa			panic("assumption failed: hdr not split"); \
399103285Sikob		*mtod((m), u_char *) = *(p);\
400103285Sikob		*(p) = (i);\
401103285Sikob		p = mtod((m), u_char *);\
402103285Sikob		(m)->m_next = (mp)->m_next;\
403103285Sikob		(mp)->m_next = (m);\
404103285Sikob		(mp) = (m);\
405103285Sikob	}\
406103285Sikob    } while (/*CONSTCOND*/ 0)
407103285Sikob		/*
408103285Sikob		 * result: IPv6 hbh dest1 rthdr dest2 payload
409103285Sikob		 * m will point to IPv6 header.  mprev will point to the
410108530Ssimokawa		 * extension header prior to dest2 (rthdr in the above case).
411118416Ssimokawa		 */
412118416Ssimokawa		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
413118416Ssimokawa		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
414103285Sikob		    IPPROTO_DSTOPTS);
415118416Ssimokawa		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
416118416Ssimokawa		    IPPROTO_ROUTING);
417118416Ssimokawa
418118416Ssimokawa#if defined(IPSEC) || defined(FAST_IPSEC)
419103285Sikob		if (!needipsec)
420103285Sikob			goto skip_ipsec2;
421103285Sikob
422103285Sikob		/*
423103285Sikob		 * pointers after IPsec headers are not valid any more.
424103285Sikob		 * other pointers need a great care too.
425103285Sikob		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
426168099Ssimokawa		 */
427168099Ssimokawa		exthdrs.ip6e_dest2 = NULL;
428168099Ssimokawa
429168099Ssimokawa	    {
430168099Ssimokawa		struct ip6_rthdr *rh = NULL;
431168099Ssimokawa		int segleft_org = 0;
432127468Ssimokawa		struct ipsec_output_state state;
433168099Ssimokawa
434168099Ssimokawa		if (exthdrs.ip6e_rthdr) {
435103285Sikob			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
436168099Ssimokawa			segleft_org = rh->ip6r_segleft;
437168099Ssimokawa			rh->ip6r_segleft = 0;
438103285Sikob		}
439103285Sikob
440103285Sikob		bzero(&state, sizeof(state));
441103285Sikob		state.m = m;
442103285Sikob		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
443103285Sikob		    &needipsectun);
444103285Sikob		m = state.m;
445103285Sikob		if (error) {
446103285Sikob			/* mbuf is already reclaimed in ipsec6_output_trans. */
447103285Sikob			m = NULL;
448108527Ssimokawa			switch (error) {
449108527Ssimokawa			case EHOSTUNREACH:
450170374Ssimokawa			case ENETUNREACH:
451103285Sikob			case EMSGSIZE:
452103285Sikob			case ENOBUFS:
453103285Sikob			case ENOMEM:
454103285Sikob				break;
455103285Sikob			default:
456106809Ssimokawa				printf("ip6_output (ipsec): error code %d\n", error);
457106809Ssimokawa				/* FALLTHROUGH */
458106809Ssimokawa			case ENOENT:
459116978Ssimokawa				/* don't show these error codes to the user */
460108705Ssimokawa				error = 0;
461108705Ssimokawa				break;
462108642Ssimokawa			}
463108642Ssimokawa			goto bad;
464108642Ssimokawa		}
465108642Ssimokawa		if (exthdrs.ip6e_rthdr) {
466116978Ssimokawa			/* ah6_output doesn't modify mbuf chain */
467106809Ssimokawa			rh->ip6r_segleft = segleft_org;
468106809Ssimokawa		}
469106809Ssimokawa	    }
470106809Ssimokawaskip_ipsec2:;
471106809Ssimokawa#endif
472106809Ssimokawa	}
473108530Ssimokawa
474108530Ssimokawa	/*
475108642Ssimokawa	 * If there is a routing header, replace the destination address field
476108642Ssimokawa	 * with the first hop of the routing header.
477106809Ssimokawa	 */
478106809Ssimokawa	if (exthdrs.ip6e_rthdr) {
479106809Ssimokawa		struct ip6_rthdr *rh =
480108642Ssimokawa			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
481108642Ssimokawa						  struct ip6_rthdr *));
482108642Ssimokawa		struct ip6_rthdr0 *rh0;
483108642Ssimokawa		struct in6_addr *addrs;
484108642Ssimokawa
485110145Ssimokawa		switch (rh->ip6r_type) {
486110145Ssimokawa		case IPV6_RTHDR_TYPE_0:
487108642Ssimokawa			 rh0 = (struct ip6_rthdr0 *)rh;
488108642Ssimokawa			 addrs = (struct in6_addr *)(rh0 + 1);
489108642Ssimokawa
490125238Ssimokawa			 ip6->ip6_dst = *addrs;
491212413Savg			 bcopy((caddr_t)(addrs + 1), (caddr_t)addrs,
492125238Ssimokawa			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
493125238Ssimokawa				 );
494125238Ssimokawa			 *(addrs + rh0->ip6r0_segleft - 1) = finaldst;
495170374Ssimokawa			 break;
496125238Ssimokawa		default:	/* is it possible? */
497125238Ssimokawa			 error = EINVAL;
498125238Ssimokawa			 goto bad;
499125238Ssimokawa		}
500125238Ssimokawa	}
501125238Ssimokawa
502125238Ssimokawa	/* Source address validation */
503125238Ssimokawa	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
504125238Ssimokawa	    (flags & IPV6_DADOUTPUT) == 0) {
505125238Ssimokawa		error = EOPNOTSUPP;
506125238Ssimokawa		ip6stat.ip6s_badscope++;
507125238Ssimokawa		goto bad;
508125238Ssimokawa	}
509125238Ssimokawa	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
510125238Ssimokawa		error = EOPNOTSUPP;
511125238Ssimokawa		ip6stat.ip6s_badscope++;
512125238Ssimokawa		goto bad;
513125238Ssimokawa	}
514125238Ssimokawa
515125238Ssimokawa	ip6stat.ip6s_localout++;
516125238Ssimokawa
517125238Ssimokawa	/*
518167686Ssimokawa	 * Route packet.
519170374Ssimokawa	 */
520167686Ssimokawa	if (ro == 0) {
521167686Ssimokawa		ro = &ip6route;
522167686Ssimokawa		bzero((caddr_t)ro, sizeof(*ro));
523167686Ssimokawa	}
524167686Ssimokawa	ro_pmtu = ro;
525125238Ssimokawa	if (opt && opt->ip6po_rthdr)
526125238Ssimokawa		ro = &opt->ip6po_route;
527125238Ssimokawa	dst = (struct sockaddr_in6 *)&ro->ro_dst;
528125238Ssimokawa
529103285Sikobagain:
530103285Sikob	/*
531103285Sikob	 * If there is a cached route,
532103285Sikob	 * check that it is to the same destination
533103285Sikob	 * and is still up. If not, free it and try again.
534106809Ssimokawa	 */
535106809Ssimokawa	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
536108642Ssimokawa			 dst->sin6_family != AF_INET6 ||
537103285Sikob			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
538103285Sikob		RTFREE(ro->ro_rt);
539125238Ssimokawa		ro->ro_rt = (struct rtentry *)0;
540103285Sikob	}
541227843Smarius	if (ro->ro_rt == 0) {
542103285Sikob		bzero(dst, sizeof(*dst));
543103285Sikob		dst->sin6_family = AF_INET6;
544103285Sikob		dst->sin6_len = sizeof(struct sockaddr_in6);
545103285Sikob		dst->sin6_addr = ip6->ip6_dst;
546103285Sikob	}
547103285Sikob
548103285Sikob 	/*
549103285Sikob	 * if specified, try to fill in the traffic class field.
550103285Sikob	 * do not override if a non-zero value is already set.
551103285Sikob	 * we check the diffserv field and the ecn field separately.
552127468Ssimokawa	 */
553125238Ssimokawa	if (opt && opt->ip6po_tclass >= 0) {
554125238Ssimokawa		int mask = 0;
555103285Sikob
556		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
557			mask |= 0xfc;
558		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
559			mask |= 0x03;
560		if (mask != 0)
561			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
562	}
563
564	/* fill in or override the hop limit field, if necessary. */
565	if (opt && opt->ip6po_hlim != -1)
566		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
567	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
568		if (im6o != NULL)
569			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
570		else
571			ip6->ip6_hlim = ip6_defmcasthlim;
572	}
573
574#if defined(IPSEC) || defined(FAST_IPSEC)
575	if (needipsec && needipsectun) {
576		struct ipsec_output_state state;
577
578		/*
579		 * All the extension headers will become inaccessible
580		 * (since they can be encrypted).
581		 * Don't panic, we need no more updates to extension headers
582		 * on inner IPv6 packet (since they are now encapsulated).
583		 *
584		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
585		 */
586		bzero(&exthdrs, sizeof(exthdrs));
587		exthdrs.ip6e_ip6 = m;
588
589		bzero(&state, sizeof(state));
590		state.m = m;
591		state.ro = (struct route *)ro;
592		state.dst = (struct sockaddr *)dst;
593
594		error = ipsec6_output_tunnel(&state, sp, flags);
595
596		m = state.m;
597		ro = (struct route_in6 *)state.ro;
598		dst = (struct sockaddr_in6 *)state.dst;
599		if (error) {
600			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
601			m0 = m = NULL;
602			m = NULL;
603			switch (error) {
604			case EHOSTUNREACH:
605			case ENETUNREACH:
606			case EMSGSIZE:
607			case ENOBUFS:
608			case ENOMEM:
609				break;
610			default:
611				printf("ip6_output (ipsec): error code %d\n", error);
612				/* FALLTHROUGH */
613			case ENOENT:
614				/* don't show these error codes to the user */
615				error = 0;
616				break;
617			}
618			goto bad;
619		}
620
621		exthdrs.ip6e_ip6 = m;
622	}
623#endif /* IPSEC */
624
625	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
626		/* Unicast */
627
628#define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
629#define sin6tosa(sin6)	((struct sockaddr *)(sin6))
630		/* xxx
631		 * interface selection comes here
632		 * if an interface is specified from an upper layer,
633		 * ifp must point it.
634		 */
635		if (ro->ro_rt == 0) {
636			/*
637			 * non-bsdi always clone routes, if parent is
638			 * PRF_CLONING.
639			 */
640			rtalloc((struct route *)ro);
641		}
642		if (ro->ro_rt == 0) {
643			ip6stat.ip6s_noroute++;
644			error = EHOSTUNREACH;
645			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
646			goto bad;
647		}
648		/* XXX rt not locked */
649		ia = ifatoia6(ro->ro_rt->rt_ifa);
650		ifp = ro->ro_rt->rt_ifp;
651		ro->ro_rt->rt_rmx.rmx_pksent++;
652		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
653			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
654		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
655
656		in6_ifstat_inc(ifp, ifs6_out_request);
657
658		/*
659		 * Check if the outgoing interface conflicts with
660		 * the interface specified by ifi6_ifindex (if specified).
661		 * Note that loopback interface is always okay.
662		 * (this may happen when we are sending a packet to one of
663		 *  our own addresses.)
664		 */
665		if (opt && opt->ip6po_pktinfo
666		 && opt->ip6po_pktinfo->ipi6_ifindex) {
667			if (!(ifp->if_flags & IFF_LOOPBACK)
668			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
669				ip6stat.ip6s_noroute++;
670				in6_ifstat_inc(ifp, ifs6_out_discard);
671				error = EHOSTUNREACH;
672				goto bad;
673			}
674		}
675
676		if (opt && opt->ip6po_hlim != -1)
677			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
678	} else {
679		/* Multicast */
680		struct	in6_multi *in6m;
681
682		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
683
684		/*
685		 * See if the caller provided any multicast options
686		 */
687		ifp = NULL;
688		if (im6o != NULL) {
689			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
690			if (im6o->im6o_multicast_ifp != NULL)
691				ifp = im6o->im6o_multicast_ifp;
692		} else
693			ip6->ip6_hlim = ip6_defmcasthlim;
694
695		/*
696		 * See if the caller provided the outgoing interface
697		 * as an ancillary data.
698		 * Boundary check for ifindex is assumed to be already done.
699		 */
700		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
701			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
702
703		/*
704		 * If the destination is a node-local scope multicast,
705		 * the packet should be loop-backed only.
706		 */
707		if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
708			/*
709			 * If the outgoing interface is already specified,
710			 * it should be a loopback interface.
711			 */
712			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
713				ip6stat.ip6s_badscope++;
714				error = ENETUNREACH; /* XXX: better error? */
715				/* XXX correct ifp? */
716				in6_ifstat_inc(ifp, ifs6_out_discard);
717				goto bad;
718			} else {
719				ifp = &loif[0];
720			}
721		}
722
723		if (opt && opt->ip6po_hlim != -1)
724			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
725
726		/*
727		 * If caller did not provide an interface lookup a
728		 * default in the routing table.  This is either a
729		 * default for the speicfied group (i.e. a host
730		 * route), or a multicast default (a route for the
731		 * ``net'' ff00::/8).
732		 */
733		if (ifp == NULL) {
734			if (ro->ro_rt == 0)
735				ro->ro_rt = rtalloc1((struct sockaddr *)
736						&ro->ro_dst, 0, 0UL);
737			else
738				RT_LOCK(ro->ro_rt);
739			if (ro->ro_rt == 0) {
740				ip6stat.ip6s_noroute++;
741				error = EHOSTUNREACH;
742				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
743				goto bad;
744			}
745			ia = ifatoia6(ro->ro_rt->rt_ifa);
746			ifp = ro->ro_rt->rt_ifp;
747			ro->ro_rt->rt_rmx.rmx_pksent++;
748			RT_UNLOCK(ro->ro_rt);
749		}
750
751		if ((flags & IPV6_FORWARDING) == 0)
752			in6_ifstat_inc(ifp, ifs6_out_request);
753		in6_ifstat_inc(ifp, ifs6_out_mcast);
754
755		/*
756		 * Confirm that the outgoing interface supports multicast.
757		 */
758		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
759			ip6stat.ip6s_noroute++;
760			in6_ifstat_inc(ifp, ifs6_out_discard);
761			error = ENETUNREACH;
762			goto bad;
763		}
764		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
765		if (in6m != NULL &&
766		   (im6o == NULL || im6o->im6o_multicast_loop)) {
767			/*
768			 * If we belong to the destination multicast group
769			 * on the outgoing interface, and the caller did not
770			 * forbid loopback, loop back a copy.
771			 */
772			ip6_mloopback(ifp, m, dst);
773		} else {
774			/*
775			 * If we are acting as a multicast router, perform
776			 * multicast forwarding as if the packet had just
777			 * arrived on the interface to which we are about
778			 * to send.  The multicast forwarding function
779			 * recursively calls this function, using the
780			 * IPV6_FORWARDING flag to prevent infinite recursion.
781			 *
782			 * Multicasts that are looped back by ip6_mloopback(),
783			 * above, will be forwarded by the ip6_input() routine,
784			 * if necessary.
785			 */
786			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
787				if (ip6_mforward(ip6, ifp, m) != 0) {
788					m_freem(m);
789					goto done;
790				}
791			}
792		}
793		/*
794		 * Multicasts with a hoplimit of zero may be looped back,
795		 * above, but must not be transmitted on a network.
796		 * Also, multicasts addressed to the loopback interface
797		 * are not sent -- the above call to ip6_mloopback() will
798		 * loop back a copy if this host actually belongs to the
799		 * destination group on the loopback interface.
800		 */
801		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
802		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
803			m_freem(m);
804			goto done;
805		}
806	}
807
808	/*
809	 * Fill the outgoing inteface to tell the upper layer
810	 * to increment per-interface statistics.
811	 */
812	if (ifpp)
813		*ifpp = ifp;
814
815	/* Determine path MTU. */
816	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
817	    &alwaysfrag)) != 0)
818		goto bad;
819
820	/*
821	 * The caller of this function may specify to use the minimum MTU
822	 * in some cases.
823	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
824	 * setting.  The logic is a bit complicated; by default, unicast
825	 * packets will follow path MTU while multicast packets will be sent at
826	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
827	 * including unicast ones will be sent at the minimum MTU.  Multicast
828	 * packets will always be sent at the minimum MTU unless
829	 * IP6PO_MINMTU_DISABLE is explicitly specified.
830	 * See RFC 3542 for more details.
831	 */
832	if (mtu > IPV6_MMTU) {
833		if ((flags & IPV6_MINMTU))
834			mtu = IPV6_MMTU;
835		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
836			mtu = IPV6_MMTU;
837		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
838			 (opt == NULL ||
839			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
840			mtu = IPV6_MMTU;
841		}
842	}
843
844	/* Fake scoped addresses */
845	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
846		/*
847		 * If source or destination address is a scoped address, and
848		 * the packet is going to be sent to a loopback interface,
849		 * we should keep the original interface.
850		 */
851
852		/*
853		 * XXX: this is a very experimental and temporary solution.
854		 * We eventually have sockaddr_in6 and use the sin6_scope_id
855		 * field of the structure here.
856		 * We rely on the consistency between two scope zone ids
857		 * of source and destination, which should already be assured.
858		 * Larger scopes than link will be supported in the future.
859		 */
860		origifp = NULL;
861		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
862			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
863		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
864			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
865		/*
866		 * XXX: origifp can be NULL even in those two cases above.
867		 * For example, if we remove the (only) link-local address
868		 * from the loopback interface, and try to send a link-local
869		 * address without link-id information.  Then the source
870		 * address is ::1, and the destination address is the
871		 * link-local address with its s6_addr16[1] being zero.
872		 * What is worse, if the packet goes to the loopback interface
873		 * by a default rejected route, the null pointer would be
874		 * passed to looutput, and the kernel would hang.
875		 * The following last resort would prevent such disaster.
876		 */
877		if (origifp == NULL)
878			origifp = ifp;
879	}
880	else
881		origifp = ifp;
882	/*
883	 * clear embedded scope identifiers if necessary.
884	 * in6_clearscope will touch the addresses only when necessary.
885	 */
886	in6_clearscope(&ip6->ip6_src);
887	in6_clearscope(&ip6->ip6_dst);
888
889	/*
890	 * Check with the firewall...
891	 */
892	if (ip6_fw_enable && ip6_fw_chk_ptr) {
893		u_short port = 0;
894		m->m_pkthdr.rcvif = NULL;	/* XXX */
895		/* If ipfw says divert, we have to just drop packet */
896		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
897			m_freem(m);
898			goto done;
899		}
900		if (!m) {
901			error = EACCES;
902			goto done;
903		}
904	}
905
906	/*
907	 * If the outgoing packet contains a hop-by-hop options header,
908	 * it must be examined and processed even by the source node.
909	 * (RFC 2460, section 4.)
910	 */
911	if (exthdrs.ip6e_hbh) {
912		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
913		u_int32_t dummy; /* XXX unused */
914		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
915
916#ifdef DIAGNOSTIC
917		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
918			panic("ip6e_hbh is not continuous");
919#endif
920		/*
921		 *  XXX: if we have to send an ICMPv6 error to the sender,
922		 *       we need the M_LOOP flag since icmp6_error() expects
923		 *       the IPv6 and the hop-by-hop options header are
924		 *       continuous unless the flag is set.
925		 */
926		m->m_flags |= M_LOOP;
927		m->m_pkthdr.rcvif = ifp;
928		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
929		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
930		    &dummy, &plen) < 0) {
931			/* m was already freed at this point */
932			error = EINVAL;/* better error? */
933			goto done;
934		}
935		m->m_flags &= ~M_LOOP; /* XXX */
936		m->m_pkthdr.rcvif = NULL;
937	}
938
939	/* Jump over all PFIL processing if hooks are not active. */
940	if (inet6_pfil_hook.ph_busy_count == -1)
941		goto passout;
942
943	odst = ip6->ip6_dst;
944	/* Run through list of hooks for output packets. */
945	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
946	if (error != 0 || m == NULL)
947		goto done;
948	ip6 = mtod(m, struct ip6_hdr *);
949
950	/* See if destination IP address was changed by packet filter. */
951	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
952		m->m_flags |= M_SKIP_FIREWALL;
953		/* If destination is now ourself drop to ip6_input(). */
954		if (in6_localaddr(&ip6->ip6_dst)) {
955			if (m->m_pkthdr.rcvif == NULL)
956				m->m_pkthdr.rcvif = loif;
957			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
958				m->m_pkthdr.csum_flags |=
959				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
960				m->m_pkthdr.csum_data = 0xffff;
961			}
962			m->m_pkthdr.csum_flags |=
963			    CSUM_IP_CHECKED | CSUM_IP_VALID;
964			error = netisr_queue(NETISR_IPV6, m);
965			goto done;
966		} else
967			goto again;	/* Redo the routing table lookup. */
968	}
969
970	/* XXX: IPFIREWALL_FORWARD */
971
972passout:
973	/*
974	 * Send the packet to the outgoing interface.
975	 * If necessary, do IPv6 fragmentation before sending.
976	 *
977	 * the logic here is rather complex:
978	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
979	 * 1-a:	send as is if tlen <= path mtu
980	 * 1-b:	fragment if tlen > path mtu
981	 *
982	 * 2: if user asks us not to fragment (dontfrag == 1)
983	 * 2-a:	send as is if tlen <= interface mtu
984	 * 2-b:	error if tlen > interface mtu
985	 *
986	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
987	 *	always fragment
988	 *
989	 * 4: if dontfrag == 1 && alwaysfrag == 1
990	 *	error, as we cannot handle this conflicting request
991	 */
992	tlen = m->m_pkthdr.len;
993
994	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
995		dontfrag = 1;
996	else
997		dontfrag = 0;
998	if (dontfrag && alwaysfrag) {	/* case 4 */
999		/* conflicting request - can't transmit */
1000		error = EMSGSIZE;
1001		goto bad;
1002	}
1003	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
1004		/*
1005		 * Even if the DONTFRAG option is specified, we cannot send the
1006		 * packet when the data length is larger than the MTU of the
1007		 * outgoing interface.
1008		 * Notify the error by sending IPV6_PATHMTU ancillary data as
1009		 * well as returning an error code (the latter is not described
1010		 * in the API spec.)
1011		 */
1012		u_int32_t mtu32;
1013		struct ip6ctlparam ip6cp;
1014
1015		mtu32 = (u_int32_t)mtu;
1016		bzero(&ip6cp, sizeof(ip6cp));
1017		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1018		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1019		    (void *)&ip6cp);
1020
1021		error = EMSGSIZE;
1022		goto bad;
1023	}
1024
1025	/*
1026	 * transmit packet without fragmentation
1027	 */
1028	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
1029		struct in6_ifaddr *ia6;
1030
1031		ip6 = mtod(m, struct ip6_hdr *);
1032		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1033		if (ia6) {
1034			/* Record statistics for this interface address. */
1035			ia6->ia_ifa.if_opackets++;
1036			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
1037		}
1038#ifdef IPSEC
1039		/* clean ipsec history once it goes out of the node */
1040		ipsec_delaux(m);
1041#endif
1042		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1043		goto done;
1044	}
1045
1046	/*
1047	 * try to fragment the packet.  case 1-b and 3
1048	 */
1049	if (mtu < IPV6_MMTU) {
1050		/* path MTU cannot be less than IPV6_MMTU */
1051		error = EMSGSIZE;
1052		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1053		goto bad;
1054	} else if (ip6->ip6_plen == 0) {
1055		/* jumbo payload cannot be fragmented */
1056		error = EMSGSIZE;
1057		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1058		goto bad;
1059	} else {
1060		struct mbuf **mnext, *m_frgpart;
1061		struct ip6_frag *ip6f;
1062		u_int32_t id = htonl(ip6_randomid());
1063		u_char nextproto;
1064#if 0
1065		struct ip6ctlparam ip6cp;
1066		u_int32_t mtu32;
1067#endif
1068		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
1069
1070		/*
1071		 * Too large for the destination or interface;
1072		 * fragment if possible.
1073		 * Must be able to put at least 8 bytes per fragment.
1074		 */
1075		hlen = unfragpartlen;
1076		if (mtu > IPV6_MAXPACKET)
1077			mtu = IPV6_MAXPACKET;
1078
1079#if 0
1080		/*
1081		 * It is believed this code is a leftover from the
1082		 * development of the IPV6_RECVPATHMTU sockopt and
1083		 * associated work to implement RFC3542.
1084		 * It's not entirely clear what the intent of the API
1085		 * is at this point, so disable this code for now.
1086		 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
1087		 * will send notifications if the application requests.
1088		 */
1089
1090		/* Notify a proper path MTU to applications. */
1091		mtu32 = (u_int32_t)mtu;
1092		bzero(&ip6cp, sizeof(ip6cp));
1093		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1094		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1095		    (void *)&ip6cp);
1096#endif
1097
1098		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1099		if (len < 8) {
1100			error = EMSGSIZE;
1101			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1102			goto bad;
1103		}
1104
1105		/*
1106		 * Verify that we have any chance at all of being able to queue
1107		 *      the packet or packet fragments
1108		 */
1109		if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
1110		    < tlen  /* - hlen */)) {
1111			error = ENOBUFS;
1112			ip6stat.ip6s_odropped++;
1113			goto bad;
1114		}
1115
1116		mnext = &m->m_nextpkt;
1117
1118		/*
1119		 * Change the next header field of the last header in the
1120		 * unfragmentable part.
1121		 */
1122		if (exthdrs.ip6e_rthdr) {
1123			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1124			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1125		} else if (exthdrs.ip6e_dest1) {
1126			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1127			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1128		} else if (exthdrs.ip6e_hbh) {
1129			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1130			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1131		} else {
1132			nextproto = ip6->ip6_nxt;
1133			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1134		}
1135
1136		/*
1137		 * Loop through length of segment after first fragment,
1138		 * make new header and copy data of each part and link onto
1139		 * chain.
1140		 */
1141		m0 = m;
1142		for (off = hlen; off < tlen; off += len) {
1143			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1144			if (!m) {
1145				error = ENOBUFS;
1146				ip6stat.ip6s_odropped++;
1147				goto sendorfree;
1148			}
1149			m->m_pkthdr.rcvif = NULL;
1150			m->m_flags = m0->m_flags & M_COPYFLAGS;
1151			*mnext = m;
1152			mnext = &m->m_nextpkt;
1153			m->m_data += max_linkhdr;
1154			mhip6 = mtod(m, struct ip6_hdr *);
1155			*mhip6 = *ip6;
1156			m->m_len = sizeof(*mhip6);
1157			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1158			if (error) {
1159				ip6stat.ip6s_odropped++;
1160				goto sendorfree;
1161			}
1162			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1163			if (off + len >= tlen)
1164				len = tlen - off;
1165			else
1166				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1167			mhip6->ip6_plen = htons((u_short)(len + hlen +
1168			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1169			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1170				error = ENOBUFS;
1171				ip6stat.ip6s_odropped++;
1172				goto sendorfree;
1173			}
1174			m_cat(m, m_frgpart);
1175			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1176			m->m_pkthdr.rcvif = (struct ifnet *)0;
1177			ip6f->ip6f_reserved = 0;
1178			ip6f->ip6f_ident = id;
1179			ip6f->ip6f_nxt = nextproto;
1180			ip6stat.ip6s_ofragments++;
1181			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1182		}
1183
1184		in6_ifstat_inc(ifp, ifs6_out_fragok);
1185	}
1186
1187	/*
1188	 * Remove leading garbages.
1189	 */
1190sendorfree:
1191	m = m0->m_nextpkt;
1192	m0->m_nextpkt = 0;
1193	m_freem(m0);
1194	for (m0 = m; m; m = m0) {
1195		m0 = m->m_nextpkt;
1196		m->m_nextpkt = 0;
1197		if (error == 0) {
1198 			/* Record statistics for this interface address. */
1199 			if (ia) {
1200 				ia->ia_ifa.if_opackets++;
1201 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1202 			}
1203#ifdef IPSEC
1204			/* clean ipsec history once it goes out of the node */
1205			ipsec_delaux(m);
1206#endif
1207			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1208		} else
1209			m_freem(m);
1210	}
1211
1212	if (error == 0)
1213		ip6stat.ip6s_fragmented++;
1214
1215done:
1216	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1217		RTFREE(ro->ro_rt);
1218	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1219		RTFREE(ro_pmtu->ro_rt);
1220	}
1221
1222#ifdef IPSEC
1223	if (sp != NULL)
1224		key_freesp(sp);
1225#endif /* IPSEC */
1226#ifdef FAST_IPSEC
1227	if (sp != NULL)
1228		KEY_FREESP(&sp);
1229#endif /* FAST_IPSEC */
1230
1231	return (error);
1232
1233freehdrs:
1234	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1235	m_freem(exthdrs.ip6e_dest1);
1236	m_freem(exthdrs.ip6e_rthdr);
1237	m_freem(exthdrs.ip6e_dest2);
1238	/* FALLTHROUGH */
1239bad:
1240	m_freem(m);
1241	goto done;
1242}
1243
1244static int
1245ip6_copyexthdr(mp, hdr, hlen)
1246	struct mbuf **mp;
1247	caddr_t hdr;
1248	int hlen;
1249{
1250	struct mbuf *m;
1251
1252	if (hlen > MCLBYTES)
1253		return (ENOBUFS); /* XXX */
1254
1255	MGET(m, M_DONTWAIT, MT_DATA);
1256	if (!m)
1257		return (ENOBUFS);
1258
1259	if (hlen > MLEN) {
1260		MCLGET(m, M_DONTWAIT);
1261		if ((m->m_flags & M_EXT) == 0) {
1262			m_free(m);
1263			return (ENOBUFS);
1264		}
1265	}
1266	m->m_len = hlen;
1267	if (hdr)
1268		bcopy(hdr, mtod(m, caddr_t), hlen);
1269
1270	*mp = m;
1271	return (0);
1272}
1273
1274/*
1275 * Insert jumbo payload option.
1276 */
1277static int
1278ip6_insert_jumboopt(exthdrs, plen)
1279	struct ip6_exthdrs *exthdrs;
1280	u_int32_t plen;
1281{
1282	struct mbuf *mopt;
1283	u_char *optbuf;
1284	u_int32_t v;
1285
1286#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1287
1288	/*
1289	 * If there is no hop-by-hop options header, allocate new one.
1290	 * If there is one but it doesn't have enough space to store the
1291	 * jumbo payload option, allocate a cluster to store the whole options.
1292	 * Otherwise, use it to store the options.
1293	 */
1294	if (exthdrs->ip6e_hbh == 0) {
1295		MGET(mopt, M_DONTWAIT, MT_DATA);
1296		if (mopt == 0)
1297			return (ENOBUFS);
1298		mopt->m_len = JUMBOOPTLEN;
1299		optbuf = mtod(mopt, u_char *);
1300		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1301		exthdrs->ip6e_hbh = mopt;
1302	} else {
1303		struct ip6_hbh *hbh;
1304
1305		mopt = exthdrs->ip6e_hbh;
1306		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1307			/*
1308			 * XXX assumption:
1309			 * - exthdrs->ip6e_hbh is not referenced from places
1310			 *   other than exthdrs.
1311			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1312			 */
1313			int oldoptlen = mopt->m_len;
1314			struct mbuf *n;
1315
1316			/*
1317			 * XXX: give up if the whole (new) hbh header does
1318			 * not fit even in an mbuf cluster.
1319			 */
1320			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1321				return (ENOBUFS);
1322
1323			/*
1324			 * As a consequence, we must always prepare a cluster
1325			 * at this point.
1326			 */
1327			MGET(n, M_DONTWAIT, MT_DATA);
1328			if (n) {
1329				MCLGET(n, M_DONTWAIT);
1330				if ((n->m_flags & M_EXT) == 0) {
1331					m_freem(n);
1332					n = NULL;
1333				}
1334			}
1335			if (!n)
1336				return (ENOBUFS);
1337			n->m_len = oldoptlen + JUMBOOPTLEN;
1338			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1339			    oldoptlen);
1340			optbuf = mtod(n, caddr_t) + oldoptlen;
1341			m_freem(mopt);
1342			mopt = exthdrs->ip6e_hbh = n;
1343		} else {
1344			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1345			mopt->m_len += JUMBOOPTLEN;
1346		}
1347		optbuf[0] = IP6OPT_PADN;
1348		optbuf[1] = 1;
1349
1350		/*
1351		 * Adjust the header length according to the pad and
1352		 * the jumbo payload option.
1353		 */
1354		hbh = mtod(mopt, struct ip6_hbh *);
1355		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1356	}
1357
1358	/* fill in the option. */
1359	optbuf[2] = IP6OPT_JUMBO;
1360	optbuf[3] = 4;
1361	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1362	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1363
1364	/* finally, adjust the packet header length */
1365	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1366
1367	return (0);
1368#undef JUMBOOPTLEN
1369}
1370
1371/*
1372 * Insert fragment header and copy unfragmentable header portions.
1373 */
1374static int
1375ip6_insertfraghdr(m0, m, hlen, frghdrp)
1376	struct mbuf *m0, *m;
1377	int hlen;
1378	struct ip6_frag **frghdrp;
1379{
1380	struct mbuf *n, *mlast;
1381
1382	if (hlen > sizeof(struct ip6_hdr)) {
1383		n = m_copym(m0, sizeof(struct ip6_hdr),
1384		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1385		if (n == 0)
1386			return (ENOBUFS);
1387		m->m_next = n;
1388	} else
1389		n = m;
1390
1391	/* Search for the last mbuf of unfragmentable part. */
1392	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1393		;
1394
1395	if ((mlast->m_flags & M_EXT) == 0 &&
1396	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1397		/* use the trailing space of the last mbuf for the fragment hdr */
1398		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1399		    mlast->m_len);
1400		mlast->m_len += sizeof(struct ip6_frag);
1401		m->m_pkthdr.len += sizeof(struct ip6_frag);
1402	} else {
1403		/* allocate a new mbuf for the fragment header */
1404		struct mbuf *mfrg;
1405
1406		MGET(mfrg, M_DONTWAIT, MT_DATA);
1407		if (mfrg == 0)
1408			return (ENOBUFS);
1409		mfrg->m_len = sizeof(struct ip6_frag);
1410		*frghdrp = mtod(mfrg, struct ip6_frag *);
1411		mlast->m_next = mfrg;
1412	}
1413
1414	return (0);
1415}
1416
1417static int
1418ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1419	struct route_in6 *ro_pmtu, *ro;
1420	struct ifnet *ifp;
1421	struct in6_addr *dst;
1422	u_long *mtup;
1423	int *alwaysfragp;
1424{
1425	u_int32_t mtu = 0;
1426	int alwaysfrag = 0;
1427	int error = 0;
1428
1429	if (ro_pmtu != ro) {
1430		/* The first hop and the final destination may differ. */
1431		struct sockaddr_in6 *sa6_dst =
1432		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1433		if (ro_pmtu->ro_rt &&
1434		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1435		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1436			RTFREE(ro_pmtu->ro_rt);
1437			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1438		}
1439		if (ro_pmtu->ro_rt == NULL) {
1440			bzero(sa6_dst, sizeof(*sa6_dst));
1441			sa6_dst->sin6_family = AF_INET6;
1442			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1443			sa6_dst->sin6_addr = *dst;
1444
1445			rtalloc((struct route *)ro_pmtu);
1446		}
1447	}
1448	if (ro_pmtu->ro_rt) {
1449		u_int32_t ifmtu;
1450		struct in_conninfo inc;
1451
1452		bzero(&inc, sizeof(inc));
1453		inc.inc_flags = 1; /* IPv6 */
1454		inc.inc6_faddr = *dst;
1455
1456		if (ifp == NULL)
1457			ifp = ro_pmtu->ro_rt->rt_ifp;
1458		ifmtu = IN6_LINKMTU(ifp);
1459		mtu = tcp_hc_getmtu(&inc);
1460		if (mtu)
1461			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1462		else
1463			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1464		if (mtu == 0)
1465			mtu = ifmtu;
1466		else if (mtu < IPV6_MMTU) {
1467			/*
1468			 * RFC2460 section 5, last paragraph:
1469			 * if we record ICMPv6 too big message with
1470			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1471			 * or smaller, with framgent header attached.
1472			 * (fragment header is needed regardless from the
1473			 * packet size, for translators to identify packets)
1474			 */
1475			alwaysfrag = 1;
1476			mtu = IPV6_MMTU;
1477		} else if (mtu > ifmtu) {
1478			/*
1479			 * The MTU on the route is larger than the MTU on
1480			 * the interface!  This shouldn't happen, unless the
1481			 * MTU of the interface has been changed after the
1482			 * interface was brought up.  Change the MTU in the
1483			 * route to match the interface MTU (as long as the
1484			 * field isn't locked).
1485			 */
1486			mtu = ifmtu;
1487			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1488		}
1489	} else if (ifp) {
1490		mtu = IN6_LINKMTU(ifp);
1491	} else
1492		error = EHOSTUNREACH; /* XXX */
1493
1494	*mtup = mtu;
1495	if (alwaysfragp)
1496		*alwaysfragp = alwaysfrag;
1497	return (error);
1498}
1499
1500/*
1501 * IP6 socket option processing.
1502 */
1503int
1504ip6_ctloutput(so, sopt)
1505	struct socket *so;
1506	struct sockopt *sopt;
1507{
1508	int privileged, optdatalen, uproto;
1509	void *optdata;
1510	struct inpcb *in6p = sotoinpcb(so);
1511	int error, optval;
1512	int level, op, optname;
1513	int optlen;
1514	struct thread *td;
1515
1516	if (sopt) {
1517		level = sopt->sopt_level;
1518		op = sopt->sopt_dir;
1519		optname = sopt->sopt_name;
1520		optlen = sopt->sopt_valsize;
1521		td = sopt->sopt_td;
1522	} else {
1523		panic("ip6_ctloutput: arg soopt is NULL");
1524	}
1525	error = optval = 0;
1526
1527	privileged = (td == 0 || suser(td)) ? 0 : 1;
1528	uproto = (int)so->so_proto->pr_protocol;
1529
1530	if (level == IPPROTO_IPV6) {
1531		switch (op) {
1532
1533		case SOPT_SET:
1534			switch (optname) {
1535			case IPV6_2292PKTOPTIONS:
1536#ifdef IPV6_PKTOPTIONS
1537			case IPV6_PKTOPTIONS:
1538#endif
1539			{
1540				struct mbuf *m;
1541
1542				error = soopt_getm(sopt, &m); /* XXX */
1543				if (error != 0)
1544					break;
1545				error = soopt_mcopyin(sopt, m); /* XXX */
1546				if (error != 0)
1547					break;
1548				error = ip6_pcbopts(&in6p->in6p_outputopts,
1549						    m, so, sopt);
1550				m_freem(m); /* XXX */
1551				break;
1552			}
1553
1554			/*
1555			 * Use of some Hop-by-Hop options or some
1556			 * Destination options, might require special
1557			 * privilege.  That is, normal applications
1558			 * (without special privilege) might be forbidden
1559			 * from setting certain options in outgoing packets,
1560			 * and might never see certain options in received
1561			 * packets. [RFC 2292 Section 6]
1562			 * KAME specific note:
1563			 *  KAME prevents non-privileged users from sending or
1564			 *  receiving ANY hbh/dst options in order to avoid
1565			 *  overhead of parsing options in the kernel.
1566			 */
1567			case IPV6_RECVHOPOPTS:
1568			case IPV6_RECVDSTOPTS:
1569			case IPV6_RECVRTHDRDSTOPTS:
1570				if (!privileged) {
1571					error = EPERM;
1572					break;
1573				}
1574				/* FALLTHROUGH */
1575			case IPV6_UNICAST_HOPS:
1576			case IPV6_HOPLIMIT:
1577			case IPV6_FAITH:
1578
1579			case IPV6_RECVPKTINFO:
1580			case IPV6_RECVHOPLIMIT:
1581			case IPV6_RECVRTHDR:
1582			case IPV6_RECVPATHMTU:
1583			case IPV6_RECVTCLASS:
1584			case IPV6_V6ONLY:
1585			case IPV6_AUTOFLOWLABEL:
1586				if (optlen != sizeof(int)) {
1587					error = EINVAL;
1588					break;
1589				}
1590				error = sooptcopyin(sopt, &optval,
1591					sizeof optval, sizeof optval);
1592				if (error)
1593					break;
1594				switch (optname) {
1595
1596				case IPV6_UNICAST_HOPS:
1597					if (optval < -1 || optval >= 256)
1598						error = EINVAL;
1599					else {
1600						/* -1 = kernel default */
1601						in6p->in6p_hops = optval;
1602						if ((in6p->in6p_vflag &
1603						     INP_IPV4) != 0)
1604							in6p->inp_ip_ttl = optval;
1605					}
1606					break;
1607#define OPTSET(bit) \
1608do { \
1609	if (optval) \
1610		in6p->in6p_flags |= (bit); \
1611	else \
1612		in6p->in6p_flags &= ~(bit); \
1613} while (/*CONSTCOND*/ 0)
1614#define OPTSET2292(bit) \
1615do { \
1616	in6p->in6p_flags |= IN6P_RFC2292; \
1617	if (optval) \
1618		in6p->in6p_flags |= (bit); \
1619	else \
1620		in6p->in6p_flags &= ~(bit); \
1621} while (/*CONSTCOND*/ 0)
1622#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1623
1624				case IPV6_RECVPKTINFO:
1625					/* cannot mix with RFC2292 */
1626					if (OPTBIT(IN6P_RFC2292)) {
1627						error = EINVAL;
1628						break;
1629					}
1630					OPTSET(IN6P_PKTINFO);
1631					break;
1632
1633				case IPV6_HOPLIMIT:
1634				{
1635					struct ip6_pktopts **optp;
1636
1637					/* cannot mix with RFC2292 */
1638					if (OPTBIT(IN6P_RFC2292)) {
1639						error = EINVAL;
1640						break;
1641					}
1642					optp = &in6p->in6p_outputopts;
1643					error = ip6_pcbopt(IPV6_HOPLIMIT,
1644							   (u_char *)&optval,
1645							   sizeof(optval),
1646							   optp,
1647							   privileged, uproto);
1648					break;
1649				}
1650
1651				case IPV6_RECVHOPLIMIT:
1652					/* cannot mix with RFC2292 */
1653					if (OPTBIT(IN6P_RFC2292)) {
1654						error = EINVAL;
1655						break;
1656					}
1657					OPTSET(IN6P_HOPLIMIT);
1658					break;
1659
1660				case IPV6_RECVHOPOPTS:
1661					/* cannot mix with RFC2292 */
1662					if (OPTBIT(IN6P_RFC2292)) {
1663						error = EINVAL;
1664						break;
1665					}
1666					OPTSET(IN6P_HOPOPTS);
1667					break;
1668
1669				case IPV6_RECVDSTOPTS:
1670					/* cannot mix with RFC2292 */
1671					if (OPTBIT(IN6P_RFC2292)) {
1672						error = EINVAL;
1673						break;
1674					}
1675					OPTSET(IN6P_DSTOPTS);
1676					break;
1677
1678				case IPV6_RECVRTHDRDSTOPTS:
1679					/* cannot mix with RFC2292 */
1680					if (OPTBIT(IN6P_RFC2292)) {
1681						error = EINVAL;
1682						break;
1683					}
1684					OPTSET(IN6P_RTHDRDSTOPTS);
1685					break;
1686
1687				case IPV6_RECVRTHDR:
1688					/* cannot mix with RFC2292 */
1689					if (OPTBIT(IN6P_RFC2292)) {
1690						error = EINVAL;
1691						break;
1692					}
1693					OPTSET(IN6P_RTHDR);
1694					break;
1695
1696				case IPV6_FAITH:
1697					OPTSET(IN6P_FAITH);
1698					break;
1699
1700				case IPV6_RECVPATHMTU:
1701					/*
1702					 * We ignore this option for TCP
1703					 * sockets.
1704					 * (rfc2292bis leaves this case
1705					 * unspecified.)
1706					 */
1707					if (uproto != IPPROTO_TCP)
1708						OPTSET(IN6P_MTU);
1709					break;
1710
1711				case IPV6_V6ONLY:
1712					/*
1713					 * make setsockopt(IPV6_V6ONLY)
1714					 * available only prior to bind(2).
1715					 * see ipng mailing list, Jun 22 2001.
1716					 */
1717					if (in6p->in6p_lport ||
1718					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1719						error = EINVAL;
1720						break;
1721					}
1722					OPTSET(IN6P_IPV6_V6ONLY);
1723					if (optval)
1724						in6p->in6p_vflag &= ~INP_IPV4;
1725					else
1726						in6p->in6p_vflag |= INP_IPV4;
1727					break;
1728				case IPV6_RECVTCLASS:
1729					/* cannot mix with RFC2292 XXX */
1730					if (OPTBIT(IN6P_RFC2292)) {
1731						error = EINVAL;
1732						break;
1733					}
1734					OPTSET(IN6P_TCLASS);
1735					break;
1736				case IPV6_AUTOFLOWLABEL:
1737					OPTSET(IN6P_AUTOFLOWLABEL);
1738					break;
1739
1740				}
1741				break;
1742
1743			case IPV6_TCLASS:
1744			case IPV6_DONTFRAG:
1745			case IPV6_USE_MIN_MTU:
1746			case IPV6_PREFER_TEMPADDR:
1747				if (optlen != sizeof(optval)) {
1748					error = EINVAL;
1749					break;
1750				}
1751				error = sooptcopyin(sopt, &optval,
1752					sizeof optval, sizeof optval);
1753				if (error)
1754					break;
1755				{
1756					struct ip6_pktopts **optp;
1757					optp = &in6p->in6p_outputopts;
1758					error = ip6_pcbopt(optname,
1759							   (u_char *)&optval,
1760							   sizeof(optval),
1761							   optp,
1762							   privileged, uproto);
1763					break;
1764				}
1765
1766			case IPV6_2292PKTINFO:
1767			case IPV6_2292HOPLIMIT:
1768			case IPV6_2292HOPOPTS:
1769			case IPV6_2292DSTOPTS:
1770			case IPV6_2292RTHDR:
1771				/* RFC 2292 */
1772				if (optlen != sizeof(int)) {
1773					error = EINVAL;
1774					break;
1775				}
1776				error = sooptcopyin(sopt, &optval,
1777					sizeof optval, sizeof optval);
1778				if (error)
1779					break;
1780				switch (optname) {
1781				case IPV6_2292PKTINFO:
1782					OPTSET2292(IN6P_PKTINFO);
1783					break;
1784				case IPV6_2292HOPLIMIT:
1785					OPTSET2292(IN6P_HOPLIMIT);
1786					break;
1787				case IPV6_2292HOPOPTS:
1788					/*
1789					 * Check super-user privilege.
1790					 * See comments for IPV6_RECVHOPOPTS.
1791					 */
1792					if (!privileged)
1793						return (EPERM);
1794					OPTSET2292(IN6P_HOPOPTS);
1795					break;
1796				case IPV6_2292DSTOPTS:
1797					if (!privileged)
1798						return (EPERM);
1799					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1800					break;
1801				case IPV6_2292RTHDR:
1802					OPTSET2292(IN6P_RTHDR);
1803					break;
1804				}
1805				break;
1806			case IPV6_PKTINFO:
1807			case IPV6_HOPOPTS:
1808			case IPV6_RTHDR:
1809			case IPV6_DSTOPTS:
1810			case IPV6_RTHDRDSTOPTS:
1811			case IPV6_NEXTHOP:
1812			{
1813				/* new advanced API (2292bis) */
1814				u_char *optbuf;
1815				int optlen;
1816				struct ip6_pktopts **optp;
1817
1818				/* cannot mix with RFC2292 */
1819				if (OPTBIT(IN6P_RFC2292)) {
1820					error = EINVAL;
1821					break;
1822				}
1823
1824				switch (optname) {
1825				case IPV6_HOPOPTS:
1826				case IPV6_DSTOPTS:
1827				case IPV6_RTHDRDSTOPTS:
1828				case IPV6_NEXTHOP:
1829					if (!privileged)
1830						error = EPERM;
1831					break;
1832				}
1833				if (error)
1834					break;
1835
1836				switch (optname) {
1837				case IPV6_PKTINFO:
1838					optlen = sizeof(struct in6_pktinfo);
1839					break;
1840				case IPV6_NEXTHOP:
1841					optlen = SOCK_MAXADDRLEN;
1842					break;
1843				default:
1844					optlen = IPV6_MAXOPTHDR;
1845					break;
1846				}
1847				if (sopt->sopt_valsize > optlen) {
1848					error = EINVAL;
1849					break;
1850				}
1851
1852				optlen = sopt->sopt_valsize;
1853				optbuf = malloc(optlen, M_TEMP, M_WAITOK);
1854				error = sooptcopyin(sopt, optbuf, optlen,
1855				    optlen);
1856				if (error) {
1857					free(optbuf, M_TEMP);
1858					break;
1859				}
1860
1861				optp = &in6p->in6p_outputopts;
1862				error = ip6_pcbopt(optname,
1863						   optbuf, optlen,
1864						   optp, privileged, uproto);
1865				free(optbuf, M_TEMP);
1866				break;
1867			}
1868#undef OPTSET
1869
1870			case IPV6_MULTICAST_IF:
1871			case IPV6_MULTICAST_HOPS:
1872			case IPV6_MULTICAST_LOOP:
1873			case IPV6_JOIN_GROUP:
1874			case IPV6_LEAVE_GROUP:
1875			    {
1876				if (sopt->sopt_valsize > MLEN) {
1877					error = EMSGSIZE;
1878					break;
1879				}
1880				/* XXX */
1881			    }
1882			    /* FALLTHROUGH */
1883			    {
1884				struct mbuf *m;
1885
1886				if (sopt->sopt_valsize > MCLBYTES) {
1887					error = EMSGSIZE;
1888					break;
1889				}
1890				/* XXX */
1891				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_HEADER);
1892				if (m == 0) {
1893					error = ENOBUFS;
1894					break;
1895				}
1896				if (sopt->sopt_valsize > MLEN) {
1897					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1898					if ((m->m_flags & M_EXT) == 0) {
1899						m_free(m);
1900						error = ENOBUFS;
1901						break;
1902					}
1903				}
1904				m->m_len = sopt->sopt_valsize;
1905				error = sooptcopyin(sopt, mtod(m, char *),
1906						    m->m_len, m->m_len);
1907				if (error) {
1908					(void)m_free(m);
1909					break;
1910				}
1911				error =	ip6_setmoptions(sopt->sopt_name,
1912							&in6p->in6p_moptions,
1913							m);
1914				(void)m_free(m);
1915			    }
1916				break;
1917
1918			case IPV6_PORTRANGE:
1919				error = sooptcopyin(sopt, &optval,
1920				    sizeof optval, sizeof optval);
1921				if (error)
1922					break;
1923
1924				switch (optval) {
1925				case IPV6_PORTRANGE_DEFAULT:
1926					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1927					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1928					break;
1929
1930				case IPV6_PORTRANGE_HIGH:
1931					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1932					in6p->in6p_flags |= IN6P_HIGHPORT;
1933					break;
1934
1935				case IPV6_PORTRANGE_LOW:
1936					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1937					in6p->in6p_flags |= IN6P_LOWPORT;
1938					break;
1939
1940				default:
1941					error = EINVAL;
1942					break;
1943				}
1944				break;
1945
1946#if defined(IPSEC) || defined(FAST_IPSEC)
1947			case IPV6_IPSEC_POLICY:
1948			    {
1949				caddr_t req = NULL;
1950				size_t len = 0;
1951				struct mbuf *m;
1952
1953				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1954					break;
1955				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1956					break;
1957				if (m) {
1958					req = mtod(m, caddr_t);
1959					len = m->m_len;
1960				}
1961				error = ipsec6_set_policy(in6p, optname, req,
1962							  len, privileged);
1963				m_freem(m);
1964			    }
1965				break;
1966#endif /* KAME IPSEC */
1967
1968			case IPV6_FW_ADD:
1969			case IPV6_FW_DEL:
1970			case IPV6_FW_FLUSH:
1971			case IPV6_FW_ZERO:
1972			    {
1973				struct mbuf *m;
1974				struct mbuf **mp = &m;
1975
1976				if (ip6_fw_ctl_ptr == NULL)
1977					return EINVAL;
1978				/* XXX */
1979				if ((error = soopt_getm(sopt, &m)) != 0)
1980					break;
1981				/* XXX */
1982				if ((error = soopt_mcopyin(sopt, m)) != 0)
1983					break;
1984				error = (*ip6_fw_ctl_ptr)(optname, mp);
1985				m = *mp;
1986			    }
1987				break;
1988
1989			default:
1990				error = ENOPROTOOPT;
1991				break;
1992			}
1993			break;
1994
1995		case SOPT_GET:
1996			switch (optname) {
1997
1998			case IPV6_2292PKTOPTIONS:
1999#ifdef IPV6_PKTOPTIONS
2000			case IPV6_PKTOPTIONS:
2001#endif
2002				/*
2003				 * RFC3542 (effectively) deprecated the
2004				 * semantics of the 2292-style pktoptions.
2005				 * Since it was not reliable in nature (i.e.,
2006				 * applications had to expect the lack of some
2007				 * information after all), it would make sense
2008				 * to simplify this part by always returning
2009				 * empty data.
2010				 */
2011				sopt->sopt_valsize = 0;
2012				break;
2013
2014			case IPV6_RECVHOPOPTS:
2015			case IPV6_RECVDSTOPTS:
2016			case IPV6_RECVRTHDRDSTOPTS:
2017			case IPV6_UNICAST_HOPS:
2018			case IPV6_RECVPKTINFO:
2019			case IPV6_RECVHOPLIMIT:
2020			case IPV6_RECVRTHDR:
2021			case IPV6_RECVPATHMTU:
2022
2023			case IPV6_FAITH:
2024			case IPV6_V6ONLY:
2025			case IPV6_PORTRANGE:
2026			case IPV6_RECVTCLASS:
2027			case IPV6_AUTOFLOWLABEL:
2028				switch (optname) {
2029
2030				case IPV6_RECVHOPOPTS:
2031					optval = OPTBIT(IN6P_HOPOPTS);
2032					break;
2033
2034				case IPV6_RECVDSTOPTS:
2035					optval = OPTBIT(IN6P_DSTOPTS);
2036					break;
2037
2038				case IPV6_RECVRTHDRDSTOPTS:
2039					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2040					break;
2041
2042				case IPV6_UNICAST_HOPS:
2043					optval = in6p->in6p_hops;
2044					break;
2045
2046				case IPV6_RECVPKTINFO:
2047					optval = OPTBIT(IN6P_PKTINFO);
2048					break;
2049
2050				case IPV6_RECVHOPLIMIT:
2051					optval = OPTBIT(IN6P_HOPLIMIT);
2052					break;
2053
2054				case IPV6_RECVRTHDR:
2055					optval = OPTBIT(IN6P_RTHDR);
2056					break;
2057
2058				case IPV6_RECVPATHMTU:
2059					optval = OPTBIT(IN6P_MTU);
2060					break;
2061
2062				case IPV6_FAITH:
2063					optval = OPTBIT(IN6P_FAITH);
2064					break;
2065
2066				case IPV6_V6ONLY:
2067					optval = OPTBIT(IN6P_IPV6_V6ONLY);
2068					break;
2069
2070				case IPV6_PORTRANGE:
2071				    {
2072					int flags;
2073					flags = in6p->in6p_flags;
2074					if (flags & IN6P_HIGHPORT)
2075						optval = IPV6_PORTRANGE_HIGH;
2076					else if (flags & IN6P_LOWPORT)
2077						optval = IPV6_PORTRANGE_LOW;
2078					else
2079						optval = 0;
2080					break;
2081				    }
2082				case IPV6_RECVTCLASS:
2083					optval = OPTBIT(IN6P_TCLASS);
2084					break;
2085
2086				case IPV6_AUTOFLOWLABEL:
2087					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2088					break;
2089				}
2090				if (error)
2091					break;
2092				error = sooptcopyout(sopt, &optval,
2093					sizeof optval);
2094				break;
2095
2096			case IPV6_PATHMTU:
2097			{
2098				u_long pmtu = 0;
2099				struct ip6_mtuinfo mtuinfo;
2100				struct route_in6 sro;
2101
2102				bzero(&sro, sizeof(sro));
2103
2104				if (!(so->so_state & SS_ISCONNECTED))
2105					return (ENOTCONN);
2106				/*
2107				 * XXX: we dot not consider the case of source
2108				 * routing, or optional information to specify
2109				 * the outgoing interface.
2110				 */
2111				error = ip6_getpmtu(&sro, NULL, NULL,
2112				    &in6p->in6p_faddr, &pmtu, NULL);
2113				if (sro.ro_rt)
2114					RTFREE(sro.ro_rt);
2115				if (error)
2116					break;
2117				if (pmtu > IPV6_MAXPACKET)
2118					pmtu = IPV6_MAXPACKET;
2119
2120				bzero(&mtuinfo, sizeof(mtuinfo));
2121				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2122				optdata = (void *)&mtuinfo;
2123				optdatalen = sizeof(mtuinfo);
2124				error = sooptcopyout(sopt, optdata,
2125				    optdatalen);
2126				break;
2127			}
2128
2129			case IPV6_2292PKTINFO:
2130			case IPV6_2292HOPLIMIT:
2131			case IPV6_2292HOPOPTS:
2132			case IPV6_2292RTHDR:
2133			case IPV6_2292DSTOPTS:
2134				switch (optname) {
2135				case IPV6_2292PKTINFO:
2136					optval = OPTBIT(IN6P_PKTINFO);
2137					break;
2138				case IPV6_2292HOPLIMIT:
2139					optval = OPTBIT(IN6P_HOPLIMIT);
2140					break;
2141				case IPV6_2292HOPOPTS:
2142					optval = OPTBIT(IN6P_HOPOPTS);
2143					break;
2144				case IPV6_2292RTHDR:
2145					optval = OPTBIT(IN6P_RTHDR);
2146					break;
2147				case IPV6_2292DSTOPTS:
2148					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2149					break;
2150				}
2151				error = sooptcopyout(sopt, &optval,
2152				    sizeof optval);
2153				break;
2154			case IPV6_PKTINFO:
2155			case IPV6_HOPOPTS:
2156			case IPV6_RTHDR:
2157			case IPV6_DSTOPTS:
2158			case IPV6_RTHDRDSTOPTS:
2159			case IPV6_NEXTHOP:
2160			case IPV6_TCLASS:
2161			case IPV6_DONTFRAG:
2162			case IPV6_USE_MIN_MTU:
2163			case IPV6_PREFER_TEMPADDR:
2164				error = ip6_getpcbopt(in6p->in6p_outputopts,
2165				    optname, sopt);
2166				break;
2167
2168			case IPV6_MULTICAST_IF:
2169			case IPV6_MULTICAST_HOPS:
2170			case IPV6_MULTICAST_LOOP:
2171			case IPV6_JOIN_GROUP:
2172			case IPV6_LEAVE_GROUP:
2173			    {
2174				struct mbuf *m;
2175				error = ip6_getmoptions(sopt->sopt_name,
2176				    in6p->in6p_moptions, &m);
2177				if (error == 0)
2178					error = sooptcopyout(sopt,
2179					    mtod(m, char *), m->m_len);
2180				m_freem(m);
2181			    }
2182				break;
2183
2184#if defined(IPSEC) || defined(FAST_IPSEC)
2185			case IPV6_IPSEC_POLICY:
2186			  {
2187				caddr_t req = NULL;
2188				size_t len = 0;
2189				struct mbuf *m = NULL;
2190				struct mbuf **mp = &m;
2191				size_t ovalsize = sopt->sopt_valsize;
2192				caddr_t oval = (caddr_t)sopt->sopt_val;
2193
2194				error = soopt_getm(sopt, &m); /* XXX */
2195				if (error != 0)
2196					break;
2197				error = soopt_mcopyin(sopt, m); /* XXX */
2198				if (error != 0)
2199					break;
2200				sopt->sopt_valsize = ovalsize;
2201				sopt->sopt_val = oval;
2202				if (m) {
2203					req = mtod(m, caddr_t);
2204					len = m->m_len;
2205				}
2206				error = ipsec6_get_policy(in6p, req, len, mp);
2207				if (error == 0)
2208					error = soopt_mcopyout(sopt, m); /* XXX */
2209				if (error == 0 && m)
2210					m_freem(m);
2211				break;
2212			  }
2213#endif /* KAME IPSEC */
2214
2215			case IPV6_FW_GET:
2216			  {
2217				struct mbuf *m;
2218				struct mbuf **mp = &m;
2219
2220				if (ip6_fw_ctl_ptr == NULL)
2221			        {
2222					return EINVAL;
2223				}
2224				error = (*ip6_fw_ctl_ptr)(optname, mp);
2225				if (error == 0)
2226					error = soopt_mcopyout(sopt, m); /* XXX */
2227				if (error == 0 && m)
2228					m_freem(m);
2229			  }
2230				break;
2231
2232			default:
2233				error = ENOPROTOOPT;
2234				break;
2235			}
2236			break;
2237		}
2238	} else {		/* level != IPPROTO_IPV6 */
2239		error = EINVAL;
2240	}
2241	return (error);
2242}
2243
2244int
2245ip6_raw_ctloutput(so, sopt)
2246	struct socket *so;
2247	struct sockopt *sopt;
2248{
2249	int error = 0, optval, optlen;
2250	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2251	struct in6pcb *in6p = sotoin6pcb(so);
2252	int level, op, optname;
2253
2254	if (sopt) {
2255		level = sopt->sopt_level;
2256		op = sopt->sopt_dir;
2257		optname = sopt->sopt_name;
2258		optlen = sopt->sopt_valsize;
2259	} else
2260		panic("ip6_raw_ctloutput: arg soopt is NULL");
2261
2262	if (level != IPPROTO_IPV6) {
2263		return (EINVAL);
2264	}
2265
2266	switch (optname) {
2267	case IPV6_CHECKSUM:
2268		/*
2269		 * For ICMPv6 sockets, no modification allowed for checksum
2270		 * offset, permit "no change" values to help existing apps.
2271		 *
2272		 * XXX 2292bis says: "An attempt to set IPV6_CHECKSUM
2273		 * for an ICMPv6 socket will fail."
2274		 * The current behavior does not meet 2292bis.
2275		 */
2276		switch (op) {
2277		case SOPT_SET:
2278			if (optlen != sizeof(int)) {
2279				error = EINVAL;
2280				break;
2281			}
2282			error = sooptcopyin(sopt, &optval, sizeof(optval),
2283					    sizeof(optval));
2284			if (error)
2285				break;
2286			if ((optval % 2) != 0) {
2287				/* the API assumes even offset values */
2288				error = EINVAL;
2289			} else if (so->so_proto->pr_protocol ==
2290			    IPPROTO_ICMPV6) {
2291				if (optval != icmp6off)
2292					error = EINVAL;
2293			} else
2294				in6p->in6p_cksum = optval;
2295			break;
2296
2297		case SOPT_GET:
2298			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2299				optval = icmp6off;
2300			else
2301				optval = in6p->in6p_cksum;
2302
2303			error = sooptcopyout(sopt, &optval, sizeof(optval));
2304			break;
2305
2306		default:
2307			error = EINVAL;
2308			break;
2309		}
2310		break;
2311
2312	default:
2313		error = ENOPROTOOPT;
2314		break;
2315	}
2316
2317	return (error);
2318}
2319
2320/*
2321 * Set up IP6 options in pcb for insertion in output packets or
2322 * specifying behavior of outgoing packets.
2323 */
2324static int
2325ip6_pcbopts(pktopt, m, so, sopt)
2326	struct ip6_pktopts **pktopt;
2327	struct mbuf *m;
2328	struct socket *so;
2329	struct sockopt *sopt;
2330{
2331	struct ip6_pktopts *opt = *pktopt;
2332	int error = 0;
2333	struct thread *td = sopt->sopt_td;
2334	int priv = 0;
2335
2336	/* turn off any old options. */
2337	if (opt) {
2338#ifdef DIAGNOSTIC
2339		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2340		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2341		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2342			printf("ip6_pcbopts: all specified options are cleared.\n");
2343#endif
2344		ip6_clearpktopts(opt, -1);
2345	} else
2346		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2347	*pktopt = NULL;
2348
2349	if (!m || m->m_len == 0) {
2350		/*
2351		 * Only turning off any previous options, regardless of
2352		 * whether the opt is just created or given.
2353		 */
2354		free(opt, M_IP6OPT);
2355		return (0);
2356	}
2357
2358	/*  set options specified by user. */
2359	if (td && !suser(td))
2360		priv = 1;
2361	if ((error = ip6_setpktoptions(m, opt, NULL, priv, 1,
2362	    so->so_proto->pr_protocol)) != 0) {
2363		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2364		free(opt, M_IP6OPT);
2365		return (error);
2366	}
2367	*pktopt = opt;
2368	return (0);
2369}
2370
2371/*
2372 * initialize ip6_pktopts.  beware that there are non-zero default values in
2373 * the struct.
2374 */
2375void
2376init_ip6pktopts(opt)
2377	struct ip6_pktopts *opt;
2378{
2379
2380	bzero(opt, sizeof(*opt));
2381	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2382	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2383	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2384	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2385}
2386
2387static int
2388ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2389	int optname, len, priv;
2390	u_char *buf;
2391	struct ip6_pktopts **pktopt;
2392	int uproto;
2393{
2394	struct ip6_pktopts *opt;
2395
2396	if (*pktopt == NULL) {
2397		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2398		    M_WAITOK);
2399		init_ip6pktopts(*pktopt);
2400		(*pktopt)->needfree = 1;
2401	}
2402	opt = *pktopt;
2403
2404	return (ip6_setpktoption(optname, buf, len, opt, priv, 1, 0, uproto));
2405}
2406
2407static int
2408ip6_getpcbopt(pktopt, optname, sopt)
2409	struct ip6_pktopts *pktopt;
2410	struct sockopt *sopt;
2411	int optname;
2412{
2413	void *optdata = NULL;
2414	int optdatalen = 0;
2415	struct ip6_ext *ip6e;
2416	int error = 0;
2417	struct in6_pktinfo null_pktinfo;
2418	int deftclass = 0, on;
2419	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2420	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2421
2422	switch (optname) {
2423	case IPV6_PKTINFO:
2424		if (pktopt && pktopt->ip6po_pktinfo)
2425			optdata = (void *)pktopt->ip6po_pktinfo;
2426		else {
2427			/* XXX: we don't have to do this every time... */
2428			bzero(&null_pktinfo, sizeof(null_pktinfo));
2429			optdata = (void *)&null_pktinfo;
2430		}
2431		optdatalen = sizeof(struct in6_pktinfo);
2432		break;
2433	case IPV6_TCLASS:
2434		if (pktopt && pktopt->ip6po_tclass >= 0)
2435			optdata = (void *)&pktopt->ip6po_tclass;
2436		else
2437			optdata = (void *)&deftclass;
2438		optdatalen = sizeof(int);
2439		break;
2440	case IPV6_HOPOPTS:
2441		if (pktopt && pktopt->ip6po_hbh) {
2442			optdata = (void *)pktopt->ip6po_hbh;
2443			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2444			optdatalen = (ip6e->ip6e_len + 1) << 3;
2445		}
2446		break;
2447	case IPV6_RTHDR:
2448		if (pktopt && pktopt->ip6po_rthdr) {
2449			optdata = (void *)pktopt->ip6po_rthdr;
2450			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2451			optdatalen = (ip6e->ip6e_len + 1) << 3;
2452		}
2453		break;
2454	case IPV6_RTHDRDSTOPTS:
2455		if (pktopt && pktopt->ip6po_dest1) {
2456			optdata = (void *)pktopt->ip6po_dest1;
2457			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2458			optdatalen = (ip6e->ip6e_len + 1) << 3;
2459		}
2460		break;
2461	case IPV6_DSTOPTS:
2462		if (pktopt && pktopt->ip6po_dest2) {
2463			optdata = (void *)pktopt->ip6po_dest2;
2464			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2465			optdatalen = (ip6e->ip6e_len + 1) << 3;
2466		}
2467		break;
2468	case IPV6_NEXTHOP:
2469		if (pktopt && pktopt->ip6po_nexthop) {
2470			optdata = (void *)pktopt->ip6po_nexthop;
2471			optdatalen = pktopt->ip6po_nexthop->sa_len;
2472		}
2473		break;
2474	case IPV6_USE_MIN_MTU:
2475		if (pktopt)
2476			optdata = (void *)&pktopt->ip6po_minmtu;
2477		else
2478			optdata = (void *)&defminmtu;
2479		optdatalen = sizeof(int);
2480		break;
2481	case IPV6_DONTFRAG:
2482		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2483			on = 1;
2484		else
2485			on = 0;
2486		optdata = (void *)&on;
2487		optdatalen = sizeof(on);
2488		break;
2489	case IPV6_PREFER_TEMPADDR:
2490		if (pktopt)
2491			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2492		else
2493			optdata = (void *)&defpreftemp;
2494		optdatalen = sizeof(int);
2495		break;
2496	default:		/* should not happen */
2497#ifdef DIAGNOSTIC
2498		panic("ip6_getpcbopt: unexpected option\n");
2499#endif
2500		return (ENOPROTOOPT);
2501	}
2502
2503	error = sooptcopyout(sopt, optdata, optdatalen);
2504
2505	return (error);
2506}
2507
2508void
2509ip6_clearpktopts(pktopt, optname)
2510	struct ip6_pktopts *pktopt;
2511	int optname;
2512{
2513	int needfree;
2514
2515	if (pktopt == NULL)
2516		return;
2517
2518	needfree = pktopt->needfree;
2519
2520	if (optname == -1 || optname == IPV6_PKTINFO) {
2521		if (needfree && pktopt->ip6po_pktinfo)
2522			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2523		pktopt->ip6po_pktinfo = NULL;
2524	}
2525	if (optname == -1 || optname == IPV6_HOPLIMIT)
2526		pktopt->ip6po_hlim = -1;
2527	if (optname == -1 || optname == IPV6_TCLASS)
2528		pktopt->ip6po_tclass = -1;
2529	if (optname == -1 || optname == IPV6_NEXTHOP) {
2530		if (pktopt->ip6po_nextroute.ro_rt) {
2531			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2532			pktopt->ip6po_nextroute.ro_rt = NULL;
2533		}
2534		if (needfree && pktopt->ip6po_nexthop)
2535			free(pktopt->ip6po_nexthop, M_IP6OPT);
2536		pktopt->ip6po_nexthop = NULL;
2537	}
2538	if (optname == -1 || optname == IPV6_HOPOPTS) {
2539		if (needfree && pktopt->ip6po_hbh)
2540			free(pktopt->ip6po_hbh, M_IP6OPT);
2541		pktopt->ip6po_hbh = NULL;
2542	}
2543	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2544		if (needfree && pktopt->ip6po_dest1)
2545			free(pktopt->ip6po_dest1, M_IP6OPT);
2546		pktopt->ip6po_dest1 = NULL;
2547	}
2548	if (optname == -1 || optname == IPV6_RTHDR) {
2549		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2550			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2551		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2552		if (pktopt->ip6po_route.ro_rt) {
2553			RTFREE(pktopt->ip6po_route.ro_rt);
2554			pktopt->ip6po_route.ro_rt = NULL;
2555		}
2556	}
2557	if (optname == -1 || optname == IPV6_DSTOPTS) {
2558		if (needfree && pktopt->ip6po_dest2)
2559			free(pktopt->ip6po_dest2, M_IP6OPT);
2560		pktopt->ip6po_dest2 = NULL;
2561	}
2562}
2563
2564#define PKTOPT_EXTHDRCPY(type) \
2565do {\
2566	if (src->type) {\
2567		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2568		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2569		if (dst->type == NULL && canwait == M_NOWAIT)\
2570			goto bad;\
2571		bcopy(src->type, dst->type, hlen);\
2572	}\
2573} while (/*CONSTCOND*/ 0)
2574
2575struct ip6_pktopts *
2576ip6_copypktopts(src, canwait)
2577	struct ip6_pktopts *src;
2578	int canwait;
2579{
2580	struct ip6_pktopts *dst;
2581
2582	if (src == NULL) {
2583		printf("ip6_clearpktopts: invalid argument\n");
2584		return (NULL);
2585	}
2586
2587	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2588	if (dst == NULL && canwait == M_NOWAIT)
2589		return (NULL);
2590	bzero(dst, sizeof(*dst));
2591	dst->needfree = 1;
2592
2593	dst->ip6po_hlim = src->ip6po_hlim;
2594	dst->ip6po_tclass = src->ip6po_tclass;
2595	dst->ip6po_flags = src->ip6po_flags;
2596	if (src->ip6po_pktinfo) {
2597		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2598		    M_IP6OPT, canwait);
2599		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2600			goto bad;
2601		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2602	}
2603	if (src->ip6po_nexthop) {
2604		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2605		    M_IP6OPT, canwait);
2606		if (dst->ip6po_nexthop == NULL)
2607			goto bad;
2608		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2609		    src->ip6po_nexthop->sa_len);
2610	}
2611	PKTOPT_EXTHDRCPY(ip6po_hbh);
2612	PKTOPT_EXTHDRCPY(ip6po_dest1);
2613	PKTOPT_EXTHDRCPY(ip6po_dest2);
2614	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2615	return (dst);
2616
2617  bad:
2618	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2619	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2620	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2621	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2622	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2623	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2624	free(dst, M_IP6OPT);
2625	return (NULL);
2626}
2627#undef PKTOPT_EXTHDRCPY
2628
2629void
2630ip6_freepcbopts(pktopt)
2631	struct ip6_pktopts *pktopt;
2632{
2633	if (pktopt == NULL)
2634		return;
2635
2636	ip6_clearpktopts(pktopt, -1);
2637
2638	free(pktopt, M_IP6OPT);
2639}
2640
2641/*
2642 * Set the IP6 multicast options in response to user setsockopt().
2643 */
2644static int
2645ip6_setmoptions(optname, im6op, m)
2646	int optname;
2647	struct ip6_moptions **im6op;
2648	struct mbuf *m;
2649{
2650	int error = 0;
2651	u_int loop, ifindex;
2652	struct ipv6_mreq *mreq;
2653	struct ifnet *ifp;
2654	struct ip6_moptions *im6o = *im6op;
2655	struct route_in6 ro;
2656	struct sockaddr_in6 *dst;
2657	struct in6_multi_mship *imm;
2658	struct thread *td = curthread;
2659
2660	if (im6o == NULL) {
2661		/*
2662		 * No multicast option buffer attached to the pcb;
2663		 * allocate one and initialize to default values.
2664		 */
2665		im6o = (struct ip6_moptions *)
2666			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2667
2668		if (im6o == NULL)
2669			return (ENOBUFS);
2670		*im6op = im6o;
2671		im6o->im6o_multicast_ifp = NULL;
2672		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2673		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2674		LIST_INIT(&im6o->im6o_memberships);
2675	}
2676
2677	switch (optname) {
2678
2679	case IPV6_MULTICAST_IF:
2680		/*
2681		 * Select the interface for outgoing multicast packets.
2682		 */
2683		if (m == NULL || m->m_len != sizeof(u_int)) {
2684			error = EINVAL;
2685			break;
2686		}
2687		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2688		if (ifindex < 0 || if_index < ifindex) {
2689			error = ENXIO;	/* XXX EINVAL? */
2690			break;
2691		}
2692		ifp = ifnet_byindex(ifindex);
2693		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2694			error = EADDRNOTAVAIL;
2695			break;
2696		}
2697		im6o->im6o_multicast_ifp = ifp;
2698		break;
2699
2700	case IPV6_MULTICAST_HOPS:
2701	    {
2702		/*
2703		 * Set the IP6 hoplimit for outgoing multicast packets.
2704		 */
2705		int optval;
2706		if (m == NULL || m->m_len != sizeof(int)) {
2707			error = EINVAL;
2708			break;
2709		}
2710		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2711		if (optval < -1 || optval >= 256)
2712			error = EINVAL;
2713		else if (optval == -1)
2714			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2715		else
2716			im6o->im6o_multicast_hlim = optval;
2717		break;
2718	    }
2719
2720	case IPV6_MULTICAST_LOOP:
2721		/*
2722		 * Set the loopback flag for outgoing multicast packets.
2723		 * Must be zero or one.
2724		 */
2725		if (m == NULL || m->m_len != sizeof(u_int)) {
2726			error = EINVAL;
2727			break;
2728		}
2729		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2730		if (loop > 1) {
2731			error = EINVAL;
2732			break;
2733		}
2734		im6o->im6o_multicast_loop = loop;
2735		break;
2736
2737	case IPV6_JOIN_GROUP:
2738		/*
2739		 * Add a multicast group membership.
2740		 * Group must be a valid IP6 multicast address.
2741		 */
2742		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2743			error = EINVAL;
2744			break;
2745		}
2746		mreq = mtod(m, struct ipv6_mreq *);
2747		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2748			/*
2749			 * We use the unspecified address to specify to accept
2750			 * all multicast addresses. Only super user is allowed
2751			 * to do this.
2752			 */
2753			if (suser(td)) {
2754				error = EACCES;
2755				break;
2756			}
2757		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2758			error = EINVAL;
2759			break;
2760		}
2761
2762		/*
2763		 * If the interface is specified, validate it.
2764		 */
2765		if (mreq->ipv6mr_interface < 0 ||
2766		    if_index < mreq->ipv6mr_interface) {
2767			error = ENXIO;	/* XXX EINVAL? */
2768			break;
2769		}
2770		/*
2771		 * If no interface was explicitly specified, choose an
2772		 * appropriate one according to the given multicast address.
2773		 */
2774		if (mreq->ipv6mr_interface == 0) {
2775			/*
2776			 * If the multicast address is in node-local scope,
2777			 * the interface should be a loopback interface.
2778			 * Otherwise, look up the routing table for the
2779			 * address, and choose the outgoing interface.
2780			 *   XXX: is it a good approach?
2781			 */
2782			if (IN6_IS_ADDR_MC_INTFACELOCAL(&mreq->ipv6mr_multiaddr)) {
2783				ifp = &loif[0];
2784			} else {
2785				ro.ro_rt = NULL;
2786				dst = (struct sockaddr_in6 *)&ro.ro_dst;
2787				bzero(dst, sizeof(*dst));
2788				dst->sin6_len = sizeof(struct sockaddr_in6);
2789				dst->sin6_family = AF_INET6;
2790				dst->sin6_addr = mreq->ipv6mr_multiaddr;
2791				rtalloc((struct route *)&ro);
2792				if (ro.ro_rt == NULL) {
2793					error = EADDRNOTAVAIL;
2794					break;
2795				}
2796				ifp = ro.ro_rt->rt_ifp;
2797				RTFREE(ro.ro_rt);
2798			}
2799		} else
2800			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2801
2802		/*
2803		 * See if we found an interface, and confirm that it
2804		 * supports multicast
2805		 */
2806		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2807			error = EADDRNOTAVAIL;
2808			break;
2809		}
2810		/*
2811		 * Put interface index into the multicast address,
2812		 * if the address has link-local scope.
2813		 */
2814		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2815			mreq->ipv6mr_multiaddr.s6_addr16[1] =
2816			    htons(ifp->if_index);
2817		}
2818		/*
2819		 * See if the membership already exists.
2820		 */
2821		for (imm = im6o->im6o_memberships.lh_first;
2822		     imm != NULL; imm = imm->i6mm_chain.le_next)
2823			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2824			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2825					       &mreq->ipv6mr_multiaddr))
2826				break;
2827		if (imm != NULL) {
2828			error = EADDRINUSE;
2829			break;
2830		}
2831		/*
2832		 * Everything looks good; add a new record to the multicast
2833		 * address list for the given interface.
2834		 */
2835		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2836		if (imm == NULL) {
2837			error = ENOBUFS;
2838			break;
2839		}
2840		if ((imm->i6mm_maddr =
2841		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2842			free(imm, M_IPMADDR);
2843			break;
2844		}
2845		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2846		break;
2847
2848	case IPV6_LEAVE_GROUP:
2849		/*
2850		 * Drop a multicast group membership.
2851		 * Group must be a valid IP6 multicast address.
2852		 */
2853		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2854			error = EINVAL;
2855			break;
2856		}
2857		mreq = mtod(m, struct ipv6_mreq *);
2858		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2859			if (suser(td)) {
2860				error = EACCES;
2861				break;
2862			}
2863		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2864			error = EINVAL;
2865			break;
2866		}
2867		/*
2868		 * If an interface address was specified, get a pointer
2869		 * to its ifnet structure.
2870		 */
2871		if (mreq->ipv6mr_interface < 0
2872		 || if_index < mreq->ipv6mr_interface) {
2873			error = ENXIO;	/* XXX EINVAL? */
2874			break;
2875		}
2876		ifp = ifnet_byindex(mreq->ipv6mr_interface);
2877		/*
2878		 * Put interface index into the multicast address,
2879		 * if the address has link-local scope.
2880		 */
2881		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2882			mreq->ipv6mr_multiaddr.s6_addr16[1]
2883				= htons(mreq->ipv6mr_interface);
2884		}
2885
2886		/*
2887		 * Find the membership in the membership list.
2888		 */
2889		for (imm = im6o->im6o_memberships.lh_first;
2890		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2891			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2892			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2893			    &mreq->ipv6mr_multiaddr))
2894				break;
2895		}
2896		if (imm == NULL) {
2897			/* Unable to resolve interface */
2898			error = EADDRNOTAVAIL;
2899			break;
2900		}
2901		/*
2902		 * Give up the multicast address record to which the
2903		 * membership points.
2904		 */
2905		LIST_REMOVE(imm, i6mm_chain);
2906		in6_delmulti(imm->i6mm_maddr);
2907		free(imm, M_IPMADDR);
2908		break;
2909
2910	default:
2911		error = EOPNOTSUPP;
2912		break;
2913	}
2914
2915	/*
2916	 * If all options have default values, no need to keep the mbuf.
2917	 */
2918	if (im6o->im6o_multicast_ifp == NULL &&
2919	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2920	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2921	    im6o->im6o_memberships.lh_first == NULL) {
2922		free(*im6op, M_IPMOPTS);
2923		*im6op = NULL;
2924	}
2925
2926	return (error);
2927}
2928
2929/*
2930 * Return the IP6 multicast options in response to user getsockopt().
2931 */
2932static int
2933ip6_getmoptions(optname, im6o, mp)
2934	int optname;
2935	struct ip6_moptions *im6o;
2936	struct mbuf **mp;
2937{
2938	u_int *hlim, *loop, *ifindex;
2939
2940	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2941
2942	switch (optname) {
2943
2944	case IPV6_MULTICAST_IF:
2945		ifindex = mtod(*mp, u_int *);
2946		(*mp)->m_len = sizeof(u_int);
2947		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2948			*ifindex = 0;
2949		else
2950			*ifindex = im6o->im6o_multicast_ifp->if_index;
2951		return (0);
2952
2953	case IPV6_MULTICAST_HOPS:
2954		hlim = mtod(*mp, u_int *);
2955		(*mp)->m_len = sizeof(u_int);
2956		if (im6o == NULL)
2957			*hlim = ip6_defmcasthlim;
2958		else
2959			*hlim = im6o->im6o_multicast_hlim;
2960		return (0);
2961
2962	case IPV6_MULTICAST_LOOP:
2963		loop = mtod(*mp, u_int *);
2964		(*mp)->m_len = sizeof(u_int);
2965		if (im6o == NULL)
2966			*loop = ip6_defmcasthlim;
2967		else
2968			*loop = im6o->im6o_multicast_loop;
2969		return (0);
2970
2971	default:
2972		return (EOPNOTSUPP);
2973	}
2974}
2975
2976/*
2977 * Discard the IP6 multicast options.
2978 */
2979void
2980ip6_freemoptions(im6o)
2981	struct ip6_moptions *im6o;
2982{
2983	struct in6_multi_mship *imm;
2984
2985	if (im6o == NULL)
2986		return;
2987
2988	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2989		LIST_REMOVE(imm, i6mm_chain);
2990		if (imm->i6mm_maddr)
2991			in6_delmulti(imm->i6mm_maddr);
2992		free(imm, M_IPMADDR);
2993	}
2994	free(im6o, M_IPMOPTS);
2995}
2996
2997/*
2998 * Set IPv6 outgoing packet options based on advanced API.
2999 */
3000int
3001ip6_setpktoptions(control, opt, stickyopt, priv, needcopy, uproto)
3002	struct mbuf *control;
3003	struct ip6_pktopts *opt, *stickyopt;
3004	int priv, needcopy, uproto;
3005{
3006	struct cmsghdr *cm = 0;
3007
3008	if (control == 0 || opt == 0)
3009		return (EINVAL);
3010
3011	if (stickyopt) {
3012		/*
3013		 * If stickyopt is provided, make a local copy of the options
3014		 * for this particular packet, then override them by ancillary
3015		 * objects.
3016		 * XXX: need to gain a reference for the cached route of the
3017		 * next hop in case of the overriding.
3018		 */
3019		*opt = *stickyopt;
3020		if (opt->ip6po_nextroute.ro_rt) {
3021			RT_LOCK(opt->ip6po_nextroute.ro_rt);
3022			RT_ADDREF(opt->ip6po_nextroute.ro_rt);
3023			RT_UNLOCK(opt->ip6po_nextroute.ro_rt);
3024		}
3025	} else
3026		init_ip6pktopts(opt);
3027	opt->needfree = needcopy;
3028
3029	/*
3030	 * XXX: Currently, we assume all the optional information is stored
3031	 * in a single mbuf.
3032	 */
3033	if (control->m_next)
3034		return (EINVAL);
3035
3036	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
3037	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
3038		int error;
3039
3040		if (control->m_len < CMSG_LEN(0))
3041			return (EINVAL);
3042
3043		cm = mtod(control, struct cmsghdr *);
3044		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
3045			return (EINVAL);
3046		if (cm->cmsg_level != IPPROTO_IPV6)
3047			continue;
3048
3049		error = ip6_setpktoption(cm->cmsg_type, CMSG_DATA(cm),
3050		    cm->cmsg_len - CMSG_LEN(0), opt, priv, needcopy, 1, uproto);
3051		if (error)
3052			return (error);
3053	}
3054
3055	return (0);
3056}
3057
3058/*
3059 * Set a particular packet option, as a sticky option or an ancillary data
3060 * item.  "len" can be 0 only when it's a sticky option.
3061 * We have 4 cases of combination of "sticky" and "cmsg":
3062 * "sticky=0, cmsg=0": impossible
3063 * "sticky=0, cmsg=1": RFC2292 or rfc2292bis ancillary data
3064 * "sticky=1, cmsg=0": rfc2292bis socket option
3065 * "sticky=1, cmsg=1": RFC2292 socket option
3066 */
3067static int
3068ip6_setpktoption(optname, buf, len, opt, priv, sticky, cmsg, uproto)
3069	int optname, len, priv, sticky, cmsg, uproto;
3070	u_char *buf;
3071	struct ip6_pktopts *opt;
3072{
3073	int minmtupolicy, preftemp;
3074
3075	if (!sticky && !cmsg) {
3076#ifdef DIAGNOSTIC
3077		printf("ip6_setpktoption: impossible case\n");
3078#endif
3079		return (EINVAL);
3080	}
3081
3082	/*
3083	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3084	 * not be specified in the context of rfc2292bis.  Conversely,
3085	 * rfc2292bis types should not be specified in the context of RFC2292.
3086	 */
3087	if (!cmsg) {
3088		switch (optname) {
3089		case IPV6_2292PKTINFO:
3090		case IPV6_2292HOPLIMIT:
3091		case IPV6_2292NEXTHOP:
3092		case IPV6_2292HOPOPTS:
3093		case IPV6_2292DSTOPTS:
3094		case IPV6_2292RTHDR:
3095		case IPV6_2292PKTOPTIONS:
3096			return (ENOPROTOOPT);
3097		}
3098	}
3099	if (sticky && cmsg) {
3100		switch (optname) {
3101		case IPV6_PKTINFO:
3102		case IPV6_HOPLIMIT:
3103		case IPV6_NEXTHOP:
3104		case IPV6_HOPOPTS:
3105		case IPV6_DSTOPTS:
3106		case IPV6_RTHDRDSTOPTS:
3107		case IPV6_RTHDR:
3108		case IPV6_USE_MIN_MTU:
3109		case IPV6_DONTFRAG:
3110		case IPV6_TCLASS:
3111		case IPV6_PREFER_TEMPADDR: /* XXX: not an rfc2292bis option */
3112			return (ENOPROTOOPT);
3113		}
3114	}
3115
3116	switch (optname) {
3117	case IPV6_2292PKTINFO:
3118	case IPV6_PKTINFO:
3119	{
3120		struct ifnet *ifp = NULL;
3121		struct in6_pktinfo *pktinfo;
3122
3123		if (len != sizeof(struct in6_pktinfo))
3124			return (EINVAL);
3125
3126		pktinfo = (struct in6_pktinfo *)buf;
3127
3128		/*
3129		 * An application can clear any sticky IPV6_PKTINFO option by
3130		 * doing a "regular" setsockopt with ipi6_addr being
3131		 * in6addr_any and ipi6_ifindex being zero.
3132		 * [RFC 3542, Section 6]
3133		 */
3134		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3135		    pktinfo->ipi6_ifindex == 0 &&
3136		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3137			ip6_clearpktopts(opt, optname);
3138			break;
3139		}
3140
3141		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3142		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3143			return (EINVAL);
3144		}
3145
3146		/* validate the interface index if specified. */
3147		if (pktinfo->ipi6_ifindex > if_index ||
3148		    pktinfo->ipi6_ifindex < 0) {
3149			 return (ENXIO);
3150		}
3151		if (pktinfo->ipi6_ifindex) {
3152			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
3153			if (ifp == NULL)
3154				return (ENXIO);
3155		}
3156
3157		/*
3158		 * We store the address anyway, and let in6_selectsrc()
3159		 * validate the specified address.  This is because ipi6_addr
3160		 * may not have enough information about its scope zone, and
3161		 * we may need additional information (such as outgoing
3162		 * interface or the scope zone of a destination address) to
3163		 * disambiguate the scope.
3164		 * XXX: the delay of the validation may confuse the
3165		 * application when it is used as a sticky option.
3166		 */
3167		if (sticky) {
3168			if (opt->ip6po_pktinfo == NULL) {
3169				opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3170				    M_IP6OPT, M_WAITOK);
3171			}
3172			bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3173		} else
3174			opt->ip6po_pktinfo = pktinfo;
3175		break;
3176	}
3177
3178	case IPV6_2292HOPLIMIT:
3179	case IPV6_HOPLIMIT:
3180	{
3181		int *hlimp;
3182
3183		/*
3184		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3185		 * to simplify the ordering among hoplimit options.
3186		 */
3187		if (optname == IPV6_HOPLIMIT && sticky)
3188			return (ENOPROTOOPT);
3189
3190		if (len != sizeof(int))
3191			return (EINVAL);
3192		hlimp = (int *)buf;
3193		if (*hlimp < -1 || *hlimp > 255)
3194			return (EINVAL);
3195
3196		opt->ip6po_hlim = *hlimp;
3197		break;
3198	}
3199
3200	case IPV6_TCLASS:
3201	{
3202		int tclass;
3203
3204		if (len != sizeof(int))
3205			return (EINVAL);
3206		tclass = *(int *)buf;
3207		if (tclass < -1 || tclass > 255)
3208			return (EINVAL);
3209
3210		opt->ip6po_tclass = tclass;
3211		break;
3212	}
3213
3214	case IPV6_2292NEXTHOP:
3215	case IPV6_NEXTHOP:
3216		if (!priv)
3217			return (EPERM);
3218
3219		if (len == 0) {	/* just remove the option */
3220			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3221			break;
3222		}
3223
3224		/* check if cmsg_len is large enough for sa_len */
3225		if (len < sizeof(struct sockaddr) || len < *buf)
3226			return (EINVAL);
3227
3228		switch (((struct sockaddr *)buf)->sa_family) {
3229		case AF_INET6:
3230		{
3231			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3232#if 0
3233			int error;
3234#endif
3235
3236			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3237				return (EINVAL);
3238
3239			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3240			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3241				return (EINVAL);
3242			}
3243#if 0
3244			if ((error = scope6_check_id(sa6, ip6_use_defzone))
3245			    != 0) {
3246				return (error);
3247			}
3248#endif
3249			sa6->sin6_scope_id = 0; /* XXX */
3250			break;
3251		}
3252		case AF_LINK:	/* should eventually be supported */
3253		default:
3254			return (EAFNOSUPPORT);
3255		}
3256
3257		/* turn off the previous option, then set the new option. */
3258		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3259		if (sticky) {
3260			opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_WAITOK);
3261			bcopy(buf, opt->ip6po_nexthop, *buf);
3262		} else
3263			opt->ip6po_nexthop = (struct sockaddr *)buf;
3264		break;
3265
3266	case IPV6_2292HOPOPTS:
3267	case IPV6_HOPOPTS:
3268	{
3269		struct ip6_hbh *hbh;
3270		int hbhlen;
3271
3272		/*
3273		 * XXX: We don't allow a non-privileged user to set ANY HbH
3274		 * options, since per-option restriction has too much
3275		 * overhead.
3276		 */
3277		if (!priv)
3278			return (EPERM);
3279
3280		if (len == 0) {
3281			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3282			break;	/* just remove the option */
3283		}
3284
3285		/* message length validation */
3286		if (len < sizeof(struct ip6_hbh))
3287			return (EINVAL);
3288		hbh = (struct ip6_hbh *)buf;
3289		hbhlen = (hbh->ip6h_len + 1) << 3;
3290		if (len != hbhlen)
3291			return (EINVAL);
3292
3293		/* turn off the previous option, then set the new option. */
3294		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3295		if (sticky) {
3296			opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK);
3297			bcopy(hbh, opt->ip6po_hbh, hbhlen);
3298		} else
3299			opt->ip6po_hbh = hbh;
3300
3301		break;
3302	}
3303
3304	case IPV6_2292DSTOPTS:
3305	case IPV6_DSTOPTS:
3306	case IPV6_RTHDRDSTOPTS:
3307	{
3308		struct ip6_dest *dest, **newdest = NULL;
3309		int destlen;
3310
3311		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3312			return (EPERM);
3313
3314		if (len == 0) {
3315			ip6_clearpktopts(opt, optname);
3316			break;	/* just remove the option */
3317		}
3318
3319		/* message length validation */
3320		if (len < sizeof(struct ip6_dest))
3321			return (EINVAL);
3322		dest = (struct ip6_dest *)buf;
3323		destlen = (dest->ip6d_len + 1) << 3;
3324		if (len != destlen)
3325			return (EINVAL);
3326
3327		/*
3328		 * Determine the position that the destination options header
3329		 * should be inserted; before or after the routing header.
3330		 */
3331		switch (optname) {
3332		case IPV6_2292DSTOPTS:
3333			/*
3334			 * The old advacned API is ambiguous on this point.
3335			 * Our approach is to determine the position based
3336			 * according to the existence of a routing header.
3337			 * Note, however, that this depends on the order of the
3338			 * extension headers in the ancillary data; the 1st
3339			 * part of the destination options header must appear
3340			 * before the routing header in the ancillary data,
3341			 * too.
3342			 * RFC2292bis solved the ambiguity by introducing
3343			 * separate ancillary data or option types.
3344			 */
3345			if (opt->ip6po_rthdr == NULL)
3346				newdest = &opt->ip6po_dest1;
3347			else
3348				newdest = &opt->ip6po_dest2;
3349			break;
3350		case IPV6_RTHDRDSTOPTS:
3351			newdest = &opt->ip6po_dest1;
3352			break;
3353		case IPV6_DSTOPTS:
3354			newdest = &opt->ip6po_dest2;
3355			break;
3356		}
3357
3358		/* turn off the previous option, then set the new option. */
3359		ip6_clearpktopts(opt, optname);
3360		if (sticky) {
3361			*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
3362			bcopy(dest, *newdest, destlen);
3363		} else
3364			*newdest = dest;
3365
3366		break;
3367	}
3368
3369	case IPV6_2292RTHDR:
3370	case IPV6_RTHDR:
3371	{
3372		struct ip6_rthdr *rth;
3373		int rthlen;
3374
3375		if (len == 0) {
3376			ip6_clearpktopts(opt, IPV6_RTHDR);
3377			break;	/* just remove the option */
3378		}
3379
3380		/* message length validation */
3381		if (len < sizeof(struct ip6_rthdr))
3382			return (EINVAL);
3383		rth = (struct ip6_rthdr *)buf;
3384		rthlen = (rth->ip6r_len + 1) << 3;
3385		if (len != rthlen)
3386			return (EINVAL);
3387
3388		switch (rth->ip6r_type) {
3389		case IPV6_RTHDR_TYPE_0:
3390			if (rth->ip6r_len == 0)	/* must contain one addr */
3391				return (EINVAL);
3392			if (rth->ip6r_len % 2) /* length must be even */
3393				return (EINVAL);
3394			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3395				return (EINVAL);
3396			break;
3397		default:
3398			return (EINVAL);	/* not supported */
3399		}
3400
3401		/* turn off the previous option */
3402		ip6_clearpktopts(opt, IPV6_RTHDR);
3403		if (sticky) {
3404			opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK);
3405			bcopy(rth, opt->ip6po_rthdr, rthlen);
3406		} else
3407			opt->ip6po_rthdr = rth;
3408
3409		break;
3410	}
3411
3412	case IPV6_USE_MIN_MTU:
3413		if (len != sizeof(int))
3414			return (EINVAL);
3415		minmtupolicy = *(int *)buf;
3416		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3417		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3418		    minmtupolicy != IP6PO_MINMTU_ALL) {
3419			return (EINVAL);
3420		}
3421		opt->ip6po_minmtu = minmtupolicy;
3422		break;
3423
3424	case IPV6_DONTFRAG:
3425		if (len != sizeof(int))
3426			return (EINVAL);
3427
3428		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3429			/*
3430			 * we ignore this option for TCP sockets.
3431			 * (rfc2292bis leaves this case unspecified.)
3432			 */
3433			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3434		} else
3435			opt->ip6po_flags |= IP6PO_DONTFRAG;
3436		break;
3437
3438	case IPV6_PREFER_TEMPADDR:
3439		if (len != sizeof(int))
3440			return (EINVAL);
3441		preftemp = *(int *)buf;
3442		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3443		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3444		    preftemp != IP6PO_TEMPADDR_PREFER) {
3445			return (EINVAL);
3446		}
3447		opt->ip6po_prefer_tempaddr = preftemp;
3448		break;
3449
3450	default:
3451		return (ENOPROTOOPT);
3452	} /* end of switch */
3453
3454	return (0);
3455}
3456
3457/*
3458 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3459 * packet to the input queue of a specified interface.  Note that this
3460 * calls the output routine of the loopback "driver", but with an interface
3461 * pointer that might NOT be &loif -- easier than replicating that code here.
3462 */
3463void
3464ip6_mloopback(ifp, m, dst)
3465	struct ifnet *ifp;
3466	struct mbuf *m;
3467	struct sockaddr_in6 *dst;
3468{
3469	struct mbuf *copym;
3470	struct ip6_hdr *ip6;
3471
3472	copym = m_copy(m, 0, M_COPYALL);
3473	if (copym == NULL)
3474		return;
3475
3476	/*
3477	 * Make sure to deep-copy IPv6 header portion in case the data
3478	 * is in an mbuf cluster, so that we can safely override the IPv6
3479	 * header portion later.
3480	 */
3481	if ((copym->m_flags & M_EXT) != 0 ||
3482	    copym->m_len < sizeof(struct ip6_hdr)) {
3483		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3484		if (copym == NULL)
3485			return;
3486	}
3487
3488#ifdef DIAGNOSTIC
3489	if (copym->m_len < sizeof(*ip6)) {
3490		m_freem(copym);
3491		return;
3492	}
3493#endif
3494
3495	ip6 = mtod(copym, struct ip6_hdr *);
3496	/*
3497	 * clear embedded scope identifiers if necessary.
3498	 * in6_clearscope will touch the addresses only when necessary.
3499	 */
3500	in6_clearscope(&ip6->ip6_src);
3501	in6_clearscope(&ip6->ip6_dst);
3502
3503	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3504}
3505
3506/*
3507 * Chop IPv6 header off from the payload.
3508 */
3509static int
3510ip6_splithdr(m, exthdrs)
3511	struct mbuf *m;
3512	struct ip6_exthdrs *exthdrs;
3513{
3514	struct mbuf *mh;
3515	struct ip6_hdr *ip6;
3516
3517	ip6 = mtod(m, struct ip6_hdr *);
3518	if (m->m_len > sizeof(*ip6)) {
3519		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3520		if (mh == 0) {
3521			m_freem(m);
3522			return ENOBUFS;
3523		}
3524		M_MOVE_PKTHDR(mh, m);
3525		MH_ALIGN(mh, sizeof(*ip6));
3526		m->m_len -= sizeof(*ip6);
3527		m->m_data += sizeof(*ip6);
3528		mh->m_next = m;
3529		m = mh;
3530		m->m_len = sizeof(*ip6);
3531		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3532	}
3533	exthdrs->ip6e_ip6 = m;
3534	return 0;
3535}
3536
3537/*
3538 * Compute IPv6 extension header length.
3539 */
3540int
3541ip6_optlen(in6p)
3542	struct in6pcb *in6p;
3543{
3544	int len;
3545
3546	if (!in6p->in6p_outputopts)
3547		return 0;
3548
3549	len = 0;
3550#define elen(x) \
3551    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3552
3553	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3554	if (in6p->in6p_outputopts->ip6po_rthdr)
3555		/* dest1 is valid with rthdr only */
3556		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3557	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3558	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3559	return len;
3560#undef elen
3561}
3562