ip6_output.c revision 122334
1/*	$FreeBSD: head/sys/netinet6/ip6_output.c 122334 2003-11-08 23:36:32Z sam $	*/
2/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
66 */
67
68#include "opt_ip6fw.h"
69#include "opt_inet.h"
70#include "opt_inet6.h"
71#include "opt_ipsec.h"
72#include "opt_pfil_hooks.h"
73#include "opt_random_ip_id.h"
74
75#include <sys/param.h>
76#include <sys/malloc.h>
77#include <sys/mbuf.h>
78#include <sys/proc.h>
79#include <sys/errno.h>
80#include <sys/protosw.h>
81#include <sys/socket.h>
82#include <sys/socketvar.h>
83#include <sys/systm.h>
84#include <sys/kernel.h>
85
86#include <net/if.h>
87#include <net/route.h>
88#ifdef PFIL_HOOKS
89#include <net/pfil.h>
90#endif
91
92#include <netinet/in.h>
93#include <netinet/in_var.h>
94#include <netinet6/in6_var.h>
95#include <netinet/ip6.h>
96#include <netinet/icmp6.h>
97#include <netinet6/ip6_var.h>
98#include <netinet/in_pcb.h>
99#include <netinet6/nd6.h>
100
101#ifdef IPSEC
102#include <netinet6/ipsec.h>
103#ifdef INET6
104#include <netinet6/ipsec6.h>
105#endif
106#include <netkey/key.h>
107#endif /* IPSEC */
108
109#ifdef FAST_IPSEC
110#include <netipsec/ipsec.h>
111#include <netipsec/ipsec6.h>
112#include <netipsec/key.h>
113#endif /* FAST_IPSEC */
114
115#include <netinet6/ip6_fw.h>
116
117#include <net/net_osdep.h>
118
119#include <netinet6/ip6protosw.h>
120
121static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
122
123struct ip6_exthdrs {
124	struct mbuf *ip6e_ip6;
125	struct mbuf *ip6e_hbh;
126	struct mbuf *ip6e_dest1;
127	struct mbuf *ip6e_rthdr;
128	struct mbuf *ip6e_dest2;
129};
130
131static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
132			   int, int));
133static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
134	struct socket *, struct sockopt *));
135static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
136static int ip6_setpktoption __P((int, u_char *, int, struct ip6_pktopts *, int,
137	int, int, int));
138
139static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
140static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
141static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
142static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
143	struct ip6_frag **));
144static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
145static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
146static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
147	struct ifnet *, struct in6_addr *, u_long *, int *));
148
149
150/*
151 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
152 * header (with pri, len, nxt, hlim, src, dst).
153 * This function may modify ver and hlim only.
154 * The mbuf chain containing the packet will be freed.
155 * The mbuf opt, if present, will not be freed.
156 *
157 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
158 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
159 * which is rt_rmx.rmx_mtu.
160 */
161int
162ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
163	struct mbuf *m0;
164	struct ip6_pktopts *opt;
165	struct route_in6 *ro;
166	int flags;
167	struct ip6_moptions *im6o;
168	struct ifnet **ifpp;		/* XXX: just for statistics */
169	struct inpcb *inp;
170{
171	struct ip6_hdr *ip6, *mhip6;
172	struct ifnet *ifp, *origifp;
173	struct mbuf *m = m0;
174	int hlen, tlen, len, off;
175	struct route_in6 ip6route;
176	struct sockaddr_in6 *dst;
177	int error = 0;
178	struct in6_ifaddr *ia = NULL;
179	u_long mtu;
180	int alwaysfrag, dontfrag;
181	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
182	struct ip6_exthdrs exthdrs;
183	struct in6_addr finaldst;
184	struct route_in6 *ro_pmtu = NULL;
185	int hdrsplit = 0;
186	int needipsec = 0;
187#ifdef FAST_IPSEC
188	int needipsectun = 0;
189	struct secpolicy *sp = NULL;
190#endif /* FAST_IPSEC */
191#ifdef IPSEC
192	int needipsectun = 0;
193	struct socket *so;
194	struct secpolicy *sp = NULL;
195
196	/* for AH processing. stupid to have "socket" variable in IP layer... */
197	so = ipsec_getsocket(m);
198	(void)ipsec_setsocket(m, NULL);
199#endif /* IPSEC */
200
201	ip6 = mtod(m, struct ip6_hdr *);
202
203#define MAKE_EXTHDR(hp, mp)						\
204    do {								\
205	if (hp) {							\
206		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
207		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
208		    ((eh)->ip6e_len + 1) << 3);				\
209		if (error)						\
210			goto freehdrs;					\
211	}								\
212    } while (/*CONSTCOND*/ 0)
213
214	bzero(&exthdrs, sizeof(exthdrs));
215
216	if (opt) {
217		/* Hop-by-Hop options header */
218		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
219		/* Destination options header(1st part) */
220		if (opt->ip6po_rthdr) {
221			/*
222			 * Destination options header(1st part)
223			 * This only makes sence with a routing header.
224			 * See Section 9.2 of RFC 3542.
225			 * Disabling this part just for MIP6 convenience is
226			 * a bad idea.  We need to think carefully about a
227			 * way to make the advanced API coexist with MIP6
228			 * options, which might automatically be inserted in
229			 * the kernel.
230			 */
231			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
232		}
233		/* Routing header */
234		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
235		/* Destination options header(2nd part) */
236		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
237	}
238
239#ifdef IPSEC
240	/* get a security policy for this packet */
241	if (so == NULL)
242		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
243	else
244		sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
245
246	if (sp == NULL) {
247		ipsec6stat.out_inval++;
248		goto freehdrs;
249	}
250
251	error = 0;
252
253	/* check policy */
254	switch (sp->policy) {
255	case IPSEC_POLICY_DISCARD:
256		/*
257		 * This packet is just discarded.
258		 */
259		ipsec6stat.out_polvio++;
260		goto freehdrs;
261
262	case IPSEC_POLICY_BYPASS:
263	case IPSEC_POLICY_NONE:
264		/* no need to do IPsec. */
265		needipsec = 0;
266		break;
267
268	case IPSEC_POLICY_IPSEC:
269		if (sp->req == NULL) {
270			/* acquire a policy */
271			error = key_spdacquire(sp);
272			goto freehdrs;
273		}
274		needipsec = 1;
275		break;
276
277	case IPSEC_POLICY_ENTRUST:
278	default:
279		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
280	}
281#endif /* IPSEC */
282#ifdef FAST_IPSEC
283	/* get a security policy for this packet */
284	if (inp == NULL)
285		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
286	else
287		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
288
289	if (sp == NULL) {
290		newipsecstat.ips_out_inval++;
291		goto freehdrs;
292	}
293
294	error = 0;
295
296	/* check policy */
297	switch (sp->policy) {
298	case IPSEC_POLICY_DISCARD:
299		/*
300		 * This packet is just discarded.
301		 */
302		newipsecstat.ips_out_polvio++;
303		goto freehdrs;
304
305	case IPSEC_POLICY_BYPASS:
306	case IPSEC_POLICY_NONE:
307		/* no need to do IPsec. */
308		needipsec = 0;
309		break;
310
311	case IPSEC_POLICY_IPSEC:
312		if (sp->req == NULL) {
313			/* acquire a policy */
314			error = key_spdacquire(sp);
315			goto freehdrs;
316		}
317		needipsec = 1;
318		break;
319
320	case IPSEC_POLICY_ENTRUST:
321	default:
322		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
323	}
324#endif /* FAST_IPSEC */
325
326	/*
327	 * Calculate the total length of the extension header chain.
328	 * Keep the length of the unfragmentable part for fragmentation.
329	 */
330	optlen = 0;
331	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
332	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
333	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
334	unfragpartlen = optlen + sizeof(struct ip6_hdr);
335	/* NOTE: we don't add AH/ESP length here. do that later. */
336	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
337
338	/*
339	 * If we need IPsec, or there is at least one extension header,
340	 * separate IP6 header from the payload.
341	 */
342	if ((needipsec || optlen) && !hdrsplit) {
343		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
344			m = NULL;
345			goto freehdrs;
346		}
347		m = exthdrs.ip6e_ip6;
348		hdrsplit++;
349	}
350
351	/* adjust pointer */
352	ip6 = mtod(m, struct ip6_hdr *);
353
354	/* adjust mbuf packet header length */
355	m->m_pkthdr.len += optlen;
356	plen = m->m_pkthdr.len - sizeof(*ip6);
357
358	/* If this is a jumbo payload, insert a jumbo payload option. */
359	if (plen > IPV6_MAXPACKET) {
360		if (!hdrsplit) {
361			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
362				m = NULL;
363				goto freehdrs;
364			}
365			m = exthdrs.ip6e_ip6;
366			hdrsplit++;
367		}
368		/* adjust pointer */
369		ip6 = mtod(m, struct ip6_hdr *);
370		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
371			goto freehdrs;
372		ip6->ip6_plen = 0;
373	} else
374		ip6->ip6_plen = htons(plen);
375
376	/*
377	 * Concatenate headers and fill in next header fields.
378	 * Here we have, on "m"
379	 *	IPv6 payload
380	 * and we insert headers accordingly.  Finally, we should be getting:
381	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
382	 *
383	 * during the header composing process, "m" points to IPv6 header.
384	 * "mprev" points to an extension header prior to esp.
385	 */
386	{
387		u_char *nexthdrp = &ip6->ip6_nxt;
388		struct mbuf *mprev = m;
389
390		/*
391		 * we treat dest2 specially.  this makes IPsec processing
392		 * much easier.  the goal here is to make mprev point the
393		 * mbuf prior to dest2.
394		 *
395		 * result: IPv6 dest2 payload
396		 * m and mprev will point to IPv6 header.
397		 */
398		if (exthdrs.ip6e_dest2) {
399			if (!hdrsplit)
400				panic("assumption failed: hdr not split");
401			exthdrs.ip6e_dest2->m_next = m->m_next;
402			m->m_next = exthdrs.ip6e_dest2;
403			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
404			ip6->ip6_nxt = IPPROTO_DSTOPTS;
405		}
406
407#define MAKE_CHAIN(m, mp, p, i)\
408    do {\
409	if (m) {\
410		if (!hdrsplit) \
411			panic("assumption failed: hdr not split"); \
412		*mtod((m), u_char *) = *(p);\
413		*(p) = (i);\
414		p = mtod((m), u_char *);\
415		(m)->m_next = (mp)->m_next;\
416		(mp)->m_next = (m);\
417		(mp) = (m);\
418	}\
419    } while (/*CONSTCOND*/ 0)
420		/*
421		 * result: IPv6 hbh dest1 rthdr dest2 payload
422		 * m will point to IPv6 header.  mprev will point to the
423		 * extension header prior to dest2 (rthdr in the above case).
424		 */
425		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
426		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
427		    IPPROTO_DSTOPTS);
428		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
429		    IPPROTO_ROUTING);
430
431#if defined(IPSEC) || defined(FAST_IPSEC)
432		if (!needipsec)
433			goto skip_ipsec2;
434
435		/*
436		 * pointers after IPsec headers are not valid any more.
437		 * other pointers need a great care too.
438		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
439		 */
440		exthdrs.ip6e_dest2 = NULL;
441
442	    {
443		struct ip6_rthdr *rh = NULL;
444		int segleft_org = 0;
445		struct ipsec_output_state state;
446
447		if (exthdrs.ip6e_rthdr) {
448			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
449			segleft_org = rh->ip6r_segleft;
450			rh->ip6r_segleft = 0;
451		}
452
453		bzero(&state, sizeof(state));
454		state.m = m;
455		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
456		    &needipsectun);
457		m = state.m;
458		if (error) {
459			/* mbuf is already reclaimed in ipsec6_output_trans. */
460			m = NULL;
461			switch (error) {
462			case EHOSTUNREACH:
463			case ENETUNREACH:
464			case EMSGSIZE:
465			case ENOBUFS:
466			case ENOMEM:
467				break;
468			default:
469				printf("ip6_output (ipsec): error code %d\n", error);
470				/* FALLTHROUGH */
471			case ENOENT:
472				/* don't show these error codes to the user */
473				error = 0;
474				break;
475			}
476			goto bad;
477		}
478		if (exthdrs.ip6e_rthdr) {
479			/* ah6_output doesn't modify mbuf chain */
480			rh->ip6r_segleft = segleft_org;
481		}
482	    }
483skip_ipsec2:;
484#endif
485	}
486
487	/*
488	 * If there is a routing header, replace the destination address field
489	 * with the first hop of the routing header.
490	 */
491	if (exthdrs.ip6e_rthdr) {
492		struct ip6_rthdr *rh =
493			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
494						  struct ip6_rthdr *));
495		struct ip6_rthdr0 *rh0;
496		struct in6_addr *addrs;
497
498		finaldst = ip6->ip6_dst;
499		switch (rh->ip6r_type) {
500		case IPV6_RTHDR_TYPE_0:
501			 rh0 = (struct ip6_rthdr0 *)rh;
502			 addrs = (struct in6_addr *)(rh + 1);
503
504			 ip6->ip6_dst = *addrs;
505			 bcopy((caddr_t)(addrs + 1), (caddr_t)addrs,
506			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
507				 );
508			 *(addrs + rh0->ip6r0_segleft - 1) = finaldst;
509			 break;
510		default:	/* is it possible? */
511			 error = EINVAL;
512			 goto bad;
513		}
514	}
515
516	/* Source address validation */
517	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
518	    (flags & IPV6_DADOUTPUT) == 0) {
519		error = EOPNOTSUPP;
520		ip6stat.ip6s_badscope++;
521		goto bad;
522	}
523	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
524		error = EOPNOTSUPP;
525		ip6stat.ip6s_badscope++;
526		goto bad;
527	}
528
529	ip6stat.ip6s_localout++;
530
531	/*
532	 * Route packet.
533	 */
534	if (ro == 0) {
535		ro = &ip6route;
536		bzero((caddr_t)ro, sizeof(*ro));
537	}
538	ro_pmtu = ro;
539	if (opt && opt->ip6po_rthdr)
540		ro = &opt->ip6po_route;
541	dst = (struct sockaddr_in6 *)&ro->ro_dst;
542
543	/*
544	 * If there is a cached route,
545	 * check that it is to the same destination
546	 * and is still up. If not, free it and try again.
547	 */
548	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
549			 dst->sin6_family != AF_INET6 ||
550			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
551		RTFREE(ro->ro_rt);
552		ro->ro_rt = (struct rtentry *)0;
553	}
554	if (ro->ro_rt == 0) {
555		bzero(dst, sizeof(*dst));
556		dst->sin6_family = AF_INET6;
557		dst->sin6_len = sizeof(struct sockaddr_in6);
558		dst->sin6_addr = ip6->ip6_dst;
559	}
560
561 	/*
562	 * if specified, try to fill in the traffic class field.
563	 * do not override if a non-zero value is already set.
564	 * we check the diffserv field and the ecn field separately.
565	 */
566	if (opt && opt->ip6po_tclass >= 0) {
567		int mask = 0;
568
569		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
570			mask |= 0xfc;
571		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
572			mask |= 0x03;
573		if (mask != 0)
574			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
575	}
576
577	/* fill in or override the hop limit field, if necessary. */
578	if (opt && opt->ip6po_hlim != -1)
579		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
580	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
581		if (im6o != NULL)
582			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
583		else
584			ip6->ip6_hlim = ip6_defmcasthlim;
585	}
586
587#if defined(IPSEC) || defined(FAST_IPSEC)
588	if (needipsec && needipsectun) {
589		struct ipsec_output_state state;
590
591		/*
592		 * All the extension headers will become inaccessible
593		 * (since they can be encrypted).
594		 * Don't panic, we need no more updates to extension headers
595		 * on inner IPv6 packet (since they are now encapsulated).
596		 *
597		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
598		 */
599		bzero(&exthdrs, sizeof(exthdrs));
600		exthdrs.ip6e_ip6 = m;
601
602		bzero(&state, sizeof(state));
603		state.m = m;
604		state.ro = (struct route *)ro;
605		state.dst = (struct sockaddr *)dst;
606
607		error = ipsec6_output_tunnel(&state, sp, flags);
608
609		m = state.m;
610		ro = (struct route_in6 *)state.ro;
611		dst = (struct sockaddr_in6 *)state.dst;
612		if (error) {
613			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
614			m0 = m = NULL;
615			m = NULL;
616			switch (error) {
617			case EHOSTUNREACH:
618			case ENETUNREACH:
619			case EMSGSIZE:
620			case ENOBUFS:
621			case ENOMEM:
622				break;
623			default:
624				printf("ip6_output (ipsec): error code %d\n", error);
625				/* FALLTHROUGH */
626			case ENOENT:
627				/* don't show these error codes to the user */
628				error = 0;
629				break;
630			}
631			goto bad;
632		}
633
634		exthdrs.ip6e_ip6 = m;
635	}
636#endif /* IPSEC */
637
638	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
639		/* Unicast */
640
641#define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
642#define sin6tosa(sin6)	((struct sockaddr *)(sin6))
643		/* xxx
644		 * interface selection comes here
645		 * if an interface is specified from an upper layer,
646		 * ifp must point it.
647		 */
648		if (ro->ro_rt == 0) {
649			/*
650			 * non-bsdi always clone routes, if parent is
651			 * PRF_CLONING.
652			 */
653			rtalloc((struct route *)ro);
654		}
655		if (ro->ro_rt == 0) {
656			ip6stat.ip6s_noroute++;
657			error = EHOSTUNREACH;
658			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
659			goto bad;
660		}
661		/* XXX rt not locked */
662		ia = ifatoia6(ro->ro_rt->rt_ifa);
663		ifp = ro->ro_rt->rt_ifp;
664		ro->ro_rt->rt_use++;
665		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
666			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
667		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
668
669		in6_ifstat_inc(ifp, ifs6_out_request);
670
671		/*
672		 * Check if the outgoing interface conflicts with
673		 * the interface specified by ifi6_ifindex (if specified).
674		 * Note that loopback interface is always okay.
675		 * (this may happen when we are sending a packet to one of
676		 *  our own addresses.)
677		 */
678		if (opt && opt->ip6po_pktinfo
679		 && opt->ip6po_pktinfo->ipi6_ifindex) {
680			if (!(ifp->if_flags & IFF_LOOPBACK)
681			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
682				ip6stat.ip6s_noroute++;
683				in6_ifstat_inc(ifp, ifs6_out_discard);
684				error = EHOSTUNREACH;
685				goto bad;
686			}
687		}
688
689		if (opt && opt->ip6po_hlim != -1)
690			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
691	} else {
692		/* Multicast */
693		struct	in6_multi *in6m;
694
695		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
696
697		/*
698		 * See if the caller provided any multicast options
699		 */
700		ifp = NULL;
701		if (im6o != NULL) {
702			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
703			if (im6o->im6o_multicast_ifp != NULL)
704				ifp = im6o->im6o_multicast_ifp;
705		} else
706			ip6->ip6_hlim = ip6_defmcasthlim;
707
708		/*
709		 * See if the caller provided the outgoing interface
710		 * as an ancillary data.
711		 * Boundary check for ifindex is assumed to be already done.
712		 */
713		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
714			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
715
716		/*
717		 * If the destination is a node-local scope multicast,
718		 * the packet should be loop-backed only.
719		 */
720		if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
721			/*
722			 * If the outgoing interface is already specified,
723			 * it should be a loopback interface.
724			 */
725			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
726				ip6stat.ip6s_badscope++;
727				error = ENETUNREACH; /* XXX: better error? */
728				/* XXX correct ifp? */
729				in6_ifstat_inc(ifp, ifs6_out_discard);
730				goto bad;
731			} else {
732				ifp = &loif[0];
733			}
734		}
735
736		if (opt && opt->ip6po_hlim != -1)
737			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
738
739		/*
740		 * If caller did not provide an interface lookup a
741		 * default in the routing table.  This is either a
742		 * default for the speicfied group (i.e. a host
743		 * route), or a multicast default (a route for the
744		 * ``net'' ff00::/8).
745		 */
746		if (ifp == NULL) {
747			if (ro->ro_rt == 0)
748				ro->ro_rt = rtalloc1((struct sockaddr *)
749						&ro->ro_dst, 0, 0UL);
750			else
751				RT_LOCK(ro->ro_rt);
752			if (ro->ro_rt == 0) {
753				ip6stat.ip6s_noroute++;
754				error = EHOSTUNREACH;
755				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
756				goto bad;
757			}
758			ia = ifatoia6(ro->ro_rt->rt_ifa);
759			ifp = ro->ro_rt->rt_ifp;
760			ro->ro_rt->rt_use++;
761			RT_UNLOCK(ro->ro_rt);
762		}
763
764		if ((flags & IPV6_FORWARDING) == 0)
765			in6_ifstat_inc(ifp, ifs6_out_request);
766		in6_ifstat_inc(ifp, ifs6_out_mcast);
767
768		/*
769		 * Confirm that the outgoing interface supports multicast.
770		 */
771		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
772			ip6stat.ip6s_noroute++;
773			in6_ifstat_inc(ifp, ifs6_out_discard);
774			error = ENETUNREACH;
775			goto bad;
776		}
777		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
778		if (in6m != NULL &&
779		   (im6o == NULL || im6o->im6o_multicast_loop)) {
780			/*
781			 * If we belong to the destination multicast group
782			 * on the outgoing interface, and the caller did not
783			 * forbid loopback, loop back a copy.
784			 */
785			ip6_mloopback(ifp, m, dst);
786		} else {
787			/*
788			 * If we are acting as a multicast router, perform
789			 * multicast forwarding as if the packet had just
790			 * arrived on the interface to which we are about
791			 * to send.  The multicast forwarding function
792			 * recursively calls this function, using the
793			 * IPV6_FORWARDING flag to prevent infinite recursion.
794			 *
795			 * Multicasts that are looped back by ip6_mloopback(),
796			 * above, will be forwarded by the ip6_input() routine,
797			 * if necessary.
798			 */
799			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
800				if (ip6_mforward(ip6, ifp, m) != 0) {
801					m_freem(m);
802					goto done;
803				}
804			}
805		}
806		/*
807		 * Multicasts with a hoplimit of zero may be looped back,
808		 * above, but must not be transmitted on a network.
809		 * Also, multicasts addressed to the loopback interface
810		 * are not sent -- the above call to ip6_mloopback() will
811		 * loop back a copy if this host actually belongs to the
812		 * destination group on the loopback interface.
813		 */
814		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
815		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
816			m_freem(m);
817			goto done;
818		}
819	}
820
821	/*
822	 * Fill the outgoing inteface to tell the upper layer
823	 * to increment per-interface statistics.
824	 */
825	if (ifpp)
826		*ifpp = ifp;
827
828	/* Determine path MTU. */
829	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
830	    &alwaysfrag)) != 0)
831		goto bad;
832
833	/*
834	 * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting
835	 */
836	if ((flags & IPV6_MINMTU) != 0 && mtu > IPV6_MMTU)
837		mtu = IPV6_MMTU;
838
839	/* Fake scoped addresses */
840	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
841		/*
842		 * If source or destination address is a scoped address, and
843		 * the packet is going to be sent to a loopback interface,
844		 * we should keep the original interface.
845		 */
846
847		/*
848		 * XXX: this is a very experimental and temporary solution.
849		 * We eventually have sockaddr_in6 and use the sin6_scope_id
850		 * field of the structure here.
851		 * We rely on the consistency between two scope zone ids
852		 * of source and destination, which should already be assured.
853		 * Larger scopes than link will be supported in the future.
854		 */
855		origifp = NULL;
856		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
857			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
858		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
859			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
860		/*
861		 * XXX: origifp can be NULL even in those two cases above.
862		 * For example, if we remove the (only) link-local address
863		 * from the loopback interface, and try to send a link-local
864		 * address without link-id information.  Then the source
865		 * address is ::1, and the destination address is the
866		 * link-local address with its s6_addr16[1] being zero.
867		 * What is worse, if the packet goes to the loopback interface
868		 * by a default rejected route, the null pointer would be
869		 * passed to looutput, and the kernel would hang.
870		 * The following last resort would prevent such disaster.
871		 */
872		if (origifp == NULL)
873			origifp = ifp;
874	}
875	else
876		origifp = ifp;
877	/*
878	 * clear embedded scope identifiers if necessary.
879	 * in6_clearscope will touch the addresses only when necessary.
880	 */
881	in6_clearscope(&ip6->ip6_src);
882	in6_clearscope(&ip6->ip6_dst);
883
884	/*
885	 * Check with the firewall...
886	 */
887	if (ip6_fw_enable && ip6_fw_chk_ptr) {
888		u_short port = 0;
889		m->m_pkthdr.rcvif = NULL;	/* XXX */
890		/* If ipfw says divert, we have to just drop packet */
891		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
892			m_freem(m);
893			goto done;
894		}
895		if (!m) {
896			error = EACCES;
897			goto done;
898		}
899	}
900
901	/*
902	 * If the outgoing packet contains a hop-by-hop options header,
903	 * it must be examined and processed even by the source node.
904	 * (RFC 2460, section 4.)
905	 */
906	if (exthdrs.ip6e_hbh) {
907		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
908		u_int32_t dummy1; /* XXX unused */
909		u_int32_t dummy2; /* XXX unused */
910
911#ifdef DIAGNOSTIC
912		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
913			panic("ip6e_hbh is not continuous");
914#endif
915		/*
916		 *  XXX: if we have to send an ICMPv6 error to the sender,
917		 *       we need the M_LOOP flag since icmp6_error() expects
918		 *       the IPv6 and the hop-by-hop options header are
919		 *       continuous unless the flag is set.
920		 */
921		m->m_flags |= M_LOOP;
922		m->m_pkthdr.rcvif = ifp;
923		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
924		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
925		    &dummy1, &dummy2) < 0) {
926			/* m was already freed at this point */
927			error = EINVAL;/* better error? */
928			goto done;
929		}
930		m->m_flags &= ~M_LOOP; /* XXX */
931		m->m_pkthdr.rcvif = NULL;
932	}
933
934#ifdef PFIL_HOOKS
935	/*
936	 * Run through list of hooks for output packets.
937	 */
938	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
939	if (error != 0 || m == NULL)
940		goto done;
941	ip6 = mtod(m, struct ip6_hdr *);
942#endif /* PFIL_HOOKS */
943
944	/*
945	 * Send the packet to the outgoing interface.
946	 * If necessary, do IPv6 fragmentation before sending.
947	 *
948	 * the logic here is rather complex:
949	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
950	 * 1-a:	send as is if tlen <= path mtu
951	 * 1-b:	fragment if tlen > path mtu
952	 *
953	 * 2: if user asks us not to fragment (dontfrag == 1)
954	 * 2-a:	send as is if tlen <= interface mtu
955	 * 2-b:	error if tlen > interface mtu
956	 *
957	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
958	 *	always fragment
959	 *
960	 * 4: if dontfrag == 1 && alwaysfrag == 1
961	 *	error, as we cannot handle this conflicting request
962	 */
963	tlen = m->m_pkthdr.len;
964
965	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
966		dontfrag = 1;
967	else
968		dontfrag = 0;
969	if (dontfrag && alwaysfrag) {	/* case 4 */
970		/* conflicting request - can't transmit */
971		error = EMSGSIZE;
972		goto bad;
973	}
974	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
975		/*
976		 * Even if the DONTFRAG option is specified, we cannot send the
977		 * packet when the data length is larger than the MTU of the
978		 * outgoing interface.
979		 * Notify the error by sending IPV6_PATHMTU ancillary data as
980		 * well as returning an error code (the latter is not described
981		 * in the API spec.)
982		 */
983		u_int32_t mtu32;
984		struct ip6ctlparam ip6cp;
985
986		mtu32 = (u_int32_t)mtu;
987		bzero(&ip6cp, sizeof(ip6cp));
988		ip6cp.ip6c_cmdarg = (void *)&mtu32;
989		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
990		    (void *)&ip6cp);
991
992		error = EMSGSIZE;
993		goto bad;
994	}
995
996	/*
997	 * transmit packet without fragmentation
998	 */
999	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
1000		struct in6_ifaddr *ia6;
1001
1002		ip6 = mtod(m, struct ip6_hdr *);
1003		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1004		if (ia6) {
1005			/* Record statistics for this interface address. */
1006			ia6->ia_ifa.if_opackets++;
1007			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
1008		}
1009#ifdef IPSEC
1010		/* clean ipsec history once it goes out of the node */
1011		ipsec_delaux(m);
1012#endif
1013		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1014		goto done;
1015	}
1016
1017	/*
1018	 * try to fragment the packet.  case 1-b and 3
1019	 */
1020	if (mtu < IPV6_MMTU) {
1021		/* path MTU cannot be less than IPV6_MMTU */
1022		error = EMSGSIZE;
1023		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1024		goto bad;
1025	} else if (ip6->ip6_plen == 0) {
1026		/* jumbo payload cannot be fragmented */
1027		error = EMSGSIZE;
1028		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1029		goto bad;
1030	} else {
1031		struct mbuf **mnext, *m_frgpart;
1032		struct ip6_frag *ip6f;
1033#ifdef RANDOM_IP_ID
1034		u_int32_t id = htonl(ip6_randomid());
1035#else
1036		u_int32_t id = htonl(ip6_id++);
1037#endif
1038		u_char nextproto;
1039
1040		/*
1041		 * Too large for the destination or interface;
1042		 * fragment if possible.
1043		 * Must be able to put at least 8 bytes per fragment.
1044		 */
1045		hlen = unfragpartlen;
1046		if (mtu > IPV6_MAXPACKET)
1047			mtu = IPV6_MAXPACKET;
1048
1049		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1050		if (len < 8) {
1051			error = EMSGSIZE;
1052			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1053			goto bad;
1054		}
1055
1056		mnext = &m->m_nextpkt;
1057
1058		/*
1059		 * Change the next header field of the last header in the
1060		 * unfragmentable part.
1061		 */
1062		if (exthdrs.ip6e_rthdr) {
1063			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1064			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1065		} else if (exthdrs.ip6e_dest1) {
1066			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1067			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1068		} else if (exthdrs.ip6e_hbh) {
1069			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1070			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1071		} else {
1072			nextproto = ip6->ip6_nxt;
1073			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1074		}
1075
1076		/*
1077		 * Loop through length of segment after first fragment,
1078		 * make new header and copy data of each part and link onto
1079		 * chain.
1080		 */
1081		m0 = m;
1082		for (off = hlen; off < tlen; off += len) {
1083			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1084			if (!m) {
1085				error = ENOBUFS;
1086				ip6stat.ip6s_odropped++;
1087				goto sendorfree;
1088			}
1089			m->m_pkthdr.rcvif = NULL;
1090			m->m_flags = m0->m_flags & M_COPYFLAGS;
1091			*mnext = m;
1092			mnext = &m->m_nextpkt;
1093			m->m_data += max_linkhdr;
1094			mhip6 = mtod(m, struct ip6_hdr *);
1095			*mhip6 = *ip6;
1096			m->m_len = sizeof(*mhip6);
1097			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1098			if (error) {
1099				ip6stat.ip6s_odropped++;
1100				goto sendorfree;
1101			}
1102			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1103			if (off + len >= tlen)
1104				len = tlen - off;
1105			else
1106				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1107			mhip6->ip6_plen = htons((u_short)(len + hlen +
1108			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1109			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1110				error = ENOBUFS;
1111				ip6stat.ip6s_odropped++;
1112				goto sendorfree;
1113			}
1114			m_cat(m, m_frgpart);
1115			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1116			m->m_pkthdr.rcvif = (struct ifnet *)0;
1117			ip6f->ip6f_reserved = 0;
1118			ip6f->ip6f_ident = id;
1119			ip6f->ip6f_nxt = nextproto;
1120			ip6stat.ip6s_ofragments++;
1121			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1122		}
1123
1124		in6_ifstat_inc(ifp, ifs6_out_fragok);
1125	}
1126
1127	/*
1128	 * Remove leading garbages.
1129	 */
1130sendorfree:
1131	m = m0->m_nextpkt;
1132	m0->m_nextpkt = 0;
1133	m_freem(m0);
1134	for (m0 = m; m; m = m0) {
1135		m0 = m->m_nextpkt;
1136		m->m_nextpkt = 0;
1137		if (error == 0) {
1138 			/* Record statistics for this interface address. */
1139 			if (ia) {
1140 				ia->ia_ifa.if_opackets++;
1141 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1142 			}
1143#ifdef IPSEC
1144			/* clean ipsec history once it goes out of the node */
1145			ipsec_delaux(m);
1146#endif
1147			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1148		} else
1149			m_freem(m);
1150	}
1151
1152	if (error == 0)
1153		ip6stat.ip6s_fragmented++;
1154
1155done:
1156	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1157		RTFREE(ro->ro_rt);
1158	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1159		RTFREE(ro_pmtu->ro_rt);
1160	}
1161
1162#ifdef IPSEC
1163	if (sp != NULL)
1164		key_freesp(sp);
1165#endif /* IPSEC */
1166#ifdef FAST_IPSEC
1167	if (sp != NULL)
1168		KEY_FREESP(&sp);
1169#endif /* FAST_IPSEC */
1170
1171	return (error);
1172
1173freehdrs:
1174	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1175	m_freem(exthdrs.ip6e_dest1);
1176	m_freem(exthdrs.ip6e_rthdr);
1177	m_freem(exthdrs.ip6e_dest2);
1178	/* FALLTHROUGH */
1179bad:
1180	m_freem(m);
1181	goto done;
1182}
1183
1184static int
1185ip6_copyexthdr(mp, hdr, hlen)
1186	struct mbuf **mp;
1187	caddr_t hdr;
1188	int hlen;
1189{
1190	struct mbuf *m;
1191
1192	if (hlen > MCLBYTES)
1193		return (ENOBUFS); /* XXX */
1194
1195	MGET(m, M_DONTWAIT, MT_DATA);
1196	if (!m)
1197		return (ENOBUFS);
1198
1199	if (hlen > MLEN) {
1200		MCLGET(m, M_DONTWAIT);
1201		if ((m->m_flags & M_EXT) == 0) {
1202			m_free(m);
1203			return (ENOBUFS);
1204		}
1205	}
1206	m->m_len = hlen;
1207	if (hdr)
1208		bcopy(hdr, mtod(m, caddr_t), hlen);
1209
1210	*mp = m;
1211	return (0);
1212}
1213
1214/*
1215 * Insert jumbo payload option.
1216 */
1217static int
1218ip6_insert_jumboopt(exthdrs, plen)
1219	struct ip6_exthdrs *exthdrs;
1220	u_int32_t plen;
1221{
1222	struct mbuf *mopt;
1223	u_char *optbuf;
1224	u_int32_t v;
1225
1226#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1227
1228	/*
1229	 * If there is no hop-by-hop options header, allocate new one.
1230	 * If there is one but it doesn't have enough space to store the
1231	 * jumbo payload option, allocate a cluster to store the whole options.
1232	 * Otherwise, use it to store the options.
1233	 */
1234	if (exthdrs->ip6e_hbh == 0) {
1235		MGET(mopt, M_DONTWAIT, MT_DATA);
1236		if (mopt == 0)
1237			return (ENOBUFS);
1238		mopt->m_len = JUMBOOPTLEN;
1239		optbuf = mtod(mopt, u_char *);
1240		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1241		exthdrs->ip6e_hbh = mopt;
1242	} else {
1243		struct ip6_hbh *hbh;
1244
1245		mopt = exthdrs->ip6e_hbh;
1246		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1247			/*
1248			 * XXX assumption:
1249			 * - exthdrs->ip6e_hbh is not referenced from places
1250			 *   other than exthdrs.
1251			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1252			 */
1253			int oldoptlen = mopt->m_len;
1254			struct mbuf *n;
1255
1256			/*
1257			 * XXX: give up if the whole (new) hbh header does
1258			 * not fit even in an mbuf cluster.
1259			 */
1260			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1261				return (ENOBUFS);
1262
1263			/*
1264			 * As a consequence, we must always prepare a cluster
1265			 * at this point.
1266			 */
1267			MGET(n, M_DONTWAIT, MT_DATA);
1268			if (n) {
1269				MCLGET(n, M_DONTWAIT);
1270				if ((n->m_flags & M_EXT) == 0) {
1271					m_freem(n);
1272					n = NULL;
1273				}
1274			}
1275			if (!n)
1276				return (ENOBUFS);
1277			n->m_len = oldoptlen + JUMBOOPTLEN;
1278			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1279			    oldoptlen);
1280			optbuf = mtod(n, caddr_t) + oldoptlen;
1281			m_freem(mopt);
1282			mopt = exthdrs->ip6e_hbh = n;
1283		} else {
1284			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1285			mopt->m_len += JUMBOOPTLEN;
1286		}
1287		optbuf[0] = IP6OPT_PADN;
1288		optbuf[1] = 1;
1289
1290		/*
1291		 * Adjust the header length according to the pad and
1292		 * the jumbo payload option.
1293		 */
1294		hbh = mtod(mopt, struct ip6_hbh *);
1295		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1296	}
1297
1298	/* fill in the option. */
1299	optbuf[2] = IP6OPT_JUMBO;
1300	optbuf[3] = 4;
1301	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1302	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1303
1304	/* finally, adjust the packet header length */
1305	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1306
1307	return (0);
1308#undef JUMBOOPTLEN
1309}
1310
1311/*
1312 * Insert fragment header and copy unfragmentable header portions.
1313 */
1314static int
1315ip6_insertfraghdr(m0, m, hlen, frghdrp)
1316	struct mbuf *m0, *m;
1317	int hlen;
1318	struct ip6_frag **frghdrp;
1319{
1320	struct mbuf *n, *mlast;
1321
1322	if (hlen > sizeof(struct ip6_hdr)) {
1323		n = m_copym(m0, sizeof(struct ip6_hdr),
1324		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1325		if (n == 0)
1326			return (ENOBUFS);
1327		m->m_next = n;
1328	} else
1329		n = m;
1330
1331	/* Search for the last mbuf of unfragmentable part. */
1332	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1333		;
1334
1335	if ((mlast->m_flags & M_EXT) == 0 &&
1336	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1337		/* use the trailing space of the last mbuf for the fragment hdr */
1338		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1339		    mlast->m_len);
1340		mlast->m_len += sizeof(struct ip6_frag);
1341		m->m_pkthdr.len += sizeof(struct ip6_frag);
1342	} else {
1343		/* allocate a new mbuf for the fragment header */
1344		struct mbuf *mfrg;
1345
1346		MGET(mfrg, M_DONTWAIT, MT_DATA);
1347		if (mfrg == 0)
1348			return (ENOBUFS);
1349		mfrg->m_len = sizeof(struct ip6_frag);
1350		*frghdrp = mtod(mfrg, struct ip6_frag *);
1351		mlast->m_next = mfrg;
1352	}
1353
1354	return (0);
1355}
1356
1357static int
1358ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1359	struct route_in6 *ro_pmtu, *ro;
1360	struct ifnet *ifp;
1361	struct in6_addr *dst;
1362	u_long *mtup;
1363	int *alwaysfragp;
1364{
1365	u_int32_t mtu = 0;
1366	int alwaysfrag = 0;
1367	int error = 0;
1368
1369	if (ro_pmtu != ro) {
1370		/* The first hop and the final destination may differ. */
1371		struct sockaddr_in6 *sa6_dst =
1372		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1373		if (ro_pmtu->ro_rt &&
1374		    ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
1375		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1376			RTFREE(ro_pmtu->ro_rt);
1377			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1378		}
1379		if (ro_pmtu->ro_rt == NULL) {
1380			bzero(sa6_dst, sizeof(*sa6_dst));
1381			sa6_dst->sin6_family = AF_INET6;
1382			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1383			sa6_dst->sin6_addr = *dst;
1384
1385			rtalloc((struct route *)ro_pmtu);
1386		}
1387	}
1388	if (ro_pmtu->ro_rt) {
1389		u_int32_t ifmtu;
1390
1391		if (ifp == NULL)
1392			ifp = ro_pmtu->ro_rt->rt_ifp;
1393		ifmtu = IN6_LINKMTU(ifp);
1394		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1395		if (mtu == 0)
1396			mtu = ifmtu;
1397		else if (mtu < IPV6_MMTU) {
1398			/*
1399			 * RFC2460 section 5, last paragraph:
1400			 * if we record ICMPv6 too big message with
1401			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1402			 * or smaller, with framgent header attached.
1403			 * (fragment header is needed regardless from the
1404			 * packet size, for translators to identify packets)
1405			 */
1406			alwaysfrag = 1;
1407			mtu = IPV6_MMTU;
1408		} else if (mtu > ifmtu) {
1409			/*
1410			 * The MTU on the route is larger than the MTU on
1411			 * the interface!  This shouldn't happen, unless the
1412			 * MTU of the interface has been changed after the
1413			 * interface was brought up.  Change the MTU in the
1414			 * route to match the interface MTU (as long as the
1415			 * field isn't locked).
1416			 */
1417			mtu = ifmtu;
1418			if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
1419				ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1420		}
1421	} else if (ifp) {
1422		mtu = IN6_LINKMTU(ifp);
1423	} else
1424		error = EHOSTUNREACH; /* XXX */
1425
1426	*mtup = mtu;
1427	if (alwaysfragp)
1428		*alwaysfragp = alwaysfrag;
1429	return (error);
1430}
1431
1432/*
1433 * IP6 socket option processing.
1434 */
1435int
1436ip6_ctloutput(so, sopt)
1437	struct socket *so;
1438	struct sockopt *sopt;
1439{
1440	int privileged, optdatalen, uproto;
1441	void *optdata;
1442	struct inpcb *in6p = sotoinpcb(so);
1443	int error, optval;
1444	int level, op, optname;
1445	int optlen;
1446	struct thread *td;
1447
1448	if (sopt) {
1449		level = sopt->sopt_level;
1450		op = sopt->sopt_dir;
1451		optname = sopt->sopt_name;
1452		optlen = sopt->sopt_valsize;
1453		td = sopt->sopt_td;
1454	} else {
1455		panic("ip6_ctloutput: arg soopt is NULL");
1456	}
1457	error = optval = 0;
1458
1459	privileged = (td == 0 || suser(td)) ? 0 : 1;
1460	uproto = (int)so->so_proto->pr_protocol;
1461
1462	if (level == IPPROTO_IPV6) {
1463		switch (op) {
1464
1465		case SOPT_SET:
1466			switch (optname) {
1467			case IPV6_2292PKTOPTIONS:
1468#ifdef IPV6_PKTOPTIONS
1469			case IPV6_PKTOPTIONS:
1470#endif
1471			{
1472				struct mbuf *m;
1473
1474				error = soopt_getm(sopt, &m); /* XXX */
1475				if (error != NULL)
1476					break;
1477				error = soopt_mcopyin(sopt, m); /* XXX */
1478				if (error != NULL)
1479					break;
1480				error = ip6_pcbopts(&in6p->in6p_outputopts,
1481						    m, so, sopt);
1482				m_freem(m); /* XXX */
1483				break;
1484			}
1485
1486			/*
1487			 * Use of some Hop-by-Hop options or some
1488			 * Destination options, might require special
1489			 * privilege.  That is, normal applications
1490			 * (without special privilege) might be forbidden
1491			 * from setting certain options in outgoing packets,
1492			 * and might never see certain options in received
1493			 * packets. [RFC 2292 Section 6]
1494			 * KAME specific note:
1495			 *  KAME prevents non-privileged users from sending or
1496			 *  receiving ANY hbh/dst options in order to avoid
1497			 *  overhead of parsing options in the kernel.
1498			 */
1499			case IPV6_RECVHOPOPTS:
1500			case IPV6_RECVDSTOPTS:
1501			case IPV6_RECVRTHDRDSTOPTS:
1502				if (!privileged) {
1503					error = EPERM;
1504					break;
1505				}
1506				/* FALLTHROUGH */
1507			case IPV6_UNICAST_HOPS:
1508			case IPV6_HOPLIMIT:
1509			case IPV6_FAITH:
1510
1511			case IPV6_RECVPKTINFO:
1512			case IPV6_RECVHOPLIMIT:
1513			case IPV6_RECVRTHDR:
1514			case IPV6_RECVPATHMTU:
1515			case IPV6_RECVTCLASS:
1516			case IPV6_V6ONLY:
1517			case IPV6_AUTOFLOWLABEL:
1518				if (optlen != sizeof(int)) {
1519					error = EINVAL;
1520					break;
1521				}
1522				error = sooptcopyin(sopt, &optval,
1523					sizeof optval, sizeof optval);
1524				if (error)
1525					break;
1526				switch (optname) {
1527
1528				case IPV6_UNICAST_HOPS:
1529					if (optval < -1 || optval >= 256)
1530						error = EINVAL;
1531					else {
1532						/* -1 = kernel default */
1533						in6p->in6p_hops = optval;
1534						if ((in6p->in6p_vflag &
1535						     INP_IPV4) != 0)
1536							in6p->inp_ip_ttl = optval;
1537					}
1538					break;
1539#define OPTSET(bit) \
1540do { \
1541	if (optval) \
1542		in6p->in6p_flags |= (bit); \
1543	else \
1544		in6p->in6p_flags &= ~(bit); \
1545} while (/*CONSTCOND*/ 0)
1546#define OPTSET2292(bit) \
1547do { \
1548	in6p->in6p_flags |= IN6P_RFC2292; \
1549	if (optval) \
1550		in6p->in6p_flags |= (bit); \
1551	else \
1552		in6p->in6p_flags &= ~(bit); \
1553} while (/*CONSTCOND*/ 0)
1554#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1555
1556				case IPV6_RECVPKTINFO:
1557					/* cannot mix with RFC2292 */
1558					if (OPTBIT(IN6P_RFC2292)) {
1559						error = EINVAL;
1560						break;
1561					}
1562					OPTSET(IN6P_PKTINFO);
1563					break;
1564
1565				case IPV6_HOPLIMIT:
1566				{
1567					struct ip6_pktopts **optp;
1568
1569					/* cannot mix with RFC2292 */
1570					if (OPTBIT(IN6P_RFC2292)) {
1571						error = EINVAL;
1572						break;
1573					}
1574					optp = &in6p->in6p_outputopts;
1575					error = ip6_pcbopt(IPV6_HOPLIMIT,
1576							   (u_char *)&optval,
1577							   sizeof(optval),
1578							   optp,
1579							   privileged, uproto);
1580					break;
1581				}
1582
1583				case IPV6_RECVHOPLIMIT:
1584					/* cannot mix with RFC2292 */
1585					if (OPTBIT(IN6P_RFC2292)) {
1586						error = EINVAL;
1587						break;
1588					}
1589					OPTSET(IN6P_HOPLIMIT);
1590					break;
1591
1592				case IPV6_RECVHOPOPTS:
1593					/* cannot mix with RFC2292 */
1594					if (OPTBIT(IN6P_RFC2292)) {
1595						error = EINVAL;
1596						break;
1597					}
1598					OPTSET(IN6P_HOPOPTS);
1599					break;
1600
1601				case IPV6_RECVDSTOPTS:
1602					/* cannot mix with RFC2292 */
1603					if (OPTBIT(IN6P_RFC2292)) {
1604						error = EINVAL;
1605						break;
1606					}
1607					OPTSET(IN6P_DSTOPTS);
1608					break;
1609
1610				case IPV6_RECVRTHDRDSTOPTS:
1611					/* cannot mix with RFC2292 */
1612					if (OPTBIT(IN6P_RFC2292)) {
1613						error = EINVAL;
1614						break;
1615					}
1616					OPTSET(IN6P_RTHDRDSTOPTS);
1617					break;
1618
1619				case IPV6_RECVRTHDR:
1620					/* cannot mix with RFC2292 */
1621					if (OPTBIT(IN6P_RFC2292)) {
1622						error = EINVAL;
1623						break;
1624					}
1625					OPTSET(IN6P_RTHDR);
1626					break;
1627
1628				case IPV6_FAITH:
1629					OPTSET(IN6P_FAITH);
1630					break;
1631
1632				case IPV6_RECVPATHMTU:
1633					/*
1634					 * We ignore this option for TCP
1635					 * sockets.
1636					 * (rfc2292bis leaves this case
1637					 * unspecified.)
1638					 */
1639					if (uproto != IPPROTO_TCP)
1640						OPTSET(IN6P_MTU);
1641					break;
1642
1643				case IPV6_V6ONLY:
1644					/*
1645					 * make setsockopt(IPV6_V6ONLY)
1646					 * available only prior to bind(2).
1647					 * see ipng mailing list, Jun 22 2001.
1648					 */
1649					if (in6p->in6p_lport ||
1650					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1651						error = EINVAL;
1652						break;
1653					}
1654					OPTSET(IN6P_IPV6_V6ONLY);
1655					if (optval)
1656						in6p->in6p_vflag &= ~INP_IPV4;
1657					else
1658						in6p->in6p_vflag |= INP_IPV4;
1659					break;
1660				case IPV6_RECVTCLASS:
1661					/* cannot mix with RFC2292 XXX */
1662					if (OPTBIT(IN6P_RFC2292)) {
1663						error = EINVAL;
1664						break;
1665					}
1666					OPTSET(IN6P_TCLASS);
1667					break;
1668				case IPV6_AUTOFLOWLABEL:
1669					OPTSET(IN6P_AUTOFLOWLABEL);
1670					break;
1671
1672				}
1673				break;
1674
1675			case IPV6_TCLASS:
1676			case IPV6_DONTFRAG:
1677			case IPV6_USE_MIN_MTU:
1678			case IPV6_PREFER_TEMPADDR:
1679				if (optlen != sizeof(optval)) {
1680					error = EINVAL;
1681					break;
1682				}
1683				error = sooptcopyin(sopt, &optval,
1684					sizeof optval, sizeof optval);
1685				if (error)
1686					break;
1687				{
1688					struct ip6_pktopts **optp;
1689					optp = &in6p->in6p_outputopts;
1690					error = ip6_pcbopt(optname,
1691							   (u_char *)&optval,
1692							   sizeof(optval),
1693							   optp,
1694							   privileged, uproto);
1695					break;
1696				}
1697
1698			case IPV6_2292PKTINFO:
1699			case IPV6_2292HOPLIMIT:
1700			case IPV6_2292HOPOPTS:
1701			case IPV6_2292DSTOPTS:
1702			case IPV6_2292RTHDR:
1703				/* RFC 2292 */
1704				if (optlen != sizeof(int)) {
1705					error = EINVAL;
1706					break;
1707				}
1708				error = sooptcopyin(sopt, &optval,
1709					sizeof optval, sizeof optval);
1710				if (error)
1711					break;
1712				switch (optname) {
1713				case IPV6_2292PKTINFO:
1714					OPTSET2292(IN6P_PKTINFO);
1715					break;
1716				case IPV6_2292HOPLIMIT:
1717					OPTSET2292(IN6P_HOPLIMIT);
1718					break;
1719				case IPV6_2292HOPOPTS:
1720					/*
1721					 * Check super-user privilege.
1722					 * See comments for IPV6_RECVHOPOPTS.
1723					 */
1724					if (!privileged)
1725						return (EPERM);
1726					OPTSET2292(IN6P_HOPOPTS);
1727					break;
1728				case IPV6_2292DSTOPTS:
1729					if (!privileged)
1730						return (EPERM);
1731					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1732					break;
1733				case IPV6_2292RTHDR:
1734					OPTSET2292(IN6P_RTHDR);
1735					break;
1736				}
1737				break;
1738			case IPV6_PKTINFO:
1739			case IPV6_HOPOPTS:
1740			case IPV6_RTHDR:
1741			case IPV6_DSTOPTS:
1742			case IPV6_RTHDRDSTOPTS:
1743			case IPV6_NEXTHOP:
1744			{
1745				/* new advanced API (2292bis) */
1746				u_char *optbuf;
1747				int optlen;
1748				struct ip6_pktopts **optp;
1749
1750				/* cannot mix with RFC2292 */
1751				if (OPTBIT(IN6P_RFC2292)) {
1752					error = EINVAL;
1753					break;
1754				}
1755
1756				optbuf = sopt->sopt_val;
1757				optlen = sopt->sopt_valsize;
1758				optp = &in6p->in6p_outputopts;
1759				error = ip6_pcbopt(optname,
1760						   optbuf, optlen,
1761						   optp, privileged, uproto);
1762				break;
1763			}
1764#undef OPTSET
1765
1766			case IPV6_MULTICAST_IF:
1767			case IPV6_MULTICAST_HOPS:
1768			case IPV6_MULTICAST_LOOP:
1769			case IPV6_JOIN_GROUP:
1770			case IPV6_LEAVE_GROUP:
1771			    {
1772				if (sopt->sopt_valsize > MLEN) {
1773					error = EMSGSIZE;
1774					break;
1775				}
1776				/* XXX */
1777			    }
1778			    /* FALLTHROUGH */
1779			    {
1780				struct mbuf *m;
1781
1782				if (sopt->sopt_valsize > MCLBYTES) {
1783					error = EMSGSIZE;
1784					break;
1785				}
1786				/* XXX */
1787				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_HEADER);
1788				if (m == 0) {
1789					error = ENOBUFS;
1790					break;
1791				}
1792				if (sopt->sopt_valsize > MLEN) {
1793					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1794					if ((m->m_flags & M_EXT) == 0) {
1795						m_free(m);
1796						error = ENOBUFS;
1797						break;
1798					}
1799				}
1800				m->m_len = sopt->sopt_valsize;
1801				error = sooptcopyin(sopt, mtod(m, char *),
1802						    m->m_len, m->m_len);
1803				if (error) {
1804					(void)m_free(m);
1805					break;
1806				}
1807				error =	ip6_setmoptions(sopt->sopt_name,
1808							&in6p->in6p_moptions,
1809							m);
1810				(void)m_free(m);
1811			    }
1812				break;
1813
1814			case IPV6_PORTRANGE:
1815				error = sooptcopyin(sopt, &optval,
1816				    sizeof optval, sizeof optval);
1817				if (error)
1818					break;
1819
1820				switch (optval) {
1821				case IPV6_PORTRANGE_DEFAULT:
1822					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1823					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1824					break;
1825
1826				case IPV6_PORTRANGE_HIGH:
1827					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1828					in6p->in6p_flags |= IN6P_HIGHPORT;
1829					break;
1830
1831				case IPV6_PORTRANGE_LOW:
1832					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1833					in6p->in6p_flags |= IN6P_LOWPORT;
1834					break;
1835
1836				default:
1837					error = EINVAL;
1838					break;
1839				}
1840				break;
1841
1842#if defined(IPSEC) || defined(FAST_IPSEC)
1843			case IPV6_IPSEC_POLICY:
1844			    {
1845				caddr_t req = NULL;
1846				size_t len = 0;
1847				struct mbuf *m;
1848
1849				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1850					break;
1851				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1852					break;
1853				if (m) {
1854					req = mtod(m, caddr_t);
1855					len = m->m_len;
1856				}
1857				error = ipsec6_set_policy(in6p, optname, req,
1858							  len, privileged);
1859				m_freem(m);
1860			    }
1861				break;
1862#endif /* KAME IPSEC */
1863
1864			case IPV6_FW_ADD:
1865			case IPV6_FW_DEL:
1866			case IPV6_FW_FLUSH:
1867			case IPV6_FW_ZERO:
1868			    {
1869				struct mbuf *m;
1870				struct mbuf **mp = &m;
1871
1872				if (ip6_fw_ctl_ptr == NULL)
1873					return EINVAL;
1874				/* XXX */
1875				if ((error = soopt_getm(sopt, &m)) != 0)
1876					break;
1877				/* XXX */
1878				if ((error = soopt_mcopyin(sopt, m)) != 0)
1879					break;
1880				error = (*ip6_fw_ctl_ptr)(optname, mp);
1881				m = *mp;
1882			    }
1883				break;
1884
1885			default:
1886				error = ENOPROTOOPT;
1887				break;
1888			}
1889			break;
1890
1891		case SOPT_GET:
1892			switch (optname) {
1893
1894			case IPV6_2292PKTOPTIONS:
1895#ifdef IPV6_PKTOPTIONS
1896			case IPV6_PKTOPTIONS:
1897#endif
1898				/*
1899				 * RFC3542 (effectively) deprecated the
1900				 * semantics of the 2292-style pktoptions.
1901				 * Since it was not reliable in nature (i.e.,
1902				 * applications had to expect the lack of some
1903				 * information after all), it would make sense
1904				 * to simplify this part by always returning
1905				 * empty data.
1906				 */
1907				sopt->sopt_valsize = 0;
1908				break;
1909
1910			case IPV6_RECVHOPOPTS:
1911			case IPV6_RECVDSTOPTS:
1912			case IPV6_RECVRTHDRDSTOPTS:
1913			case IPV6_UNICAST_HOPS:
1914			case IPV6_RECVPKTINFO:
1915			case IPV6_RECVHOPLIMIT:
1916			case IPV6_RECVRTHDR:
1917			case IPV6_RECVPATHMTU:
1918
1919			case IPV6_FAITH:
1920			case IPV6_V6ONLY:
1921			case IPV6_PORTRANGE:
1922			case IPV6_RECVTCLASS:
1923			case IPV6_AUTOFLOWLABEL:
1924				switch (optname) {
1925
1926				case IPV6_RECVHOPOPTS:
1927					optval = OPTBIT(IN6P_HOPOPTS);
1928					break;
1929
1930				case IPV6_RECVDSTOPTS:
1931					optval = OPTBIT(IN6P_DSTOPTS);
1932					break;
1933
1934				case IPV6_RECVRTHDRDSTOPTS:
1935					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1936					break;
1937
1938				case IPV6_UNICAST_HOPS:
1939					optval = in6p->in6p_hops;
1940					break;
1941
1942				case IPV6_RECVPKTINFO:
1943					optval = OPTBIT(IN6P_PKTINFO);
1944					break;
1945
1946				case IPV6_RECVHOPLIMIT:
1947					optval = OPTBIT(IN6P_HOPLIMIT);
1948					break;
1949
1950				case IPV6_RECVRTHDR:
1951					optval = OPTBIT(IN6P_RTHDR);
1952					break;
1953
1954				case IPV6_RECVPATHMTU:
1955					optval = OPTBIT(IN6P_MTU);
1956					break;
1957
1958				case IPV6_FAITH:
1959					optval = OPTBIT(IN6P_FAITH);
1960					break;
1961
1962				case IPV6_V6ONLY:
1963					optval = OPTBIT(IN6P_IPV6_V6ONLY);
1964					break;
1965
1966				case IPV6_PORTRANGE:
1967				    {
1968					int flags;
1969					flags = in6p->in6p_flags;
1970					if (flags & IN6P_HIGHPORT)
1971						optval = IPV6_PORTRANGE_HIGH;
1972					else if (flags & IN6P_LOWPORT)
1973						optval = IPV6_PORTRANGE_LOW;
1974					else
1975						optval = 0;
1976					break;
1977				    }
1978				case IPV6_RECVTCLASS:
1979					optval = OPTBIT(IN6P_TCLASS);
1980					break;
1981
1982				case IPV6_AUTOFLOWLABEL:
1983					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1984					break;
1985				}
1986				if (error)
1987					break;
1988				error = sooptcopyout(sopt, &optval,
1989					sizeof optval);
1990				break;
1991
1992			case IPV6_PATHMTU:
1993			{
1994				u_long pmtu = 0;
1995				struct ip6_mtuinfo mtuinfo;
1996				struct route_in6 *ro = (struct route_in6 *)&in6p->in6p_route;
1997
1998				if (!(so->so_state & SS_ISCONNECTED))
1999					return (ENOTCONN);
2000				/*
2001				 * XXX: we dot not consider the case of source
2002				 * routing, or optional information to specify
2003				 * the outgoing interface.
2004				 */
2005				error = ip6_getpmtu(ro, NULL, NULL,
2006				    &in6p->in6p_faddr, &pmtu, NULL);
2007				if (error)
2008					break;
2009				if (pmtu > IPV6_MAXPACKET)
2010					pmtu = IPV6_MAXPACKET;
2011
2012				bzero(&mtuinfo, sizeof(mtuinfo));
2013				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2014				optdata = (void *)&mtuinfo;
2015				optdatalen = sizeof(mtuinfo);
2016				error = sooptcopyout(sopt, optdata,
2017				    optdatalen);
2018				break;
2019			}
2020
2021			case IPV6_2292PKTINFO:
2022			case IPV6_2292HOPLIMIT:
2023			case IPV6_2292HOPOPTS:
2024			case IPV6_2292RTHDR:
2025			case IPV6_2292DSTOPTS:
2026				switch (optname) {
2027				case IPV6_2292PKTINFO:
2028					optval = OPTBIT(IN6P_PKTINFO);
2029					break;
2030				case IPV6_2292HOPLIMIT:
2031					optval = OPTBIT(IN6P_HOPLIMIT);
2032					break;
2033				case IPV6_2292HOPOPTS:
2034					optval = OPTBIT(IN6P_HOPOPTS);
2035					break;
2036				case IPV6_2292RTHDR:
2037					optval = OPTBIT(IN6P_RTHDR);
2038					break;
2039				case IPV6_2292DSTOPTS:
2040					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2041					break;
2042				}
2043				error = sooptcopyout(sopt, &optval,
2044				    sizeof optval);
2045				break;
2046			case IPV6_PKTINFO:
2047			case IPV6_HOPOPTS:
2048			case IPV6_RTHDR:
2049			case IPV6_DSTOPTS:
2050			case IPV6_RTHDRDSTOPTS:
2051			case IPV6_NEXTHOP:
2052			case IPV6_TCLASS:
2053			case IPV6_DONTFRAG:
2054			case IPV6_USE_MIN_MTU:
2055			case IPV6_PREFER_TEMPADDR:
2056				error = ip6_getpcbopt(in6p->in6p_outputopts,
2057				    optname, sopt);
2058				break;
2059
2060			case IPV6_MULTICAST_IF:
2061			case IPV6_MULTICAST_HOPS:
2062			case IPV6_MULTICAST_LOOP:
2063			case IPV6_JOIN_GROUP:
2064			case IPV6_LEAVE_GROUP:
2065			    {
2066				struct mbuf *m;
2067				error = ip6_getmoptions(sopt->sopt_name,
2068				    in6p->in6p_moptions, &m);
2069				if (error == 0)
2070					error = sooptcopyout(sopt,
2071					    mtod(m, char *), m->m_len);
2072				m_freem(m);
2073			    }
2074				break;
2075
2076#if defined(IPSEC) || defined(FAST_IPSEC)
2077			case IPV6_IPSEC_POLICY:
2078			  {
2079				caddr_t req = NULL;
2080				size_t len = 0;
2081				struct mbuf *m = NULL;
2082				struct mbuf **mp = &m;
2083				size_t ovalsize = sopt->sopt_valsize;
2084				caddr_t oval = (caddr_t)sopt->sopt_val;
2085
2086				error = soopt_getm(sopt, &m); /* XXX */
2087				if (error != NULL)
2088					break;
2089				error = soopt_mcopyin(sopt, m); /* XXX */
2090				if (error != NULL)
2091					break;
2092				sopt->sopt_valsize = ovalsize;
2093				sopt->sopt_val = oval;
2094				if (m) {
2095					req = mtod(m, caddr_t);
2096					len = m->m_len;
2097				}
2098				error = ipsec6_get_policy(in6p, req, len, mp);
2099				if (error == 0)
2100					error = soopt_mcopyout(sopt, m); /* XXX */
2101				if (error == 0 && m)
2102					m_freem(m);
2103				break;
2104			  }
2105#endif /* KAME IPSEC */
2106
2107			case IPV6_FW_GET:
2108			  {
2109				struct mbuf *m;
2110				struct mbuf **mp = &m;
2111
2112				if (ip6_fw_ctl_ptr == NULL)
2113			        {
2114					return EINVAL;
2115				}
2116				error = (*ip6_fw_ctl_ptr)(optname, mp);
2117				if (error == 0)
2118					error = soopt_mcopyout(sopt, m); /* XXX */
2119				if (error == 0 && m)
2120					m_freem(m);
2121			  }
2122				break;
2123
2124			default:
2125				error = ENOPROTOOPT;
2126				break;
2127			}
2128			break;
2129		}
2130	} else {		/* level != IPPROTO_IPV6 */
2131		error = EINVAL;
2132	}
2133	return (error);
2134}
2135
2136int
2137ip6_raw_ctloutput(so, sopt)
2138	struct socket *so;
2139	struct sockopt *sopt;
2140{
2141	int error = 0, optval, optlen;
2142	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2143	struct in6pcb *in6p = sotoin6pcb(so);
2144	int level, op, optname;
2145
2146	if (sopt) {
2147		level = sopt->sopt_level;
2148		op = sopt->sopt_dir;
2149		optname = sopt->sopt_name;
2150		optlen = sopt->sopt_valsize;
2151	} else
2152		panic("ip6_raw_ctloutput: arg soopt is NULL");
2153
2154	if (level != IPPROTO_IPV6) {
2155		return (EINVAL);
2156	}
2157
2158	switch (optname) {
2159	case IPV6_CHECKSUM:
2160		/*
2161		 * For ICMPv6 sockets, no modification allowed for checksum
2162		 * offset, permit "no change" values to help existing apps.
2163		 *
2164		 * XXX 2292bis says: "An attempt to set IPV6_CHECKSUM
2165		 * for an ICMPv6 socket will fail."
2166		 * The current behavior does not meet 2292bis.
2167		 */
2168		switch (op) {
2169		case SOPT_SET:
2170			if (optlen != sizeof(int)) {
2171				error = EINVAL;
2172				break;
2173			}
2174			error = sooptcopyin(sopt, &optval, sizeof(optval),
2175					    sizeof(optval));
2176			if (error)
2177				break;
2178			if ((optval % 2) != 0) {
2179				/* the API assumes even offset values */
2180				error = EINVAL;
2181			} else if (so->so_proto->pr_protocol ==
2182			    IPPROTO_ICMPV6) {
2183				if (optval != icmp6off)
2184					error = EINVAL;
2185			} else
2186				in6p->in6p_cksum = optval;
2187			break;
2188
2189		case SOPT_GET:
2190			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2191				optval = icmp6off;
2192			else
2193				optval = in6p->in6p_cksum;
2194
2195			error = sooptcopyout(sopt, &optval, sizeof(optval));
2196			break;
2197
2198		default:
2199			error = EINVAL;
2200			break;
2201		}
2202		break;
2203
2204	default:
2205		error = ENOPROTOOPT;
2206		break;
2207	}
2208
2209	return (error);
2210}
2211
2212/*
2213 * Set up IP6 options in pcb for insertion in output packets or
2214 * specifying behavior of outgoing packets.
2215 */
2216static int
2217ip6_pcbopts(pktopt, m, so, sopt)
2218	struct ip6_pktopts **pktopt;
2219	struct mbuf *m;
2220	struct socket *so;
2221	struct sockopt *sopt;
2222{
2223	struct ip6_pktopts *opt = *pktopt;
2224	int error = 0;
2225	struct thread *td = sopt->sopt_td;
2226	int priv = 0;
2227
2228	/* turn off any old options. */
2229	if (opt) {
2230#ifdef DIAGNOSTIC
2231		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2232		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2233		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2234			printf("ip6_pcbopts: all specified options are cleared.\n");
2235#endif
2236		ip6_clearpktopts(opt, -1);
2237	} else
2238		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2239	*pktopt = NULL;
2240
2241	if (!m || m->m_len == 0) {
2242		/*
2243		 * Only turning off any previous options, regardless of
2244		 * whether the opt is just created or given.
2245		 */
2246		free(opt, M_IP6OPT);
2247		return (0);
2248	}
2249
2250	/*  set options specified by user. */
2251	if (td && !suser(td))
2252		priv = 1;
2253	if ((error = ip6_setpktoptions(m, opt, NULL, priv, 1,
2254	    so->so_proto->pr_protocol)) != 0) {
2255		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2256		free(opt, M_IP6OPT);
2257		return (error);
2258	}
2259	*pktopt = opt;
2260	return (0);
2261}
2262
2263/*
2264 * initialize ip6_pktopts.  beware that there are non-zero default values in
2265 * the struct.
2266 */
2267void
2268init_ip6pktopts(opt)
2269	struct ip6_pktopts *opt;
2270{
2271
2272	bzero(opt, sizeof(*opt));
2273	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2274	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2275	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2276	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2277}
2278
2279static int
2280ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2281	int optname, len, priv;
2282	u_char *buf;
2283	struct ip6_pktopts **pktopt;
2284	int uproto;
2285{
2286	struct ip6_pktopts *opt;
2287
2288	if (*pktopt == NULL) {
2289		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2290		    M_WAITOK);
2291		init_ip6pktopts(*pktopt);
2292		(*pktopt)->needfree = 1;
2293	}
2294	opt = *pktopt;
2295
2296	return (ip6_setpktoption(optname, buf, len, opt, priv, 1, 0, uproto));
2297}
2298
2299static int
2300ip6_getpcbopt(pktopt, optname, sopt)
2301	struct ip6_pktopts *pktopt;
2302	struct sockopt *sopt;
2303	int optname;
2304{
2305	void *optdata = NULL;
2306	int optdatalen = 0;
2307	struct ip6_ext *ip6e;
2308	int error = 0;
2309	struct in6_pktinfo null_pktinfo;
2310	int deftclass = 0, on;
2311	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2312	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2313
2314	switch (optname) {
2315	case IPV6_PKTINFO:
2316		if (pktopt && pktopt->ip6po_pktinfo)
2317			optdata = (void *)pktopt->ip6po_pktinfo;
2318		else {
2319			/* XXX: we don't have to do this every time... */
2320			bzero(&null_pktinfo, sizeof(null_pktinfo));
2321			optdata = (void *)&null_pktinfo;
2322		}
2323		optdatalen = sizeof(struct in6_pktinfo);
2324		break;
2325	case IPV6_TCLASS:
2326		if (pktopt && pktopt->ip6po_tclass >= 0)
2327			optdata = (void *)&pktopt->ip6po_tclass;
2328		else
2329			optdata = (void *)&deftclass;
2330		optdatalen = sizeof(int);
2331		break;
2332	case IPV6_HOPOPTS:
2333		if (pktopt && pktopt->ip6po_hbh) {
2334			optdata = (void *)pktopt->ip6po_hbh;
2335			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2336			optdatalen = (ip6e->ip6e_len + 1) << 3;
2337		}
2338		break;
2339	case IPV6_RTHDR:
2340		if (pktopt && pktopt->ip6po_rthdr) {
2341			optdata = (void *)pktopt->ip6po_rthdr;
2342			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2343			optdatalen = (ip6e->ip6e_len + 1) << 3;
2344		}
2345		break;
2346	case IPV6_RTHDRDSTOPTS:
2347		if (pktopt && pktopt->ip6po_dest1) {
2348			optdata = (void *)pktopt->ip6po_dest1;
2349			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2350			optdatalen = (ip6e->ip6e_len + 1) << 3;
2351		}
2352		break;
2353	case IPV6_DSTOPTS:
2354		if (pktopt && pktopt->ip6po_dest2) {
2355			optdata = (void *)pktopt->ip6po_dest2;
2356			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2357			optdatalen = (ip6e->ip6e_len + 1) << 3;
2358		}
2359		break;
2360	case IPV6_NEXTHOP:
2361		if (pktopt && pktopt->ip6po_nexthop) {
2362			optdata = (void *)pktopt->ip6po_nexthop;
2363			optdatalen = pktopt->ip6po_nexthop->sa_len;
2364		}
2365		break;
2366	case IPV6_USE_MIN_MTU:
2367		if (pktopt)
2368			optdata = (void *)&pktopt->ip6po_minmtu;
2369		else
2370			optdata = (void *)&defminmtu;
2371		optdatalen = sizeof(int);
2372		break;
2373	case IPV6_DONTFRAG:
2374		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2375			on = 1;
2376		else
2377			on = 0;
2378		optdata = (void *)&on;
2379		optdatalen = sizeof(on);
2380		break;
2381	case IPV6_PREFER_TEMPADDR:
2382		if (pktopt)
2383			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2384		else
2385			optdata = (void *)&defpreftemp;
2386		optdatalen = sizeof(int);
2387		break;
2388	default:		/* should not happen */
2389#ifdef DIAGNOSTIC
2390		panic("ip6_getpcbopt: unexpected option\n");
2391#endif
2392		return (ENOPROTOOPT);
2393	}
2394
2395	error = sooptcopyout(sopt, optdata, optdatalen);
2396
2397	return (error);
2398}
2399
2400void
2401ip6_clearpktopts(pktopt, optname)
2402	struct ip6_pktopts *pktopt;
2403	int optname;
2404{
2405	int needfree;
2406
2407	needfree = pktopt->needfree;
2408
2409	if (optname == -1 || optname == IPV6_PKTINFO) {
2410		if (needfree && pktopt->ip6po_pktinfo)
2411			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2412		pktopt->ip6po_pktinfo = NULL;
2413	}
2414	if (optname == -1 || optname == IPV6_HOPLIMIT)
2415		pktopt->ip6po_hlim = -1;
2416	if (optname == -1 || optname == IPV6_TCLASS)
2417		pktopt->ip6po_tclass = -1;
2418	if (optname == -1 || optname == IPV6_NEXTHOP) {
2419		if (pktopt->ip6po_nextroute.ro_rt) {
2420			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2421			pktopt->ip6po_nextroute.ro_rt = NULL;
2422		}
2423		if (needfree && pktopt->ip6po_nexthop)
2424			free(pktopt->ip6po_nexthop, M_IP6OPT);
2425		pktopt->ip6po_nexthop = NULL;
2426	}
2427	if (optname == -1 || optname == IPV6_HOPOPTS) {
2428		if (needfree && pktopt->ip6po_hbh)
2429			free(pktopt->ip6po_hbh, M_IP6OPT);
2430		pktopt->ip6po_hbh = NULL;
2431	}
2432	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2433		if (needfree && pktopt->ip6po_dest1)
2434			free(pktopt->ip6po_dest1, M_IP6OPT);
2435		pktopt->ip6po_dest1 = NULL;
2436	}
2437	if (optname == -1 || optname == IPV6_RTHDR) {
2438		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2439			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2440		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2441		if (pktopt->ip6po_route.ro_rt) {
2442			RTFREE(pktopt->ip6po_route.ro_rt);
2443			pktopt->ip6po_route.ro_rt = NULL;
2444		}
2445	}
2446	if (optname == -1 || optname == IPV6_DSTOPTS) {
2447		if (needfree && pktopt->ip6po_dest2)
2448			free(pktopt->ip6po_dest2, M_IP6OPT);
2449		pktopt->ip6po_dest2 = NULL;
2450	}
2451}
2452
2453#define PKTOPT_EXTHDRCPY(type) \
2454do {\
2455	if (src->type) {\
2456		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2457		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2458		if (dst->type == NULL && canwait == M_NOWAIT)\
2459			goto bad;\
2460		bcopy(src->type, dst->type, hlen);\
2461	}\
2462} while (/*CONSTCOND*/ 0)
2463
2464struct ip6_pktopts *
2465ip6_copypktopts(src, canwait)
2466	struct ip6_pktopts *src;
2467	int canwait;
2468{
2469	struct ip6_pktopts *dst;
2470
2471	if (src == NULL) {
2472		printf("ip6_clearpktopts: invalid argument\n");
2473		return (NULL);
2474	}
2475
2476	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2477	if (dst == NULL && canwait == M_NOWAIT)
2478		return (NULL);
2479	bzero(dst, sizeof(*dst));
2480	dst->needfree = 1;
2481
2482	dst->ip6po_hlim = src->ip6po_hlim;
2483	dst->ip6po_tclass = src->ip6po_tclass;
2484	dst->ip6po_flags = src->ip6po_flags;
2485	if (src->ip6po_pktinfo) {
2486		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2487		    M_IP6OPT, canwait);
2488		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2489			goto bad;
2490		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2491	}
2492	if (src->ip6po_nexthop) {
2493		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2494		    M_IP6OPT, canwait);
2495		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2496			goto bad;
2497		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2498		    src->ip6po_nexthop->sa_len);
2499	}
2500	PKTOPT_EXTHDRCPY(ip6po_hbh);
2501	PKTOPT_EXTHDRCPY(ip6po_dest1);
2502	PKTOPT_EXTHDRCPY(ip6po_dest2);
2503	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2504	return (dst);
2505
2506  bad:
2507	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2508	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2509	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2510	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2511	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2512	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2513	free(dst, M_IP6OPT);
2514	return (NULL);
2515}
2516#undef PKTOPT_EXTHDRCPY
2517
2518void
2519ip6_freepcbopts(pktopt)
2520	struct ip6_pktopts *pktopt;
2521{
2522	if (pktopt == NULL)
2523		return;
2524
2525	ip6_clearpktopts(pktopt, -1);
2526
2527	free(pktopt, M_IP6OPT);
2528}
2529
2530/*
2531 * Set the IP6 multicast options in response to user setsockopt().
2532 */
2533static int
2534ip6_setmoptions(optname, im6op, m)
2535	int optname;
2536	struct ip6_moptions **im6op;
2537	struct mbuf *m;
2538{
2539	int error = 0;
2540	u_int loop, ifindex;
2541	struct ipv6_mreq *mreq;
2542	struct ifnet *ifp;
2543	struct ip6_moptions *im6o = *im6op;
2544	struct route_in6 ro;
2545	struct sockaddr_in6 *dst;
2546	struct in6_multi_mship *imm;
2547	struct thread *td = curthread;
2548
2549	if (im6o == NULL) {
2550		/*
2551		 * No multicast option buffer attached to the pcb;
2552		 * allocate one and initialize to default values.
2553		 */
2554		im6o = (struct ip6_moptions *)
2555			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2556
2557		if (im6o == NULL)
2558			return (ENOBUFS);
2559		*im6op = im6o;
2560		im6o->im6o_multicast_ifp = NULL;
2561		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2562		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2563		LIST_INIT(&im6o->im6o_memberships);
2564	}
2565
2566	switch (optname) {
2567
2568	case IPV6_MULTICAST_IF:
2569		/*
2570		 * Select the interface for outgoing multicast packets.
2571		 */
2572		if (m == NULL || m->m_len != sizeof(u_int)) {
2573			error = EINVAL;
2574			break;
2575		}
2576		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2577		if (ifindex < 0 || if_index < ifindex) {
2578			error = ENXIO;	/* XXX EINVAL? */
2579			break;
2580		}
2581		ifp = ifnet_byindex(ifindex);
2582		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2583			error = EADDRNOTAVAIL;
2584			break;
2585		}
2586		im6o->im6o_multicast_ifp = ifp;
2587		break;
2588
2589	case IPV6_MULTICAST_HOPS:
2590	    {
2591		/*
2592		 * Set the IP6 hoplimit for outgoing multicast packets.
2593		 */
2594		int optval;
2595		if (m == NULL || m->m_len != sizeof(int)) {
2596			error = EINVAL;
2597			break;
2598		}
2599		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2600		if (optval < -1 || optval >= 256)
2601			error = EINVAL;
2602		else if (optval == -1)
2603			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2604		else
2605			im6o->im6o_multicast_hlim = optval;
2606		break;
2607	    }
2608
2609	case IPV6_MULTICAST_LOOP:
2610		/*
2611		 * Set the loopback flag for outgoing multicast packets.
2612		 * Must be zero or one.
2613		 */
2614		if (m == NULL || m->m_len != sizeof(u_int)) {
2615			error = EINVAL;
2616			break;
2617		}
2618		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2619		if (loop > 1) {
2620			error = EINVAL;
2621			break;
2622		}
2623		im6o->im6o_multicast_loop = loop;
2624		break;
2625
2626	case IPV6_JOIN_GROUP:
2627		/*
2628		 * Add a multicast group membership.
2629		 * Group must be a valid IP6 multicast address.
2630		 */
2631		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2632			error = EINVAL;
2633			break;
2634		}
2635		mreq = mtod(m, struct ipv6_mreq *);
2636		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2637			/*
2638			 * We use the unspecified address to specify to accept
2639			 * all multicast addresses. Only super user is allowed
2640			 * to do this.
2641			 */
2642			if (suser(td)) {
2643				error = EACCES;
2644				break;
2645			}
2646		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2647			error = EINVAL;
2648			break;
2649		}
2650
2651		/*
2652		 * If the interface is specified, validate it.
2653		 */
2654		if (mreq->ipv6mr_interface < 0 ||
2655		    if_index < mreq->ipv6mr_interface) {
2656			error = ENXIO;	/* XXX EINVAL? */
2657			break;
2658		}
2659		/*
2660		 * If no interface was explicitly specified, choose an
2661		 * appropriate one according to the given multicast address.
2662		 */
2663		if (mreq->ipv6mr_interface == 0) {
2664			/*
2665			 * If the multicast address is in node-local scope,
2666			 * the interface should be a loopback interface.
2667			 * Otherwise, look up the routing table for the
2668			 * address, and choose the outgoing interface.
2669			 *   XXX: is it a good approach?
2670			 */
2671			if (IN6_IS_ADDR_MC_INTFACELOCAL(&mreq->ipv6mr_multiaddr)) {
2672				ifp = &loif[0];
2673			} else {
2674				ro.ro_rt = NULL;
2675				dst = (struct sockaddr_in6 *)&ro.ro_dst;
2676				bzero(dst, sizeof(*dst));
2677				dst->sin6_len = sizeof(struct sockaddr_in6);
2678				dst->sin6_family = AF_INET6;
2679				dst->sin6_addr = mreq->ipv6mr_multiaddr;
2680				rtalloc((struct route *)&ro);
2681				if (ro.ro_rt == NULL) {
2682					error = EADDRNOTAVAIL;
2683					break;
2684				}
2685				ifp = ro.ro_rt->rt_ifp;
2686				RTFREE(ro.ro_rt);
2687			}
2688		} else
2689			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2690
2691		/*
2692		 * See if we found an interface, and confirm that it
2693		 * supports multicast
2694		 */
2695		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2696			error = EADDRNOTAVAIL;
2697			break;
2698		}
2699		/*
2700		 * Put interface index into the multicast address,
2701		 * if the address has link-local scope.
2702		 */
2703		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2704			mreq->ipv6mr_multiaddr.s6_addr16[1] =
2705			    htons(ifp->if_index);
2706		}
2707		/*
2708		 * See if the membership already exists.
2709		 */
2710		for (imm = im6o->im6o_memberships.lh_first;
2711		     imm != NULL; imm = imm->i6mm_chain.le_next)
2712			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2713			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2714					       &mreq->ipv6mr_multiaddr))
2715				break;
2716		if (imm != NULL) {
2717			error = EADDRINUSE;
2718			break;
2719		}
2720		/*
2721		 * Everything looks good; add a new record to the multicast
2722		 * address list for the given interface.
2723		 */
2724		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2725		if (imm == NULL) {
2726			error = ENOBUFS;
2727			break;
2728		}
2729		if ((imm->i6mm_maddr =
2730		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2731			free(imm, M_IPMADDR);
2732			break;
2733		}
2734		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2735		break;
2736
2737	case IPV6_LEAVE_GROUP:
2738		/*
2739		 * Drop a multicast group membership.
2740		 * Group must be a valid IP6 multicast address.
2741		 */
2742		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2743			error = EINVAL;
2744			break;
2745		}
2746		mreq = mtod(m, struct ipv6_mreq *);
2747		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2748			if (suser(td)) {
2749				error = EACCES;
2750				break;
2751			}
2752		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2753			error = EINVAL;
2754			break;
2755		}
2756		/*
2757		 * If an interface address was specified, get a pointer
2758		 * to its ifnet structure.
2759		 */
2760		if (mreq->ipv6mr_interface < 0
2761		 || if_index < mreq->ipv6mr_interface) {
2762			error = ENXIO;	/* XXX EINVAL? */
2763			break;
2764		}
2765		ifp = ifnet_byindex(mreq->ipv6mr_interface);
2766		/*
2767		 * Put interface index into the multicast address,
2768		 * if the address has link-local scope.
2769		 */
2770		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2771			mreq->ipv6mr_multiaddr.s6_addr16[1]
2772				= htons(mreq->ipv6mr_interface);
2773		}
2774
2775		/*
2776		 * Find the membership in the membership list.
2777		 */
2778		for (imm = im6o->im6o_memberships.lh_first;
2779		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2780			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2781			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2782			    &mreq->ipv6mr_multiaddr))
2783				break;
2784		}
2785		if (imm == NULL) {
2786			/* Unable to resolve interface */
2787			error = EADDRNOTAVAIL;
2788			break;
2789		}
2790		/*
2791		 * Give up the multicast address record to which the
2792		 * membership points.
2793		 */
2794		LIST_REMOVE(imm, i6mm_chain);
2795		in6_delmulti(imm->i6mm_maddr);
2796		free(imm, M_IPMADDR);
2797		break;
2798
2799	default:
2800		error = EOPNOTSUPP;
2801		break;
2802	}
2803
2804	/*
2805	 * If all options have default values, no need to keep the mbuf.
2806	 */
2807	if (im6o->im6o_multicast_ifp == NULL &&
2808	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2809	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2810	    im6o->im6o_memberships.lh_first == NULL) {
2811		free(*im6op, M_IPMOPTS);
2812		*im6op = NULL;
2813	}
2814
2815	return (error);
2816}
2817
2818/*
2819 * Return the IP6 multicast options in response to user getsockopt().
2820 */
2821static int
2822ip6_getmoptions(optname, im6o, mp)
2823	int optname;
2824	struct ip6_moptions *im6o;
2825	struct mbuf **mp;
2826{
2827	u_int *hlim, *loop, *ifindex;
2828
2829	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2830
2831	switch (optname) {
2832
2833	case IPV6_MULTICAST_IF:
2834		ifindex = mtod(*mp, u_int *);
2835		(*mp)->m_len = sizeof(u_int);
2836		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2837			*ifindex = 0;
2838		else
2839			*ifindex = im6o->im6o_multicast_ifp->if_index;
2840		return (0);
2841
2842	case IPV6_MULTICAST_HOPS:
2843		hlim = mtod(*mp, u_int *);
2844		(*mp)->m_len = sizeof(u_int);
2845		if (im6o == NULL)
2846			*hlim = ip6_defmcasthlim;
2847		else
2848			*hlim = im6o->im6o_multicast_hlim;
2849		return (0);
2850
2851	case IPV6_MULTICAST_LOOP:
2852		loop = mtod(*mp, u_int *);
2853		(*mp)->m_len = sizeof(u_int);
2854		if (im6o == NULL)
2855			*loop = ip6_defmcasthlim;
2856		else
2857			*loop = im6o->im6o_multicast_loop;
2858		return (0);
2859
2860	default:
2861		return (EOPNOTSUPP);
2862	}
2863}
2864
2865/*
2866 * Discard the IP6 multicast options.
2867 */
2868void
2869ip6_freemoptions(im6o)
2870	struct ip6_moptions *im6o;
2871{
2872	struct in6_multi_mship *imm;
2873
2874	if (im6o == NULL)
2875		return;
2876
2877	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2878		LIST_REMOVE(imm, i6mm_chain);
2879		if (imm->i6mm_maddr)
2880			in6_delmulti(imm->i6mm_maddr);
2881		free(imm, M_IPMADDR);
2882	}
2883	free(im6o, M_IPMOPTS);
2884}
2885
2886/*
2887 * Set IPv6 outgoing packet options based on advanced API.
2888 */
2889int
2890ip6_setpktoptions(control, opt, stickyopt, priv, needcopy, uproto)
2891	struct mbuf *control;
2892	struct ip6_pktopts *opt, *stickyopt;
2893	int priv, needcopy, uproto;
2894{
2895	struct cmsghdr *cm = 0;
2896
2897	if (control == 0 || opt == 0)
2898		return (EINVAL);
2899
2900	if (stickyopt) {
2901		/*
2902		 * If stickyopt is provided, make a local copy of the options
2903		 * for this particular packet, then override them by ancillary
2904		 * objects.
2905		 * XXX: need to gain a reference for the cached route of the
2906		 * next hop in case of the overriding.
2907		 */
2908		*opt = *stickyopt;
2909		if (opt->ip6po_nextroute.ro_rt) {
2910			RT_LOCK(opt->ip6po_nextroute.ro_rt);
2911			RT_ADDREF(opt->ip6po_nextroute.ro_rt);
2912			RT_UNLOCK(opt->ip6po_nextroute.ro_rt);
2913		}
2914	} else
2915		init_ip6pktopts(opt);
2916	opt->needfree = needcopy;
2917
2918	/*
2919	 * XXX: Currently, we assume all the optional information is stored
2920	 * in a single mbuf.
2921	 */
2922	if (control->m_next)
2923		return (EINVAL);
2924
2925	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2926	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2927		int error;
2928
2929		if (control->m_len < CMSG_LEN(0))
2930			return (EINVAL);
2931
2932		cm = mtod(control, struct cmsghdr *);
2933		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2934			return (EINVAL);
2935		if (cm->cmsg_level != IPPROTO_IPV6)
2936			continue;
2937
2938		error = ip6_setpktoption(cm->cmsg_type, CMSG_DATA(cm),
2939		    cm->cmsg_len - CMSG_LEN(0), opt, priv, needcopy, 1, uproto);
2940		if (error)
2941			return (error);
2942	}
2943
2944	return (0);
2945}
2946
2947/*
2948 * Set a particular packet option, as a sticky option or an ancillary data
2949 * item.  "len" can be 0 only when it's a sticky option.
2950 * We have 4 cases of combination of "sticky" and "cmsg":
2951 * "sticky=0, cmsg=0": impossible
2952 * "sticky=0, cmsg=1": RFC2292 or rfc2292bis ancillary data
2953 * "sticky=1, cmsg=0": rfc2292bis socket option
2954 * "sticky=1, cmsg=1": RFC2292 socket option
2955 */
2956static int
2957ip6_setpktoption(optname, buf, len, opt, priv, sticky, cmsg, uproto)
2958	int optname, len, priv, sticky, cmsg, uproto;
2959	u_char *buf;
2960	struct ip6_pktopts *opt;
2961{
2962	int minmtupolicy, preftemp;
2963
2964	if (!sticky && !cmsg) {
2965#ifdef DIAGNOSTIC
2966		printf("ip6_setpktoption: impossible case\n");
2967#endif
2968		return (EINVAL);
2969	}
2970
2971	/*
2972	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2973	 * not be specified in the context of rfc2292bis.  Conversely,
2974	 * rfc2292bis types should not be specified in the context of RFC2292.
2975	 */
2976	if (!cmsg) {
2977		switch (optname) {
2978		case IPV6_2292PKTINFO:
2979		case IPV6_2292HOPLIMIT:
2980		case IPV6_2292NEXTHOP:
2981		case IPV6_2292HOPOPTS:
2982		case IPV6_2292DSTOPTS:
2983		case IPV6_2292RTHDR:
2984		case IPV6_2292PKTOPTIONS:
2985			return (ENOPROTOOPT);
2986		}
2987	}
2988	if (sticky && cmsg) {
2989		switch (optname) {
2990		case IPV6_PKTINFO:
2991		case IPV6_HOPLIMIT:
2992		case IPV6_NEXTHOP:
2993		case IPV6_HOPOPTS:
2994		case IPV6_DSTOPTS:
2995		case IPV6_RTHDRDSTOPTS:
2996		case IPV6_RTHDR:
2997		case IPV6_USE_MIN_MTU:
2998		case IPV6_DONTFRAG:
2999		case IPV6_TCLASS:
3000		case IPV6_PREFER_TEMPADDR: /* XXX: not an rfc2292bis option */
3001			return (ENOPROTOOPT);
3002		}
3003	}
3004
3005	switch (optname) {
3006	case IPV6_2292PKTINFO:
3007	case IPV6_PKTINFO:
3008	{
3009		struct ifnet *ifp = NULL;
3010		struct in6_pktinfo *pktinfo;
3011
3012		if (len != sizeof(struct in6_pktinfo))
3013			return (EINVAL);
3014
3015		pktinfo = (struct in6_pktinfo *)buf;
3016
3017		/*
3018		 * An application can clear any sticky IPV6_PKTINFO option by
3019		 * doing a "regular" setsockopt with ipi6_addr being
3020		 * in6addr_any and ipi6_ifindex being zero.
3021		 * [RFC 3542, Section 6]
3022		 */
3023		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3024		    pktinfo->ipi6_ifindex == 0 &&
3025		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3026			ip6_clearpktopts(opt, optname);
3027			break;
3028		}
3029
3030		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3031		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3032			return (EINVAL);
3033		}
3034
3035		/* validate the interface index if specified. */
3036		if (pktinfo->ipi6_ifindex > if_index ||
3037		    pktinfo->ipi6_ifindex < 0) {
3038			 return (ENXIO);
3039		}
3040		if (pktinfo->ipi6_ifindex) {
3041			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
3042			if (ifp == NULL)
3043				return (ENXIO);
3044		}
3045
3046		/*
3047		 * We store the address anyway, and let in6_selectsrc()
3048		 * validate the specified address.  This is because ipi6_addr
3049		 * may not have enough information about its scope zone, and
3050		 * we may need additional information (such as outgoing
3051		 * interface or the scope zone of a destination address) to
3052		 * disambiguate the scope.
3053		 * XXX: the delay of the validation may confuse the
3054		 * application when it is used as a sticky option.
3055		 */
3056		if (sticky) {
3057			if (opt->ip6po_pktinfo == NULL) {
3058				opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3059				    M_IP6OPT, M_WAITOK);
3060			}
3061			bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3062		} else
3063			opt->ip6po_pktinfo = pktinfo;
3064		break;
3065	}
3066
3067	case IPV6_2292HOPLIMIT:
3068	case IPV6_HOPLIMIT:
3069	{
3070		int *hlimp;
3071
3072		/*
3073		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3074		 * to simplify the ordering among hoplimit options.
3075		 */
3076		if (optname == IPV6_HOPLIMIT && sticky)
3077			return (ENOPROTOOPT);
3078
3079		if (len != sizeof(int))
3080			return (EINVAL);
3081		hlimp = (int *)buf;
3082		if (*hlimp < -1 || *hlimp > 255)
3083			return (EINVAL);
3084
3085		opt->ip6po_hlim = *hlimp;
3086		break;
3087	}
3088
3089	case IPV6_TCLASS:
3090	{
3091		int tclass;
3092
3093		if (len != sizeof(int))
3094			return (EINVAL);
3095		tclass = *(int *)buf;
3096		if (tclass < -1 || tclass > 255)
3097			return (EINVAL);
3098
3099		opt->ip6po_tclass = tclass;
3100		break;
3101	}
3102
3103	case IPV6_2292NEXTHOP:
3104	case IPV6_NEXTHOP:
3105		if (!priv)
3106			return (EPERM);
3107
3108		if (len == 0) {	/* just remove the option */
3109			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3110			break;
3111		}
3112
3113		/* check if cmsg_len is large enough for sa_len */
3114		if (len < sizeof(struct sockaddr) || len < *buf)
3115			return (EINVAL);
3116
3117		switch (((struct sockaddr *)buf)->sa_family) {
3118		case AF_INET6:
3119		{
3120			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3121#if 0
3122			int error;
3123#endif
3124
3125			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3126				return (EINVAL);
3127
3128			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3129			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3130				return (EINVAL);
3131			}
3132#if 0
3133			if ((error = scope6_check_id(sa6, ip6_use_defzone))
3134			    != 0) {
3135				return (error);
3136			}
3137#endif
3138			sa6->sin6_scope_id = 0; /* XXX */
3139			break;
3140		}
3141		case AF_LINK:	/* should eventually be supported */
3142		default:
3143			return (EAFNOSUPPORT);
3144		}
3145
3146		/* turn off the previous option, then set the new option. */
3147		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3148		if (sticky) {
3149			opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_WAITOK);
3150			bcopy(buf, opt->ip6po_nexthop, *buf);
3151		} else
3152			opt->ip6po_nexthop = (struct sockaddr *)buf;
3153		break;
3154
3155	case IPV6_2292HOPOPTS:
3156	case IPV6_HOPOPTS:
3157	{
3158		struct ip6_hbh *hbh;
3159		int hbhlen;
3160
3161		/*
3162		 * XXX: We don't allow a non-privileged user to set ANY HbH
3163		 * options, since per-option restriction has too much
3164		 * overhead.
3165		 */
3166		if (!priv)
3167			return (EPERM);
3168
3169		if (len == 0) {
3170			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3171			break;	/* just remove the option */
3172		}
3173
3174		/* message length validation */
3175		if (len < sizeof(struct ip6_hbh))
3176			return (EINVAL);
3177		hbh = (struct ip6_hbh *)buf;
3178		hbhlen = (hbh->ip6h_len + 1) << 3;
3179		if (len != hbhlen)
3180			return (EINVAL);
3181
3182		/* turn off the previous option, then set the new option. */
3183		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3184		if (sticky) {
3185			opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK);
3186			bcopy(hbh, opt->ip6po_hbh, hbhlen);
3187		} else
3188			opt->ip6po_hbh = hbh;
3189
3190		break;
3191	}
3192
3193	case IPV6_2292DSTOPTS:
3194	case IPV6_DSTOPTS:
3195	case IPV6_RTHDRDSTOPTS:
3196	{
3197		struct ip6_dest *dest, **newdest = NULL;
3198		int destlen;
3199
3200		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3201			return (EPERM);
3202
3203		if (len == 0) {
3204			ip6_clearpktopts(opt, optname);
3205			break;	/* just remove the option */
3206		}
3207
3208		/* message length validation */
3209		if (len < sizeof(struct ip6_dest))
3210			return (EINVAL);
3211		dest = (struct ip6_dest *)buf;
3212		destlen = (dest->ip6d_len + 1) << 3;
3213		if (len != destlen)
3214			return (EINVAL);
3215
3216		/*
3217		 * Determine the position that the destination options header
3218		 * should be inserted; before or after the routing header.
3219		 */
3220		switch (optname) {
3221		case IPV6_2292DSTOPTS:
3222			/*
3223			 * The old advacned API is ambiguous on this point.
3224			 * Our approach is to determine the position based
3225			 * according to the existence of a routing header.
3226			 * Note, however, that this depends on the order of the
3227			 * extension headers in the ancillary data; the 1st
3228			 * part of the destination options header must appear
3229			 * before the routing header in the ancillary data,
3230			 * too.
3231			 * RFC2292bis solved the ambiguity by introducing
3232			 * separate ancillary data or option types.
3233			 */
3234			if (opt->ip6po_rthdr == NULL)
3235				newdest = &opt->ip6po_dest1;
3236			else
3237				newdest = &opt->ip6po_dest2;
3238			break;
3239		case IPV6_RTHDRDSTOPTS:
3240			newdest = &opt->ip6po_dest1;
3241			break;
3242		case IPV6_DSTOPTS:
3243			newdest = &opt->ip6po_dest2;
3244			break;
3245		}
3246
3247		/* turn off the previous option, then set the new option. */
3248		ip6_clearpktopts(opt, optname);
3249		if (sticky) {
3250			*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
3251			bcopy(dest, *newdest, destlen);
3252		} else
3253			*newdest = dest;
3254
3255		break;
3256	}
3257
3258	case IPV6_2292RTHDR:
3259	case IPV6_RTHDR:
3260	{
3261		struct ip6_rthdr *rth;
3262		int rthlen;
3263
3264		if (len == 0) {
3265			ip6_clearpktopts(opt, IPV6_RTHDR);
3266			break;	/* just remove the option */
3267		}
3268
3269		/* message length validation */
3270		if (len < sizeof(struct ip6_rthdr))
3271			return (EINVAL);
3272		rth = (struct ip6_rthdr *)buf;
3273		rthlen = (rth->ip6r_len + 1) << 3;
3274		if (len != rthlen)
3275			return (EINVAL);
3276
3277		switch (rth->ip6r_type) {
3278		case IPV6_RTHDR_TYPE_0:
3279			if (rth->ip6r_len == 0)	/* must contain one addr */
3280				return (EINVAL);
3281			if (rth->ip6r_len % 2) /* length must be even */
3282				return (EINVAL);
3283			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3284				return (EINVAL);
3285			break;
3286		default:
3287			return (EINVAL);	/* not supported */
3288		}
3289
3290		/* turn off the previous option */
3291		ip6_clearpktopts(opt, IPV6_RTHDR);
3292		if (sticky) {
3293			opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK);
3294			bcopy(rth, opt->ip6po_rthdr, rthlen);
3295		} else
3296			opt->ip6po_rthdr = rth;
3297
3298		break;
3299	}
3300
3301	case IPV6_USE_MIN_MTU:
3302		if (len != sizeof(int))
3303			return (EINVAL);
3304		minmtupolicy = *(int *)buf;
3305		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3306		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3307		    minmtupolicy != IP6PO_MINMTU_ALL) {
3308			return (EINVAL);
3309		}
3310		opt->ip6po_minmtu = minmtupolicy;
3311		break;
3312
3313	case IPV6_DONTFRAG:
3314		if (len != sizeof(int))
3315			return (EINVAL);
3316
3317		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3318			/*
3319			 * we ignore this option for TCP sockets.
3320			 * (rfc2292bis leaves this case unspecified.)
3321			 */
3322			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3323		} else
3324			opt->ip6po_flags |= IP6PO_DONTFRAG;
3325		break;
3326
3327	case IPV6_PREFER_TEMPADDR:
3328		if (len != sizeof(int))
3329			return (EINVAL);
3330		preftemp = *(int *)buf;
3331		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3332		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3333		    preftemp != IP6PO_TEMPADDR_PREFER) {
3334			return (EINVAL);
3335		}
3336		opt->ip6po_prefer_tempaddr = preftemp;
3337		break;
3338
3339	default:
3340		return (ENOPROTOOPT);
3341	} /* end of switch */
3342
3343	return (0);
3344}
3345
3346/*
3347 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3348 * packet to the input queue of a specified interface.  Note that this
3349 * calls the output routine of the loopback "driver", but with an interface
3350 * pointer that might NOT be &loif -- easier than replicating that code here.
3351 */
3352void
3353ip6_mloopback(ifp, m, dst)
3354	struct ifnet *ifp;
3355	struct mbuf *m;
3356	struct sockaddr_in6 *dst;
3357{
3358	struct mbuf *copym;
3359	struct ip6_hdr *ip6;
3360
3361	copym = m_copy(m, 0, M_COPYALL);
3362	if (copym == NULL)
3363		return;
3364
3365	/*
3366	 * Make sure to deep-copy IPv6 header portion in case the data
3367	 * is in an mbuf cluster, so that we can safely override the IPv6
3368	 * header portion later.
3369	 */
3370	if ((copym->m_flags & M_EXT) != 0 ||
3371	    copym->m_len < sizeof(struct ip6_hdr)) {
3372		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3373		if (copym == NULL)
3374			return;
3375	}
3376
3377#ifdef DIAGNOSTIC
3378	if (copym->m_len < sizeof(*ip6)) {
3379		m_freem(copym);
3380		return;
3381	}
3382#endif
3383
3384	ip6 = mtod(copym, struct ip6_hdr *);
3385	/*
3386	 * clear embedded scope identifiers if necessary.
3387	 * in6_clearscope will touch the addresses only when necessary.
3388	 */
3389	in6_clearscope(&ip6->ip6_src);
3390	in6_clearscope(&ip6->ip6_dst);
3391
3392	(void)if_simloop(ifp, copym, dst->sin6_family, NULL);
3393}
3394
3395/*
3396 * Chop IPv6 header off from the payload.
3397 */
3398static int
3399ip6_splithdr(m, exthdrs)
3400	struct mbuf *m;
3401	struct ip6_exthdrs *exthdrs;
3402{
3403	struct mbuf *mh;
3404	struct ip6_hdr *ip6;
3405
3406	ip6 = mtod(m, struct ip6_hdr *);
3407	if (m->m_len > sizeof(*ip6)) {
3408		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3409		if (mh == 0) {
3410			m_freem(m);
3411			return ENOBUFS;
3412		}
3413		M_MOVE_PKTHDR(mh, m);
3414		MH_ALIGN(mh, sizeof(*ip6));
3415		m->m_len -= sizeof(*ip6);
3416		m->m_data += sizeof(*ip6);
3417		mh->m_next = m;
3418		m = mh;
3419		m->m_len = sizeof(*ip6);
3420		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3421	}
3422	exthdrs->ip6e_ip6 = m;
3423	return 0;
3424}
3425
3426/*
3427 * Compute IPv6 extension header length.
3428 */
3429int
3430ip6_optlen(in6p)
3431	struct in6pcb *in6p;
3432{
3433	int len;
3434
3435	if (!in6p->in6p_outputopts)
3436		return 0;
3437
3438	len = 0;
3439#define elen(x) \
3440    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3441
3442	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3443	if (in6p->in6p_outputopts->ip6po_rthdr)
3444		/* dest1 is valid with rthdr only */
3445		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3446	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3447	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3448	return len;
3449#undef elen
3450}
3451