ip6_output.c revision 121478
1/*	$FreeBSD: head/sys/netinet6/ip6_output.c 121478 2003-10-24 20:37:05Z ume $	*/
2/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
66 */
67
68#include "opt_ip6fw.h"
69#include "opt_inet.h"
70#include "opt_inet6.h"
71#include "opt_ipsec.h"
72#include "opt_pfil_hooks.h"
73#include "opt_random_ip_id.h"
74
75#include <sys/param.h>
76#include <sys/malloc.h>
77#include <sys/mbuf.h>
78#include <sys/proc.h>
79#include <sys/errno.h>
80#include <sys/protosw.h>
81#include <sys/socket.h>
82#include <sys/socketvar.h>
83#include <sys/systm.h>
84#include <sys/kernel.h>
85
86#include <net/if.h>
87#include <net/route.h>
88#ifdef PFIL_HOOKS
89#include <net/pfil.h>
90#endif
91
92#include <netinet/in.h>
93#include <netinet/in_var.h>
94#include <netinet6/in6_var.h>
95#include <netinet/ip6.h>
96#include <netinet/icmp6.h>
97#include <netinet6/ip6_var.h>
98#include <netinet/in_pcb.h>
99#include <netinet6/nd6.h>
100
101#ifdef IPSEC
102#include <netinet6/ipsec.h>
103#ifdef INET6
104#include <netinet6/ipsec6.h>
105#endif
106#include <netkey/key.h>
107#endif /* IPSEC */
108
109#ifdef FAST_IPSEC
110#include <netipsec/ipsec.h>
111#include <netipsec/ipsec6.h>
112#include <netipsec/key.h>
113#endif /* FAST_IPSEC */
114
115#include <netinet6/ip6_fw.h>
116
117#include <net/net_osdep.h>
118
119#include <netinet6/ip6protosw.h>
120
121static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
122
123struct ip6_exthdrs {
124	struct mbuf *ip6e_ip6;
125	struct mbuf *ip6e_hbh;
126	struct mbuf *ip6e_dest1;
127	struct mbuf *ip6e_rthdr;
128	struct mbuf *ip6e_dest2;
129};
130
131static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
132			   int, int));
133static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
134	struct socket *, struct sockopt *));
135static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
136static int ip6_setpktoption __P((int, u_char *, int, struct ip6_pktopts *, int,
137	int, int, int));
138
139static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
140static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
141static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
142static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
143	struct ip6_frag **));
144static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
145static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
146static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
147	struct ifnet *, struct in6_addr *, u_long *, int *));
148
149
150/*
151 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
152 * header (with pri, len, nxt, hlim, src, dst).
153 * This function may modify ver and hlim only.
154 * The mbuf chain containing the packet will be freed.
155 * The mbuf opt, if present, will not be freed.
156 *
157 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
158 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
159 * which is rt_rmx.rmx_mtu.
160 */
161int
162ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
163	struct mbuf *m0;
164	struct ip6_pktopts *opt;
165	struct route_in6 *ro;
166	int flags;
167	struct ip6_moptions *im6o;
168	struct ifnet **ifpp;		/* XXX: just for statistics */
169	struct inpcb *inp;
170{
171	struct ip6_hdr *ip6, *mhip6;
172	struct ifnet *ifp, *origifp;
173	struct mbuf *m = m0;
174	int hlen, tlen, len, off;
175	struct route_in6 ip6route;
176	struct sockaddr_in6 *dst;
177	int error = 0;
178	struct in6_ifaddr *ia = NULL;
179	u_long mtu;
180	int alwaysfrag, dontfrag;
181	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
182	struct ip6_exthdrs exthdrs;
183	struct in6_addr finaldst;
184	struct route_in6 *ro_pmtu = NULL;
185	int hdrsplit = 0;
186	int needipsec = 0;
187#ifdef IPSEC
188	int needipsectun = 0;
189	struct secpolicy *sp = NULL;
190
191	ip6 = mtod(m, struct ip6_hdr *);
192#endif /* IPSEC */
193#ifdef FAST_IPSEC
194	int needipsectun = 0;
195	struct secpolicy *sp = NULL;
196
197	ip6 = mtod(m, struct ip6_hdr *);
198#endif /* FAST_IPSEC */
199
200#define MAKE_EXTHDR(hp, mp)						\
201    do {								\
202	if (hp) {							\
203		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
204		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
205		    ((eh)->ip6e_len + 1) << 3);				\
206		if (error)						\
207			goto freehdrs;					\
208	}								\
209    } while (/*CONSTCOND*/ 0)
210
211	bzero(&exthdrs, sizeof(exthdrs));
212
213	if (opt) {
214		/* Hop-by-Hop options header */
215		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
216		/* Destination options header(1st part) */
217		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
218		/* Routing header */
219		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
220		/* Destination options header(2nd part) */
221		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
222	}
223
224#ifdef IPSEC
225	/* get a security policy for this packet */
226	if (inp == NULL)
227		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
228	else
229		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
230
231	if (sp == NULL) {
232		ipsec6stat.out_inval++;
233		goto freehdrs;
234	}
235
236	error = 0;
237
238	/* check policy */
239	switch (sp->policy) {
240	case IPSEC_POLICY_DISCARD:
241		/*
242		 * This packet is just discarded.
243		 */
244		ipsec6stat.out_polvio++;
245		goto freehdrs;
246
247	case IPSEC_POLICY_BYPASS:
248	case IPSEC_POLICY_NONE:
249		/* no need to do IPsec. */
250		needipsec = 0;
251		break;
252
253	case IPSEC_POLICY_IPSEC:
254		if (sp->req == NULL) {
255			/* acquire a policy */
256			error = key_spdacquire(sp);
257			goto freehdrs;
258		}
259		needipsec = 1;
260		break;
261
262	case IPSEC_POLICY_ENTRUST:
263	default:
264		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
265	}
266#endif /* IPSEC */
267#ifdef FAST_IPSEC
268	/* get a security policy for this packet */
269	if (inp == NULL)
270		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
271	else
272		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
273
274	if (sp == NULL) {
275		newipsecstat.ips_out_inval++;
276		goto freehdrs;
277	}
278
279	error = 0;
280
281	/* check policy */
282	switch (sp->policy) {
283	case IPSEC_POLICY_DISCARD:
284		/*
285		 * This packet is just discarded.
286		 */
287		newipsecstat.ips_out_polvio++;
288		goto freehdrs;
289
290	case IPSEC_POLICY_BYPASS:
291	case IPSEC_POLICY_NONE:
292		/* no need to do IPsec. */
293		needipsec = 0;
294		break;
295
296	case IPSEC_POLICY_IPSEC:
297		if (sp->req == NULL) {
298			/* acquire a policy */
299			error = key_spdacquire(sp);
300			goto freehdrs;
301		}
302		needipsec = 1;
303		break;
304
305	case IPSEC_POLICY_ENTRUST:
306	default:
307		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
308	}
309#endif /* FAST_IPSEC */
310
311	/*
312	 * Calculate the total length of the extension header chain.
313	 * Keep the length of the unfragmentable part for fragmentation.
314	 */
315	optlen = 0;
316	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
317	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
318	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
319	unfragpartlen = optlen + sizeof(struct ip6_hdr);
320	/* NOTE: we don't add AH/ESP length here. do that later. */
321	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
322
323	/*
324	 * If we need IPsec, or there is at least one extension header,
325	 * separate IP6 header from the payload.
326	 */
327	if ((needipsec || optlen) && !hdrsplit) {
328		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
329			m = NULL;
330			goto freehdrs;
331		}
332		m = exthdrs.ip6e_ip6;
333		hdrsplit++;
334	}
335
336	/* adjust pointer */
337	ip6 = mtod(m, struct ip6_hdr *);
338
339	/* adjust mbuf packet header length */
340	m->m_pkthdr.len += optlen;
341	plen = m->m_pkthdr.len - sizeof(*ip6);
342
343	/* If this is a jumbo payload, insert a jumbo payload option. */
344	if (plen > IPV6_MAXPACKET) {
345		if (!hdrsplit) {
346			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
347				m = NULL;
348				goto freehdrs;
349			}
350			m = exthdrs.ip6e_ip6;
351			hdrsplit++;
352		}
353		/* adjust pointer */
354		ip6 = mtod(m, struct ip6_hdr *);
355		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
356			goto freehdrs;
357		ip6->ip6_plen = 0;
358	} else
359		ip6->ip6_plen = htons(plen);
360
361	/*
362	 * Concatenate headers and fill in next header fields.
363	 * Here we have, on "m"
364	 *	IPv6 payload
365	 * and we insert headers accordingly.  Finally, we should be getting:
366	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
367	 *
368	 * during the header composing process, "m" points to IPv6 header.
369	 * "mprev" points to an extension header prior to esp.
370	 */
371	{
372		u_char *nexthdrp = &ip6->ip6_nxt;
373		struct mbuf *mprev = m;
374
375		/*
376		 * we treat dest2 specially.  this makes IPsec processing
377		 * much easier.  the goal here is to make mprev point the
378		 * mbuf prior to dest2.
379		 *
380		 * result: IPv6 dest2 payload
381		 * m and mprev will point to IPv6 header.
382		 */
383		if (exthdrs.ip6e_dest2) {
384			if (!hdrsplit)
385				panic("assumption failed: hdr not split");
386			exthdrs.ip6e_dest2->m_next = m->m_next;
387			m->m_next = exthdrs.ip6e_dest2;
388			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
389			ip6->ip6_nxt = IPPROTO_DSTOPTS;
390		}
391
392#define MAKE_CHAIN(m, mp, p, i)\
393    do {\
394	if (m) {\
395		if (!hdrsplit) \
396			panic("assumption failed: hdr not split"); \
397		*mtod((m), u_char *) = *(p);\
398		*(p) = (i);\
399		p = mtod((m), u_char *);\
400		(m)->m_next = (mp)->m_next;\
401		(mp)->m_next = (m);\
402		(mp) = (m);\
403	}\
404    } while (/*CONSTCOND*/ 0)
405		/*
406		 * result: IPv6 hbh dest1 rthdr dest2 payload
407		 * m will point to IPv6 header.  mprev will point to the
408		 * extension header prior to dest2 (rthdr in the above case).
409		 */
410		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
411		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
412		    IPPROTO_DSTOPTS);
413		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
414		    IPPROTO_ROUTING);
415
416#if defined(IPSEC) || defined(FAST_IPSEC)
417		if (!needipsec)
418			goto skip_ipsec2;
419
420		/*
421		 * pointers after IPsec headers are not valid any more.
422		 * other pointers need a great care too.
423		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
424		 */
425		exthdrs.ip6e_dest2 = NULL;
426
427	    {
428		struct ip6_rthdr *rh = NULL;
429		int segleft_org = 0;
430		struct ipsec_output_state state;
431
432		if (exthdrs.ip6e_rthdr) {
433			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
434			segleft_org = rh->ip6r_segleft;
435			rh->ip6r_segleft = 0;
436		}
437
438		bzero(&state, sizeof(state));
439		state.m = m;
440		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
441		    &needipsectun);
442		m = state.m;
443		if (error) {
444			/* mbuf is already reclaimed in ipsec6_output_trans. */
445			m = NULL;
446			switch (error) {
447			case EHOSTUNREACH:
448			case ENETUNREACH:
449			case EMSGSIZE:
450			case ENOBUFS:
451			case ENOMEM:
452				break;
453			default:
454				printf("ip6_output (ipsec): error code %d\n", error);
455				/* FALLTHROUGH */
456			case ENOENT:
457				/* don't show these error codes to the user */
458				error = 0;
459				break;
460			}
461			goto bad;
462		}
463		if (exthdrs.ip6e_rthdr) {
464			/* ah6_output doesn't modify mbuf chain */
465			rh->ip6r_segleft = segleft_org;
466		}
467	    }
468skip_ipsec2:;
469#endif
470	}
471
472	/*
473	 * If there is a routing header, replace the destination address field
474	 * with the first hop of the routing header.
475	 */
476	if (exthdrs.ip6e_rthdr) {
477		struct ip6_rthdr *rh =
478			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
479						  struct ip6_rthdr *));
480		struct ip6_rthdr0 *rh0;
481		struct in6_addr *addrs;
482
483		finaldst = ip6->ip6_dst;
484		switch (rh->ip6r_type) {
485		case IPV6_RTHDR_TYPE_0:
486			 rh0 = (struct ip6_rthdr0 *)rh;
487			 addrs = (struct in6_addr *)(rh + 1);
488
489			 ip6->ip6_dst = *addrs;
490			 bcopy((caddr_t)(addrs + 1), (caddr_t)addrs,
491			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
492				 );
493			 *(addrs + rh0->ip6r0_segleft - 1) = finaldst;
494			 break;
495		default:	/* is it possible? */
496			 error = EINVAL;
497			 goto bad;
498		}
499	}
500
501	/* Source address validation */
502	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
503	    (flags & IPV6_DADOUTPUT) == 0) {
504		error = EOPNOTSUPP;
505		ip6stat.ip6s_badscope++;
506		goto bad;
507	}
508	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
509		error = EOPNOTSUPP;
510		ip6stat.ip6s_badscope++;
511		goto bad;
512	}
513
514	ip6stat.ip6s_localout++;
515
516	/*
517	 * Route packet.
518	 */
519	if (ro == 0) {
520		ro = &ip6route;
521		bzero((caddr_t)ro, sizeof(*ro));
522	}
523	ro_pmtu = ro;
524	if (opt && opt->ip6po_rthdr)
525		ro = &opt->ip6po_route;
526	dst = (struct sockaddr_in6 *)&ro->ro_dst;
527
528	/*
529	 * If there is a cached route,
530	 * check that it is to the same destination
531	 * and is still up. If not, free it and try again.
532	 */
533	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
534			 dst->sin6_family != AF_INET6 ||
535			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
536		RTFREE(ro->ro_rt);
537		ro->ro_rt = (struct rtentry *)0;
538	}
539	if (ro->ro_rt == 0) {
540		bzero(dst, sizeof(*dst));
541		dst->sin6_family = AF_INET6;
542		dst->sin6_len = sizeof(struct sockaddr_in6);
543		dst->sin6_addr = ip6->ip6_dst;
544	}
545
546 	/*
547	 * if specified, try to fill in the traffic class field.
548	 * do not override if a non-zero value is already set.
549	 * we check the diffserv field and the ecn field separately.
550	 */
551	if (opt && opt->ip6po_tclass >= 0) {
552		int mask = 0;
553
554		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
555			mask |= 0xfc;
556		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
557			mask |= 0x03;
558		if (mask != 0)
559			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
560	}
561
562	/* fill in or override the hop limit field, if necessary. */
563	if (opt && opt->ip6po_hlim != -1)
564		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
565	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
566		if (im6o != NULL)
567			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
568		else
569			ip6->ip6_hlim = ip6_defmcasthlim;
570	}
571
572#if defined(IPSEC) || defined(FAST_IPSEC)
573	if (needipsec && needipsectun) {
574		struct ipsec_output_state state;
575
576		/*
577		 * All the extension headers will become inaccessible
578		 * (since they can be encrypted).
579		 * Don't panic, we need no more updates to extension headers
580		 * on inner IPv6 packet (since they are now encapsulated).
581		 *
582		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
583		 */
584		bzero(&exthdrs, sizeof(exthdrs));
585		exthdrs.ip6e_ip6 = m;
586
587		bzero(&state, sizeof(state));
588		state.m = m;
589		state.ro = (struct route *)ro;
590		state.dst = (struct sockaddr *)dst;
591
592		error = ipsec6_output_tunnel(&state, sp, flags);
593
594		m = state.m;
595		ro = (struct route_in6 *)state.ro;
596		dst = (struct sockaddr_in6 *)state.dst;
597		if (error) {
598			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
599			m0 = m = NULL;
600			m = NULL;
601			switch (error) {
602			case EHOSTUNREACH:
603			case ENETUNREACH:
604			case EMSGSIZE:
605			case ENOBUFS:
606			case ENOMEM:
607				break;
608			default:
609				printf("ip6_output (ipsec): error code %d\n", error);
610				/* FALLTHROUGH */
611			case ENOENT:
612				/* don't show these error codes to the user */
613				error = 0;
614				break;
615			}
616			goto bad;
617		}
618
619		exthdrs.ip6e_ip6 = m;
620	}
621#endif /* IPSEC */
622
623	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
624		/* Unicast */
625
626#define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
627#define sin6tosa(sin6)	((struct sockaddr *)(sin6))
628		/* xxx
629		 * interface selection comes here
630		 * if an interface is specified from an upper layer,
631		 * ifp must point it.
632		 */
633		if (ro->ro_rt == 0) {
634			/*
635			 * non-bsdi always clone routes, if parent is
636			 * PRF_CLONING.
637			 */
638			rtalloc((struct route *)ro);
639		}
640		if (ro->ro_rt == 0) {
641			ip6stat.ip6s_noroute++;
642			error = EHOSTUNREACH;
643			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
644			goto bad;
645		}
646		/* XXX rt not locked */
647		ia = ifatoia6(ro->ro_rt->rt_ifa);
648		ifp = ro->ro_rt->rt_ifp;
649		ro->ro_rt->rt_use++;
650		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
651			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
652		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
653
654		in6_ifstat_inc(ifp, ifs6_out_request);
655
656		/*
657		 * Check if the outgoing interface conflicts with
658		 * the interface specified by ifi6_ifindex (if specified).
659		 * Note that loopback interface is always okay.
660		 * (this may happen when we are sending a packet to one of
661		 *  our own addresses.)
662		 */
663		if (opt && opt->ip6po_pktinfo
664		 && opt->ip6po_pktinfo->ipi6_ifindex) {
665			if (!(ifp->if_flags & IFF_LOOPBACK)
666			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
667				ip6stat.ip6s_noroute++;
668				in6_ifstat_inc(ifp, ifs6_out_discard);
669				error = EHOSTUNREACH;
670				goto bad;
671			}
672		}
673
674		if (opt && opt->ip6po_hlim != -1)
675			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
676	} else {
677		/* Multicast */
678		struct	in6_multi *in6m;
679
680		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
681
682		/*
683		 * See if the caller provided any multicast options
684		 */
685		ifp = NULL;
686		if (im6o != NULL) {
687			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
688			if (im6o->im6o_multicast_ifp != NULL)
689				ifp = im6o->im6o_multicast_ifp;
690		} else
691			ip6->ip6_hlim = ip6_defmcasthlim;
692
693		/*
694		 * See if the caller provided the outgoing interface
695		 * as an ancillary data.
696		 * Boundary check for ifindex is assumed to be already done.
697		 */
698		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
699			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
700
701		/*
702		 * If the destination is a node-local scope multicast,
703		 * the packet should be loop-backed only.
704		 */
705		if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
706			/*
707			 * If the outgoing interface is already specified,
708			 * it should be a loopback interface.
709			 */
710			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
711				ip6stat.ip6s_badscope++;
712				error = ENETUNREACH; /* XXX: better error? */
713				/* XXX correct ifp? */
714				in6_ifstat_inc(ifp, ifs6_out_discard);
715				goto bad;
716			} else {
717				ifp = &loif[0];
718			}
719		}
720
721		if (opt && opt->ip6po_hlim != -1)
722			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
723
724		/*
725		 * If caller did not provide an interface lookup a
726		 * default in the routing table.  This is either a
727		 * default for the speicfied group (i.e. a host
728		 * route), or a multicast default (a route for the
729		 * ``net'' ff00::/8).
730		 */
731		if (ifp == NULL) {
732			if (ro->ro_rt == 0)
733				ro->ro_rt = rtalloc1((struct sockaddr *)
734						&ro->ro_dst, 0, 0UL);
735			else
736				RT_LOCK(ro->ro_rt);
737			if (ro->ro_rt == 0) {
738				ip6stat.ip6s_noroute++;
739				error = EHOSTUNREACH;
740				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
741				goto bad;
742			}
743			ia = ifatoia6(ro->ro_rt->rt_ifa);
744			ifp = ro->ro_rt->rt_ifp;
745			ro->ro_rt->rt_use++;
746			RT_UNLOCK(ro->ro_rt);
747		}
748
749		if ((flags & IPV6_FORWARDING) == 0)
750			in6_ifstat_inc(ifp, ifs6_out_request);
751		in6_ifstat_inc(ifp, ifs6_out_mcast);
752
753		/*
754		 * Confirm that the outgoing interface supports multicast.
755		 */
756		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
757			ip6stat.ip6s_noroute++;
758			in6_ifstat_inc(ifp, ifs6_out_discard);
759			error = ENETUNREACH;
760			goto bad;
761		}
762		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
763		if (in6m != NULL &&
764		   (im6o == NULL || im6o->im6o_multicast_loop)) {
765			/*
766			 * If we belong to the destination multicast group
767			 * on the outgoing interface, and the caller did not
768			 * forbid loopback, loop back a copy.
769			 */
770			ip6_mloopback(ifp, m, dst);
771		} else {
772			/*
773			 * If we are acting as a multicast router, perform
774			 * multicast forwarding as if the packet had just
775			 * arrived on the interface to which we are about
776			 * to send.  The multicast forwarding function
777			 * recursively calls this function, using the
778			 * IPV6_FORWARDING flag to prevent infinite recursion.
779			 *
780			 * Multicasts that are looped back by ip6_mloopback(),
781			 * above, will be forwarded by the ip6_input() routine,
782			 * if necessary.
783			 */
784			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
785				if (ip6_mforward(ip6, ifp, m) != 0) {
786					m_freem(m);
787					goto done;
788				}
789			}
790		}
791		/*
792		 * Multicasts with a hoplimit of zero may be looped back,
793		 * above, but must not be transmitted on a network.
794		 * Also, multicasts addressed to the loopback interface
795		 * are not sent -- the above call to ip6_mloopback() will
796		 * loop back a copy if this host actually belongs to the
797		 * destination group on the loopback interface.
798		 */
799		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
800		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
801			m_freem(m);
802			goto done;
803		}
804	}
805
806	/*
807	 * Fill the outgoing inteface to tell the upper layer
808	 * to increment per-interface statistics.
809	 */
810	if (ifpp)
811		*ifpp = ifp;
812
813	/* Determine path MTU. */
814	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
815	    &alwaysfrag)) != 0)
816		goto bad;
817
818	/*
819	 * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting
820	 */
821	if ((flags & IPV6_MINMTU) != 0 && mtu > IPV6_MMTU)
822		mtu = IPV6_MMTU;
823
824	/* Fake scoped addresses */
825	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
826		/*
827		 * If source or destination address is a scoped address, and
828		 * the packet is going to be sent to a loopback interface,
829		 * we should keep the original interface.
830		 */
831
832		/*
833		 * XXX: this is a very experimental and temporary solution.
834		 * We eventually have sockaddr_in6 and use the sin6_scope_id
835		 * field of the structure here.
836		 * We rely on the consistency between two scope zone ids
837		 * of source and destination, which should already be assured.
838		 * Larger scopes than link will be supported in the future.
839		 */
840		origifp = NULL;
841		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
842			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
843		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
844			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
845		/*
846		 * XXX: origifp can be NULL even in those two cases above.
847		 * For example, if we remove the (only) link-local address
848		 * from the loopback interface, and try to send a link-local
849		 * address without link-id information.  Then the source
850		 * address is ::1, and the destination address is the
851		 * link-local address with its s6_addr16[1] being zero.
852		 * What is worse, if the packet goes to the loopback interface
853		 * by a default rejected route, the null pointer would be
854		 * passed to looutput, and the kernel would hang.
855		 * The following last resort would prevent such disaster.
856		 */
857		if (origifp == NULL)
858			origifp = ifp;
859	}
860	else
861		origifp = ifp;
862	/*
863	 * clear embedded scope identifiers if necessary.
864	 * in6_clearscope will touch the addresses only when necessary.
865	 */
866	in6_clearscope(&ip6->ip6_src);
867	in6_clearscope(&ip6->ip6_dst);
868
869	/*
870	 * Check with the firewall...
871	 */
872	if (ip6_fw_enable && ip6_fw_chk_ptr) {
873		u_short port = 0;
874		m->m_pkthdr.rcvif = NULL;	/* XXX */
875		/* If ipfw says divert, we have to just drop packet */
876		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
877			m_freem(m);
878			goto done;
879		}
880		if (!m) {
881			error = EACCES;
882			goto done;
883		}
884	}
885
886	/*
887	 * If the outgoing packet contains a hop-by-hop options header,
888	 * it must be examined and processed even by the source node.
889	 * (RFC 2460, section 4.)
890	 */
891	if (exthdrs.ip6e_hbh) {
892		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
893		u_int32_t dummy1; /* XXX unused */
894		u_int32_t dummy2; /* XXX unused */
895
896#ifdef DIAGNOSTIC
897		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
898			panic("ip6e_hbh is not continuous");
899#endif
900		/*
901		 *  XXX: if we have to send an ICMPv6 error to the sender,
902		 *       we need the M_LOOP flag since icmp6_error() expects
903		 *       the IPv6 and the hop-by-hop options header are
904		 *       continuous unless the flag is set.
905		 */
906		m->m_flags |= M_LOOP;
907		m->m_pkthdr.rcvif = ifp;
908		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
909		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
910		    &dummy1, &dummy2) < 0) {
911			/* m was already freed at this point */
912			error = EINVAL;/* better error? */
913			goto done;
914		}
915		m->m_flags &= ~M_LOOP; /* XXX */
916		m->m_pkthdr.rcvif = NULL;
917	}
918
919#ifdef PFIL_HOOKS
920	/*
921	 * Run through list of hooks for output packets.
922	 */
923	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
924	if (error != 0 || m == NULL)
925		goto done;
926	ip6 = mtod(m, struct ip6_hdr *);
927#endif /* PFIL_HOOKS */
928
929	/*
930	 * Send the packet to the outgoing interface.
931	 * If necessary, do IPv6 fragmentation before sending.
932	 *
933	 * the logic here is rather complex:
934	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
935	 * 1-a:	send as is if tlen <= path mtu
936	 * 1-b:	fragment if tlen > path mtu
937	 *
938	 * 2: if user asks us not to fragment (dontfrag == 1)
939	 * 2-a:	send as is if tlen <= interface mtu
940	 * 2-b:	error if tlen > interface mtu
941	 *
942	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
943	 *	always fragment
944	 *
945	 * 4: if dontfrag == 1 && alwaysfrag == 1
946	 *	error, as we cannot handle this conflicting request
947	 */
948	tlen = m->m_pkthdr.len;
949
950	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
951		dontfrag = 1;
952	else
953		dontfrag = 0;
954	if (dontfrag && alwaysfrag) {	/* case 4 */
955		/* conflicting request - can't transmit */
956		error = EMSGSIZE;
957		goto bad;
958	}
959	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
960		/*
961		 * Even if the DONTFRAG option is specified, we cannot send the
962		 * packet when the data length is larger than the MTU of the
963		 * outgoing interface.
964		 * Notify the error by sending IPV6_PATHMTU ancillary data as
965		 * well as returning an error code (the latter is not described
966		 * in the API spec.)
967		 */
968		u_int32_t mtu32;
969		struct ip6ctlparam ip6cp;
970
971		mtu32 = (u_int32_t)mtu;
972		bzero(&ip6cp, sizeof(ip6cp));
973		ip6cp.ip6c_cmdarg = (void *)&mtu32;
974		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
975		    (void *)&ip6cp);
976
977		error = EMSGSIZE;
978		goto bad;
979	}
980
981	/*
982	 * transmit packet without fragmentation
983	 */
984	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
985		struct in6_ifaddr *ia6;
986
987		ip6 = mtod(m, struct ip6_hdr *);
988		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
989		if (ia6) {
990			/* Record statistics for this interface address. */
991			ia6->ia_ifa.if_opackets++;
992			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
993		}
994#ifdef IPSEC
995		/* clean ipsec history once it goes out of the node */
996		ipsec_delaux(m);
997#endif
998		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
999		goto done;
1000	}
1001
1002	/*
1003	 * try to fragment the packet.  case 1-b and 3
1004	 */
1005	if (mtu < IPV6_MMTU) {
1006		/* path MTU cannot be less than IPV6_MMTU */
1007		error = EMSGSIZE;
1008		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1009		goto bad;
1010	} else if (ip6->ip6_plen == 0) {
1011		/* jumbo payload cannot be fragmented */
1012		error = EMSGSIZE;
1013		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1014		goto bad;
1015	} else {
1016		struct mbuf **mnext, *m_frgpart;
1017		struct ip6_frag *ip6f;
1018#ifdef RANDOM_IP_ID
1019		u_int32_t id = htonl(ip6_randomid());
1020#else
1021		u_int32_t id = htonl(ip6_id++);
1022#endif
1023		u_char nextproto;
1024
1025		/*
1026		 * Too large for the destination or interface;
1027		 * fragment if possible.
1028		 * Must be able to put at least 8 bytes per fragment.
1029		 */
1030		hlen = unfragpartlen;
1031		if (mtu > IPV6_MAXPACKET)
1032			mtu = IPV6_MAXPACKET;
1033
1034		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1035		if (len < 8) {
1036			error = EMSGSIZE;
1037			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1038			goto bad;
1039		}
1040
1041		mnext = &m->m_nextpkt;
1042
1043		/*
1044		 * Change the next header field of the last header in the
1045		 * unfragmentable part.
1046		 */
1047		if (exthdrs.ip6e_rthdr) {
1048			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1049			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1050		} else if (exthdrs.ip6e_dest1) {
1051			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1052			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1053		} else if (exthdrs.ip6e_hbh) {
1054			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1055			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1056		} else {
1057			nextproto = ip6->ip6_nxt;
1058			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1059		}
1060
1061		/*
1062		 * Loop through length of segment after first fragment,
1063		 * make new header and copy data of each part and link onto
1064		 * chain.
1065		 */
1066		m0 = m;
1067		for (off = hlen; off < tlen; off += len) {
1068			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1069			if (!m) {
1070				error = ENOBUFS;
1071				ip6stat.ip6s_odropped++;
1072				goto sendorfree;
1073			}
1074			m->m_pkthdr.rcvif = NULL;
1075			m->m_flags = m0->m_flags & M_COPYFLAGS;
1076			*mnext = m;
1077			mnext = &m->m_nextpkt;
1078			m->m_data += max_linkhdr;
1079			mhip6 = mtod(m, struct ip6_hdr *);
1080			*mhip6 = *ip6;
1081			m->m_len = sizeof(*mhip6);
1082			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1083			if (error) {
1084				ip6stat.ip6s_odropped++;
1085				goto sendorfree;
1086			}
1087			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1088			if (off + len >= tlen)
1089				len = tlen - off;
1090			else
1091				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1092			mhip6->ip6_plen = htons((u_short)(len + hlen +
1093			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1094			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1095				error = ENOBUFS;
1096				ip6stat.ip6s_odropped++;
1097				goto sendorfree;
1098			}
1099			m_cat(m, m_frgpart);
1100			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1101			m->m_pkthdr.rcvif = (struct ifnet *)0;
1102			ip6f->ip6f_reserved = 0;
1103			ip6f->ip6f_ident = id;
1104			ip6f->ip6f_nxt = nextproto;
1105			ip6stat.ip6s_ofragments++;
1106			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1107		}
1108
1109		in6_ifstat_inc(ifp, ifs6_out_fragok);
1110	}
1111
1112	/*
1113	 * Remove leading garbages.
1114	 */
1115sendorfree:
1116	m = m0->m_nextpkt;
1117	m0->m_nextpkt = 0;
1118	m_freem(m0);
1119	for (m0 = m; m; m = m0) {
1120		m0 = m->m_nextpkt;
1121		m->m_nextpkt = 0;
1122		if (error == 0) {
1123 			/* Record statistics for this interface address. */
1124 			if (ia) {
1125 				ia->ia_ifa.if_opackets++;
1126 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1127 			}
1128#ifdef IPSEC
1129			/* clean ipsec history once it goes out of the node */
1130			ipsec_delaux(m);
1131#endif
1132			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1133		} else
1134			m_freem(m);
1135	}
1136
1137	if (error == 0)
1138		ip6stat.ip6s_fragmented++;
1139
1140done:
1141	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1142		RTFREE(ro->ro_rt);
1143	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1144		RTFREE(ro_pmtu->ro_rt);
1145	}
1146
1147#ifdef IPSEC
1148	if (sp != NULL)
1149		key_freesp(sp);
1150#endif /* IPSEC */
1151#ifdef FAST_IPSEC
1152	if (sp != NULL)
1153		KEY_FREESP(&sp);
1154#endif /* FAST_IPSEC */
1155
1156	return (error);
1157
1158freehdrs:
1159	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1160	m_freem(exthdrs.ip6e_dest1);
1161	m_freem(exthdrs.ip6e_rthdr);
1162	m_freem(exthdrs.ip6e_dest2);
1163	/* FALLTHROUGH */
1164bad:
1165	m_freem(m);
1166	goto done;
1167}
1168
1169static int
1170ip6_copyexthdr(mp, hdr, hlen)
1171	struct mbuf **mp;
1172	caddr_t hdr;
1173	int hlen;
1174{
1175	struct mbuf *m;
1176
1177	if (hlen > MCLBYTES)
1178		return (ENOBUFS); /* XXX */
1179
1180	MGET(m, M_DONTWAIT, MT_DATA);
1181	if (!m)
1182		return (ENOBUFS);
1183
1184	if (hlen > MLEN) {
1185		MCLGET(m, M_DONTWAIT);
1186		if ((m->m_flags & M_EXT) == 0) {
1187			m_free(m);
1188			return (ENOBUFS);
1189		}
1190	}
1191	m->m_len = hlen;
1192	if (hdr)
1193		bcopy(hdr, mtod(m, caddr_t), hlen);
1194
1195	*mp = m;
1196	return (0);
1197}
1198
1199/*
1200 * Insert jumbo payload option.
1201 */
1202static int
1203ip6_insert_jumboopt(exthdrs, plen)
1204	struct ip6_exthdrs *exthdrs;
1205	u_int32_t plen;
1206{
1207	struct mbuf *mopt;
1208	u_char *optbuf;
1209	u_int32_t v;
1210
1211#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1212
1213	/*
1214	 * If there is no hop-by-hop options header, allocate new one.
1215	 * If there is one but it doesn't have enough space to store the
1216	 * jumbo payload option, allocate a cluster to store the whole options.
1217	 * Otherwise, use it to store the options.
1218	 */
1219	if (exthdrs->ip6e_hbh == 0) {
1220		MGET(mopt, M_DONTWAIT, MT_DATA);
1221		if (mopt == 0)
1222			return (ENOBUFS);
1223		mopt->m_len = JUMBOOPTLEN;
1224		optbuf = mtod(mopt, u_char *);
1225		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1226		exthdrs->ip6e_hbh = mopt;
1227	} else {
1228		struct ip6_hbh *hbh;
1229
1230		mopt = exthdrs->ip6e_hbh;
1231		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1232			/*
1233			 * XXX assumption:
1234			 * - exthdrs->ip6e_hbh is not referenced from places
1235			 *   other than exthdrs.
1236			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1237			 */
1238			int oldoptlen = mopt->m_len;
1239			struct mbuf *n;
1240
1241			/*
1242			 * XXX: give up if the whole (new) hbh header does
1243			 * not fit even in an mbuf cluster.
1244			 */
1245			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1246				return (ENOBUFS);
1247
1248			/*
1249			 * As a consequence, we must always prepare a cluster
1250			 * at this point.
1251			 */
1252			MGET(n, M_DONTWAIT, MT_DATA);
1253			if (n) {
1254				MCLGET(n, M_DONTWAIT);
1255				if ((n->m_flags & M_EXT) == 0) {
1256					m_freem(n);
1257					n = NULL;
1258				}
1259			}
1260			if (!n)
1261				return (ENOBUFS);
1262			n->m_len = oldoptlen + JUMBOOPTLEN;
1263			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1264			    oldoptlen);
1265			optbuf = mtod(n, caddr_t) + oldoptlen;
1266			m_freem(mopt);
1267			mopt = exthdrs->ip6e_hbh = n;
1268		} else {
1269			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1270			mopt->m_len += JUMBOOPTLEN;
1271		}
1272		optbuf[0] = IP6OPT_PADN;
1273		optbuf[1] = 1;
1274
1275		/*
1276		 * Adjust the header length according to the pad and
1277		 * the jumbo payload option.
1278		 */
1279		hbh = mtod(mopt, struct ip6_hbh *);
1280		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1281	}
1282
1283	/* fill in the option. */
1284	optbuf[2] = IP6OPT_JUMBO;
1285	optbuf[3] = 4;
1286	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1287	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1288
1289	/* finally, adjust the packet header length */
1290	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1291
1292	return (0);
1293#undef JUMBOOPTLEN
1294}
1295
1296/*
1297 * Insert fragment header and copy unfragmentable header portions.
1298 */
1299static int
1300ip6_insertfraghdr(m0, m, hlen, frghdrp)
1301	struct mbuf *m0, *m;
1302	int hlen;
1303	struct ip6_frag **frghdrp;
1304{
1305	struct mbuf *n, *mlast;
1306
1307	if (hlen > sizeof(struct ip6_hdr)) {
1308		n = m_copym(m0, sizeof(struct ip6_hdr),
1309		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1310		if (n == 0)
1311			return (ENOBUFS);
1312		m->m_next = n;
1313	} else
1314		n = m;
1315
1316	/* Search for the last mbuf of unfragmentable part. */
1317	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1318		;
1319
1320	if ((mlast->m_flags & M_EXT) == 0 &&
1321	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1322		/* use the trailing space of the last mbuf for the fragment hdr */
1323		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1324		    mlast->m_len);
1325		mlast->m_len += sizeof(struct ip6_frag);
1326		m->m_pkthdr.len += sizeof(struct ip6_frag);
1327	} else {
1328		/* allocate a new mbuf for the fragment header */
1329		struct mbuf *mfrg;
1330
1331		MGET(mfrg, M_DONTWAIT, MT_DATA);
1332		if (mfrg == 0)
1333			return (ENOBUFS);
1334		mfrg->m_len = sizeof(struct ip6_frag);
1335		*frghdrp = mtod(mfrg, struct ip6_frag *);
1336		mlast->m_next = mfrg;
1337	}
1338
1339	return (0);
1340}
1341
1342static int
1343ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1344	struct route_in6 *ro_pmtu, *ro;
1345	struct ifnet *ifp;
1346	struct in6_addr *dst;
1347	u_long *mtup;
1348	int *alwaysfragp;
1349{
1350	u_int32_t mtu = 0;
1351	int alwaysfrag = 0;
1352	int error = 0;
1353
1354	if (ro_pmtu != ro) {
1355		/* The first hop and the final destination may differ. */
1356		struct sockaddr_in6 *sa6_dst =
1357		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1358		if (ro_pmtu->ro_rt &&
1359		    ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
1360		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1361			RTFREE(ro_pmtu->ro_rt);
1362			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1363		}
1364		if (ro_pmtu->ro_rt == NULL) {
1365			bzero(sa6_dst, sizeof(*sa6_dst));
1366			sa6_dst->sin6_family = AF_INET6;
1367			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1368			sa6_dst->sin6_addr = *dst;
1369
1370			rtalloc((struct route *)ro_pmtu);
1371		}
1372	}
1373	if (ro_pmtu->ro_rt) {
1374		u_int32_t ifmtu;
1375
1376		if (ifp == NULL)
1377			ifp = ro_pmtu->ro_rt->rt_ifp;
1378		ifmtu = IN6_LINKMTU(ifp);
1379		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1380		if (mtu == 0)
1381			mtu = ifmtu;
1382		else if (mtu < IPV6_MMTU) {
1383			/*
1384			 * RFC2460 section 5, last paragraph:
1385			 * if we record ICMPv6 too big message with
1386			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1387			 * or smaller, with framgent header attached.
1388			 * (fragment header is needed regardless from the
1389			 * packet size, for translators to identify packets)
1390			 */
1391			alwaysfrag = 1;
1392			mtu = IPV6_MMTU;
1393		} else if (mtu > ifmtu) {
1394			/*
1395			 * The MTU on the route is larger than the MTU on
1396			 * the interface!  This shouldn't happen, unless the
1397			 * MTU of the interface has been changed after the
1398			 * interface was brought up.  Change the MTU in the
1399			 * route to match the interface MTU (as long as the
1400			 * field isn't locked).
1401			 */
1402			mtu = ifmtu;
1403			if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
1404				ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1405		}
1406	} else if (ifp) {
1407		mtu = IN6_LINKMTU(ifp);
1408	} else
1409		error = EHOSTUNREACH; /* XXX */
1410
1411	*mtup = mtu;
1412	if (alwaysfragp)
1413		*alwaysfragp = alwaysfrag;
1414	return (error);
1415}
1416
1417/*
1418 * IP6 socket option processing.
1419 */
1420int
1421ip6_ctloutput(so, sopt)
1422	struct socket *so;
1423	struct sockopt *sopt;
1424{
1425	int privileged, optdatalen, uproto;
1426	void *optdata;
1427	struct inpcb *in6p = sotoinpcb(so);
1428	int error, optval;
1429	int level, op, optname;
1430	int optlen;
1431	struct thread *td;
1432
1433	if (sopt) {
1434		level = sopt->sopt_level;
1435		op = sopt->sopt_dir;
1436		optname = sopt->sopt_name;
1437		optlen = sopt->sopt_valsize;
1438		td = sopt->sopt_td;
1439	} else {
1440		panic("ip6_ctloutput: arg soopt is NULL");
1441	}
1442	error = optval = 0;
1443
1444	privileged = (td == 0 || suser(td)) ? 0 : 1;
1445	uproto = (int)so->so_proto->pr_protocol;
1446
1447	if (level == IPPROTO_IPV6) {
1448		switch (op) {
1449
1450		case SOPT_SET:
1451			switch (optname) {
1452			case IPV6_2292PKTOPTIONS:
1453#ifdef IPV6_PKTOPTIONS
1454			case IPV6_PKTOPTIONS:
1455#endif
1456			{
1457				struct mbuf *m;
1458
1459				error = soopt_getm(sopt, &m); /* XXX */
1460				if (error != NULL)
1461					break;
1462				error = soopt_mcopyin(sopt, m); /* XXX */
1463				if (error != NULL)
1464					break;
1465				error = ip6_pcbopts(&in6p->in6p_outputopts,
1466						    m, so, sopt);
1467				m_freem(m); /* XXX */
1468				break;
1469			}
1470
1471			/*
1472			 * Use of some Hop-by-Hop options or some
1473			 * Destination options, might require special
1474			 * privilege.  That is, normal applications
1475			 * (without special privilege) might be forbidden
1476			 * from setting certain options in outgoing packets,
1477			 * and might never see certain options in received
1478			 * packets. [RFC 2292 Section 6]
1479			 * KAME specific note:
1480			 *  KAME prevents non-privileged users from sending or
1481			 *  receiving ANY hbh/dst options in order to avoid
1482			 *  overhead of parsing options in the kernel.
1483			 */
1484			case IPV6_RECVHOPOPTS:
1485			case IPV6_RECVDSTOPTS:
1486			case IPV6_RECVRTHDRDSTOPTS:
1487				if (!privileged) {
1488					error = EPERM;
1489					break;
1490				}
1491				/* FALLTHROUGH */
1492			case IPV6_UNICAST_HOPS:
1493			case IPV6_HOPLIMIT:
1494			case IPV6_FAITH:
1495
1496			case IPV6_RECVPKTINFO:
1497			case IPV6_RECVHOPLIMIT:
1498			case IPV6_RECVRTHDR:
1499			case IPV6_RECVPATHMTU:
1500			case IPV6_RECVTCLASS:
1501			case IPV6_V6ONLY:
1502			case IPV6_AUTOFLOWLABEL:
1503				if (optlen != sizeof(int)) {
1504					error = EINVAL;
1505					break;
1506				}
1507				error = sooptcopyin(sopt, &optval,
1508					sizeof optval, sizeof optval);
1509				if (error)
1510					break;
1511				switch (optname) {
1512
1513				case IPV6_UNICAST_HOPS:
1514					if (optval < -1 || optval >= 256)
1515						error = EINVAL;
1516					else {
1517						/* -1 = kernel default */
1518						in6p->in6p_hops = optval;
1519						if ((in6p->in6p_vflag &
1520						     INP_IPV4) != 0)
1521							in6p->inp_ip_ttl = optval;
1522					}
1523					break;
1524#define OPTSET(bit) \
1525do { \
1526	if (optval) \
1527		in6p->in6p_flags |= (bit); \
1528	else \
1529		in6p->in6p_flags &= ~(bit); \
1530} while (/*CONSTCOND*/ 0)
1531#define OPTSET2292(bit) \
1532do { \
1533	in6p->in6p_flags |= IN6P_RFC2292; \
1534	if (optval) \
1535		in6p->in6p_flags |= (bit); \
1536	else \
1537		in6p->in6p_flags &= ~(bit); \
1538} while (/*CONSTCOND*/ 0)
1539#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1540
1541				case IPV6_RECVPKTINFO:
1542					/* cannot mix with RFC2292 */
1543					if (OPTBIT(IN6P_RFC2292)) {
1544						error = EINVAL;
1545						break;
1546					}
1547					OPTSET(IN6P_PKTINFO);
1548					break;
1549
1550				case IPV6_HOPLIMIT:
1551				{
1552					struct ip6_pktopts **optp;
1553
1554					/* cannot mix with RFC2292 */
1555					if (OPTBIT(IN6P_RFC2292)) {
1556						error = EINVAL;
1557						break;
1558					}
1559					optp = &in6p->in6p_outputopts;
1560					error = ip6_pcbopt(IPV6_HOPLIMIT,
1561							   (u_char *)&optval,
1562							   sizeof(optval),
1563							   optp,
1564							   privileged, uproto);
1565					break;
1566				}
1567
1568				case IPV6_RECVHOPLIMIT:
1569					/* cannot mix with RFC2292 */
1570					if (OPTBIT(IN6P_RFC2292)) {
1571						error = EINVAL;
1572						break;
1573					}
1574					OPTSET(IN6P_HOPLIMIT);
1575					break;
1576
1577				case IPV6_RECVHOPOPTS:
1578					/* cannot mix with RFC2292 */
1579					if (OPTBIT(IN6P_RFC2292)) {
1580						error = EINVAL;
1581						break;
1582					}
1583					OPTSET(IN6P_HOPOPTS);
1584					break;
1585
1586				case IPV6_RECVDSTOPTS:
1587					/* cannot mix with RFC2292 */
1588					if (OPTBIT(IN6P_RFC2292)) {
1589						error = EINVAL;
1590						break;
1591					}
1592					OPTSET(IN6P_DSTOPTS);
1593					break;
1594
1595				case IPV6_RECVRTHDRDSTOPTS:
1596					/* cannot mix with RFC2292 */
1597					if (OPTBIT(IN6P_RFC2292)) {
1598						error = EINVAL;
1599						break;
1600					}
1601					OPTSET(IN6P_RTHDRDSTOPTS);
1602					break;
1603
1604				case IPV6_RECVRTHDR:
1605					/* cannot mix with RFC2292 */
1606					if (OPTBIT(IN6P_RFC2292)) {
1607						error = EINVAL;
1608						break;
1609					}
1610					OPTSET(IN6P_RTHDR);
1611					break;
1612
1613				case IPV6_FAITH:
1614					OPTSET(IN6P_FAITH);
1615					break;
1616
1617				case IPV6_RECVPATHMTU:
1618					/*
1619					 * We ignore this option for TCP
1620					 * sockets.
1621					 * (rfc2292bis leaves this case
1622					 * unspecified.)
1623					 */
1624					if (uproto != IPPROTO_TCP)
1625						OPTSET(IN6P_MTU);
1626					break;
1627
1628				case IPV6_V6ONLY:
1629					/*
1630					 * make setsockopt(IPV6_V6ONLY)
1631					 * available only prior to bind(2).
1632					 * see ipng mailing list, Jun 22 2001.
1633					 */
1634					if (in6p->in6p_lport ||
1635					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1636						error = EINVAL;
1637						break;
1638					}
1639					OPTSET(IN6P_IPV6_V6ONLY);
1640					if (optval)
1641						in6p->in6p_vflag &= ~INP_IPV4;
1642					else
1643						in6p->in6p_vflag |= INP_IPV4;
1644					break;
1645				case IPV6_RECVTCLASS:
1646					/* cannot mix with RFC2292 XXX */
1647					if (OPTBIT(IN6P_RFC2292)) {
1648						error = EINVAL;
1649						break;
1650					}
1651					OPTSET(IN6P_TCLASS);
1652					break;
1653				case IPV6_AUTOFLOWLABEL:
1654					OPTSET(IN6P_AUTOFLOWLABEL);
1655					break;
1656
1657				}
1658				break;
1659
1660			case IPV6_TCLASS:
1661			case IPV6_DONTFRAG:
1662			case IPV6_USE_MIN_MTU:
1663			case IPV6_PREFER_TEMPADDR:
1664				if (optlen != sizeof(optval)) {
1665					error = EINVAL;
1666					break;
1667				}
1668				error = sooptcopyin(sopt, &optval,
1669					sizeof optval, sizeof optval);
1670				if (error)
1671					break;
1672				{
1673					struct ip6_pktopts **optp;
1674					optp = &in6p->in6p_outputopts;
1675					error = ip6_pcbopt(optname,
1676							   (u_char *)&optval,
1677							   sizeof(optval),
1678							   optp,
1679							   privileged, uproto);
1680					break;
1681				}
1682
1683			case IPV6_2292PKTINFO:
1684			case IPV6_2292HOPLIMIT:
1685			case IPV6_2292HOPOPTS:
1686			case IPV6_2292DSTOPTS:
1687			case IPV6_2292RTHDR:
1688				/* RFC 2292 */
1689				if (optlen != sizeof(int)) {
1690					error = EINVAL;
1691					break;
1692				}
1693				error = sooptcopyin(sopt, &optval,
1694					sizeof optval, sizeof optval);
1695				if (error)
1696					break;
1697				switch (optname) {
1698				case IPV6_2292PKTINFO:
1699					OPTSET2292(IN6P_PKTINFO);
1700					break;
1701				case IPV6_2292HOPLIMIT:
1702					OPTSET2292(IN6P_HOPLIMIT);
1703					break;
1704				case IPV6_2292HOPOPTS:
1705					/*
1706					 * Check super-user privilege.
1707					 * See comments for IPV6_RECVHOPOPTS.
1708					 */
1709					if (!privileged)
1710						return (EPERM);
1711					OPTSET2292(IN6P_HOPOPTS);
1712					break;
1713				case IPV6_2292DSTOPTS:
1714					if (!privileged)
1715						return (EPERM);
1716					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1717					break;
1718				case IPV6_2292RTHDR:
1719					OPTSET2292(IN6P_RTHDR);
1720					break;
1721				}
1722				break;
1723			case IPV6_PKTINFO:
1724			case IPV6_HOPOPTS:
1725			case IPV6_RTHDR:
1726			case IPV6_DSTOPTS:
1727			case IPV6_RTHDRDSTOPTS:
1728			case IPV6_NEXTHOP:
1729			{
1730				/* new advanced API (2292bis) */
1731				u_char *optbuf;
1732				int optlen;
1733				struct ip6_pktopts **optp;
1734
1735				/* cannot mix with RFC2292 */
1736				if (OPTBIT(IN6P_RFC2292)) {
1737					error = EINVAL;
1738					break;
1739				}
1740
1741				optbuf = sopt->sopt_val;
1742				optlen = sopt->sopt_valsize;
1743				optp = &in6p->in6p_outputopts;
1744				error = ip6_pcbopt(optname,
1745						   optbuf, optlen,
1746						   optp, privileged, uproto);
1747				break;
1748			}
1749#undef OPTSET
1750
1751			case IPV6_MULTICAST_IF:
1752			case IPV6_MULTICAST_HOPS:
1753			case IPV6_MULTICAST_LOOP:
1754			case IPV6_JOIN_GROUP:
1755			case IPV6_LEAVE_GROUP:
1756			    {
1757				if (sopt->sopt_valsize > MLEN) {
1758					error = EMSGSIZE;
1759					break;
1760				}
1761				/* XXX */
1762			    }
1763			    /* FALLTHROUGH */
1764			    {
1765				struct mbuf *m;
1766
1767				if (sopt->sopt_valsize > MCLBYTES) {
1768					error = EMSGSIZE;
1769					break;
1770				}
1771				/* XXX */
1772				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_HEADER);
1773				if (m == 0) {
1774					error = ENOBUFS;
1775					break;
1776				}
1777				if (sopt->sopt_valsize > MLEN) {
1778					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1779					if ((m->m_flags & M_EXT) == 0) {
1780						m_free(m);
1781						error = ENOBUFS;
1782						break;
1783					}
1784				}
1785				m->m_len = sopt->sopt_valsize;
1786				error = sooptcopyin(sopt, mtod(m, char *),
1787						    m->m_len, m->m_len);
1788				if (error) {
1789					(void)m_free(m);
1790					break;
1791				}
1792				error =	ip6_setmoptions(sopt->sopt_name,
1793							&in6p->in6p_moptions,
1794							m);
1795				(void)m_free(m);
1796			    }
1797				break;
1798
1799			case IPV6_PORTRANGE:
1800				error = sooptcopyin(sopt, &optval,
1801				    sizeof optval, sizeof optval);
1802				if (error)
1803					break;
1804
1805				switch (optval) {
1806				case IPV6_PORTRANGE_DEFAULT:
1807					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1808					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1809					break;
1810
1811				case IPV6_PORTRANGE_HIGH:
1812					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1813					in6p->in6p_flags |= IN6P_HIGHPORT;
1814					break;
1815
1816				case IPV6_PORTRANGE_LOW:
1817					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1818					in6p->in6p_flags |= IN6P_LOWPORT;
1819					break;
1820
1821				default:
1822					error = EINVAL;
1823					break;
1824				}
1825				break;
1826
1827#if defined(IPSEC) || defined(FAST_IPSEC)
1828			case IPV6_IPSEC_POLICY:
1829			    {
1830				caddr_t req = NULL;
1831				size_t len = 0;
1832				struct mbuf *m;
1833
1834				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1835					break;
1836				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1837					break;
1838				if (m) {
1839					req = mtod(m, caddr_t);
1840					len = m->m_len;
1841				}
1842				error = ipsec6_set_policy(in6p, optname, req,
1843							  len, privileged);
1844				m_freem(m);
1845			    }
1846				break;
1847#endif /* KAME IPSEC */
1848
1849			case IPV6_FW_ADD:
1850			case IPV6_FW_DEL:
1851			case IPV6_FW_FLUSH:
1852			case IPV6_FW_ZERO:
1853			    {
1854				struct mbuf *m;
1855				struct mbuf **mp = &m;
1856
1857				if (ip6_fw_ctl_ptr == NULL)
1858					return EINVAL;
1859				/* XXX */
1860				if ((error = soopt_getm(sopt, &m)) != 0)
1861					break;
1862				/* XXX */
1863				if ((error = soopt_mcopyin(sopt, m)) != 0)
1864					break;
1865				error = (*ip6_fw_ctl_ptr)(optname, mp);
1866				m = *mp;
1867			    }
1868				break;
1869
1870			default:
1871				error = ENOPROTOOPT;
1872				break;
1873			}
1874			break;
1875
1876		case SOPT_GET:
1877			switch (optname) {
1878
1879			case IPV6_2292PKTOPTIONS:
1880#ifdef IPV6_PKTOPTIONS
1881			case IPV6_PKTOPTIONS:
1882#endif
1883				/*
1884				 * RFC3542 (effectively) deprecated the
1885				 * semantics of the 2292-style pktoptions.
1886				 * Since it was not reliable in nature (i.e.,
1887				 * applications had to expect the lack of some
1888				 * information after all), it would make sense
1889				 * to simplify this part by always returning
1890				 * empty data.
1891				 */
1892				sopt->sopt_valsize = 0;
1893				break;
1894
1895			case IPV6_RECVHOPOPTS:
1896			case IPV6_RECVDSTOPTS:
1897			case IPV6_RECVRTHDRDSTOPTS:
1898			case IPV6_UNICAST_HOPS:
1899			case IPV6_RECVPKTINFO:
1900			case IPV6_RECVHOPLIMIT:
1901			case IPV6_RECVRTHDR:
1902			case IPV6_RECVPATHMTU:
1903
1904			case IPV6_FAITH:
1905			case IPV6_V6ONLY:
1906			case IPV6_PORTRANGE:
1907			case IPV6_RECVTCLASS:
1908			case IPV6_AUTOFLOWLABEL:
1909				switch (optname) {
1910
1911				case IPV6_RECVHOPOPTS:
1912					optval = OPTBIT(IN6P_HOPOPTS);
1913					break;
1914
1915				case IPV6_RECVDSTOPTS:
1916					optval = OPTBIT(IN6P_DSTOPTS);
1917					break;
1918
1919				case IPV6_RECVRTHDRDSTOPTS:
1920					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1921					break;
1922
1923				case IPV6_UNICAST_HOPS:
1924					optval = in6p->in6p_hops;
1925					break;
1926
1927				case IPV6_RECVPKTINFO:
1928					optval = OPTBIT(IN6P_PKTINFO);
1929					break;
1930
1931				case IPV6_RECVHOPLIMIT:
1932					optval = OPTBIT(IN6P_HOPLIMIT);
1933					break;
1934
1935				case IPV6_RECVRTHDR:
1936					optval = OPTBIT(IN6P_RTHDR);
1937					break;
1938
1939				case IPV6_RECVPATHMTU:
1940					optval = OPTBIT(IN6P_MTU);
1941					break;
1942
1943				case IPV6_FAITH:
1944					optval = OPTBIT(IN6P_FAITH);
1945					break;
1946
1947				case IPV6_V6ONLY:
1948					optval = OPTBIT(IN6P_IPV6_V6ONLY);
1949					break;
1950
1951				case IPV6_PORTRANGE:
1952				    {
1953					int flags;
1954					flags = in6p->in6p_flags;
1955					if (flags & IN6P_HIGHPORT)
1956						optval = IPV6_PORTRANGE_HIGH;
1957					else if (flags & IN6P_LOWPORT)
1958						optval = IPV6_PORTRANGE_LOW;
1959					else
1960						optval = 0;
1961					break;
1962				    }
1963				case IPV6_RECVTCLASS:
1964					optval = OPTBIT(IN6P_TCLASS);
1965					break;
1966
1967				case IPV6_AUTOFLOWLABEL:
1968					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1969					break;
1970				}
1971				if (error)
1972					break;
1973				error = sooptcopyout(sopt, &optval,
1974					sizeof optval);
1975				break;
1976
1977			case IPV6_PATHMTU:
1978			{
1979				u_long pmtu = 0;
1980				struct ip6_mtuinfo mtuinfo;
1981				struct route_in6 *ro = (struct route_in6 *)&in6p->in6p_route;
1982
1983				if (!(so->so_state & SS_ISCONNECTED))
1984					return (ENOTCONN);
1985				/*
1986				 * XXX: we dot not consider the case of source
1987				 * routing, or optional information to specify
1988				 * the outgoing interface.
1989				 */
1990				error = ip6_getpmtu(ro, NULL, NULL,
1991				    &in6p->in6p_faddr, &pmtu, NULL);
1992				if (error)
1993					break;
1994				if (pmtu > IPV6_MAXPACKET)
1995					pmtu = IPV6_MAXPACKET;
1996
1997				bzero(&mtuinfo, sizeof(mtuinfo));
1998				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1999				optdata = (void *)&mtuinfo;
2000				optdatalen = sizeof(mtuinfo);
2001				error = sooptcopyout(sopt, optdata,
2002				    optdatalen);
2003				break;
2004			}
2005
2006			case IPV6_2292PKTINFO:
2007			case IPV6_2292HOPLIMIT:
2008			case IPV6_2292HOPOPTS:
2009			case IPV6_2292RTHDR:
2010			case IPV6_2292DSTOPTS:
2011				switch (optname) {
2012				case IPV6_2292PKTINFO:
2013					optval = OPTBIT(IN6P_PKTINFO);
2014					break;
2015				case IPV6_2292HOPLIMIT:
2016					optval = OPTBIT(IN6P_HOPLIMIT);
2017					break;
2018				case IPV6_2292HOPOPTS:
2019					optval = OPTBIT(IN6P_HOPOPTS);
2020					break;
2021				case IPV6_2292RTHDR:
2022					optval = OPTBIT(IN6P_RTHDR);
2023					break;
2024				case IPV6_2292DSTOPTS:
2025					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2026					break;
2027				}
2028				error = sooptcopyout(sopt, &optval,
2029				    sizeof optval);
2030				break;
2031			case IPV6_PKTINFO:
2032			case IPV6_HOPOPTS:
2033			case IPV6_RTHDR:
2034			case IPV6_DSTOPTS:
2035			case IPV6_RTHDRDSTOPTS:
2036			case IPV6_NEXTHOP:
2037			case IPV6_TCLASS:
2038			case IPV6_DONTFRAG:
2039			case IPV6_USE_MIN_MTU:
2040			case IPV6_PREFER_TEMPADDR:
2041				error = ip6_getpcbopt(in6p->in6p_outputopts,
2042				    optname, sopt);
2043				break;
2044
2045			case IPV6_MULTICAST_IF:
2046			case IPV6_MULTICAST_HOPS:
2047			case IPV6_MULTICAST_LOOP:
2048			case IPV6_JOIN_GROUP:
2049			case IPV6_LEAVE_GROUP:
2050			    {
2051				struct mbuf *m;
2052				error = ip6_getmoptions(sopt->sopt_name,
2053				    in6p->in6p_moptions, &m);
2054				if (error == 0)
2055					error = sooptcopyout(sopt,
2056					    mtod(m, char *), m->m_len);
2057				m_freem(m);
2058			    }
2059				break;
2060
2061#if defined(IPSEC) || defined(FAST_IPSEC)
2062			case IPV6_IPSEC_POLICY:
2063			  {
2064				caddr_t req = NULL;
2065				size_t len = 0;
2066				struct mbuf *m = NULL;
2067				struct mbuf **mp = &m;
2068				size_t ovalsize = sopt->sopt_valsize;
2069				caddr_t oval = (caddr_t)sopt->sopt_val;
2070
2071				error = soopt_getm(sopt, &m); /* XXX */
2072				if (error != NULL)
2073					break;
2074				error = soopt_mcopyin(sopt, m); /* XXX */
2075				if (error != NULL)
2076					break;
2077				sopt->sopt_valsize = ovalsize;
2078				sopt->sopt_val = oval;
2079				if (m) {
2080					req = mtod(m, caddr_t);
2081					len = m->m_len;
2082				}
2083				error = ipsec6_get_policy(in6p, req, len, mp);
2084				if (error == 0)
2085					error = soopt_mcopyout(sopt, m); /* XXX */
2086				if (error == 0 && m)
2087					m_freem(m);
2088				break;
2089			  }
2090#endif /* KAME IPSEC */
2091
2092			case IPV6_FW_GET:
2093			  {
2094				struct mbuf *m;
2095				struct mbuf **mp = &m;
2096
2097				if (ip6_fw_ctl_ptr == NULL)
2098			        {
2099					return EINVAL;
2100				}
2101				error = (*ip6_fw_ctl_ptr)(optname, mp);
2102				if (error == 0)
2103					error = soopt_mcopyout(sopt, m); /* XXX */
2104				if (error == 0 && m)
2105					m_freem(m);
2106			  }
2107				break;
2108
2109			default:
2110				error = ENOPROTOOPT;
2111				break;
2112			}
2113			break;
2114		}
2115	} else {		/* level != IPPROTO_IPV6 */
2116		error = EINVAL;
2117	}
2118	return (error);
2119}
2120
2121/*
2122 * Set up IP6 options in pcb for insertion in output packets or
2123 * specifying behavior of outgoing packets.
2124 */
2125static int
2126ip6_pcbopts(pktopt, m, so, sopt)
2127	struct ip6_pktopts **pktopt;
2128	struct mbuf *m;
2129	struct socket *so;
2130	struct sockopt *sopt;
2131{
2132	struct ip6_pktopts *opt = *pktopt;
2133	int error = 0;
2134	struct thread *td = sopt->sopt_td;
2135	int priv = 0;
2136
2137	/* turn off any old options. */
2138	if (opt) {
2139#ifdef DIAGNOSTIC
2140		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2141		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2142		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2143			printf("ip6_pcbopts: all specified options are cleared.\n");
2144#endif
2145		ip6_clearpktopts(opt, -1);
2146	} else
2147		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2148	*pktopt = NULL;
2149
2150	if (!m || m->m_len == 0) {
2151		/*
2152		 * Only turning off any previous options, regardless of
2153		 * whether the opt is just created or given.
2154		 */
2155		free(opt, M_IP6OPT);
2156		return (0);
2157	}
2158
2159	/*  set options specified by user. */
2160	if (td && !suser(td))
2161		priv = 1;
2162	if ((error = ip6_setpktoptions(m, opt, NULL, priv, 1,
2163	    so->so_proto->pr_protocol)) != 0) {
2164		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2165		free(opt, M_IP6OPT);
2166		return (error);
2167	}
2168	*pktopt = opt;
2169	return (0);
2170}
2171
2172/*
2173 * initialize ip6_pktopts.  beware that there are non-zero default values in
2174 * the struct.
2175 */
2176void
2177init_ip6pktopts(opt)
2178	struct ip6_pktopts *opt;
2179{
2180
2181	bzero(opt, sizeof(*opt));
2182	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2183	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2184	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2185	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2186}
2187
2188static int
2189ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2190	int optname, len, priv;
2191	u_char *buf;
2192	struct ip6_pktopts **pktopt;
2193	int uproto;
2194{
2195	struct ip6_pktopts *opt;
2196
2197	if (*pktopt == NULL) {
2198		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2199		    M_WAITOK);
2200		init_ip6pktopts(*pktopt);
2201		(*pktopt)->needfree = 1;
2202	}
2203	opt = *pktopt;
2204
2205	return (ip6_setpktoption(optname, buf, len, opt, priv, 1, 0, uproto));
2206}
2207
2208static int
2209ip6_getpcbopt(pktopt, optname, sopt)
2210	struct ip6_pktopts *pktopt;
2211	struct sockopt *sopt;
2212	int optname;
2213{
2214	void *optdata = NULL;
2215	int optdatalen = 0;
2216	struct ip6_ext *ip6e;
2217	int error = 0;
2218	struct in6_pktinfo null_pktinfo;
2219	int deftclass = 0, on;
2220	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2221	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2222
2223	switch (optname) {
2224	case IPV6_PKTINFO:
2225		if (pktopt && pktopt->ip6po_pktinfo)
2226			optdata = (void *)pktopt->ip6po_pktinfo;
2227		else {
2228			/* XXX: we don't have to do this every time... */
2229			bzero(&null_pktinfo, sizeof(null_pktinfo));
2230			optdata = (void *)&null_pktinfo;
2231		}
2232		optdatalen = sizeof(struct in6_pktinfo);
2233		break;
2234	case IPV6_TCLASS:
2235		if (pktopt && pktopt->ip6po_tclass >= 0)
2236			optdata = (void *)&pktopt->ip6po_tclass;
2237		else
2238			optdata = (void *)&deftclass;
2239		optdatalen = sizeof(int);
2240		break;
2241	case IPV6_HOPOPTS:
2242		if (pktopt && pktopt->ip6po_hbh) {
2243			optdata = (void *)pktopt->ip6po_hbh;
2244			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2245			optdatalen = (ip6e->ip6e_len + 1) << 3;
2246		}
2247		break;
2248	case IPV6_RTHDR:
2249		if (pktopt && pktopt->ip6po_rthdr) {
2250			optdata = (void *)pktopt->ip6po_rthdr;
2251			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2252			optdatalen = (ip6e->ip6e_len + 1) << 3;
2253		}
2254		break;
2255	case IPV6_RTHDRDSTOPTS:
2256		if (pktopt && pktopt->ip6po_dest1) {
2257			optdata = (void *)pktopt->ip6po_dest1;
2258			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2259			optdatalen = (ip6e->ip6e_len + 1) << 3;
2260		}
2261		break;
2262	case IPV6_DSTOPTS:
2263		if (pktopt && pktopt->ip6po_dest2) {
2264			optdata = (void *)pktopt->ip6po_dest2;
2265			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2266			optdatalen = (ip6e->ip6e_len + 1) << 3;
2267		}
2268		break;
2269	case IPV6_NEXTHOP:
2270		if (pktopt && pktopt->ip6po_nexthop) {
2271			optdata = (void *)pktopt->ip6po_nexthop;
2272			optdatalen = pktopt->ip6po_nexthop->sa_len;
2273		}
2274		break;
2275	case IPV6_USE_MIN_MTU:
2276		if (pktopt)
2277			optdata = (void *)&pktopt->ip6po_minmtu;
2278		else
2279			optdata = (void *)&defminmtu;
2280		optdatalen = sizeof(int);
2281		break;
2282	case IPV6_DONTFRAG:
2283		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2284			on = 1;
2285		else
2286			on = 0;
2287		optdata = (void *)&on;
2288		optdatalen = sizeof(on);
2289		break;
2290	case IPV6_PREFER_TEMPADDR:
2291		if (pktopt)
2292			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2293		else
2294			optdata = (void *)&defpreftemp;
2295		optdatalen = sizeof(int);
2296		break;
2297	default:		/* should not happen */
2298#ifdef DIAGNOSTIC
2299		panic("ip6_getpcbopt: unexpected option\n");
2300#endif
2301		return (ENOPROTOOPT);
2302	}
2303
2304	error = sooptcopyout(sopt, optdata, optdatalen);
2305
2306	return (error);
2307}
2308
2309void
2310ip6_clearpktopts(pktopt, optname)
2311	struct ip6_pktopts *pktopt;
2312	int optname;
2313{
2314	int needfree;
2315
2316	needfree = pktopt->needfree;
2317
2318	if (optname == -1 || optname == IPV6_PKTINFO) {
2319		if (needfree && pktopt->ip6po_pktinfo)
2320			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2321		pktopt->ip6po_pktinfo = NULL;
2322	}
2323	if (optname == -1 || optname == IPV6_HOPLIMIT)
2324		pktopt->ip6po_hlim = -1;
2325	if (optname == -1 || optname == IPV6_TCLASS)
2326		pktopt->ip6po_tclass = -1;
2327	if (optname == -1 || optname == IPV6_NEXTHOP) {
2328		if (pktopt->ip6po_nextroute.ro_rt) {
2329			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2330			pktopt->ip6po_nextroute.ro_rt = NULL;
2331		}
2332		if (needfree && pktopt->ip6po_nexthop)
2333			free(pktopt->ip6po_nexthop, M_IP6OPT);
2334		pktopt->ip6po_nexthop = NULL;
2335	}
2336	if (optname == -1 || optname == IPV6_HOPOPTS) {
2337		if (needfree && pktopt->ip6po_hbh)
2338			free(pktopt->ip6po_hbh, M_IP6OPT);
2339		pktopt->ip6po_hbh = NULL;
2340	}
2341	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2342		if (needfree && pktopt->ip6po_dest1)
2343			free(pktopt->ip6po_dest1, M_IP6OPT);
2344		pktopt->ip6po_dest1 = NULL;
2345	}
2346	if (optname == -1 || optname == IPV6_RTHDR) {
2347		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2348			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2349		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2350		if (pktopt->ip6po_route.ro_rt) {
2351			RTFREE(pktopt->ip6po_route.ro_rt);
2352			pktopt->ip6po_route.ro_rt = NULL;
2353		}
2354	}
2355	if (optname == -1 || optname == IPV6_DSTOPTS) {
2356		if (needfree && pktopt->ip6po_dest2)
2357			free(pktopt->ip6po_dest2, M_IP6OPT);
2358		pktopt->ip6po_dest2 = NULL;
2359	}
2360}
2361
2362#define PKTOPT_EXTHDRCPY(type) \
2363do {\
2364	if (src->type) {\
2365		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2366		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2367		if (dst->type == NULL && canwait == M_NOWAIT)\
2368			goto bad;\
2369		bcopy(src->type, dst->type, hlen);\
2370	}\
2371} while (/*CONSTCOND*/ 0)
2372
2373struct ip6_pktopts *
2374ip6_copypktopts(src, canwait)
2375	struct ip6_pktopts *src;
2376	int canwait;
2377{
2378	struct ip6_pktopts *dst;
2379
2380	if (src == NULL) {
2381		printf("ip6_clearpktopts: invalid argument\n");
2382		return (NULL);
2383	}
2384
2385	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2386	if (dst == NULL && canwait == M_NOWAIT)
2387		return (NULL);
2388	bzero(dst, sizeof(*dst));
2389	dst->needfree = 1;
2390
2391	dst->ip6po_hlim = src->ip6po_hlim;
2392	dst->ip6po_tclass = src->ip6po_tclass;
2393	dst->ip6po_flags = src->ip6po_flags;
2394	if (src->ip6po_pktinfo) {
2395		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2396		    M_IP6OPT, canwait);
2397		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2398			goto bad;
2399		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2400	}
2401	if (src->ip6po_nexthop) {
2402		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2403		    M_IP6OPT, canwait);
2404		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2405			goto bad;
2406		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2407		    src->ip6po_nexthop->sa_len);
2408	}
2409	PKTOPT_EXTHDRCPY(ip6po_hbh);
2410	PKTOPT_EXTHDRCPY(ip6po_dest1);
2411	PKTOPT_EXTHDRCPY(ip6po_dest2);
2412	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2413	return (dst);
2414
2415  bad:
2416	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2417	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2418	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2419	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2420	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2421	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2422	free(dst, M_IP6OPT);
2423	return (NULL);
2424}
2425#undef PKTOPT_EXTHDRCPY
2426
2427void
2428ip6_freepcbopts(pktopt)
2429	struct ip6_pktopts *pktopt;
2430{
2431	if (pktopt == NULL)
2432		return;
2433
2434	ip6_clearpktopts(pktopt, -1);
2435
2436	free(pktopt, M_IP6OPT);
2437}
2438
2439/*
2440 * Set the IP6 multicast options in response to user setsockopt().
2441 */
2442static int
2443ip6_setmoptions(optname, im6op, m)
2444	int optname;
2445	struct ip6_moptions **im6op;
2446	struct mbuf *m;
2447{
2448	int error = 0;
2449	u_int loop, ifindex;
2450	struct ipv6_mreq *mreq;
2451	struct ifnet *ifp;
2452	struct ip6_moptions *im6o = *im6op;
2453	struct route_in6 ro;
2454	struct sockaddr_in6 *dst;
2455	struct in6_multi_mship *imm;
2456	struct thread *td = curthread;
2457
2458	if (im6o == NULL) {
2459		/*
2460		 * No multicast option buffer attached to the pcb;
2461		 * allocate one and initialize to default values.
2462		 */
2463		im6o = (struct ip6_moptions *)
2464			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2465
2466		if (im6o == NULL)
2467			return (ENOBUFS);
2468		*im6op = im6o;
2469		im6o->im6o_multicast_ifp = NULL;
2470		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2471		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2472		LIST_INIT(&im6o->im6o_memberships);
2473	}
2474
2475	switch (optname) {
2476
2477	case IPV6_MULTICAST_IF:
2478		/*
2479		 * Select the interface for outgoing multicast packets.
2480		 */
2481		if (m == NULL || m->m_len != sizeof(u_int)) {
2482			error = EINVAL;
2483			break;
2484		}
2485		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2486		if (ifindex < 0 || if_index < ifindex) {
2487			error = ENXIO;	/* XXX EINVAL? */
2488			break;
2489		}
2490		ifp = ifnet_byindex(ifindex);
2491		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2492			error = EADDRNOTAVAIL;
2493			break;
2494		}
2495		im6o->im6o_multicast_ifp = ifp;
2496		break;
2497
2498	case IPV6_MULTICAST_HOPS:
2499	    {
2500		/*
2501		 * Set the IP6 hoplimit for outgoing multicast packets.
2502		 */
2503		int optval;
2504		if (m == NULL || m->m_len != sizeof(int)) {
2505			error = EINVAL;
2506			break;
2507		}
2508		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2509		if (optval < -1 || optval >= 256)
2510			error = EINVAL;
2511		else if (optval == -1)
2512			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2513		else
2514			im6o->im6o_multicast_hlim = optval;
2515		break;
2516	    }
2517
2518	case IPV6_MULTICAST_LOOP:
2519		/*
2520		 * Set the loopback flag for outgoing multicast packets.
2521		 * Must be zero or one.
2522		 */
2523		if (m == NULL || m->m_len != sizeof(u_int)) {
2524			error = EINVAL;
2525			break;
2526		}
2527		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2528		if (loop > 1) {
2529			error = EINVAL;
2530			break;
2531		}
2532		im6o->im6o_multicast_loop = loop;
2533		break;
2534
2535	case IPV6_JOIN_GROUP:
2536		/*
2537		 * Add a multicast group membership.
2538		 * Group must be a valid IP6 multicast address.
2539		 */
2540		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2541			error = EINVAL;
2542			break;
2543		}
2544		mreq = mtod(m, struct ipv6_mreq *);
2545		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2546			/*
2547			 * We use the unspecified address to specify to accept
2548			 * all multicast addresses. Only super user is allowed
2549			 * to do this.
2550			 */
2551			if (suser(td)) {
2552				error = EACCES;
2553				break;
2554			}
2555		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2556			error = EINVAL;
2557			break;
2558		}
2559
2560		/*
2561		 * If the interface is specified, validate it.
2562		 */
2563		if (mreq->ipv6mr_interface < 0
2564		 || if_index < mreq->ipv6mr_interface) {
2565			error = ENXIO;	/* XXX EINVAL? */
2566			break;
2567		}
2568		/*
2569		 * If no interface was explicitly specified, choose an
2570		 * appropriate one according to the given multicast address.
2571		 */
2572		if (mreq->ipv6mr_interface == 0) {
2573			/*
2574			 * If the multicast address is in node-local scope,
2575			 * the interface should be a loopback interface.
2576			 * Otherwise, look up the routing table for the
2577			 * address, and choose the outgoing interface.
2578			 *   XXX: is it a good approach?
2579			 */
2580			if (IN6_IS_ADDR_MC_INTFACELOCAL(&mreq->ipv6mr_multiaddr)) {
2581				ifp = &loif[0];
2582			} else {
2583				ro.ro_rt = NULL;
2584				dst = (struct sockaddr_in6 *)&ro.ro_dst;
2585				bzero(dst, sizeof(*dst));
2586				dst->sin6_len = sizeof(struct sockaddr_in6);
2587				dst->sin6_family = AF_INET6;
2588				dst->sin6_addr = mreq->ipv6mr_multiaddr;
2589				rtalloc((struct route *)&ro);
2590				if (ro.ro_rt == NULL) {
2591					error = EADDRNOTAVAIL;
2592					break;
2593				}
2594				ifp = ro.ro_rt->rt_ifp;
2595				RTFREE(ro.ro_rt);
2596			}
2597		} else
2598			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2599
2600		/*
2601		 * See if we found an interface, and confirm that it
2602		 * supports multicast
2603		 */
2604		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2605			error = EADDRNOTAVAIL;
2606			break;
2607		}
2608		/*
2609		 * Put interface index into the multicast address,
2610		 * if the address has link-local scope.
2611		 */
2612		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2613			mreq->ipv6mr_multiaddr.s6_addr16[1]
2614				= htons(mreq->ipv6mr_interface);
2615		}
2616		/*
2617		 * See if the membership already exists.
2618		 */
2619		for (imm = im6o->im6o_memberships.lh_first;
2620		     imm != NULL; imm = imm->i6mm_chain.le_next)
2621			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2622			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2623					       &mreq->ipv6mr_multiaddr))
2624				break;
2625		if (imm != NULL) {
2626			error = EADDRINUSE;
2627			break;
2628		}
2629		/*
2630		 * Everything looks good; add a new record to the multicast
2631		 * address list for the given interface.
2632		 */
2633		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2634		if (imm == NULL) {
2635			error = ENOBUFS;
2636			break;
2637		}
2638		if ((imm->i6mm_maddr =
2639		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2640			free(imm, M_IPMADDR);
2641			break;
2642		}
2643		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2644		break;
2645
2646	case IPV6_LEAVE_GROUP:
2647		/*
2648		 * Drop a multicast group membership.
2649		 * Group must be a valid IP6 multicast address.
2650		 */
2651		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2652			error = EINVAL;
2653			break;
2654		}
2655		mreq = mtod(m, struct ipv6_mreq *);
2656		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2657			if (suser(td)) {
2658				error = EACCES;
2659				break;
2660			}
2661		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2662			error = EINVAL;
2663			break;
2664		}
2665		/*
2666		 * If an interface address was specified, get a pointer
2667		 * to its ifnet structure.
2668		 */
2669		if (mreq->ipv6mr_interface < 0
2670		 || if_index < mreq->ipv6mr_interface) {
2671			error = ENXIO;	/* XXX EINVAL? */
2672			break;
2673		}
2674		ifp = ifnet_byindex(mreq->ipv6mr_interface);
2675		/*
2676		 * Put interface index into the multicast address,
2677		 * if the address has link-local scope.
2678		 */
2679		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2680			mreq->ipv6mr_multiaddr.s6_addr16[1]
2681				= htons(mreq->ipv6mr_interface);
2682		}
2683
2684		/*
2685		 * Find the membership in the membership list.
2686		 */
2687		for (imm = im6o->im6o_memberships.lh_first;
2688		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2689			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2690			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2691			    &mreq->ipv6mr_multiaddr))
2692				break;
2693		}
2694		if (imm == NULL) {
2695			/* Unable to resolve interface */
2696			error = EADDRNOTAVAIL;
2697			break;
2698		}
2699		/*
2700		 * Give up the multicast address record to which the
2701		 * membership points.
2702		 */
2703		LIST_REMOVE(imm, i6mm_chain);
2704		in6_delmulti(imm->i6mm_maddr);
2705		free(imm, M_IPMADDR);
2706		break;
2707
2708	default:
2709		error = EOPNOTSUPP;
2710		break;
2711	}
2712
2713	/*
2714	 * If all options have default values, no need to keep the mbuf.
2715	 */
2716	if (im6o->im6o_multicast_ifp == NULL &&
2717	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2718	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2719	    im6o->im6o_memberships.lh_first == NULL) {
2720		free(*im6op, M_IPMOPTS);
2721		*im6op = NULL;
2722	}
2723
2724	return (error);
2725}
2726
2727/*
2728 * Return the IP6 multicast options in response to user getsockopt().
2729 */
2730static int
2731ip6_getmoptions(optname, im6o, mp)
2732	int optname;
2733	struct ip6_moptions *im6o;
2734	struct mbuf **mp;
2735{
2736	u_int *hlim, *loop, *ifindex;
2737
2738	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2739
2740	switch (optname) {
2741
2742	case IPV6_MULTICAST_IF:
2743		ifindex = mtod(*mp, u_int *);
2744		(*mp)->m_len = sizeof(u_int);
2745		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2746			*ifindex = 0;
2747		else
2748			*ifindex = im6o->im6o_multicast_ifp->if_index;
2749		return (0);
2750
2751	case IPV6_MULTICAST_HOPS:
2752		hlim = mtod(*mp, u_int *);
2753		(*mp)->m_len = sizeof(u_int);
2754		if (im6o == NULL)
2755			*hlim = ip6_defmcasthlim;
2756		else
2757			*hlim = im6o->im6o_multicast_hlim;
2758		return (0);
2759
2760	case IPV6_MULTICAST_LOOP:
2761		loop = mtod(*mp, u_int *);
2762		(*mp)->m_len = sizeof(u_int);
2763		if (im6o == NULL)
2764			*loop = ip6_defmcasthlim;
2765		else
2766			*loop = im6o->im6o_multicast_loop;
2767		return (0);
2768
2769	default:
2770		return (EOPNOTSUPP);
2771	}
2772}
2773
2774/*
2775 * Discard the IP6 multicast options.
2776 */
2777void
2778ip6_freemoptions(im6o)
2779	struct ip6_moptions *im6o;
2780{
2781	struct in6_multi_mship *imm;
2782
2783	if (im6o == NULL)
2784		return;
2785
2786	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2787		LIST_REMOVE(imm, i6mm_chain);
2788		if (imm->i6mm_maddr)
2789			in6_delmulti(imm->i6mm_maddr);
2790		free(imm, M_IPMADDR);
2791	}
2792	free(im6o, M_IPMOPTS);
2793}
2794
2795/*
2796 * Set IPv6 outgoing packet options based on advanced API.
2797 */
2798int
2799ip6_setpktoptions(control, opt, stickyopt, priv, needcopy, uproto)
2800	struct mbuf *control;
2801	struct ip6_pktopts *opt, *stickyopt;
2802	int priv, needcopy, uproto;
2803{
2804	struct cmsghdr *cm = 0;
2805
2806	if (control == 0 || opt == 0)
2807		return (EINVAL);
2808
2809	if (stickyopt) {
2810		/*
2811		 * If stickyopt is provided, make a local copy of the options
2812		 * for this particular packet, then override them by ancillary
2813		 * objects.
2814		 * XXX: need to gain a reference for the cached route of the
2815		 * next hop in case of the overriding.
2816		 */
2817		*opt = *stickyopt;
2818		if (opt->ip6po_nextroute.ro_rt) {
2819			RT_LOCK(opt->ip6po_nextroute.ro_rt);
2820			opt->ip6po_nextroute.ro_rt->rt_refcnt++;
2821			RT_UNLOCK(opt->ip6po_nextroute.ro_rt);
2822		}
2823	} else
2824		init_ip6pktopts(opt);
2825	opt->needfree = needcopy;
2826
2827	/*
2828	 * XXX: Currently, we assume all the optional information is stored
2829	 * in a single mbuf.
2830	 */
2831	if (control->m_next)
2832		return (EINVAL);
2833
2834	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2835	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2836		int error;
2837
2838		if (control->m_len < CMSG_LEN(0))
2839			return (EINVAL);
2840
2841		cm = mtod(control, struct cmsghdr *);
2842		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2843			return (EINVAL);
2844		if (cm->cmsg_level != IPPROTO_IPV6)
2845			continue;
2846
2847		error = ip6_setpktoption(cm->cmsg_type, CMSG_DATA(cm),
2848		    cm->cmsg_len - CMSG_LEN(0), opt, priv, needcopy, 1, uproto);
2849		if (error)
2850			return (error);
2851	}
2852
2853	return (0);
2854}
2855
2856/*
2857 * Set a particular packet option, as a sticky option or an ancillary data
2858 * item.  "len" can be 0 only when it's a sticky option.
2859 * We have 4 cases of combination of "sticky" and "cmsg":
2860 * "sticky=0, cmsg=0": impossible
2861 * "sticky=0, cmsg=1": RFC2292 or rfc2292bis ancillary data
2862 * "sticky=1, cmsg=0": rfc2292bis socket option
2863 * "sticky=1, cmsg=1": RFC2292 socket option
2864 */
2865static int
2866ip6_setpktoption(optname, buf, len, opt, priv, sticky, cmsg, uproto)
2867	int optname, len, priv, sticky, cmsg, uproto;
2868	u_char *buf;
2869	struct ip6_pktopts *opt;
2870{
2871	int minmtupolicy, preftemp;
2872
2873	if (!sticky && !cmsg) {
2874#ifdef DIAGNOSTIC
2875		printf("ip6_setpktoption: impossible case\n");
2876#endif
2877		return (EINVAL);
2878	}
2879
2880	/*
2881	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2882	 * not be specified in the context of rfc2292bis.  Conversely,
2883	 * rfc2292bis types should not be specified in the context of RFC2292.
2884	 */
2885	if (!cmsg) {
2886		switch (optname) {
2887		case IPV6_2292PKTINFO:
2888		case IPV6_2292HOPLIMIT:
2889		case IPV6_2292NEXTHOP:
2890		case IPV6_2292HOPOPTS:
2891		case IPV6_2292DSTOPTS:
2892		case IPV6_2292RTHDR:
2893		case IPV6_2292PKTOPTIONS:
2894			return (ENOPROTOOPT);
2895		}
2896	}
2897	if (sticky && cmsg) {
2898		switch (optname) {
2899		case IPV6_PKTINFO:
2900		case IPV6_HOPLIMIT:
2901		case IPV6_NEXTHOP:
2902		case IPV6_HOPOPTS:
2903		case IPV6_DSTOPTS:
2904		case IPV6_RTHDRDSTOPTS:
2905		case IPV6_RTHDR:
2906		case IPV6_USE_MIN_MTU:
2907		case IPV6_DONTFRAG:
2908		case IPV6_TCLASS:
2909		case IPV6_PREFER_TEMPADDR: /* XXX: not an rfc2292bis option */
2910			return (ENOPROTOOPT);
2911		}
2912	}
2913
2914	switch (optname) {
2915	case IPV6_2292PKTINFO:
2916	case IPV6_PKTINFO:
2917	{
2918		struct ifnet *ifp = NULL;
2919		struct in6_pktinfo *pktinfo;
2920
2921		if (len != sizeof(struct in6_pktinfo))
2922			return (EINVAL);
2923
2924		pktinfo = (struct in6_pktinfo *)buf;
2925
2926		/*
2927		 * An application can clear any sticky IPV6_PKTINFO option by
2928		 * doing a "regular" setsockopt with ipi6_addr being
2929		 * in6addr_any and ipi6_ifindex being zero.
2930		 * [RFC 3542, Section 6]
2931		 */
2932		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2933		    pktinfo->ipi6_ifindex == 0 &&
2934		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2935			ip6_clearpktopts(opt, optname);
2936			break;
2937		}
2938
2939		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2940		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2941			return (EINVAL);
2942		}
2943
2944		/* validate the interface index if specified. */
2945		if (pktinfo->ipi6_ifindex > if_index ||
2946		    pktinfo->ipi6_ifindex < 0) {
2947			 return (ENXIO);
2948		}
2949		if (pktinfo->ipi6_ifindex) {
2950			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
2951			if (ifp == NULL)
2952				return (ENXIO);
2953		}
2954
2955		/*
2956		 * We store the address anyway, and let in6_selectsrc()
2957		 * validate the specified address.  This is because ipi6_addr
2958		 * may not have enough information about its scope zone, and
2959		 * we may need additional information (such as outgoing
2960		 * interface or the scope zone of a destination address) to
2961		 * disambiguate the scope.
2962		 * XXX: the delay of the validation may confuse the
2963		 * application when it is used as a sticky option.
2964		 */
2965		if (sticky) {
2966			if (opt->ip6po_pktinfo == NULL) {
2967				opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2968				    M_IP6OPT, M_WAITOK);
2969			}
2970			bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2971		} else
2972			opt->ip6po_pktinfo = pktinfo;
2973		break;
2974	}
2975
2976	case IPV6_2292HOPLIMIT:
2977	case IPV6_HOPLIMIT:
2978	{
2979		int *hlimp;
2980
2981		/*
2982		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2983		 * to simplify the ordering among hoplimit options.
2984		 */
2985		if (optname == IPV6_HOPLIMIT && sticky)
2986			return (ENOPROTOOPT);
2987
2988		if (len != sizeof(int))
2989			return (EINVAL);
2990		hlimp = (int *)buf;
2991		if (*hlimp < -1 || *hlimp > 255)
2992			return (EINVAL);
2993
2994		opt->ip6po_hlim = *hlimp;
2995		break;
2996	}
2997
2998	case IPV6_TCLASS:
2999	{
3000		int tclass;
3001
3002		if (len != sizeof(int))
3003			return (EINVAL);
3004		tclass = *(int *)buf;
3005		if (tclass < -1 || tclass > 255)
3006			return (EINVAL);
3007
3008		opt->ip6po_tclass = tclass;
3009		break;
3010	}
3011
3012	case IPV6_2292NEXTHOP:
3013	case IPV6_NEXTHOP:
3014		if (!priv)
3015			return (EPERM);
3016
3017		if (len == 0) {	/* just remove the option */
3018			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3019			break;
3020		}
3021
3022		/* check if cmsg_len is large enough for sa_len */
3023		if (len < sizeof(struct sockaddr) || len < *buf)
3024			return (EINVAL);
3025
3026		switch (((struct sockaddr *)buf)->sa_family) {
3027		case AF_INET6:
3028		{
3029			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3030#if 0
3031			int error;
3032#endif
3033
3034			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3035				return (EINVAL);
3036
3037			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3038			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3039				return (EINVAL);
3040			}
3041#if 0
3042			if ((error = scope6_check_id(sa6, ip6_use_defzone))
3043			    != 0) {
3044				return (error);
3045			}
3046#endif
3047			sa6->sin6_scope_id = 0; /* XXX */
3048			break;
3049		}
3050		case AF_LINK:	/* should eventually be supported */
3051		default:
3052			return (EAFNOSUPPORT);
3053		}
3054
3055		/* turn off the previous option, then set the new option. */
3056		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3057		if (sticky) {
3058			opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_WAITOK);
3059			bcopy(buf, opt->ip6po_nexthop, *buf);
3060		} else
3061			opt->ip6po_nexthop = (struct sockaddr *)buf;
3062		break;
3063
3064	case IPV6_2292HOPOPTS:
3065	case IPV6_HOPOPTS:
3066	{
3067		struct ip6_hbh *hbh;
3068		int hbhlen;
3069
3070		/*
3071		 * XXX: We don't allow a non-privileged user to set ANY HbH
3072		 * options, since per-option restriction has too much
3073		 * overhead.
3074		 */
3075		if (!priv)
3076			return (EPERM);
3077
3078		if (len == 0) {
3079			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3080			break;	/* just remove the option */
3081		}
3082
3083		/* message length validation */
3084		if (len < sizeof(struct ip6_hbh))
3085			return (EINVAL);
3086		hbh = (struct ip6_hbh *)buf;
3087		hbhlen = (hbh->ip6h_len + 1) << 3;
3088		if (len != hbhlen)
3089			return (EINVAL);
3090
3091		/* turn off the previous option, then set the new option. */
3092		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3093		if (sticky) {
3094			opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK);
3095			bcopy(hbh, opt->ip6po_hbh, hbhlen);
3096		} else
3097			opt->ip6po_hbh = hbh;
3098
3099		break;
3100	}
3101
3102	case IPV6_2292DSTOPTS:
3103	case IPV6_DSTOPTS:
3104	case IPV6_RTHDRDSTOPTS:
3105	{
3106		struct ip6_dest *dest, **newdest = NULL;
3107		int destlen;
3108
3109		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3110			return (EPERM);
3111
3112		if (len == 0) {
3113			ip6_clearpktopts(opt, optname);
3114			break;	/* just remove the option */
3115		}
3116
3117		/* message length validation */
3118		if (len < sizeof(struct ip6_dest))
3119			return (EINVAL);
3120		dest = (struct ip6_dest *)buf;
3121		destlen = (dest->ip6d_len + 1) << 3;
3122		if (len != destlen)
3123			return (EINVAL);
3124
3125		/*
3126		 * Determine the position that the destination options header
3127		 * should be inserted; before or after the routing header.
3128		 */
3129		switch (optname) {
3130		case IPV6_2292DSTOPTS:
3131			/*
3132			 * The old advacned API is ambiguous on this point.
3133			 * Our approach is to determine the position based
3134			 * according to the existence of a routing header.
3135			 * Note, however, that this depends on the order of the
3136			 * extension headers in the ancillary data; the 1st
3137			 * part of the destination options header must appear
3138			 * before the routing header in the ancillary data,
3139			 * too.
3140			 * RFC2292bis solved the ambiguity by introducing
3141			 * separate ancillary data or option types.
3142			 */
3143			if (opt->ip6po_rthdr == NULL)
3144				newdest = &opt->ip6po_dest1;
3145			else
3146				newdest = &opt->ip6po_dest2;
3147			break;
3148		case IPV6_RTHDRDSTOPTS:
3149			newdest = &opt->ip6po_dest1;
3150			break;
3151		case IPV6_DSTOPTS:
3152			newdest = &opt->ip6po_dest2;
3153			break;
3154		}
3155
3156		/* turn off the previous option, then set the new option. */
3157		ip6_clearpktopts(opt, optname);
3158		if (sticky) {
3159			*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
3160			bcopy(dest, *newdest, destlen);
3161		} else
3162			*newdest = dest;
3163
3164		break;
3165	}
3166
3167	case IPV6_2292RTHDR:
3168	case IPV6_RTHDR:
3169	{
3170		struct ip6_rthdr *rth;
3171		int rthlen;
3172
3173		if (len == 0) {
3174			ip6_clearpktopts(opt, IPV6_RTHDR);
3175			break;	/* just remove the option */
3176		}
3177
3178		/* message length validation */
3179		if (len < sizeof(struct ip6_rthdr))
3180			return (EINVAL);
3181		rth = (struct ip6_rthdr *)buf;
3182		rthlen = (rth->ip6r_len + 1) << 3;
3183		if (len != rthlen)
3184			return (EINVAL);
3185
3186		switch (rth->ip6r_type) {
3187		case IPV6_RTHDR_TYPE_0:
3188			if (rth->ip6r_len == 0)	/* must contain one addr */
3189				return (EINVAL);
3190			if (rth->ip6r_len % 2) /* length must be even */
3191				return (EINVAL);
3192			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3193				return (EINVAL);
3194			break;
3195		default:
3196			return (EINVAL);	/* not supported */
3197		}
3198
3199		/* turn off the previous option */
3200		ip6_clearpktopts(opt, IPV6_RTHDR);
3201		if (sticky) {
3202			opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK);
3203			bcopy(rth, opt->ip6po_rthdr, rthlen);
3204		} else
3205			opt->ip6po_rthdr = rth;
3206
3207		break;
3208	}
3209
3210	case IPV6_USE_MIN_MTU:
3211		if (len != sizeof(int))
3212			return (EINVAL);
3213		minmtupolicy = *(int *)buf;
3214		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3215		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3216		    minmtupolicy != IP6PO_MINMTU_ALL) {
3217			return (EINVAL);
3218		}
3219		opt->ip6po_minmtu = minmtupolicy;
3220		break;
3221
3222	case IPV6_DONTFRAG:
3223		if (len != sizeof(int))
3224			return (EINVAL);
3225
3226		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3227			/*
3228			 * we ignore this option for TCP sockets.
3229			 * (rfc2292bis leaves this case unspecified.)
3230			 */
3231			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3232		} else
3233			opt->ip6po_flags |= IP6PO_DONTFRAG;
3234		break;
3235
3236	case IPV6_PREFER_TEMPADDR:
3237		if (len != sizeof(int))
3238			return (EINVAL);
3239		preftemp = *(int *)buf;
3240		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3241		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3242		    preftemp != IP6PO_TEMPADDR_PREFER) {
3243			return (EINVAL);
3244		}
3245		opt->ip6po_prefer_tempaddr = preftemp;
3246		break;
3247
3248	default:
3249		return (ENOPROTOOPT);
3250	} /* end of switch */
3251
3252	return (0);
3253}
3254
3255/*
3256 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3257 * packet to the input queue of a specified interface.  Note that this
3258 * calls the output routine of the loopback "driver", but with an interface
3259 * pointer that might NOT be &loif -- easier than replicating that code here.
3260 */
3261void
3262ip6_mloopback(ifp, m, dst)
3263	struct ifnet *ifp;
3264	struct mbuf *m;
3265	struct sockaddr_in6 *dst;
3266{
3267	struct mbuf *copym;
3268	struct ip6_hdr *ip6;
3269
3270	copym = m_copy(m, 0, M_COPYALL);
3271	if (copym == NULL)
3272		return;
3273
3274	/*
3275	 * Make sure to deep-copy IPv6 header portion in case the data
3276	 * is in an mbuf cluster, so that we can safely override the IPv6
3277	 * header portion later.
3278	 */
3279	if ((copym->m_flags & M_EXT) != 0 ||
3280	    copym->m_len < sizeof(struct ip6_hdr)) {
3281		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3282		if (copym == NULL)
3283			return;
3284	}
3285
3286#ifdef DIAGNOSTIC
3287	if (copym->m_len < sizeof(*ip6)) {
3288		m_freem(copym);
3289		return;
3290	}
3291#endif
3292
3293	ip6 = mtod(copym, struct ip6_hdr *);
3294	/*
3295	 * clear embedded scope identifiers if necessary.
3296	 * in6_clearscope will touch the addresses only when necessary.
3297	 */
3298	in6_clearscope(&ip6->ip6_src);
3299	in6_clearscope(&ip6->ip6_dst);
3300
3301	(void)if_simloop(ifp, copym, dst->sin6_family, NULL);
3302}
3303
3304/*
3305 * Chop IPv6 header off from the payload.
3306 */
3307static int
3308ip6_splithdr(m, exthdrs)
3309	struct mbuf *m;
3310	struct ip6_exthdrs *exthdrs;
3311{
3312	struct mbuf *mh;
3313	struct ip6_hdr *ip6;
3314
3315	ip6 = mtod(m, struct ip6_hdr *);
3316	if (m->m_len > sizeof(*ip6)) {
3317		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3318		if (mh == 0) {
3319			m_freem(m);
3320			return ENOBUFS;
3321		}
3322		M_MOVE_PKTHDR(mh, m);
3323		MH_ALIGN(mh, sizeof(*ip6));
3324		m->m_len -= sizeof(*ip6);
3325		m->m_data += sizeof(*ip6);
3326		mh->m_next = m;
3327		m = mh;
3328		m->m_len = sizeof(*ip6);
3329		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3330	}
3331	exthdrs->ip6e_ip6 = m;
3332	return 0;
3333}
3334
3335/*
3336 * Compute IPv6 extension header length.
3337 */
3338int
3339ip6_optlen(in6p)
3340	struct in6pcb *in6p;
3341{
3342	int len;
3343
3344	if (!in6p->in6p_outputopts)
3345		return 0;
3346
3347	len = 0;
3348#define elen(x) \
3349    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3350
3351	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3352	if (in6p->in6p_outputopts->ip6po_rthdr)
3353		/* dest1 is valid with rthdr only */
3354		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3355	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3356	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3357	return len;
3358#undef elen
3359}
3360