ip6_output.c revision 189106
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
30 */
31
32/*-
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
61 */
62
63#include <sys/cdefs.h>
64__FBSDID("$FreeBSD: head/sys/netinet6/ip6_output.c 189106 2009-02-27 14:12:05Z bz $");
65
66#include "opt_inet.h"
67#include "opt_inet6.h"
68#include "opt_ipsec.h"
69#include "opt_route.h"
70
71#include <sys/param.h>
72#include <sys/kernel.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/errno.h>
76#include <sys/priv.h>
77#include <sys/proc.h>
78#include <sys/protosw.h>
79#include <sys/socket.h>
80#include <sys/socketvar.h>
81#include <sys/ucred.h>
82#include <sys/vimage.h>
83
84#include <net/if.h>
85#include <net/netisr.h>
86#include <net/route.h>
87#include <net/pfil.h>
88#include <net/vnet.h>
89
90#include <netinet/in.h>
91#include <netinet/in_var.h>
92#include <netinet6/in6_var.h>
93#include <netinet/ip6.h>
94#include <netinet/icmp6.h>
95#include <netinet6/ip6_var.h>
96#include <netinet/in_pcb.h>
97#include <netinet/tcp_var.h>
98#include <netinet6/nd6.h>
99#include <netinet/vinet.h>
100
101#ifdef IPSEC
102#include <netipsec/ipsec.h>
103#include <netipsec/ipsec6.h>
104#include <netipsec/key.h>
105#include <netinet6/ip6_ipsec.h>
106#endif /* IPSEC */
107
108#include <netinet6/ip6protosw.h>
109#include <netinet6/scope6_var.h>
110#include <netinet6/vinet6.h>
111
112static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "internet multicast options");
113
114struct ip6_exthdrs {
115	struct mbuf *ip6e_ip6;
116	struct mbuf *ip6e_hbh;
117	struct mbuf *ip6e_dest1;
118	struct mbuf *ip6e_rthdr;
119	struct mbuf *ip6e_dest2;
120};
121
122static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
123			   struct ucred *, int));
124static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
125	struct socket *, struct sockopt *));
126static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
127static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *,
128	struct ucred *, int, int, int));
129
130static int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *);
131static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **);
132static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
133static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
134	struct ip6_frag **));
135static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
136static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
137static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
138	struct ifnet *, struct in6_addr *, u_long *, int *));
139static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
140
141
142/*
143 * Make an extension header from option data.  hp is the source, and
144 * mp is the destination.
145 */
146#define MAKE_EXTHDR(hp, mp)						\
147    do {								\
148	if (hp) {							\
149		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
150		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
151		    ((eh)->ip6e_len + 1) << 3);				\
152		if (error)						\
153			goto freehdrs;					\
154	}								\
155    } while (/*CONSTCOND*/ 0)
156
157/*
158 * Form a chain of extension headers.
159 * m is the extension header mbuf
160 * mp is the previous mbuf in the chain
161 * p is the next header
162 * i is the type of option.
163 */
164#define MAKE_CHAIN(m, mp, p, i)\
165    do {\
166	if (m) {\
167		if (!hdrsplit) \
168			panic("assumption failed: hdr not split"); \
169		*mtod((m), u_char *) = *(p);\
170		*(p) = (i);\
171		p = mtod((m), u_char *);\
172		(m)->m_next = (mp)->m_next;\
173		(mp)->m_next = (m);\
174		(mp) = (m);\
175	}\
176    } while (/*CONSTCOND*/ 0)
177
178/*
179 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
180 * header (with pri, len, nxt, hlim, src, dst).
181 * This function may modify ver and hlim only.
182 * The mbuf chain containing the packet will be freed.
183 * The mbuf opt, if present, will not be freed.
184 *
185 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
186 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
187 * which is rt_rmx.rmx_mtu.
188 *
189 * ifpp - XXX: just for statistics
190 */
191int
192ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
193    struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
194    struct ifnet **ifpp, struct inpcb *inp)
195{
196	INIT_VNET_NET(curvnet);
197	INIT_VNET_INET6(curvnet);
198	struct ip6_hdr *ip6, *mhip6;
199	struct ifnet *ifp, *origifp;
200	struct mbuf *m = m0;
201	struct mbuf *mprev = NULL;
202	int hlen, tlen, len, off;
203	struct route_in6 ip6route;
204	struct rtentry *rt = NULL;
205	struct sockaddr_in6 *dst, src_sa, dst_sa;
206	struct in6_addr odst;
207	int error = 0;
208	struct in6_ifaddr *ia = NULL;
209	u_long mtu;
210	int alwaysfrag, dontfrag;
211	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
212	struct ip6_exthdrs exthdrs;
213	struct in6_addr finaldst, src0, dst0;
214	u_int32_t zone;
215	struct route_in6 *ro_pmtu = NULL;
216	int hdrsplit = 0;
217	int needipsec = 0;
218#ifdef IPSEC
219	struct ipsec_output_state state;
220	struct ip6_rthdr *rh = NULL;
221	int needipsectun = 0;
222	int segleft_org = 0;
223	struct secpolicy *sp = NULL;
224#endif /* IPSEC */
225
226	ip6 = mtod(m, struct ip6_hdr *);
227	if (ip6 == NULL) {
228		printf ("ip6 is NULL");
229		goto bad;
230	}
231
232	finaldst = ip6->ip6_dst;
233
234	bzero(&exthdrs, sizeof(exthdrs));
235
236	if (opt) {
237		/* Hop-by-Hop options header */
238		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
239		/* Destination options header(1st part) */
240		if (opt->ip6po_rthdr) {
241			/*
242			 * Destination options header(1st part)
243			 * This only makes sense with a routing header.
244			 * See Section 9.2 of RFC 3542.
245			 * Disabling this part just for MIP6 convenience is
246			 * a bad idea.  We need to think carefully about a
247			 * way to make the advanced API coexist with MIP6
248			 * options, which might automatically be inserted in
249			 * the kernel.
250			 */
251			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
252		}
253		/* Routing header */
254		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
255		/* Destination options header(2nd part) */
256		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
257	}
258
259	/*
260	 * IPSec checking which handles several cases.
261	 * FAST IPSEC: We re-injected the packet.
262	 */
263#ifdef IPSEC
264	switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
265	{
266	case 1:                 /* Bad packet */
267		goto freehdrs;
268	case -1:                /* Do IPSec */
269		needipsec = 1;
270	case 0:                 /* No IPSec */
271	default:
272		break;
273	}
274#endif /* IPSEC */
275
276	/*
277	 * Calculate the total length of the extension header chain.
278	 * Keep the length of the unfragmentable part for fragmentation.
279	 */
280	optlen = 0;
281	if (exthdrs.ip6e_hbh)
282		optlen += exthdrs.ip6e_hbh->m_len;
283	if (exthdrs.ip6e_dest1)
284		optlen += exthdrs.ip6e_dest1->m_len;
285	if (exthdrs.ip6e_rthdr)
286		optlen += exthdrs.ip6e_rthdr->m_len;
287	unfragpartlen = optlen + sizeof(struct ip6_hdr);
288
289	/* NOTE: we don't add AH/ESP length here. do that later. */
290	if (exthdrs.ip6e_dest2)
291		optlen += exthdrs.ip6e_dest2->m_len;
292
293	/*
294	 * If we need IPsec, or there is at least one extension header,
295	 * separate IP6 header from the payload.
296	 */
297	if ((needipsec || optlen) && !hdrsplit) {
298		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
299			m = NULL;
300			goto freehdrs;
301		}
302		m = exthdrs.ip6e_ip6;
303		hdrsplit++;
304	}
305
306	/* adjust pointer */
307	ip6 = mtod(m, struct ip6_hdr *);
308
309	/* adjust mbuf packet header length */
310	m->m_pkthdr.len += optlen;
311	plen = m->m_pkthdr.len - sizeof(*ip6);
312
313	/* If this is a jumbo payload, insert a jumbo payload option. */
314	if (plen > IPV6_MAXPACKET) {
315		if (!hdrsplit) {
316			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
317				m = NULL;
318				goto freehdrs;
319			}
320			m = exthdrs.ip6e_ip6;
321			hdrsplit++;
322		}
323		/* adjust pointer */
324		ip6 = mtod(m, struct ip6_hdr *);
325		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
326			goto freehdrs;
327		ip6->ip6_plen = 0;
328	} else
329		ip6->ip6_plen = htons(plen);
330
331	/*
332	 * Concatenate headers and fill in next header fields.
333	 * Here we have, on "m"
334	 *	IPv6 payload
335	 * and we insert headers accordingly.  Finally, we should be getting:
336	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
337	 *
338	 * during the header composing process, "m" points to IPv6 header.
339	 * "mprev" points to an extension header prior to esp.
340	 */
341	u_char *nexthdrp = &ip6->ip6_nxt;
342	mprev = m;
343
344	/*
345	 * we treat dest2 specially.  this makes IPsec processing
346	 * much easier.  the goal here is to make mprev point the
347	 * mbuf prior to dest2.
348	 *
349	 * result: IPv6 dest2 payload
350	 * m and mprev will point to IPv6 header.
351	 */
352	if (exthdrs.ip6e_dest2) {
353		if (!hdrsplit)
354			panic("assumption failed: hdr not split");
355		exthdrs.ip6e_dest2->m_next = m->m_next;
356		m->m_next = exthdrs.ip6e_dest2;
357		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
358		ip6->ip6_nxt = IPPROTO_DSTOPTS;
359	}
360
361	/*
362	 * result: IPv6 hbh dest1 rthdr dest2 payload
363	 * m will point to IPv6 header.  mprev will point to the
364	 * extension header prior to dest2 (rthdr in the above case).
365	 */
366	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
367	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
368		   IPPROTO_DSTOPTS);
369	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
370		   IPPROTO_ROUTING);
371
372#ifdef IPSEC
373	if (!needipsec)
374		goto skip_ipsec2;
375
376	/*
377	 * pointers after IPsec headers are not valid any more.
378	 * other pointers need a great care too.
379	 * (IPsec routines should not mangle mbufs prior to AH/ESP)
380	 */
381	exthdrs.ip6e_dest2 = NULL;
382
383	if (exthdrs.ip6e_rthdr) {
384		rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
385		segleft_org = rh->ip6r_segleft;
386		rh->ip6r_segleft = 0;
387	}
388
389	bzero(&state, sizeof(state));
390	state.m = m;
391	error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
392				    &needipsectun);
393	m = state.m;
394	if (error == EJUSTRETURN) {
395		/*
396		 * We had a SP with a level of 'use' and no SA. We
397		 * will just continue to process the packet without
398		 * IPsec processing.
399		 */
400		;
401	} else if (error) {
402		/* mbuf is already reclaimed in ipsec6_output_trans. */
403		m = NULL;
404		switch (error) {
405		case EHOSTUNREACH:
406		case ENETUNREACH:
407		case EMSGSIZE:
408		case ENOBUFS:
409		case ENOMEM:
410			break;
411		default:
412			printf("[%s:%d] (ipsec): error code %d\n",
413			    __func__, __LINE__, error);
414			/* FALLTHROUGH */
415		case ENOENT:
416			/* don't show these error codes to the user */
417			error = 0;
418			break;
419		}
420		goto bad;
421	} else if (!needipsectun) {
422		/*
423		 * In the FAST IPSec case we have already
424		 * re-injected the packet and it has been freed
425		 * by the ipsec_done() function.  So, just clean
426		 * up after ourselves.
427		 */
428		m = NULL;
429		goto done;
430	}
431	if (exthdrs.ip6e_rthdr) {
432		/* ah6_output doesn't modify mbuf chain */
433		rh->ip6r_segleft = segleft_org;
434	}
435skip_ipsec2:;
436#endif /* IPSEC */
437
438	/*
439	 * If there is a routing header, replace the destination address field
440	 * with the first hop of the routing header.
441	 */
442	if (exthdrs.ip6e_rthdr) {
443		struct ip6_rthdr *rh =
444			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
445						  struct ip6_rthdr *));
446		struct ip6_rthdr0 *rh0;
447		struct in6_addr *addr;
448		struct sockaddr_in6 sa;
449
450		switch (rh->ip6r_type) {
451		case IPV6_RTHDR_TYPE_0:
452			 rh0 = (struct ip6_rthdr0 *)rh;
453			 addr = (struct in6_addr *)(rh0 + 1);
454
455			 /*
456			  * construct a sockaddr_in6 form of
457			  * the first hop.
458			  *
459			  * XXX: we may not have enough
460			  * information about its scope zone;
461			  * there is no standard API to pass
462			  * the information from the
463			  * application.
464			  */
465			 bzero(&sa, sizeof(sa));
466			 sa.sin6_family = AF_INET6;
467			 sa.sin6_len = sizeof(sa);
468			 sa.sin6_addr = addr[0];
469			 if ((error = sa6_embedscope(&sa,
470			     V_ip6_use_defzone)) != 0) {
471				 goto bad;
472			 }
473			 ip6->ip6_dst = sa.sin6_addr;
474			 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
475			     * (rh0->ip6r0_segleft - 1));
476			 addr[rh0->ip6r0_segleft - 1] = finaldst;
477			 /* XXX */
478			 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
479			 break;
480		default:	/* is it possible? */
481			 error = EINVAL;
482			 goto bad;
483		}
484	}
485
486	/* Source address validation */
487	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
488	    (flags & IPV6_UNSPECSRC) == 0) {
489		error = EOPNOTSUPP;
490		V_ip6stat.ip6s_badscope++;
491		goto bad;
492	}
493	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
494		error = EOPNOTSUPP;
495		V_ip6stat.ip6s_badscope++;
496		goto bad;
497	}
498
499	V_ip6stat.ip6s_localout++;
500
501	/*
502	 * Route packet.
503	 */
504	if (ro == 0) {
505		ro = &ip6route;
506		bzero((caddr_t)ro, sizeof(*ro));
507	}
508	ro_pmtu = ro;
509	if (opt && opt->ip6po_rthdr)
510		ro = &opt->ip6po_route;
511	dst = (struct sockaddr_in6 *)&ro->ro_dst;
512
513again:
514	/*
515	 * if specified, try to fill in the traffic class field.
516	 * do not override if a non-zero value is already set.
517	 * we check the diffserv field and the ecn field separately.
518	 */
519	if (opt && opt->ip6po_tclass >= 0) {
520		int mask = 0;
521
522		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
523			mask |= 0xfc;
524		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
525			mask |= 0x03;
526		if (mask != 0)
527			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
528	}
529
530	/* fill in or override the hop limit field, if necessary. */
531	if (opt && opt->ip6po_hlim != -1)
532		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
533	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
534		if (im6o != NULL)
535			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
536		else
537			ip6->ip6_hlim = V_ip6_defmcasthlim;
538	}
539
540#ifdef IPSEC
541	/*
542	 * We may re-inject packets into the stack here.
543	 */
544	if (needipsec && needipsectun) {
545		struct ipsec_output_state state;
546
547		/*
548		 * All the extension headers will become inaccessible
549		 * (since they can be encrypted).
550		 * Don't panic, we need no more updates to extension headers
551		 * on inner IPv6 packet (since they are now encapsulated).
552		 *
553		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
554		 */
555		bzero(&exthdrs, sizeof(exthdrs));
556		exthdrs.ip6e_ip6 = m;
557
558		bzero(&state, sizeof(state));
559		state.m = m;
560		state.ro = (struct route *)ro;
561		state.dst = (struct sockaddr *)dst;
562
563		error = ipsec6_output_tunnel(&state, sp, flags);
564
565		m = state.m;
566		ro = (struct route_in6 *)state.ro;
567		dst = (struct sockaddr_in6 *)state.dst;
568		if (error == EJUSTRETURN) {
569			/*
570			 * We had a SP with a level of 'use' and no SA. We
571			 * will just continue to process the packet without
572			 * IPsec processing.
573			 */
574			;
575		} else if (error) {
576			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
577			m0 = m = NULL;
578			m = NULL;
579			switch (error) {
580			case EHOSTUNREACH:
581			case ENETUNREACH:
582			case EMSGSIZE:
583			case ENOBUFS:
584			case ENOMEM:
585				break;
586			default:
587				printf("[%s:%d] (ipsec): error code %d\n",
588				    __func__, __LINE__, error);
589				/* FALLTHROUGH */
590			case ENOENT:
591				/* don't show these error codes to the user */
592				error = 0;
593				break;
594			}
595			goto bad;
596		} else {
597			/*
598			 * In the FAST IPSec case we have already
599			 * re-injected the packet and it has been freed
600			 * by the ipsec_done() function.  So, just clean
601			 * up after ourselves.
602			 */
603			m = NULL;
604			goto done;
605		}
606
607		exthdrs.ip6e_ip6 = m;
608	}
609#endif /* IPSEC */
610
611	/* adjust pointer */
612	ip6 = mtod(m, struct ip6_hdr *);
613
614	bzero(&dst_sa, sizeof(dst_sa));
615	dst_sa.sin6_family = AF_INET6;
616	dst_sa.sin6_len = sizeof(dst_sa);
617	dst_sa.sin6_addr = ip6->ip6_dst;
618	if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
619	    &ifp, &rt)) != 0) {
620		switch (error) {
621		case EHOSTUNREACH:
622			V_ip6stat.ip6s_noroute++;
623			break;
624		case EADDRNOTAVAIL:
625		default:
626			break; /* XXX statistics? */
627		}
628		if (ifp != NULL)
629			in6_ifstat_inc(ifp, ifs6_out_discard);
630		goto bad;
631	}
632	if (rt == NULL) {
633		/*
634		 * If in6_selectroute() does not return a route entry,
635		 * dst may not have been updated.
636		 */
637		*dst = dst_sa;	/* XXX */
638	}
639
640	/*
641	 * then rt (for unicast) and ifp must be non-NULL valid values.
642	 */
643	if ((flags & IPV6_FORWARDING) == 0) {
644		/* XXX: the FORWARDING flag can be set for mrouting. */
645		in6_ifstat_inc(ifp, ifs6_out_request);
646	}
647	if (rt != NULL) {
648		ia = (struct in6_ifaddr *)(rt->rt_ifa);
649		rt->rt_use++;
650	}
651
652	/*
653	 * The outgoing interface must be in the zone of source and
654	 * destination addresses.  We should use ia_ifp to support the
655	 * case of sending packets to an address of our own.
656	 */
657	if (ia != NULL && ia->ia_ifp)
658		origifp = ia->ia_ifp;
659	else
660		origifp = ifp;
661
662	src0 = ip6->ip6_src;
663	if (in6_setscope(&src0, origifp, &zone))
664		goto badscope;
665	bzero(&src_sa, sizeof(src_sa));
666	src_sa.sin6_family = AF_INET6;
667	src_sa.sin6_len = sizeof(src_sa);
668	src_sa.sin6_addr = ip6->ip6_src;
669	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
670		goto badscope;
671
672	dst0 = ip6->ip6_dst;
673	if (in6_setscope(&dst0, origifp, &zone))
674		goto badscope;
675	/* re-initialize to be sure */
676	bzero(&dst_sa, sizeof(dst_sa));
677	dst_sa.sin6_family = AF_INET6;
678	dst_sa.sin6_len = sizeof(dst_sa);
679	dst_sa.sin6_addr = ip6->ip6_dst;
680	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
681		goto badscope;
682	}
683
684	/* scope check is done. */
685	goto routefound;
686
687  badscope:
688	V_ip6stat.ip6s_badscope++;
689	in6_ifstat_inc(origifp, ifs6_out_discard);
690	if (error == 0)
691		error = EHOSTUNREACH; /* XXX */
692	goto bad;
693
694  routefound:
695	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
696		if (opt && opt->ip6po_nextroute.ro_rt) {
697			/*
698			 * The nexthop is explicitly specified by the
699			 * application.  We assume the next hop is an IPv6
700			 * address.
701			 */
702			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
703		}
704		else if ((rt->rt_flags & RTF_GATEWAY))
705			dst = (struct sockaddr_in6 *)rt->rt_gateway;
706	}
707
708	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
709		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
710	} else {
711		struct	in6_multi *in6m;
712
713		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
714
715		in6_ifstat_inc(ifp, ifs6_out_mcast);
716
717		/*
718		 * Confirm that the outgoing interface supports multicast.
719		 */
720		if (!(ifp->if_flags & IFF_MULTICAST)) {
721			V_ip6stat.ip6s_noroute++;
722			in6_ifstat_inc(ifp, ifs6_out_discard);
723			error = ENETUNREACH;
724			goto bad;
725		}
726		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
727		if (in6m != NULL &&
728		   (im6o == NULL || im6o->im6o_multicast_loop)) {
729			/*
730			 * If we belong to the destination multicast group
731			 * on the outgoing interface, and the caller did not
732			 * forbid loopback, loop back a copy.
733			 */
734			ip6_mloopback(ifp, m, dst);
735		} else {
736			/*
737			 * If we are acting as a multicast router, perform
738			 * multicast forwarding as if the packet had just
739			 * arrived on the interface to which we are about
740			 * to send.  The multicast forwarding function
741			 * recursively calls this function, using the
742			 * IPV6_FORWARDING flag to prevent infinite recursion.
743			 *
744			 * Multicasts that are looped back by ip6_mloopback(),
745			 * above, will be forwarded by the ip6_input() routine,
746			 * if necessary.
747			 */
748			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
749				/*
750				 * XXX: ip6_mforward expects that rcvif is NULL
751				 * when it is called from the originating path.
752				 * However, it is not always the case, since
753				 * some versions of MGETHDR() does not
754				 * initialize the field.
755				 */
756				m->m_pkthdr.rcvif = NULL;
757				if (ip6_mforward(ip6, ifp, m) != 0) {
758					m_freem(m);
759					goto done;
760				}
761			}
762		}
763		/*
764		 * Multicasts with a hoplimit of zero may be looped back,
765		 * above, but must not be transmitted on a network.
766		 * Also, multicasts addressed to the loopback interface
767		 * are not sent -- the above call to ip6_mloopback() will
768		 * loop back a copy if this host actually belongs to the
769		 * destination group on the loopback interface.
770		 */
771		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
772		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
773			m_freem(m);
774			goto done;
775		}
776	}
777
778	/*
779	 * Fill the outgoing inteface to tell the upper layer
780	 * to increment per-interface statistics.
781	 */
782	if (ifpp)
783		*ifpp = ifp;
784
785	/* Determine path MTU. */
786	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
787	    &alwaysfrag)) != 0)
788		goto bad;
789
790	/*
791	 * The caller of this function may specify to use the minimum MTU
792	 * in some cases.
793	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
794	 * setting.  The logic is a bit complicated; by default, unicast
795	 * packets will follow path MTU while multicast packets will be sent at
796	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
797	 * including unicast ones will be sent at the minimum MTU.  Multicast
798	 * packets will always be sent at the minimum MTU unless
799	 * IP6PO_MINMTU_DISABLE is explicitly specified.
800	 * See RFC 3542 for more details.
801	 */
802	if (mtu > IPV6_MMTU) {
803		if ((flags & IPV6_MINMTU))
804			mtu = IPV6_MMTU;
805		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
806			mtu = IPV6_MMTU;
807		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
808			 (opt == NULL ||
809			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
810			mtu = IPV6_MMTU;
811		}
812	}
813
814	/*
815	 * clear embedded scope identifiers if necessary.
816	 * in6_clearscope will touch the addresses only when necessary.
817	 */
818	in6_clearscope(&ip6->ip6_src);
819	in6_clearscope(&ip6->ip6_dst);
820
821	/*
822	 * If the outgoing packet contains a hop-by-hop options header,
823	 * it must be examined and processed even by the source node.
824	 * (RFC 2460, section 4.)
825	 */
826	if (exthdrs.ip6e_hbh) {
827		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
828		u_int32_t dummy; /* XXX unused */
829		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
830
831#ifdef DIAGNOSTIC
832		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
833			panic("ip6e_hbh is not continuous");
834#endif
835		/*
836		 *  XXX: if we have to send an ICMPv6 error to the sender,
837		 *       we need the M_LOOP flag since icmp6_error() expects
838		 *       the IPv6 and the hop-by-hop options header are
839		 *       continuous unless the flag is set.
840		 */
841		m->m_flags |= M_LOOP;
842		m->m_pkthdr.rcvif = ifp;
843		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
844		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
845		    &dummy, &plen) < 0) {
846			/* m was already freed at this point */
847			error = EINVAL;/* better error? */
848			goto done;
849		}
850		m->m_flags &= ~M_LOOP; /* XXX */
851		m->m_pkthdr.rcvif = NULL;
852	}
853
854	/* Jump over all PFIL processing if hooks are not active. */
855	if (!PFIL_HOOKED(&inet6_pfil_hook))
856		goto passout;
857
858	odst = ip6->ip6_dst;
859	/* Run through list of hooks for output packets. */
860	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
861	if (error != 0 || m == NULL)
862		goto done;
863	ip6 = mtod(m, struct ip6_hdr *);
864
865	/* See if destination IP address was changed by packet filter. */
866	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
867		m->m_flags |= M_SKIP_FIREWALL;
868		/* If destination is now ourself drop to ip6_input(). */
869		if (in6_localaddr(&ip6->ip6_dst)) {
870			if (m->m_pkthdr.rcvif == NULL)
871				m->m_pkthdr.rcvif = V_loif;
872			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
873				m->m_pkthdr.csum_flags |=
874				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
875				m->m_pkthdr.csum_data = 0xffff;
876			}
877			m->m_pkthdr.csum_flags |=
878			    CSUM_IP_CHECKED | CSUM_IP_VALID;
879			error = netisr_queue(NETISR_IPV6, m);
880			goto done;
881		} else
882			goto again;	/* Redo the routing table lookup. */
883	}
884
885	/* XXX: IPFIREWALL_FORWARD */
886
887passout:
888	/*
889	 * Send the packet to the outgoing interface.
890	 * If necessary, do IPv6 fragmentation before sending.
891	 *
892	 * the logic here is rather complex:
893	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
894	 * 1-a:	send as is if tlen <= path mtu
895	 * 1-b:	fragment if tlen > path mtu
896	 *
897	 * 2: if user asks us not to fragment (dontfrag == 1)
898	 * 2-a:	send as is if tlen <= interface mtu
899	 * 2-b:	error if tlen > interface mtu
900	 *
901	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
902	 *	always fragment
903	 *
904	 * 4: if dontfrag == 1 && alwaysfrag == 1
905	 *	error, as we cannot handle this conflicting request
906	 */
907	tlen = m->m_pkthdr.len;
908
909	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
910		dontfrag = 1;
911	else
912		dontfrag = 0;
913	if (dontfrag && alwaysfrag) {	/* case 4 */
914		/* conflicting request - can't transmit */
915		error = EMSGSIZE;
916		goto bad;
917	}
918	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
919		/*
920		 * Even if the DONTFRAG option is specified, we cannot send the
921		 * packet when the data length is larger than the MTU of the
922		 * outgoing interface.
923		 * Notify the error by sending IPV6_PATHMTU ancillary data as
924		 * well as returning an error code (the latter is not described
925		 * in the API spec.)
926		 */
927		u_int32_t mtu32;
928		struct ip6ctlparam ip6cp;
929
930		mtu32 = (u_int32_t)mtu;
931		bzero(&ip6cp, sizeof(ip6cp));
932		ip6cp.ip6c_cmdarg = (void *)&mtu32;
933		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
934		    (void *)&ip6cp);
935
936		error = EMSGSIZE;
937		goto bad;
938	}
939
940	/*
941	 * transmit packet without fragmentation
942	 */
943	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
944		struct in6_ifaddr *ia6;
945
946		ip6 = mtod(m, struct ip6_hdr *);
947		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
948		if (ia6) {
949			/* Record statistics for this interface address. */
950			ia6->ia_ifa.if_opackets++;
951			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
952		}
953		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
954		goto done;
955	}
956
957	/*
958	 * try to fragment the packet.  case 1-b and 3
959	 */
960	if (mtu < IPV6_MMTU) {
961		/* path MTU cannot be less than IPV6_MMTU */
962		error = EMSGSIZE;
963		in6_ifstat_inc(ifp, ifs6_out_fragfail);
964		goto bad;
965	} else if (ip6->ip6_plen == 0) {
966		/* jumbo payload cannot be fragmented */
967		error = EMSGSIZE;
968		in6_ifstat_inc(ifp, ifs6_out_fragfail);
969		goto bad;
970	} else {
971		struct mbuf **mnext, *m_frgpart;
972		struct ip6_frag *ip6f;
973		u_int32_t id = htonl(ip6_randomid());
974		u_char nextproto;
975
976		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
977
978		/*
979		 * Too large for the destination or interface;
980		 * fragment if possible.
981		 * Must be able to put at least 8 bytes per fragment.
982		 */
983		hlen = unfragpartlen;
984		if (mtu > IPV6_MAXPACKET)
985			mtu = IPV6_MAXPACKET;
986
987		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
988		if (len < 8) {
989			error = EMSGSIZE;
990			in6_ifstat_inc(ifp, ifs6_out_fragfail);
991			goto bad;
992		}
993
994		/*
995		 * Verify that we have any chance at all of being able to queue
996		 *      the packet or packet fragments
997		 */
998		if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
999		    < tlen  /* - hlen */)) {
1000			error = ENOBUFS;
1001			V_ip6stat.ip6s_odropped++;
1002			goto bad;
1003		}
1004
1005		mnext = &m->m_nextpkt;
1006
1007		/*
1008		 * Change the next header field of the last header in the
1009		 * unfragmentable part.
1010		 */
1011		if (exthdrs.ip6e_rthdr) {
1012			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1013			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1014		} else if (exthdrs.ip6e_dest1) {
1015			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1016			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1017		} else if (exthdrs.ip6e_hbh) {
1018			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1019			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1020		} else {
1021			nextproto = ip6->ip6_nxt;
1022			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1023		}
1024
1025		/*
1026		 * Loop through length of segment after first fragment,
1027		 * make new header and copy data of each part and link onto
1028		 * chain.
1029		 */
1030		m0 = m;
1031		for (off = hlen; off < tlen; off += len) {
1032			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1033			if (!m) {
1034				error = ENOBUFS;
1035				V_ip6stat.ip6s_odropped++;
1036				goto sendorfree;
1037			}
1038			m->m_pkthdr.rcvif = NULL;
1039			m->m_flags = m0->m_flags & M_COPYFLAGS;
1040			*mnext = m;
1041			mnext = &m->m_nextpkt;
1042			m->m_data += max_linkhdr;
1043			mhip6 = mtod(m, struct ip6_hdr *);
1044			*mhip6 = *ip6;
1045			m->m_len = sizeof(*mhip6);
1046			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1047			if (error) {
1048				V_ip6stat.ip6s_odropped++;
1049				goto sendorfree;
1050			}
1051			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1052			if (off + len >= tlen)
1053				len = tlen - off;
1054			else
1055				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1056			mhip6->ip6_plen = htons((u_short)(len + hlen +
1057			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1058			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1059				error = ENOBUFS;
1060				V_ip6stat.ip6s_odropped++;
1061				goto sendorfree;
1062			}
1063			m_cat(m, m_frgpart);
1064			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1065			m->m_pkthdr.rcvif = NULL;
1066			ip6f->ip6f_reserved = 0;
1067			ip6f->ip6f_ident = id;
1068			ip6f->ip6f_nxt = nextproto;
1069			V_ip6stat.ip6s_ofragments++;
1070			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1071		}
1072
1073		in6_ifstat_inc(ifp, ifs6_out_fragok);
1074	}
1075
1076	/*
1077	 * Remove leading garbages.
1078	 */
1079sendorfree:
1080	m = m0->m_nextpkt;
1081	m0->m_nextpkt = 0;
1082	m_freem(m0);
1083	for (m0 = m; m; m = m0) {
1084		m0 = m->m_nextpkt;
1085		m->m_nextpkt = 0;
1086		if (error == 0) {
1087			/* Record statistics for this interface address. */
1088			if (ia) {
1089				ia->ia_ifa.if_opackets++;
1090				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1091			}
1092			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1093		} else
1094			m_freem(m);
1095	}
1096
1097	if (error == 0)
1098		V_ip6stat.ip6s_fragmented++;
1099
1100done:
1101	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1102		RTFREE(ro->ro_rt);
1103	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1104		RTFREE(ro_pmtu->ro_rt);
1105	}
1106#ifdef IPSEC
1107	if (sp != NULL)
1108		KEY_FREESP(&sp);
1109#endif
1110
1111	return (error);
1112
1113freehdrs:
1114	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1115	m_freem(exthdrs.ip6e_dest1);
1116	m_freem(exthdrs.ip6e_rthdr);
1117	m_freem(exthdrs.ip6e_dest2);
1118	/* FALLTHROUGH */
1119bad:
1120	if (m)
1121		m_freem(m);
1122	goto done;
1123}
1124
1125static int
1126ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1127{
1128	struct mbuf *m;
1129
1130	if (hlen > MCLBYTES)
1131		return (ENOBUFS); /* XXX */
1132
1133	MGET(m, M_DONTWAIT, MT_DATA);
1134	if (!m)
1135		return (ENOBUFS);
1136
1137	if (hlen > MLEN) {
1138		MCLGET(m, M_DONTWAIT);
1139		if ((m->m_flags & M_EXT) == 0) {
1140			m_free(m);
1141			return (ENOBUFS);
1142		}
1143	}
1144	m->m_len = hlen;
1145	if (hdr)
1146		bcopy(hdr, mtod(m, caddr_t), hlen);
1147
1148	*mp = m;
1149	return (0);
1150}
1151
1152/*
1153 * Insert jumbo payload option.
1154 */
1155static int
1156ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1157{
1158	struct mbuf *mopt;
1159	u_char *optbuf;
1160	u_int32_t v;
1161
1162#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1163
1164	/*
1165	 * If there is no hop-by-hop options header, allocate new one.
1166	 * If there is one but it doesn't have enough space to store the
1167	 * jumbo payload option, allocate a cluster to store the whole options.
1168	 * Otherwise, use it to store the options.
1169	 */
1170	if (exthdrs->ip6e_hbh == 0) {
1171		MGET(mopt, M_DONTWAIT, MT_DATA);
1172		if (mopt == 0)
1173			return (ENOBUFS);
1174		mopt->m_len = JUMBOOPTLEN;
1175		optbuf = mtod(mopt, u_char *);
1176		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1177		exthdrs->ip6e_hbh = mopt;
1178	} else {
1179		struct ip6_hbh *hbh;
1180
1181		mopt = exthdrs->ip6e_hbh;
1182		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1183			/*
1184			 * XXX assumption:
1185			 * - exthdrs->ip6e_hbh is not referenced from places
1186			 *   other than exthdrs.
1187			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1188			 */
1189			int oldoptlen = mopt->m_len;
1190			struct mbuf *n;
1191
1192			/*
1193			 * XXX: give up if the whole (new) hbh header does
1194			 * not fit even in an mbuf cluster.
1195			 */
1196			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1197				return (ENOBUFS);
1198
1199			/*
1200			 * As a consequence, we must always prepare a cluster
1201			 * at this point.
1202			 */
1203			MGET(n, M_DONTWAIT, MT_DATA);
1204			if (n) {
1205				MCLGET(n, M_DONTWAIT);
1206				if ((n->m_flags & M_EXT) == 0) {
1207					m_freem(n);
1208					n = NULL;
1209				}
1210			}
1211			if (!n)
1212				return (ENOBUFS);
1213			n->m_len = oldoptlen + JUMBOOPTLEN;
1214			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1215			    oldoptlen);
1216			optbuf = mtod(n, caddr_t) + oldoptlen;
1217			m_freem(mopt);
1218			mopt = exthdrs->ip6e_hbh = n;
1219		} else {
1220			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1221			mopt->m_len += JUMBOOPTLEN;
1222		}
1223		optbuf[0] = IP6OPT_PADN;
1224		optbuf[1] = 1;
1225
1226		/*
1227		 * Adjust the header length according to the pad and
1228		 * the jumbo payload option.
1229		 */
1230		hbh = mtod(mopt, struct ip6_hbh *);
1231		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1232	}
1233
1234	/* fill in the option. */
1235	optbuf[2] = IP6OPT_JUMBO;
1236	optbuf[3] = 4;
1237	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1238	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1239
1240	/* finally, adjust the packet header length */
1241	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1242
1243	return (0);
1244#undef JUMBOOPTLEN
1245}
1246
1247/*
1248 * Insert fragment header and copy unfragmentable header portions.
1249 */
1250static int
1251ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1252    struct ip6_frag **frghdrp)
1253{
1254	struct mbuf *n, *mlast;
1255
1256	if (hlen > sizeof(struct ip6_hdr)) {
1257		n = m_copym(m0, sizeof(struct ip6_hdr),
1258		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1259		if (n == 0)
1260			return (ENOBUFS);
1261		m->m_next = n;
1262	} else
1263		n = m;
1264
1265	/* Search for the last mbuf of unfragmentable part. */
1266	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1267		;
1268
1269	if ((mlast->m_flags & M_EXT) == 0 &&
1270	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1271		/* use the trailing space of the last mbuf for the fragment hdr */
1272		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1273		    mlast->m_len);
1274		mlast->m_len += sizeof(struct ip6_frag);
1275		m->m_pkthdr.len += sizeof(struct ip6_frag);
1276	} else {
1277		/* allocate a new mbuf for the fragment header */
1278		struct mbuf *mfrg;
1279
1280		MGET(mfrg, M_DONTWAIT, MT_DATA);
1281		if (mfrg == 0)
1282			return (ENOBUFS);
1283		mfrg->m_len = sizeof(struct ip6_frag);
1284		*frghdrp = mtod(mfrg, struct ip6_frag *);
1285		mlast->m_next = mfrg;
1286	}
1287
1288	return (0);
1289}
1290
1291static int
1292ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1293    struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
1294    int *alwaysfragp)
1295{
1296	u_int32_t mtu = 0;
1297	int alwaysfrag = 0;
1298	int error = 0;
1299
1300	if (ro_pmtu != ro) {
1301		/* The first hop and the final destination may differ. */
1302		struct sockaddr_in6 *sa6_dst =
1303		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1304		if (ro_pmtu->ro_rt &&
1305		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1306		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1307			RTFREE(ro_pmtu->ro_rt);
1308			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1309		}
1310		if (ro_pmtu->ro_rt == NULL) {
1311			bzero(sa6_dst, sizeof(*sa6_dst));
1312			sa6_dst->sin6_family = AF_INET6;
1313			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1314			sa6_dst->sin6_addr = *dst;
1315
1316			rtalloc((struct route *)ro_pmtu);
1317		}
1318	}
1319	if (ro_pmtu->ro_rt) {
1320		u_int32_t ifmtu;
1321		struct in_conninfo inc;
1322
1323		bzero(&inc, sizeof(inc));
1324		inc.inc_flags |= INC_ISIPV6;
1325		inc.inc6_faddr = *dst;
1326
1327		if (ifp == NULL)
1328			ifp = ro_pmtu->ro_rt->rt_ifp;
1329		ifmtu = IN6_LINKMTU(ifp);
1330		mtu = tcp_hc_getmtu(&inc);
1331		if (mtu)
1332			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1333		else
1334			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1335		if (mtu == 0)
1336			mtu = ifmtu;
1337		else if (mtu < IPV6_MMTU) {
1338			/*
1339			 * RFC2460 section 5, last paragraph:
1340			 * if we record ICMPv6 too big message with
1341			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1342			 * or smaller, with framgent header attached.
1343			 * (fragment header is needed regardless from the
1344			 * packet size, for translators to identify packets)
1345			 */
1346			alwaysfrag = 1;
1347			mtu = IPV6_MMTU;
1348		} else if (mtu > ifmtu) {
1349			/*
1350			 * The MTU on the route is larger than the MTU on
1351			 * the interface!  This shouldn't happen, unless the
1352			 * MTU of the interface has been changed after the
1353			 * interface was brought up.  Change the MTU in the
1354			 * route to match the interface MTU (as long as the
1355			 * field isn't locked).
1356			 */
1357			mtu = ifmtu;
1358			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1359		}
1360	} else if (ifp) {
1361		mtu = IN6_LINKMTU(ifp);
1362	} else
1363		error = EHOSTUNREACH; /* XXX */
1364
1365	*mtup = mtu;
1366	if (alwaysfragp)
1367		*alwaysfragp = alwaysfrag;
1368	return (error);
1369}
1370
1371/*
1372 * IP6 socket option processing.
1373 */
1374int
1375ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1376{
1377	int optdatalen, uproto;
1378	void *optdata;
1379	struct inpcb *in6p = sotoinpcb(so);
1380	int error, optval;
1381	int level, op, optname;
1382	int optlen;
1383	struct thread *td;
1384
1385	level = sopt->sopt_level;
1386	op = sopt->sopt_dir;
1387	optname = sopt->sopt_name;
1388	optlen = sopt->sopt_valsize;
1389	td = sopt->sopt_td;
1390	error = 0;
1391	optval = 0;
1392	uproto = (int)so->so_proto->pr_protocol;
1393
1394	if (level == IPPROTO_IPV6) {
1395		switch (op) {
1396
1397		case SOPT_SET:
1398			switch (optname) {
1399			case IPV6_2292PKTOPTIONS:
1400#ifdef IPV6_PKTOPTIONS
1401			case IPV6_PKTOPTIONS:
1402#endif
1403			{
1404				struct mbuf *m;
1405
1406				error = soopt_getm(sopt, &m); /* XXX */
1407				if (error != 0)
1408					break;
1409				error = soopt_mcopyin(sopt, m); /* XXX */
1410				if (error != 0)
1411					break;
1412				error = ip6_pcbopts(&in6p->in6p_outputopts,
1413						    m, so, sopt);
1414				m_freem(m); /* XXX */
1415				break;
1416			}
1417
1418			/*
1419			 * Use of some Hop-by-Hop options or some
1420			 * Destination options, might require special
1421			 * privilege.  That is, normal applications
1422			 * (without special privilege) might be forbidden
1423			 * from setting certain options in outgoing packets,
1424			 * and might never see certain options in received
1425			 * packets. [RFC 2292 Section 6]
1426			 * KAME specific note:
1427			 *  KAME prevents non-privileged users from sending or
1428			 *  receiving ANY hbh/dst options in order to avoid
1429			 *  overhead of parsing options in the kernel.
1430			 */
1431			case IPV6_RECVHOPOPTS:
1432			case IPV6_RECVDSTOPTS:
1433			case IPV6_RECVRTHDRDSTOPTS:
1434				if (td != NULL) {
1435					error = priv_check(td,
1436					    PRIV_NETINET_SETHDROPTS);
1437					if (error)
1438						break;
1439				}
1440				/* FALLTHROUGH */
1441			case IPV6_UNICAST_HOPS:
1442			case IPV6_HOPLIMIT:
1443			case IPV6_FAITH:
1444
1445			case IPV6_RECVPKTINFO:
1446			case IPV6_RECVHOPLIMIT:
1447			case IPV6_RECVRTHDR:
1448			case IPV6_RECVPATHMTU:
1449			case IPV6_RECVTCLASS:
1450			case IPV6_V6ONLY:
1451			case IPV6_AUTOFLOWLABEL:
1452				if (optlen != sizeof(int)) {
1453					error = EINVAL;
1454					break;
1455				}
1456				error = sooptcopyin(sopt, &optval,
1457					sizeof optval, sizeof optval);
1458				if (error)
1459					break;
1460				switch (optname) {
1461
1462				case IPV6_UNICAST_HOPS:
1463					if (optval < -1 || optval >= 256)
1464						error = EINVAL;
1465					else {
1466						/* -1 = kernel default */
1467						in6p->in6p_hops = optval;
1468						if ((in6p->inp_vflag &
1469						     INP_IPV4) != 0)
1470							in6p->inp_ip_ttl = optval;
1471					}
1472					break;
1473#define OPTSET(bit) \
1474do { \
1475	if (optval) \
1476		in6p->inp_flags |= (bit); \
1477	else \
1478		in6p->inp_flags &= ~(bit); \
1479} while (/*CONSTCOND*/ 0)
1480#define OPTSET2292(bit) \
1481do { \
1482	in6p->inp_flags |= IN6P_RFC2292; \
1483	if (optval) \
1484		in6p->inp_flags |= (bit); \
1485	else \
1486		in6p->inp_flags &= ~(bit); \
1487} while (/*CONSTCOND*/ 0)
1488#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
1489
1490				case IPV6_RECVPKTINFO:
1491					/* cannot mix with RFC2292 */
1492					if (OPTBIT(IN6P_RFC2292)) {
1493						error = EINVAL;
1494						break;
1495					}
1496					OPTSET(IN6P_PKTINFO);
1497					break;
1498
1499				case IPV6_HOPLIMIT:
1500				{
1501					struct ip6_pktopts **optp;
1502
1503					/* cannot mix with RFC2292 */
1504					if (OPTBIT(IN6P_RFC2292)) {
1505						error = EINVAL;
1506						break;
1507					}
1508					optp = &in6p->in6p_outputopts;
1509					error = ip6_pcbopt(IPV6_HOPLIMIT,
1510					    (u_char *)&optval, sizeof(optval),
1511					    optp, (td != NULL) ? td->td_ucred :
1512					    NULL, uproto);
1513					break;
1514				}
1515
1516				case IPV6_RECVHOPLIMIT:
1517					/* cannot mix with RFC2292 */
1518					if (OPTBIT(IN6P_RFC2292)) {
1519						error = EINVAL;
1520						break;
1521					}
1522					OPTSET(IN6P_HOPLIMIT);
1523					break;
1524
1525				case IPV6_RECVHOPOPTS:
1526					/* cannot mix with RFC2292 */
1527					if (OPTBIT(IN6P_RFC2292)) {
1528						error = EINVAL;
1529						break;
1530					}
1531					OPTSET(IN6P_HOPOPTS);
1532					break;
1533
1534				case IPV6_RECVDSTOPTS:
1535					/* cannot mix with RFC2292 */
1536					if (OPTBIT(IN6P_RFC2292)) {
1537						error = EINVAL;
1538						break;
1539					}
1540					OPTSET(IN6P_DSTOPTS);
1541					break;
1542
1543				case IPV6_RECVRTHDRDSTOPTS:
1544					/* cannot mix with RFC2292 */
1545					if (OPTBIT(IN6P_RFC2292)) {
1546						error = EINVAL;
1547						break;
1548					}
1549					OPTSET(IN6P_RTHDRDSTOPTS);
1550					break;
1551
1552				case IPV6_RECVRTHDR:
1553					/* cannot mix with RFC2292 */
1554					if (OPTBIT(IN6P_RFC2292)) {
1555						error = EINVAL;
1556						break;
1557					}
1558					OPTSET(IN6P_RTHDR);
1559					break;
1560
1561				case IPV6_FAITH:
1562					OPTSET(INP_FAITH);
1563					break;
1564
1565				case IPV6_RECVPATHMTU:
1566					/*
1567					 * We ignore this option for TCP
1568					 * sockets.
1569					 * (RFC3542 leaves this case
1570					 * unspecified.)
1571					 */
1572					if (uproto != IPPROTO_TCP)
1573						OPTSET(IN6P_MTU);
1574					break;
1575
1576				case IPV6_V6ONLY:
1577					/*
1578					 * make setsockopt(IPV6_V6ONLY)
1579					 * available only prior to bind(2).
1580					 * see ipng mailing list, Jun 22 2001.
1581					 */
1582					if (in6p->inp_lport ||
1583					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1584						error = EINVAL;
1585						break;
1586					}
1587					OPTSET(IN6P_IPV6_V6ONLY);
1588					if (optval)
1589						in6p->inp_vflag &= ~INP_IPV4;
1590					else
1591						in6p->inp_vflag |= INP_IPV4;
1592					break;
1593				case IPV6_RECVTCLASS:
1594					/* cannot mix with RFC2292 XXX */
1595					if (OPTBIT(IN6P_RFC2292)) {
1596						error = EINVAL;
1597						break;
1598					}
1599					OPTSET(IN6P_TCLASS);
1600					break;
1601				case IPV6_AUTOFLOWLABEL:
1602					OPTSET(IN6P_AUTOFLOWLABEL);
1603					break;
1604
1605				}
1606				break;
1607
1608			case IPV6_TCLASS:
1609			case IPV6_DONTFRAG:
1610			case IPV6_USE_MIN_MTU:
1611			case IPV6_PREFER_TEMPADDR:
1612				if (optlen != sizeof(optval)) {
1613					error = EINVAL;
1614					break;
1615				}
1616				error = sooptcopyin(sopt, &optval,
1617					sizeof optval, sizeof optval);
1618				if (error)
1619					break;
1620				{
1621					struct ip6_pktopts **optp;
1622					optp = &in6p->in6p_outputopts;
1623					error = ip6_pcbopt(optname,
1624					    (u_char *)&optval, sizeof(optval),
1625					    optp, (td != NULL) ? td->td_ucred :
1626					    NULL, uproto);
1627					break;
1628				}
1629
1630			case IPV6_2292PKTINFO:
1631			case IPV6_2292HOPLIMIT:
1632			case IPV6_2292HOPOPTS:
1633			case IPV6_2292DSTOPTS:
1634			case IPV6_2292RTHDR:
1635				/* RFC 2292 */
1636				if (optlen != sizeof(int)) {
1637					error = EINVAL;
1638					break;
1639				}
1640				error = sooptcopyin(sopt, &optval,
1641					sizeof optval, sizeof optval);
1642				if (error)
1643					break;
1644				switch (optname) {
1645				case IPV6_2292PKTINFO:
1646					OPTSET2292(IN6P_PKTINFO);
1647					break;
1648				case IPV6_2292HOPLIMIT:
1649					OPTSET2292(IN6P_HOPLIMIT);
1650					break;
1651				case IPV6_2292HOPOPTS:
1652					/*
1653					 * Check super-user privilege.
1654					 * See comments for IPV6_RECVHOPOPTS.
1655					 */
1656					if (td != NULL) {
1657						error = priv_check(td,
1658						    PRIV_NETINET_SETHDROPTS);
1659						if (error)
1660							return (error);
1661					}
1662					OPTSET2292(IN6P_HOPOPTS);
1663					break;
1664				case IPV6_2292DSTOPTS:
1665					if (td != NULL) {
1666						error = priv_check(td,
1667						    PRIV_NETINET_SETHDROPTS);
1668						if (error)
1669							return (error);
1670					}
1671					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1672					break;
1673				case IPV6_2292RTHDR:
1674					OPTSET2292(IN6P_RTHDR);
1675					break;
1676				}
1677				break;
1678			case IPV6_PKTINFO:
1679			case IPV6_HOPOPTS:
1680			case IPV6_RTHDR:
1681			case IPV6_DSTOPTS:
1682			case IPV6_RTHDRDSTOPTS:
1683			case IPV6_NEXTHOP:
1684			{
1685				/* new advanced API (RFC3542) */
1686				u_char *optbuf;
1687				u_char optbuf_storage[MCLBYTES];
1688				int optlen;
1689				struct ip6_pktopts **optp;
1690
1691				/* cannot mix with RFC2292 */
1692				if (OPTBIT(IN6P_RFC2292)) {
1693					error = EINVAL;
1694					break;
1695				}
1696
1697				/*
1698				 * We only ensure valsize is not too large
1699				 * here.  Further validation will be done
1700				 * later.
1701				 */
1702				error = sooptcopyin(sopt, optbuf_storage,
1703				    sizeof(optbuf_storage), 0);
1704				if (error)
1705					break;
1706				optlen = sopt->sopt_valsize;
1707				optbuf = optbuf_storage;
1708				optp = &in6p->in6p_outputopts;
1709				error = ip6_pcbopt(optname, optbuf, optlen,
1710				    optp, (td != NULL) ? td->td_ucred : NULL,
1711				    uproto);
1712				break;
1713			}
1714#undef OPTSET
1715
1716			case IPV6_MULTICAST_IF:
1717			case IPV6_MULTICAST_HOPS:
1718			case IPV6_MULTICAST_LOOP:
1719			case IPV6_JOIN_GROUP:
1720			case IPV6_LEAVE_GROUP:
1721			    {
1722				if (sopt->sopt_valsize > MLEN) {
1723					error = EMSGSIZE;
1724					break;
1725				}
1726				/* XXX */
1727			    }
1728			    /* FALLTHROUGH */
1729			    {
1730				struct mbuf *m;
1731
1732				if (sopt->sopt_valsize > MCLBYTES) {
1733					error = EMSGSIZE;
1734					break;
1735				}
1736				/* XXX */
1737				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
1738				if (m == 0) {
1739					error = ENOBUFS;
1740					break;
1741				}
1742				if (sopt->sopt_valsize > MLEN) {
1743					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1744					if ((m->m_flags & M_EXT) == 0) {
1745						m_free(m);
1746						error = ENOBUFS;
1747						break;
1748					}
1749				}
1750				m->m_len = sopt->sopt_valsize;
1751				error = sooptcopyin(sopt, mtod(m, char *),
1752						    m->m_len, m->m_len);
1753				if (error) {
1754					(void)m_free(m);
1755					break;
1756				}
1757				error =	ip6_setmoptions(sopt->sopt_name,
1758							&in6p->in6p_moptions,
1759							m);
1760				(void)m_free(m);
1761			    }
1762				break;
1763
1764			case IPV6_PORTRANGE:
1765				error = sooptcopyin(sopt, &optval,
1766				    sizeof optval, sizeof optval);
1767				if (error)
1768					break;
1769
1770				switch (optval) {
1771				case IPV6_PORTRANGE_DEFAULT:
1772					in6p->inp_flags &= ~(INP_LOWPORT);
1773					in6p->inp_flags &= ~(INP_HIGHPORT);
1774					break;
1775
1776				case IPV6_PORTRANGE_HIGH:
1777					in6p->inp_flags &= ~(INP_LOWPORT);
1778					in6p->inp_flags |= INP_HIGHPORT;
1779					break;
1780
1781				case IPV6_PORTRANGE_LOW:
1782					in6p->inp_flags &= ~(INP_HIGHPORT);
1783					in6p->inp_flags |= INP_LOWPORT;
1784					break;
1785
1786				default:
1787					error = EINVAL;
1788					break;
1789				}
1790				break;
1791
1792#ifdef IPSEC
1793			case IPV6_IPSEC_POLICY:
1794			{
1795				caddr_t req;
1796				struct mbuf *m;
1797
1798				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1799					break;
1800				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1801					break;
1802				req = mtod(m, caddr_t);
1803				error = ipsec_set_policy(in6p, optname, req,
1804				    m->m_len, (sopt->sopt_td != NULL) ?
1805				    sopt->sopt_td->td_ucred : NULL);
1806				m_freem(m);
1807				break;
1808			}
1809#endif /* IPSEC */
1810
1811			default:
1812				error = ENOPROTOOPT;
1813				break;
1814			}
1815			break;
1816
1817		case SOPT_GET:
1818			switch (optname) {
1819
1820			case IPV6_2292PKTOPTIONS:
1821#ifdef IPV6_PKTOPTIONS
1822			case IPV6_PKTOPTIONS:
1823#endif
1824				/*
1825				 * RFC3542 (effectively) deprecated the
1826				 * semantics of the 2292-style pktoptions.
1827				 * Since it was not reliable in nature (i.e.,
1828				 * applications had to expect the lack of some
1829				 * information after all), it would make sense
1830				 * to simplify this part by always returning
1831				 * empty data.
1832				 */
1833				sopt->sopt_valsize = 0;
1834				break;
1835
1836			case IPV6_RECVHOPOPTS:
1837			case IPV6_RECVDSTOPTS:
1838			case IPV6_RECVRTHDRDSTOPTS:
1839			case IPV6_UNICAST_HOPS:
1840			case IPV6_RECVPKTINFO:
1841			case IPV6_RECVHOPLIMIT:
1842			case IPV6_RECVRTHDR:
1843			case IPV6_RECVPATHMTU:
1844
1845			case IPV6_FAITH:
1846			case IPV6_V6ONLY:
1847			case IPV6_PORTRANGE:
1848			case IPV6_RECVTCLASS:
1849			case IPV6_AUTOFLOWLABEL:
1850				switch (optname) {
1851
1852				case IPV6_RECVHOPOPTS:
1853					optval = OPTBIT(IN6P_HOPOPTS);
1854					break;
1855
1856				case IPV6_RECVDSTOPTS:
1857					optval = OPTBIT(IN6P_DSTOPTS);
1858					break;
1859
1860				case IPV6_RECVRTHDRDSTOPTS:
1861					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1862					break;
1863
1864				case IPV6_UNICAST_HOPS:
1865					optval = in6p->in6p_hops;
1866					break;
1867
1868				case IPV6_RECVPKTINFO:
1869					optval = OPTBIT(IN6P_PKTINFO);
1870					break;
1871
1872				case IPV6_RECVHOPLIMIT:
1873					optval = OPTBIT(IN6P_HOPLIMIT);
1874					break;
1875
1876				case IPV6_RECVRTHDR:
1877					optval = OPTBIT(IN6P_RTHDR);
1878					break;
1879
1880				case IPV6_RECVPATHMTU:
1881					optval = OPTBIT(IN6P_MTU);
1882					break;
1883
1884				case IPV6_FAITH:
1885					optval = OPTBIT(INP_FAITH);
1886					break;
1887
1888				case IPV6_V6ONLY:
1889					optval = OPTBIT(IN6P_IPV6_V6ONLY);
1890					break;
1891
1892				case IPV6_PORTRANGE:
1893				    {
1894					int flags;
1895					flags = in6p->inp_flags;
1896					if (flags & INP_HIGHPORT)
1897						optval = IPV6_PORTRANGE_HIGH;
1898					else if (flags & INP_LOWPORT)
1899						optval = IPV6_PORTRANGE_LOW;
1900					else
1901						optval = 0;
1902					break;
1903				    }
1904				case IPV6_RECVTCLASS:
1905					optval = OPTBIT(IN6P_TCLASS);
1906					break;
1907
1908				case IPV6_AUTOFLOWLABEL:
1909					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1910					break;
1911				}
1912				if (error)
1913					break;
1914				error = sooptcopyout(sopt, &optval,
1915					sizeof optval);
1916				break;
1917
1918			case IPV6_PATHMTU:
1919			{
1920				u_long pmtu = 0;
1921				struct ip6_mtuinfo mtuinfo;
1922				struct route_in6 sro;
1923
1924				bzero(&sro, sizeof(sro));
1925
1926				if (!(so->so_state & SS_ISCONNECTED))
1927					return (ENOTCONN);
1928				/*
1929				 * XXX: we dot not consider the case of source
1930				 * routing, or optional information to specify
1931				 * the outgoing interface.
1932				 */
1933				error = ip6_getpmtu(&sro, NULL, NULL,
1934				    &in6p->in6p_faddr, &pmtu, NULL);
1935				if (sro.ro_rt)
1936					RTFREE(sro.ro_rt);
1937				if (error)
1938					break;
1939				if (pmtu > IPV6_MAXPACKET)
1940					pmtu = IPV6_MAXPACKET;
1941
1942				bzero(&mtuinfo, sizeof(mtuinfo));
1943				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1944				optdata = (void *)&mtuinfo;
1945				optdatalen = sizeof(mtuinfo);
1946				error = sooptcopyout(sopt, optdata,
1947				    optdatalen);
1948				break;
1949			}
1950
1951			case IPV6_2292PKTINFO:
1952			case IPV6_2292HOPLIMIT:
1953			case IPV6_2292HOPOPTS:
1954			case IPV6_2292RTHDR:
1955			case IPV6_2292DSTOPTS:
1956				switch (optname) {
1957				case IPV6_2292PKTINFO:
1958					optval = OPTBIT(IN6P_PKTINFO);
1959					break;
1960				case IPV6_2292HOPLIMIT:
1961					optval = OPTBIT(IN6P_HOPLIMIT);
1962					break;
1963				case IPV6_2292HOPOPTS:
1964					optval = OPTBIT(IN6P_HOPOPTS);
1965					break;
1966				case IPV6_2292RTHDR:
1967					optval = OPTBIT(IN6P_RTHDR);
1968					break;
1969				case IPV6_2292DSTOPTS:
1970					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1971					break;
1972				}
1973				error = sooptcopyout(sopt, &optval,
1974				    sizeof optval);
1975				break;
1976			case IPV6_PKTINFO:
1977			case IPV6_HOPOPTS:
1978			case IPV6_RTHDR:
1979			case IPV6_DSTOPTS:
1980			case IPV6_RTHDRDSTOPTS:
1981			case IPV6_NEXTHOP:
1982			case IPV6_TCLASS:
1983			case IPV6_DONTFRAG:
1984			case IPV6_USE_MIN_MTU:
1985			case IPV6_PREFER_TEMPADDR:
1986				error = ip6_getpcbopt(in6p->in6p_outputopts,
1987				    optname, sopt);
1988				break;
1989
1990			case IPV6_MULTICAST_IF:
1991			case IPV6_MULTICAST_HOPS:
1992			case IPV6_MULTICAST_LOOP:
1993			case IPV6_JOIN_GROUP:
1994			case IPV6_LEAVE_GROUP:
1995			    {
1996				struct mbuf *m;
1997				error = ip6_getmoptions(sopt->sopt_name,
1998				    in6p->in6p_moptions, &m);
1999				if (error == 0)
2000					error = sooptcopyout(sopt,
2001					    mtod(m, char *), m->m_len);
2002				m_freem(m);
2003			    }
2004				break;
2005
2006#ifdef IPSEC
2007			case IPV6_IPSEC_POLICY:
2008			  {
2009				caddr_t req = NULL;
2010				size_t len = 0;
2011				struct mbuf *m = NULL;
2012				struct mbuf **mp = &m;
2013				size_t ovalsize = sopt->sopt_valsize;
2014				caddr_t oval = (caddr_t)sopt->sopt_val;
2015
2016				error = soopt_getm(sopt, &m); /* XXX */
2017				if (error != 0)
2018					break;
2019				error = soopt_mcopyin(sopt, m); /* XXX */
2020				if (error != 0)
2021					break;
2022				sopt->sopt_valsize = ovalsize;
2023				sopt->sopt_val = oval;
2024				if (m) {
2025					req = mtod(m, caddr_t);
2026					len = m->m_len;
2027				}
2028				error = ipsec_get_policy(in6p, req, len, mp);
2029				if (error == 0)
2030					error = soopt_mcopyout(sopt, m); /* XXX */
2031				if (error == 0 && m)
2032					m_freem(m);
2033				break;
2034			  }
2035#endif /* IPSEC */
2036
2037			default:
2038				error = ENOPROTOOPT;
2039				break;
2040			}
2041			break;
2042		}
2043	} else {		/* level != IPPROTO_IPV6 */
2044		error = EINVAL;
2045	}
2046	return (error);
2047}
2048
2049int
2050ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2051{
2052	int error = 0, optval, optlen;
2053	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2054	struct inpcb *in6p = sotoinpcb(so);
2055	int level, op, optname;
2056
2057	level = sopt->sopt_level;
2058	op = sopt->sopt_dir;
2059	optname = sopt->sopt_name;
2060	optlen = sopt->sopt_valsize;
2061
2062	if (level != IPPROTO_IPV6) {
2063		return (EINVAL);
2064	}
2065
2066	switch (optname) {
2067	case IPV6_CHECKSUM:
2068		/*
2069		 * For ICMPv6 sockets, no modification allowed for checksum
2070		 * offset, permit "no change" values to help existing apps.
2071		 *
2072		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2073		 * for an ICMPv6 socket will fail."
2074		 * The current behavior does not meet RFC3542.
2075		 */
2076		switch (op) {
2077		case SOPT_SET:
2078			if (optlen != sizeof(int)) {
2079				error = EINVAL;
2080				break;
2081			}
2082			error = sooptcopyin(sopt, &optval, sizeof(optval),
2083					    sizeof(optval));
2084			if (error)
2085				break;
2086			if ((optval % 2) != 0) {
2087				/* the API assumes even offset values */
2088				error = EINVAL;
2089			} else if (so->so_proto->pr_protocol ==
2090			    IPPROTO_ICMPV6) {
2091				if (optval != icmp6off)
2092					error = EINVAL;
2093			} else
2094				in6p->in6p_cksum = optval;
2095			break;
2096
2097		case SOPT_GET:
2098			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2099				optval = icmp6off;
2100			else
2101				optval = in6p->in6p_cksum;
2102
2103			error = sooptcopyout(sopt, &optval, sizeof(optval));
2104			break;
2105
2106		default:
2107			error = EINVAL;
2108			break;
2109		}
2110		break;
2111
2112	default:
2113		error = ENOPROTOOPT;
2114		break;
2115	}
2116
2117	return (error);
2118}
2119
2120/*
2121 * Set up IP6 options in pcb for insertion in output packets or
2122 * specifying behavior of outgoing packets.
2123 */
2124static int
2125ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2126    struct socket *so, struct sockopt *sopt)
2127{
2128	struct ip6_pktopts *opt = *pktopt;
2129	int error = 0;
2130	struct thread *td = sopt->sopt_td;
2131
2132	/* turn off any old options. */
2133	if (opt) {
2134#ifdef DIAGNOSTIC
2135		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2136		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2137		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2138			printf("ip6_pcbopts: all specified options are cleared.\n");
2139#endif
2140		ip6_clearpktopts(opt, -1);
2141	} else
2142		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2143	*pktopt = NULL;
2144
2145	if (!m || m->m_len == 0) {
2146		/*
2147		 * Only turning off any previous options, regardless of
2148		 * whether the opt is just created or given.
2149		 */
2150		free(opt, M_IP6OPT);
2151		return (0);
2152	}
2153
2154	/*  set options specified by user. */
2155	if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
2156	    td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
2157		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2158		free(opt, M_IP6OPT);
2159		return (error);
2160	}
2161	*pktopt = opt;
2162	return (0);
2163}
2164
2165/*
2166 * initialize ip6_pktopts.  beware that there are non-zero default values in
2167 * the struct.
2168 */
2169void
2170ip6_initpktopts(struct ip6_pktopts *opt)
2171{
2172
2173	bzero(opt, sizeof(*opt));
2174	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2175	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2176	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2177	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2178}
2179
2180static int
2181ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2182    struct ucred *cred, int uproto)
2183{
2184	struct ip6_pktopts *opt;
2185
2186	if (*pktopt == NULL) {
2187		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2188		    M_WAITOK);
2189		ip6_initpktopts(*pktopt);
2190	}
2191	opt = *pktopt;
2192
2193	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2194}
2195
2196static int
2197ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2198{
2199	void *optdata = NULL;
2200	int optdatalen = 0;
2201	struct ip6_ext *ip6e;
2202	int error = 0;
2203	struct in6_pktinfo null_pktinfo;
2204	int deftclass = 0, on;
2205	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2206	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2207
2208	switch (optname) {
2209	case IPV6_PKTINFO:
2210		if (pktopt && pktopt->ip6po_pktinfo)
2211			optdata = (void *)pktopt->ip6po_pktinfo;
2212		else {
2213			/* XXX: we don't have to do this every time... */
2214			bzero(&null_pktinfo, sizeof(null_pktinfo));
2215			optdata = (void *)&null_pktinfo;
2216		}
2217		optdatalen = sizeof(struct in6_pktinfo);
2218		break;
2219	case IPV6_TCLASS:
2220		if (pktopt && pktopt->ip6po_tclass >= 0)
2221			optdata = (void *)&pktopt->ip6po_tclass;
2222		else
2223			optdata = (void *)&deftclass;
2224		optdatalen = sizeof(int);
2225		break;
2226	case IPV6_HOPOPTS:
2227		if (pktopt && pktopt->ip6po_hbh) {
2228			optdata = (void *)pktopt->ip6po_hbh;
2229			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2230			optdatalen = (ip6e->ip6e_len + 1) << 3;
2231		}
2232		break;
2233	case IPV6_RTHDR:
2234		if (pktopt && pktopt->ip6po_rthdr) {
2235			optdata = (void *)pktopt->ip6po_rthdr;
2236			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2237			optdatalen = (ip6e->ip6e_len + 1) << 3;
2238		}
2239		break;
2240	case IPV6_RTHDRDSTOPTS:
2241		if (pktopt && pktopt->ip6po_dest1) {
2242			optdata = (void *)pktopt->ip6po_dest1;
2243			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2244			optdatalen = (ip6e->ip6e_len + 1) << 3;
2245		}
2246		break;
2247	case IPV6_DSTOPTS:
2248		if (pktopt && pktopt->ip6po_dest2) {
2249			optdata = (void *)pktopt->ip6po_dest2;
2250			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2251			optdatalen = (ip6e->ip6e_len + 1) << 3;
2252		}
2253		break;
2254	case IPV6_NEXTHOP:
2255		if (pktopt && pktopt->ip6po_nexthop) {
2256			optdata = (void *)pktopt->ip6po_nexthop;
2257			optdatalen = pktopt->ip6po_nexthop->sa_len;
2258		}
2259		break;
2260	case IPV6_USE_MIN_MTU:
2261		if (pktopt)
2262			optdata = (void *)&pktopt->ip6po_minmtu;
2263		else
2264			optdata = (void *)&defminmtu;
2265		optdatalen = sizeof(int);
2266		break;
2267	case IPV6_DONTFRAG:
2268		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2269			on = 1;
2270		else
2271			on = 0;
2272		optdata = (void *)&on;
2273		optdatalen = sizeof(on);
2274		break;
2275	case IPV6_PREFER_TEMPADDR:
2276		if (pktopt)
2277			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2278		else
2279			optdata = (void *)&defpreftemp;
2280		optdatalen = sizeof(int);
2281		break;
2282	default:		/* should not happen */
2283#ifdef DIAGNOSTIC
2284		panic("ip6_getpcbopt: unexpected option\n");
2285#endif
2286		return (ENOPROTOOPT);
2287	}
2288
2289	error = sooptcopyout(sopt, optdata, optdatalen);
2290
2291	return (error);
2292}
2293
2294void
2295ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2296{
2297	if (pktopt == NULL)
2298		return;
2299
2300	if (optname == -1 || optname == IPV6_PKTINFO) {
2301		if (pktopt->ip6po_pktinfo)
2302			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2303		pktopt->ip6po_pktinfo = NULL;
2304	}
2305	if (optname == -1 || optname == IPV6_HOPLIMIT)
2306		pktopt->ip6po_hlim = -1;
2307	if (optname == -1 || optname == IPV6_TCLASS)
2308		pktopt->ip6po_tclass = -1;
2309	if (optname == -1 || optname == IPV6_NEXTHOP) {
2310		if (pktopt->ip6po_nextroute.ro_rt) {
2311			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2312			pktopt->ip6po_nextroute.ro_rt = NULL;
2313		}
2314		if (pktopt->ip6po_nexthop)
2315			free(pktopt->ip6po_nexthop, M_IP6OPT);
2316		pktopt->ip6po_nexthop = NULL;
2317	}
2318	if (optname == -1 || optname == IPV6_HOPOPTS) {
2319		if (pktopt->ip6po_hbh)
2320			free(pktopt->ip6po_hbh, M_IP6OPT);
2321		pktopt->ip6po_hbh = NULL;
2322	}
2323	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2324		if (pktopt->ip6po_dest1)
2325			free(pktopt->ip6po_dest1, M_IP6OPT);
2326		pktopt->ip6po_dest1 = NULL;
2327	}
2328	if (optname == -1 || optname == IPV6_RTHDR) {
2329		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2330			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2331		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2332		if (pktopt->ip6po_route.ro_rt) {
2333			RTFREE(pktopt->ip6po_route.ro_rt);
2334			pktopt->ip6po_route.ro_rt = NULL;
2335		}
2336	}
2337	if (optname == -1 || optname == IPV6_DSTOPTS) {
2338		if (pktopt->ip6po_dest2)
2339			free(pktopt->ip6po_dest2, M_IP6OPT);
2340		pktopt->ip6po_dest2 = NULL;
2341	}
2342}
2343
2344#define PKTOPT_EXTHDRCPY(type) \
2345do {\
2346	if (src->type) {\
2347		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2348		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2349		if (dst->type == NULL && canwait == M_NOWAIT)\
2350			goto bad;\
2351		bcopy(src->type, dst->type, hlen);\
2352	}\
2353} while (/*CONSTCOND*/ 0)
2354
2355static int
2356copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2357{
2358	if (dst == NULL || src == NULL)  {
2359		printf("ip6_clearpktopts: invalid argument\n");
2360		return (EINVAL);
2361	}
2362
2363	dst->ip6po_hlim = src->ip6po_hlim;
2364	dst->ip6po_tclass = src->ip6po_tclass;
2365	dst->ip6po_flags = src->ip6po_flags;
2366	if (src->ip6po_pktinfo) {
2367		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2368		    M_IP6OPT, canwait);
2369		if (dst->ip6po_pktinfo == NULL)
2370			goto bad;
2371		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2372	}
2373	if (src->ip6po_nexthop) {
2374		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2375		    M_IP6OPT, canwait);
2376		if (dst->ip6po_nexthop == NULL)
2377			goto bad;
2378		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2379		    src->ip6po_nexthop->sa_len);
2380	}
2381	PKTOPT_EXTHDRCPY(ip6po_hbh);
2382	PKTOPT_EXTHDRCPY(ip6po_dest1);
2383	PKTOPT_EXTHDRCPY(ip6po_dest2);
2384	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2385	return (0);
2386
2387  bad:
2388	ip6_clearpktopts(dst, -1);
2389	return (ENOBUFS);
2390}
2391#undef PKTOPT_EXTHDRCPY
2392
2393struct ip6_pktopts *
2394ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2395{
2396	int error;
2397	struct ip6_pktopts *dst;
2398
2399	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2400	if (dst == NULL)
2401		return (NULL);
2402	ip6_initpktopts(dst);
2403
2404	if ((error = copypktopts(dst, src, canwait)) != 0) {
2405		free(dst, M_IP6OPT);
2406		return (NULL);
2407	}
2408
2409	return (dst);
2410}
2411
2412void
2413ip6_freepcbopts(struct ip6_pktopts *pktopt)
2414{
2415	if (pktopt == NULL)
2416		return;
2417
2418	ip6_clearpktopts(pktopt, -1);
2419
2420	free(pktopt, M_IP6OPT);
2421}
2422
2423/*
2424 * Set the IP6 multicast options in response to user setsockopt().
2425 */
2426static int
2427ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
2428{
2429	INIT_VNET_NET(curvnet);
2430	INIT_VNET_INET6(curvnet);
2431	int error = 0;
2432	u_int loop, ifindex;
2433	struct ipv6_mreq *mreq;
2434	struct ifnet *ifp;
2435	struct ip6_moptions *im6o = *im6op;
2436	struct route_in6 ro;
2437	struct in6_multi_mship *imm;
2438
2439	if (im6o == NULL) {
2440		/*
2441		 * No multicast option buffer attached to the pcb;
2442		 * allocate one and initialize to default values.
2443		 */
2444		im6o = (struct ip6_moptions *)
2445			malloc(sizeof(*im6o), M_IP6MOPTS, M_WAITOK);
2446
2447		if (im6o == NULL)
2448			return (ENOBUFS);
2449		*im6op = im6o;
2450		im6o->im6o_multicast_ifp = NULL;
2451		im6o->im6o_multicast_hlim = V_ip6_defmcasthlim;
2452		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2453		LIST_INIT(&im6o->im6o_memberships);
2454	}
2455
2456	switch (optname) {
2457
2458	case IPV6_MULTICAST_IF:
2459		/*
2460		 * Select the interface for outgoing multicast packets.
2461		 */
2462		if (m == NULL || m->m_len != sizeof(u_int)) {
2463			error = EINVAL;
2464			break;
2465		}
2466		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2467		if (ifindex < 0 || V_if_index < ifindex) {
2468			error = ENXIO;	/* XXX EINVAL? */
2469			break;
2470		}
2471		ifp = ifnet_byindex(ifindex);
2472		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2473			error = EADDRNOTAVAIL;
2474			break;
2475		}
2476		im6o->im6o_multicast_ifp = ifp;
2477		break;
2478
2479	case IPV6_MULTICAST_HOPS:
2480	    {
2481		/*
2482		 * Set the IP6 hoplimit for outgoing multicast packets.
2483		 */
2484		int optval;
2485		if (m == NULL || m->m_len != sizeof(int)) {
2486			error = EINVAL;
2487			break;
2488		}
2489		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2490		if (optval < -1 || optval >= 256)
2491			error = EINVAL;
2492		else if (optval == -1)
2493			im6o->im6o_multicast_hlim = V_ip6_defmcasthlim;
2494		else
2495			im6o->im6o_multicast_hlim = optval;
2496		break;
2497	    }
2498
2499	case IPV6_MULTICAST_LOOP:
2500		/*
2501		 * Set the loopback flag for outgoing multicast packets.
2502		 * Must be zero or one.
2503		 */
2504		if (m == NULL || m->m_len != sizeof(u_int)) {
2505			error = EINVAL;
2506			break;
2507		}
2508		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2509		if (loop > 1) {
2510			error = EINVAL;
2511			break;
2512		}
2513		im6o->im6o_multicast_loop = loop;
2514		break;
2515
2516	case IPV6_JOIN_GROUP:
2517		/*
2518		 * Add a multicast group membership.
2519		 * Group must be a valid IP6 multicast address.
2520		 */
2521		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2522			error = EINVAL;
2523			break;
2524		}
2525		mreq = mtod(m, struct ipv6_mreq *);
2526
2527		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2528			/*
2529			 * We use the unspecified address to specify to accept
2530			 * all multicast addresses. Only super user is allowed
2531			 * to do this.
2532			 */
2533			/* XXX-BZ might need a better PRIV_NETINET_x for this */
2534			error = priv_check(curthread, PRIV_NETINET_MROUTE);
2535			if (error)
2536				break;
2537		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2538			error = EINVAL;
2539			break;
2540		}
2541
2542		/*
2543		 * If no interface was explicitly specified, choose an
2544		 * appropriate one according to the given multicast address.
2545		 */
2546		if (mreq->ipv6mr_interface == 0) {
2547			struct sockaddr_in6 *dst;
2548
2549			/*
2550			 * Look up the routing table for the
2551			 * address, and choose the outgoing interface.
2552			 *   XXX: is it a good approach?
2553			 */
2554			ro.ro_rt = NULL;
2555			dst = (struct sockaddr_in6 *)&ro.ro_dst;
2556			bzero(dst, sizeof(*dst));
2557			dst->sin6_family = AF_INET6;
2558			dst->sin6_len = sizeof(*dst);
2559			dst->sin6_addr = mreq->ipv6mr_multiaddr;
2560			rtalloc((struct route *)&ro);
2561			if (ro.ro_rt == NULL) {
2562				error = EADDRNOTAVAIL;
2563				break;
2564			}
2565			ifp = ro.ro_rt->rt_ifp;
2566			RTFREE(ro.ro_rt);
2567		} else {
2568			/*
2569			 * If the interface is specified, validate it.
2570			 */
2571			if (mreq->ipv6mr_interface < 0 ||
2572			    V_if_index < mreq->ipv6mr_interface) {
2573				error = ENXIO;	/* XXX EINVAL? */
2574				break;
2575			}
2576			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2577			if (!ifp) {
2578				error = ENXIO;	/* XXX EINVAL? */
2579				break;
2580			}
2581		}
2582
2583		/*
2584		 * See if we found an interface, and confirm that it
2585		 * supports multicast
2586		 */
2587		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2588			error = EADDRNOTAVAIL;
2589			break;
2590		}
2591
2592		if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2593			error = EADDRNOTAVAIL; /* XXX: should not happen */
2594			break;
2595		}
2596
2597		/*
2598		 * See if the membership already exists.
2599		 */
2600		for (imm = im6o->im6o_memberships.lh_first;
2601		     imm != NULL; imm = imm->i6mm_chain.le_next)
2602			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2603			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2604					       &mreq->ipv6mr_multiaddr))
2605				break;
2606		if (imm != NULL) {
2607			error = EADDRINUSE;
2608			break;
2609		}
2610		/*
2611		 * Everything looks good; add a new record to the multicast
2612		 * address list for the given interface.
2613		 */
2614		imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr,  &error, 0);
2615		if (imm == NULL)
2616			break;
2617		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2618		break;
2619
2620	case IPV6_LEAVE_GROUP:
2621		/*
2622		 * Drop a multicast group membership.
2623		 * Group must be a valid IP6 multicast address.
2624		 */
2625		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2626			error = EINVAL;
2627			break;
2628		}
2629		mreq = mtod(m, struct ipv6_mreq *);
2630
2631		/*
2632		 * If an interface address was specified, get a pointer
2633		 * to its ifnet structure.
2634		 */
2635		if (mreq->ipv6mr_interface < 0 ||
2636		    V_if_index < mreq->ipv6mr_interface) {
2637			error = ENXIO;	/* XXX EINVAL? */
2638			break;
2639		}
2640		if (mreq->ipv6mr_interface == 0)
2641			ifp = NULL;
2642		else
2643			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2644
2645		/* Fill in the scope zone ID */
2646		if (ifp) {
2647			if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2648				/* XXX: should not happen */
2649				error = EADDRNOTAVAIL;
2650				break;
2651			}
2652		} else if (mreq->ipv6mr_interface != 0) {
2653			/*
2654			 * This case happens when the (positive) index is in
2655			 * the valid range, but the corresponding interface has
2656			 * been detached dynamically (XXX).
2657			 */
2658			error = EADDRNOTAVAIL;
2659			break;
2660		} else {	/* ipv6mr_interface == 0 */
2661			struct sockaddr_in6 sa6_mc;
2662
2663			/*
2664			 * The API spec says as follows:
2665			 *  If the interface index is specified as 0, the
2666			 *  system may choose a multicast group membership to
2667			 *  drop by matching the multicast address only.
2668			 * On the other hand, we cannot disambiguate the scope
2669			 * zone unless an interface is provided.  Thus, we
2670			 * check if there's ambiguity with the default scope
2671			 * zone as the last resort.
2672			 */
2673			bzero(&sa6_mc, sizeof(sa6_mc));
2674			sa6_mc.sin6_family = AF_INET6;
2675			sa6_mc.sin6_len = sizeof(sa6_mc);
2676			sa6_mc.sin6_addr = mreq->ipv6mr_multiaddr;
2677			error = sa6_embedscope(&sa6_mc, V_ip6_use_defzone);
2678			if (error != 0)
2679				break;
2680			mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
2681		}
2682
2683		/*
2684		 * Find the membership in the membership list.
2685		 */
2686		for (imm = im6o->im6o_memberships.lh_first;
2687		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2688			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2689			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2690			    &mreq->ipv6mr_multiaddr))
2691				break;
2692		}
2693		if (imm == NULL) {
2694			/* Unable to resolve interface */
2695			error = EADDRNOTAVAIL;
2696			break;
2697		}
2698		/*
2699		 * Give up the multicast address record to which the
2700		 * membership points.
2701		 */
2702		LIST_REMOVE(imm, i6mm_chain);
2703		in6_delmulti(imm->i6mm_maddr);
2704		free(imm, M_IP6MADDR);
2705		break;
2706
2707	default:
2708		error = EOPNOTSUPP;
2709		break;
2710	}
2711
2712	/*
2713	 * If all options have default values, no need to keep the mbuf.
2714	 */
2715	if (im6o->im6o_multicast_ifp == NULL &&
2716	    im6o->im6o_multicast_hlim == V_ip6_defmcasthlim &&
2717	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2718	    im6o->im6o_memberships.lh_first == NULL) {
2719		free(*im6op, M_IP6MOPTS);
2720		*im6op = NULL;
2721	}
2722
2723	return (error);
2724}
2725
2726/*
2727 * Return the IP6 multicast options in response to user getsockopt().
2728 */
2729static int
2730ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
2731{
2732	INIT_VNET_INET6(curvnet);
2733	u_int *hlim, *loop, *ifindex;
2734
2735	*mp = m_get(M_WAIT, MT_HEADER);		/* XXX */
2736
2737	switch (optname) {
2738
2739	case IPV6_MULTICAST_IF:
2740		ifindex = mtod(*mp, u_int *);
2741		(*mp)->m_len = sizeof(u_int);
2742		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2743			*ifindex = 0;
2744		else
2745			*ifindex = im6o->im6o_multicast_ifp->if_index;
2746		return (0);
2747
2748	case IPV6_MULTICAST_HOPS:
2749		hlim = mtod(*mp, u_int *);
2750		(*mp)->m_len = sizeof(u_int);
2751		if (im6o == NULL)
2752			*hlim = V_ip6_defmcasthlim;
2753		else
2754			*hlim = im6o->im6o_multicast_hlim;
2755		return (0);
2756
2757	case IPV6_MULTICAST_LOOP:
2758		loop = mtod(*mp, u_int *);
2759		(*mp)->m_len = sizeof(u_int);
2760		if (im6o == NULL)
2761			*loop = V_ip6_defmcasthlim;
2762		else
2763			*loop = im6o->im6o_multicast_loop;
2764		return (0);
2765
2766	default:
2767		return (EOPNOTSUPP);
2768	}
2769}
2770
2771/*
2772 * Discard the IP6 multicast options.
2773 */
2774void
2775ip6_freemoptions(struct ip6_moptions *im6o)
2776{
2777	struct in6_multi_mship *imm;
2778
2779	if (im6o == NULL)
2780		return;
2781
2782	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2783		LIST_REMOVE(imm, i6mm_chain);
2784		if (imm->i6mm_maddr)
2785			in6_delmulti(imm->i6mm_maddr);
2786		free(imm, M_IP6MADDR);
2787	}
2788	free(im6o, M_IP6MOPTS);
2789}
2790
2791/*
2792 * Set IPv6 outgoing packet options based on advanced API.
2793 */
2794int
2795ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2796    struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
2797{
2798	struct cmsghdr *cm = 0;
2799
2800	if (control == NULL || opt == NULL)
2801		return (EINVAL);
2802
2803	ip6_initpktopts(opt);
2804	if (stickyopt) {
2805		int error;
2806
2807		/*
2808		 * If stickyopt is provided, make a local copy of the options
2809		 * for this particular packet, then override them by ancillary
2810		 * objects.
2811		 * XXX: copypktopts() does not copy the cached route to a next
2812		 * hop (if any).  This is not very good in terms of efficiency,
2813		 * but we can allow this since this option should be rarely
2814		 * used.
2815		 */
2816		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2817			return (error);
2818	}
2819
2820	/*
2821	 * XXX: Currently, we assume all the optional information is stored
2822	 * in a single mbuf.
2823	 */
2824	if (control->m_next)
2825		return (EINVAL);
2826
2827	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2828	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2829		int error;
2830
2831		if (control->m_len < CMSG_LEN(0))
2832			return (EINVAL);
2833
2834		cm = mtod(control, struct cmsghdr *);
2835		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2836			return (EINVAL);
2837		if (cm->cmsg_level != IPPROTO_IPV6)
2838			continue;
2839
2840		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2841		    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2842		if (error)
2843			return (error);
2844	}
2845
2846	return (0);
2847}
2848
2849/*
2850 * Set a particular packet option, as a sticky option or an ancillary data
2851 * item.  "len" can be 0 only when it's a sticky option.
2852 * We have 4 cases of combination of "sticky" and "cmsg":
2853 * "sticky=0, cmsg=0": impossible
2854 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2855 * "sticky=1, cmsg=0": RFC3542 socket option
2856 * "sticky=1, cmsg=1": RFC2292 socket option
2857 */
2858static int
2859ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2860    struct ucred *cred, int sticky, int cmsg, int uproto)
2861{
2862	INIT_VNET_NET(curvnet);
2863	INIT_VNET_INET6(curvnet);
2864	int minmtupolicy, preftemp;
2865	int error;
2866
2867	if (!sticky && !cmsg) {
2868#ifdef DIAGNOSTIC
2869		printf("ip6_setpktopt: impossible case\n");
2870#endif
2871		return (EINVAL);
2872	}
2873
2874	/*
2875	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2876	 * not be specified in the context of RFC3542.  Conversely,
2877	 * RFC3542 types should not be specified in the context of RFC2292.
2878	 */
2879	if (!cmsg) {
2880		switch (optname) {
2881		case IPV6_2292PKTINFO:
2882		case IPV6_2292HOPLIMIT:
2883		case IPV6_2292NEXTHOP:
2884		case IPV6_2292HOPOPTS:
2885		case IPV6_2292DSTOPTS:
2886		case IPV6_2292RTHDR:
2887		case IPV6_2292PKTOPTIONS:
2888			return (ENOPROTOOPT);
2889		}
2890	}
2891	if (sticky && cmsg) {
2892		switch (optname) {
2893		case IPV6_PKTINFO:
2894		case IPV6_HOPLIMIT:
2895		case IPV6_NEXTHOP:
2896		case IPV6_HOPOPTS:
2897		case IPV6_DSTOPTS:
2898		case IPV6_RTHDRDSTOPTS:
2899		case IPV6_RTHDR:
2900		case IPV6_USE_MIN_MTU:
2901		case IPV6_DONTFRAG:
2902		case IPV6_TCLASS:
2903		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2904			return (ENOPROTOOPT);
2905		}
2906	}
2907
2908	switch (optname) {
2909	case IPV6_2292PKTINFO:
2910	case IPV6_PKTINFO:
2911	{
2912		struct ifnet *ifp = NULL;
2913		struct in6_pktinfo *pktinfo;
2914
2915		if (len != sizeof(struct in6_pktinfo))
2916			return (EINVAL);
2917
2918		pktinfo = (struct in6_pktinfo *)buf;
2919
2920		/*
2921		 * An application can clear any sticky IPV6_PKTINFO option by
2922		 * doing a "regular" setsockopt with ipi6_addr being
2923		 * in6addr_any and ipi6_ifindex being zero.
2924		 * [RFC 3542, Section 6]
2925		 */
2926		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2927		    pktinfo->ipi6_ifindex == 0 &&
2928		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2929			ip6_clearpktopts(opt, optname);
2930			break;
2931		}
2932
2933		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2934		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2935			return (EINVAL);
2936		}
2937
2938		/* validate the interface index if specified. */
2939		if (pktinfo->ipi6_ifindex > V_if_index ||
2940		    pktinfo->ipi6_ifindex < 0) {
2941			 return (ENXIO);
2942		}
2943		if (pktinfo->ipi6_ifindex) {
2944			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
2945			if (ifp == NULL)
2946				return (ENXIO);
2947		}
2948
2949		/*
2950		 * We store the address anyway, and let in6_selectsrc()
2951		 * validate the specified address.  This is because ipi6_addr
2952		 * may not have enough information about its scope zone, and
2953		 * we may need additional information (such as outgoing
2954		 * interface or the scope zone of a destination address) to
2955		 * disambiguate the scope.
2956		 * XXX: the delay of the validation may confuse the
2957		 * application when it is used as a sticky option.
2958		 */
2959		if (opt->ip6po_pktinfo == NULL) {
2960			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2961			    M_IP6OPT, M_NOWAIT);
2962			if (opt->ip6po_pktinfo == NULL)
2963				return (ENOBUFS);
2964		}
2965		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2966		break;
2967	}
2968
2969	case IPV6_2292HOPLIMIT:
2970	case IPV6_HOPLIMIT:
2971	{
2972		int *hlimp;
2973
2974		/*
2975		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2976		 * to simplify the ordering among hoplimit options.
2977		 */
2978		if (optname == IPV6_HOPLIMIT && sticky)
2979			return (ENOPROTOOPT);
2980
2981		if (len != sizeof(int))
2982			return (EINVAL);
2983		hlimp = (int *)buf;
2984		if (*hlimp < -1 || *hlimp > 255)
2985			return (EINVAL);
2986
2987		opt->ip6po_hlim = *hlimp;
2988		break;
2989	}
2990
2991	case IPV6_TCLASS:
2992	{
2993		int tclass;
2994
2995		if (len != sizeof(int))
2996			return (EINVAL);
2997		tclass = *(int *)buf;
2998		if (tclass < -1 || tclass > 255)
2999			return (EINVAL);
3000
3001		opt->ip6po_tclass = tclass;
3002		break;
3003	}
3004
3005	case IPV6_2292NEXTHOP:
3006	case IPV6_NEXTHOP:
3007		if (cred != NULL) {
3008			error = priv_check_cred(cred,
3009			    PRIV_NETINET_SETHDROPTS, 0);
3010			if (error)
3011				return (error);
3012		}
3013
3014		if (len == 0) {	/* just remove the option */
3015			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3016			break;
3017		}
3018
3019		/* check if cmsg_len is large enough for sa_len */
3020		if (len < sizeof(struct sockaddr) || len < *buf)
3021			return (EINVAL);
3022
3023		switch (((struct sockaddr *)buf)->sa_family) {
3024		case AF_INET6:
3025		{
3026			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3027			int error;
3028
3029			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3030				return (EINVAL);
3031
3032			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3033			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3034				return (EINVAL);
3035			}
3036			if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
3037			    != 0) {
3038				return (error);
3039			}
3040			break;
3041		}
3042		case AF_LINK:	/* should eventually be supported */
3043		default:
3044			return (EAFNOSUPPORT);
3045		}
3046
3047		/* turn off the previous option, then set the new option. */
3048		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3049		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3050		if (opt->ip6po_nexthop == NULL)
3051			return (ENOBUFS);
3052		bcopy(buf, opt->ip6po_nexthop, *buf);
3053		break;
3054
3055	case IPV6_2292HOPOPTS:
3056	case IPV6_HOPOPTS:
3057	{
3058		struct ip6_hbh *hbh;
3059		int hbhlen;
3060
3061		/*
3062		 * XXX: We don't allow a non-privileged user to set ANY HbH
3063		 * options, since per-option restriction has too much
3064		 * overhead.
3065		 */
3066		if (cred != NULL) {
3067			error = priv_check_cred(cred,
3068			    PRIV_NETINET_SETHDROPTS, 0);
3069			if (error)
3070				return (error);
3071		}
3072
3073		if (len == 0) {
3074			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3075			break;	/* just remove the option */
3076		}
3077
3078		/* message length validation */
3079		if (len < sizeof(struct ip6_hbh))
3080			return (EINVAL);
3081		hbh = (struct ip6_hbh *)buf;
3082		hbhlen = (hbh->ip6h_len + 1) << 3;
3083		if (len != hbhlen)
3084			return (EINVAL);
3085
3086		/* turn off the previous option, then set the new option. */
3087		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3088		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3089		if (opt->ip6po_hbh == NULL)
3090			return (ENOBUFS);
3091		bcopy(hbh, opt->ip6po_hbh, hbhlen);
3092
3093		break;
3094	}
3095
3096	case IPV6_2292DSTOPTS:
3097	case IPV6_DSTOPTS:
3098	case IPV6_RTHDRDSTOPTS:
3099	{
3100		struct ip6_dest *dest, **newdest = NULL;
3101		int destlen;
3102
3103		if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
3104			error = priv_check_cred(cred,
3105			    PRIV_NETINET_SETHDROPTS, 0);
3106			if (error)
3107				return (error);
3108		}
3109
3110		if (len == 0) {
3111			ip6_clearpktopts(opt, optname);
3112			break;	/* just remove the option */
3113		}
3114
3115		/* message length validation */
3116		if (len < sizeof(struct ip6_dest))
3117			return (EINVAL);
3118		dest = (struct ip6_dest *)buf;
3119		destlen = (dest->ip6d_len + 1) << 3;
3120		if (len != destlen)
3121			return (EINVAL);
3122
3123		/*
3124		 * Determine the position that the destination options header
3125		 * should be inserted; before or after the routing header.
3126		 */
3127		switch (optname) {
3128		case IPV6_2292DSTOPTS:
3129			/*
3130			 * The old advacned API is ambiguous on this point.
3131			 * Our approach is to determine the position based
3132			 * according to the existence of a routing header.
3133			 * Note, however, that this depends on the order of the
3134			 * extension headers in the ancillary data; the 1st
3135			 * part of the destination options header must appear
3136			 * before the routing header in the ancillary data,
3137			 * too.
3138			 * RFC3542 solved the ambiguity by introducing
3139			 * separate ancillary data or option types.
3140			 */
3141			if (opt->ip6po_rthdr == NULL)
3142				newdest = &opt->ip6po_dest1;
3143			else
3144				newdest = &opt->ip6po_dest2;
3145			break;
3146		case IPV6_RTHDRDSTOPTS:
3147			newdest = &opt->ip6po_dest1;
3148			break;
3149		case IPV6_DSTOPTS:
3150			newdest = &opt->ip6po_dest2;
3151			break;
3152		}
3153
3154		/* turn off the previous option, then set the new option. */
3155		ip6_clearpktopts(opt, optname);
3156		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3157		if (*newdest == NULL)
3158			return (ENOBUFS);
3159		bcopy(dest, *newdest, destlen);
3160
3161		break;
3162	}
3163
3164	case IPV6_2292RTHDR:
3165	case IPV6_RTHDR:
3166	{
3167		struct ip6_rthdr *rth;
3168		int rthlen;
3169
3170		if (len == 0) {
3171			ip6_clearpktopts(opt, IPV6_RTHDR);
3172			break;	/* just remove the option */
3173		}
3174
3175		/* message length validation */
3176		if (len < sizeof(struct ip6_rthdr))
3177			return (EINVAL);
3178		rth = (struct ip6_rthdr *)buf;
3179		rthlen = (rth->ip6r_len + 1) << 3;
3180		if (len != rthlen)
3181			return (EINVAL);
3182
3183		switch (rth->ip6r_type) {
3184		case IPV6_RTHDR_TYPE_0:
3185			if (rth->ip6r_len == 0)	/* must contain one addr */
3186				return (EINVAL);
3187			if (rth->ip6r_len % 2) /* length must be even */
3188				return (EINVAL);
3189			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3190				return (EINVAL);
3191			break;
3192		default:
3193			return (EINVAL);	/* not supported */
3194		}
3195
3196		/* turn off the previous option */
3197		ip6_clearpktopts(opt, IPV6_RTHDR);
3198		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3199		if (opt->ip6po_rthdr == NULL)
3200			return (ENOBUFS);
3201		bcopy(rth, opt->ip6po_rthdr, rthlen);
3202
3203		break;
3204	}
3205
3206	case IPV6_USE_MIN_MTU:
3207		if (len != sizeof(int))
3208			return (EINVAL);
3209		minmtupolicy = *(int *)buf;
3210		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3211		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3212		    minmtupolicy != IP6PO_MINMTU_ALL) {
3213			return (EINVAL);
3214		}
3215		opt->ip6po_minmtu = minmtupolicy;
3216		break;
3217
3218	case IPV6_DONTFRAG:
3219		if (len != sizeof(int))
3220			return (EINVAL);
3221
3222		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3223			/*
3224			 * we ignore this option for TCP sockets.
3225			 * (RFC3542 leaves this case unspecified.)
3226			 */
3227			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3228		} else
3229			opt->ip6po_flags |= IP6PO_DONTFRAG;
3230		break;
3231
3232	case IPV6_PREFER_TEMPADDR:
3233		if (len != sizeof(int))
3234			return (EINVAL);
3235		preftemp = *(int *)buf;
3236		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3237		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3238		    preftemp != IP6PO_TEMPADDR_PREFER) {
3239			return (EINVAL);
3240		}
3241		opt->ip6po_prefer_tempaddr = preftemp;
3242		break;
3243
3244	default:
3245		return (ENOPROTOOPT);
3246	} /* end of switch */
3247
3248	return (0);
3249}
3250
3251/*
3252 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3253 * packet to the input queue of a specified interface.  Note that this
3254 * calls the output routine of the loopback "driver", but with an interface
3255 * pointer that might NOT be &loif -- easier than replicating that code here.
3256 */
3257void
3258ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
3259{
3260	struct mbuf *copym;
3261	struct ip6_hdr *ip6;
3262
3263	copym = m_copy(m, 0, M_COPYALL);
3264	if (copym == NULL)
3265		return;
3266
3267	/*
3268	 * Make sure to deep-copy IPv6 header portion in case the data
3269	 * is in an mbuf cluster, so that we can safely override the IPv6
3270	 * header portion later.
3271	 */
3272	if ((copym->m_flags & M_EXT) != 0 ||
3273	    copym->m_len < sizeof(struct ip6_hdr)) {
3274		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3275		if (copym == NULL)
3276			return;
3277	}
3278
3279#ifdef DIAGNOSTIC
3280	if (copym->m_len < sizeof(*ip6)) {
3281		m_freem(copym);
3282		return;
3283	}
3284#endif
3285
3286	ip6 = mtod(copym, struct ip6_hdr *);
3287	/*
3288	 * clear embedded scope identifiers if necessary.
3289	 * in6_clearscope will touch the addresses only when necessary.
3290	 */
3291	in6_clearscope(&ip6->ip6_src);
3292	in6_clearscope(&ip6->ip6_dst);
3293
3294	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3295}
3296
3297/*
3298 * Chop IPv6 header off from the payload.
3299 */
3300static int
3301ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3302{
3303	struct mbuf *mh;
3304	struct ip6_hdr *ip6;
3305
3306	ip6 = mtod(m, struct ip6_hdr *);
3307	if (m->m_len > sizeof(*ip6)) {
3308		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3309		if (mh == 0) {
3310			m_freem(m);
3311			return ENOBUFS;
3312		}
3313		M_MOVE_PKTHDR(mh, m);
3314		MH_ALIGN(mh, sizeof(*ip6));
3315		m->m_len -= sizeof(*ip6);
3316		m->m_data += sizeof(*ip6);
3317		mh->m_next = m;
3318		m = mh;
3319		m->m_len = sizeof(*ip6);
3320		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3321	}
3322	exthdrs->ip6e_ip6 = m;
3323	return 0;
3324}
3325
3326/*
3327 * Compute IPv6 extension header length.
3328 */
3329int
3330ip6_optlen(struct inpcb *in6p)
3331{
3332	int len;
3333
3334	if (!in6p->in6p_outputopts)
3335		return 0;
3336
3337	len = 0;
3338#define elen(x) \
3339    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3340
3341	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3342	if (in6p->in6p_outputopts->ip6po_rthdr)
3343		/* dest1 is valid with rthdr only */
3344		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3345	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3346	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3347	return len;
3348#undef elen
3349}
3350