ip6_output.c revision 174510
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
30 */
31
32/*-
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
61 */
62
63#include <sys/cdefs.h>
64__FBSDID("$FreeBSD: head/sys/netinet6/ip6_output.c 174510 2007-12-10 16:03:40Z obrien $");
65
66#include "opt_inet.h"
67#include "opt_inet6.h"
68#include "opt_ipsec.h"
69
70#include <sys/param.h>
71#include <sys/malloc.h>
72#include <sys/mbuf.h>
73#include <sys/proc.h>
74#include <sys/errno.h>
75#include <sys/priv.h>
76#include <sys/protosw.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/kernel.h>
80
81#include <net/if.h>
82#include <net/netisr.h>
83#include <net/route.h>
84#include <net/pfil.h>
85
86#include <netinet/in.h>
87#include <netinet/in_var.h>
88#include <netinet6/in6_var.h>
89#include <netinet/ip6.h>
90#include <netinet/icmp6.h>
91#include <netinet6/ip6_var.h>
92#include <netinet/in_pcb.h>
93#include <netinet/tcp_var.h>
94#include <netinet6/nd6.h>
95
96#ifdef IPSEC
97#include <netipsec/ipsec.h>
98#include <netipsec/ipsec6.h>
99#include <netipsec/key.h>
100#include <netinet6/ip6_ipsec.h>
101#endif /* IPSEC */
102
103#include <netinet6/ip6protosw.h>
104#include <netinet6/scope6_var.h>
105
106static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "internet multicast options");
107
108struct ip6_exthdrs {
109	struct mbuf *ip6e_ip6;
110	struct mbuf *ip6e_hbh;
111	struct mbuf *ip6e_dest1;
112	struct mbuf *ip6e_rthdr;
113	struct mbuf *ip6e_dest2;
114};
115
116static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
117			   int, int));
118static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
119	struct socket *, struct sockopt *));
120static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
121static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *, int,
122	int, int, int));
123
124static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
125static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
126static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
127static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
128	struct ip6_frag **));
129static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
130static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
131static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
132	struct ifnet *, struct in6_addr *, u_long *, int *));
133static int copypktopts __P((struct ip6_pktopts *, struct ip6_pktopts *, int));
134
135
136/*
137 * Make an extension header from option data.  hp is the source, and
138 * mp is the destination.
139 */
140#define MAKE_EXTHDR(hp, mp)						\
141    do {								\
142	if (hp) {							\
143		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
144		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
145		    ((eh)->ip6e_len + 1) << 3);				\
146		if (error)						\
147			goto freehdrs;					\
148	}								\
149    } while (/*CONSTCOND*/ 0)
150
151/*
152 * Form a chain of extension headers.
153 * m is the extension header mbuf
154 * mp is the previous mbuf in the chain
155 * p is the next header
156 * i is the type of option.
157 */
158#define MAKE_CHAIN(m, mp, p, i)\
159    do {\
160	if (m) {\
161		if (!hdrsplit) \
162			panic("assumption failed: hdr not split"); \
163		*mtod((m), u_char *) = *(p);\
164		*(p) = (i);\
165		p = mtod((m), u_char *);\
166		(m)->m_next = (mp)->m_next;\
167		(mp)->m_next = (m);\
168		(mp) = (m);\
169	}\
170    } while (/*CONSTCOND*/ 0)
171
172/*
173 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
174 * header (with pri, len, nxt, hlim, src, dst).
175 * This function may modify ver and hlim only.
176 * The mbuf chain containing the packet will be freed.
177 * The mbuf opt, if present, will not be freed.
178 *
179 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
180 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
181 * which is rt_rmx.rmx_mtu.
182 *
183 * ifpp - XXX: just for statistics
184 */
185int
186ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
187    struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
188    struct ifnet **ifpp, struct inpcb *inp)
189{
190	struct ip6_hdr *ip6, *mhip6;
191	struct ifnet *ifp, *origifp;
192	struct mbuf *m = m0;
193	struct mbuf *mprev = NULL;
194	int hlen, tlen, len, off;
195	struct route_in6 ip6route;
196	struct rtentry *rt = NULL;
197	struct sockaddr_in6 *dst, src_sa, dst_sa;
198	struct in6_addr odst;
199	int error = 0;
200	struct in6_ifaddr *ia = NULL;
201	u_long mtu;
202	int alwaysfrag, dontfrag;
203	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
204	struct ip6_exthdrs exthdrs;
205	struct in6_addr finaldst, src0, dst0;
206	u_int32_t zone;
207	struct route_in6 *ro_pmtu = NULL;
208	int hdrsplit = 0;
209	int needipsec = 0;
210#ifdef IPSEC
211	struct ipsec_output_state state;
212	struct ip6_rthdr *rh = NULL;
213	int needipsectun = 0;
214	int segleft_org = 0;
215	struct secpolicy *sp = NULL;
216#endif /* IPSEC */
217
218	ip6 = mtod(m, struct ip6_hdr *);
219	if (ip6 == NULL) {
220		printf ("ip6 is NULL");
221		goto bad;
222	}
223
224	finaldst = ip6->ip6_dst;
225
226	bzero(&exthdrs, sizeof(exthdrs));
227
228	if (opt) {
229		/* Hop-by-Hop options header */
230		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
231		/* Destination options header(1st part) */
232		if (opt->ip6po_rthdr) {
233			/*
234			 * Destination options header(1st part)
235			 * This only makes sense with a routing header.
236			 * See Section 9.2 of RFC 3542.
237			 * Disabling this part just for MIP6 convenience is
238			 * a bad idea.  We need to think carefully about a
239			 * way to make the advanced API coexist with MIP6
240			 * options, which might automatically be inserted in
241			 * the kernel.
242			 */
243			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
244		}
245		/* Routing header */
246		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
247		/* Destination options header(2nd part) */
248		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
249	}
250
251	/*
252	 * IPSec checking which handles several cases.
253	 * FAST IPSEC: We re-injected the packet.
254	 */
255#ifdef IPSEC
256	switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
257	{
258	case 1:                 /* Bad packet */
259		goto freehdrs;
260	case -1:                /* Do IPSec */
261		needipsec = 1;
262	case 0:                 /* No IPSec */
263	default:
264		break;
265	}
266#endif /* IPSEC */
267
268	/*
269	 * Calculate the total length of the extension header chain.
270	 * Keep the length of the unfragmentable part for fragmentation.
271	 */
272	optlen = 0;
273	if (exthdrs.ip6e_hbh)
274		optlen += exthdrs.ip6e_hbh->m_len;
275	if (exthdrs.ip6e_dest1)
276		optlen += exthdrs.ip6e_dest1->m_len;
277	if (exthdrs.ip6e_rthdr)
278		optlen += exthdrs.ip6e_rthdr->m_len;
279	unfragpartlen = optlen + sizeof(struct ip6_hdr);
280
281	/* NOTE: we don't add AH/ESP length here. do that later. */
282	if (exthdrs.ip6e_dest2)
283		optlen += exthdrs.ip6e_dest2->m_len;
284
285	/*
286	 * If we need IPsec, or there is at least one extension header,
287	 * separate IP6 header from the payload.
288	 */
289	if ((needipsec || optlen) && !hdrsplit) {
290		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
291			m = NULL;
292			goto freehdrs;
293		}
294		m = exthdrs.ip6e_ip6;
295		hdrsplit++;
296	}
297
298	/* adjust pointer */
299	ip6 = mtod(m, struct ip6_hdr *);
300
301	/* adjust mbuf packet header length */
302	m->m_pkthdr.len += optlen;
303	plen = m->m_pkthdr.len - sizeof(*ip6);
304
305	/* If this is a jumbo payload, insert a jumbo payload option. */
306	if (plen > IPV6_MAXPACKET) {
307		if (!hdrsplit) {
308			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
309				m = NULL;
310				goto freehdrs;
311			}
312			m = exthdrs.ip6e_ip6;
313			hdrsplit++;
314		}
315		/* adjust pointer */
316		ip6 = mtod(m, struct ip6_hdr *);
317		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
318			goto freehdrs;
319		ip6->ip6_plen = 0;
320	} else
321		ip6->ip6_plen = htons(plen);
322
323	/*
324	 * Concatenate headers and fill in next header fields.
325	 * Here we have, on "m"
326	 *	IPv6 payload
327	 * and we insert headers accordingly.  Finally, we should be getting:
328	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
329	 *
330	 * during the header composing process, "m" points to IPv6 header.
331	 * "mprev" points to an extension header prior to esp.
332	 */
333	u_char *nexthdrp = &ip6->ip6_nxt;
334	mprev = m;
335
336	/*
337	 * we treat dest2 specially.  this makes IPsec processing
338	 * much easier.  the goal here is to make mprev point the
339	 * mbuf prior to dest2.
340	 *
341	 * result: IPv6 dest2 payload
342	 * m and mprev will point to IPv6 header.
343	 */
344	if (exthdrs.ip6e_dest2) {
345		if (!hdrsplit)
346			panic("assumption failed: hdr not split");
347		exthdrs.ip6e_dest2->m_next = m->m_next;
348		m->m_next = exthdrs.ip6e_dest2;
349		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
350		ip6->ip6_nxt = IPPROTO_DSTOPTS;
351	}
352
353	/*
354	 * result: IPv6 hbh dest1 rthdr dest2 payload
355	 * m will point to IPv6 header.  mprev will point to the
356	 * extension header prior to dest2 (rthdr in the above case).
357	 */
358	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
359	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
360		   IPPROTO_DSTOPTS);
361	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
362		   IPPROTO_ROUTING);
363
364#ifdef IPSEC
365	if (!needipsec)
366		goto skip_ipsec2;
367
368	/*
369	 * pointers after IPsec headers are not valid any more.
370	 * other pointers need a great care too.
371	 * (IPsec routines should not mangle mbufs prior to AH/ESP)
372	 */
373	exthdrs.ip6e_dest2 = NULL;
374
375	if (exthdrs.ip6e_rthdr) {
376		rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
377		segleft_org = rh->ip6r_segleft;
378		rh->ip6r_segleft = 0;
379	}
380
381	bzero(&state, sizeof(state));
382	state.m = m;
383	error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
384				    &needipsectun);
385	m = state.m;
386	if (error) {
387		/* mbuf is already reclaimed in ipsec6_output_trans. */
388		m = NULL;
389		switch (error) {
390		case EHOSTUNREACH:
391		case ENETUNREACH:
392		case EMSGSIZE:
393		case ENOBUFS:
394		case ENOMEM:
395			break;
396		default:
397			printf("ip6_output (ipsec): error code %d\n", error);
398			/* FALLTHROUGH */
399		case ENOENT:
400			/* don't show these error codes to the user */
401			error = 0;
402			break;
403		}
404		goto bad;
405	} else if (!needipsectun) {
406		/*
407		 * In the FAST IPSec case we have already
408		 * re-injected the packet and it has been freed
409		 * by the ipsec_done() function.  So, just clean
410		 * up after ourselves.
411		 */
412		m = NULL;
413		goto done;
414	}
415	if (exthdrs.ip6e_rthdr) {
416		/* ah6_output doesn't modify mbuf chain */
417		rh->ip6r_segleft = segleft_org;
418	}
419skip_ipsec2:;
420#endif /* IPSEC */
421
422	/*
423	 * If there is a routing header, replace the destination address field
424	 * with the first hop of the routing header.
425	 */
426	if (exthdrs.ip6e_rthdr) {
427		struct ip6_rthdr *rh =
428			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
429						  struct ip6_rthdr *));
430		struct ip6_rthdr0 *rh0;
431		struct in6_addr *addr;
432		struct sockaddr_in6 sa;
433
434		switch (rh->ip6r_type) {
435		case IPV6_RTHDR_TYPE_0:
436			 rh0 = (struct ip6_rthdr0 *)rh;
437			 addr = (struct in6_addr *)(rh0 + 1);
438
439			 /*
440			  * construct a sockaddr_in6 form of
441			  * the first hop.
442			  *
443			  * XXX: we may not have enough
444			  * information about its scope zone;
445			  * there is no standard API to pass
446			  * the information from the
447			  * application.
448			  */
449			 bzero(&sa, sizeof(sa));
450			 sa.sin6_family = AF_INET6;
451			 sa.sin6_len = sizeof(sa);
452			 sa.sin6_addr = addr[0];
453			 if ((error = sa6_embedscope(&sa,
454			     ip6_use_defzone)) != 0) {
455				 goto bad;
456			 }
457			 ip6->ip6_dst = sa.sin6_addr;
458			 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
459			     * (rh0->ip6r0_segleft - 1));
460			 addr[rh0->ip6r0_segleft - 1] = finaldst;
461			 /* XXX */
462			 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
463			 break;
464		default:	/* is it possible? */
465			 error = EINVAL;
466			 goto bad;
467		}
468	}
469
470	/* Source address validation */
471	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
472	    (flags & IPV6_UNSPECSRC) == 0) {
473		error = EOPNOTSUPP;
474		ip6stat.ip6s_badscope++;
475		goto bad;
476	}
477	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
478		error = EOPNOTSUPP;
479		ip6stat.ip6s_badscope++;
480		goto bad;
481	}
482
483	ip6stat.ip6s_localout++;
484
485	/*
486	 * Route packet.
487	 */
488	if (ro == 0) {
489		ro = &ip6route;
490		bzero((caddr_t)ro, sizeof(*ro));
491	}
492	ro_pmtu = ro;
493	if (opt && opt->ip6po_rthdr)
494		ro = &opt->ip6po_route;
495	dst = (struct sockaddr_in6 *)&ro->ro_dst;
496
497again:
498	/*
499	 * if specified, try to fill in the traffic class field.
500	 * do not override if a non-zero value is already set.
501	 * we check the diffserv field and the ecn field separately.
502	 */
503	if (opt && opt->ip6po_tclass >= 0) {
504		int mask = 0;
505
506		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
507			mask |= 0xfc;
508		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
509			mask |= 0x03;
510		if (mask != 0)
511			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
512	}
513
514	/* fill in or override the hop limit field, if necessary. */
515	if (opt && opt->ip6po_hlim != -1)
516		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
517	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
518		if (im6o != NULL)
519			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
520		else
521			ip6->ip6_hlim = ip6_defmcasthlim;
522	}
523
524#ifdef IPSEC
525	/*
526	 * We may re-inject packets into the stack here.
527	 */
528	if (needipsec && needipsectun) {
529		struct ipsec_output_state state;
530
531		/*
532		 * All the extension headers will become inaccessible
533		 * (since they can be encrypted).
534		 * Don't panic, we need no more updates to extension headers
535		 * on inner IPv6 packet (since they are now encapsulated).
536		 *
537		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
538		 */
539		bzero(&exthdrs, sizeof(exthdrs));
540		exthdrs.ip6e_ip6 = m;
541
542		bzero(&state, sizeof(state));
543		state.m = m;
544		state.ro = (struct route *)ro;
545		state.dst = (struct sockaddr *)dst;
546
547		error = ipsec6_output_tunnel(&state, sp, flags);
548
549		m = state.m;
550		ro = (struct route_in6 *)state.ro;
551		dst = (struct sockaddr_in6 *)state.dst;
552		if (error) {
553			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
554			m0 = m = NULL;
555			m = NULL;
556			switch (error) {
557			case EHOSTUNREACH:
558			case ENETUNREACH:
559			case EMSGSIZE:
560			case ENOBUFS:
561			case ENOMEM:
562				break;
563			default:
564				printf("ip6_output (ipsec): error code %d\n", error);
565				/* FALLTHROUGH */
566			case ENOENT:
567				/* don't show these error codes to the user */
568				error = 0;
569				break;
570			}
571			goto bad;
572		} else {
573			/*
574			 * In the FAST IPSec case we have already
575			 * re-injected the packet and it has been freed
576			 * by the ipsec_done() function.  So, just clean
577			 * up after ourselves.
578			 */
579			m = NULL;
580			goto done;
581		}
582
583		exthdrs.ip6e_ip6 = m;
584	}
585#endif /* IPSEC */
586
587	/* adjust pointer */
588	ip6 = mtod(m, struct ip6_hdr *);
589
590	bzero(&dst_sa, sizeof(dst_sa));
591	dst_sa.sin6_family = AF_INET6;
592	dst_sa.sin6_len = sizeof(dst_sa);
593	dst_sa.sin6_addr = ip6->ip6_dst;
594	if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
595	    &ifp, &rt, 0)) != 0) {
596		switch (error) {
597		case EHOSTUNREACH:
598			ip6stat.ip6s_noroute++;
599			break;
600		case EADDRNOTAVAIL:
601		default:
602			break; /* XXX statistics? */
603		}
604		if (ifp != NULL)
605			in6_ifstat_inc(ifp, ifs6_out_discard);
606		goto bad;
607	}
608	if (rt == NULL) {
609		/*
610		 * If in6_selectroute() does not return a route entry,
611		 * dst may not have been updated.
612		 */
613		*dst = dst_sa;	/* XXX */
614	}
615
616	/*
617	 * then rt (for unicast) and ifp must be non-NULL valid values.
618	 */
619	if ((flags & IPV6_FORWARDING) == 0) {
620		/* XXX: the FORWARDING flag can be set for mrouting. */
621		in6_ifstat_inc(ifp, ifs6_out_request);
622	}
623	if (rt != NULL) {
624		ia = (struct in6_ifaddr *)(rt->rt_ifa);
625		rt->rt_use++;
626	}
627
628	/*
629	 * The outgoing interface must be in the zone of source and
630	 * destination addresses.  We should use ia_ifp to support the
631	 * case of sending packets to an address of our own.
632	 */
633	if (ia != NULL && ia->ia_ifp)
634		origifp = ia->ia_ifp;
635	else
636		origifp = ifp;
637
638	src0 = ip6->ip6_src;
639	if (in6_setscope(&src0, origifp, &zone))
640		goto badscope;
641	bzero(&src_sa, sizeof(src_sa));
642	src_sa.sin6_family = AF_INET6;
643	src_sa.sin6_len = sizeof(src_sa);
644	src_sa.sin6_addr = ip6->ip6_src;
645	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
646		goto badscope;
647
648	dst0 = ip6->ip6_dst;
649	if (in6_setscope(&dst0, origifp, &zone))
650		goto badscope;
651	/* re-initialize to be sure */
652	bzero(&dst_sa, sizeof(dst_sa));
653	dst_sa.sin6_family = AF_INET6;
654	dst_sa.sin6_len = sizeof(dst_sa);
655	dst_sa.sin6_addr = ip6->ip6_dst;
656	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
657		goto badscope;
658	}
659
660	/* scope check is done. */
661	goto routefound;
662
663  badscope:
664	ip6stat.ip6s_badscope++;
665	in6_ifstat_inc(origifp, ifs6_out_discard);
666	if (error == 0)
667		error = EHOSTUNREACH; /* XXX */
668	goto bad;
669
670  routefound:
671	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
672		if (opt && opt->ip6po_nextroute.ro_rt) {
673			/*
674			 * The nexthop is explicitly specified by the
675			 * application.  We assume the next hop is an IPv6
676			 * address.
677			 */
678			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
679		}
680		else if ((rt->rt_flags & RTF_GATEWAY))
681			dst = (struct sockaddr_in6 *)rt->rt_gateway;
682	}
683
684	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
685		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
686	} else {
687		struct	in6_multi *in6m;
688
689		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
690
691		in6_ifstat_inc(ifp, ifs6_out_mcast);
692
693		/*
694		 * Confirm that the outgoing interface supports multicast.
695		 */
696		if (!(ifp->if_flags & IFF_MULTICAST)) {
697			ip6stat.ip6s_noroute++;
698			in6_ifstat_inc(ifp, ifs6_out_discard);
699			error = ENETUNREACH;
700			goto bad;
701		}
702		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
703		if (in6m != NULL &&
704		   (im6o == NULL || im6o->im6o_multicast_loop)) {
705			/*
706			 * If we belong to the destination multicast group
707			 * on the outgoing interface, and the caller did not
708			 * forbid loopback, loop back a copy.
709			 */
710			ip6_mloopback(ifp, m, dst);
711		} else {
712			/*
713			 * If we are acting as a multicast router, perform
714			 * multicast forwarding as if the packet had just
715			 * arrived on the interface to which we are about
716			 * to send.  The multicast forwarding function
717			 * recursively calls this function, using the
718			 * IPV6_FORWARDING flag to prevent infinite recursion.
719			 *
720			 * Multicasts that are looped back by ip6_mloopback(),
721			 * above, will be forwarded by the ip6_input() routine,
722			 * if necessary.
723			 */
724			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
725				/*
726				 * XXX: ip6_mforward expects that rcvif is NULL
727				 * when it is called from the originating path.
728				 * However, it is not always the case, since
729				 * some versions of MGETHDR() does not
730				 * initialize the field.
731				 */
732				m->m_pkthdr.rcvif = NULL;
733				if (ip6_mforward(ip6, ifp, m) != 0) {
734					m_freem(m);
735					goto done;
736				}
737			}
738		}
739		/*
740		 * Multicasts with a hoplimit of zero may be looped back,
741		 * above, but must not be transmitted on a network.
742		 * Also, multicasts addressed to the loopback interface
743		 * are not sent -- the above call to ip6_mloopback() will
744		 * loop back a copy if this host actually belongs to the
745		 * destination group on the loopback interface.
746		 */
747		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
748		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
749			m_freem(m);
750			goto done;
751		}
752	}
753
754	/*
755	 * Fill the outgoing inteface to tell the upper layer
756	 * to increment per-interface statistics.
757	 */
758	if (ifpp)
759		*ifpp = ifp;
760
761	/* Determine path MTU. */
762	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
763	    &alwaysfrag)) != 0)
764		goto bad;
765
766	/*
767	 * The caller of this function may specify to use the minimum MTU
768	 * in some cases.
769	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
770	 * setting.  The logic is a bit complicated; by default, unicast
771	 * packets will follow path MTU while multicast packets will be sent at
772	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
773	 * including unicast ones will be sent at the minimum MTU.  Multicast
774	 * packets will always be sent at the minimum MTU unless
775	 * IP6PO_MINMTU_DISABLE is explicitly specified.
776	 * See RFC 3542 for more details.
777	 */
778	if (mtu > IPV6_MMTU) {
779		if ((flags & IPV6_MINMTU))
780			mtu = IPV6_MMTU;
781		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
782			mtu = IPV6_MMTU;
783		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
784			 (opt == NULL ||
785			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
786			mtu = IPV6_MMTU;
787		}
788	}
789
790	/*
791	 * clear embedded scope identifiers if necessary.
792	 * in6_clearscope will touch the addresses only when necessary.
793	 */
794	in6_clearscope(&ip6->ip6_src);
795	in6_clearscope(&ip6->ip6_dst);
796
797	/*
798	 * If the outgoing packet contains a hop-by-hop options header,
799	 * it must be examined and processed even by the source node.
800	 * (RFC 2460, section 4.)
801	 */
802	if (exthdrs.ip6e_hbh) {
803		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
804		u_int32_t dummy; /* XXX unused */
805		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
806
807#ifdef DIAGNOSTIC
808		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
809			panic("ip6e_hbh is not continuous");
810#endif
811		/*
812		 *  XXX: if we have to send an ICMPv6 error to the sender,
813		 *       we need the M_LOOP flag since icmp6_error() expects
814		 *       the IPv6 and the hop-by-hop options header are
815		 *       continuous unless the flag is set.
816		 */
817		m->m_flags |= M_LOOP;
818		m->m_pkthdr.rcvif = ifp;
819		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
820		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
821		    &dummy, &plen) < 0) {
822			/* m was already freed at this point */
823			error = EINVAL;/* better error? */
824			goto done;
825		}
826		m->m_flags &= ~M_LOOP; /* XXX */
827		m->m_pkthdr.rcvif = NULL;
828	}
829
830	/* Jump over all PFIL processing if hooks are not active. */
831	if (!PFIL_HOOKED(&inet6_pfil_hook))
832		goto passout;
833
834	odst = ip6->ip6_dst;
835	/* Run through list of hooks for output packets. */
836	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
837	if (error != 0 || m == NULL)
838		goto done;
839	ip6 = mtod(m, struct ip6_hdr *);
840
841	/* See if destination IP address was changed by packet filter. */
842	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
843		m->m_flags |= M_SKIP_FIREWALL;
844		/* If destination is now ourself drop to ip6_input(). */
845		if (in6_localaddr(&ip6->ip6_dst)) {
846			if (m->m_pkthdr.rcvif == NULL)
847				m->m_pkthdr.rcvif = loif;
848			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
849				m->m_pkthdr.csum_flags |=
850				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
851				m->m_pkthdr.csum_data = 0xffff;
852			}
853			m->m_pkthdr.csum_flags |=
854			    CSUM_IP_CHECKED | CSUM_IP_VALID;
855			error = netisr_queue(NETISR_IPV6, m);
856			goto done;
857		} else
858			goto again;	/* Redo the routing table lookup. */
859	}
860
861	/* XXX: IPFIREWALL_FORWARD */
862
863passout:
864	/*
865	 * Send the packet to the outgoing interface.
866	 * If necessary, do IPv6 fragmentation before sending.
867	 *
868	 * the logic here is rather complex:
869	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
870	 * 1-a:	send as is if tlen <= path mtu
871	 * 1-b:	fragment if tlen > path mtu
872	 *
873	 * 2: if user asks us not to fragment (dontfrag == 1)
874	 * 2-a:	send as is if tlen <= interface mtu
875	 * 2-b:	error if tlen > interface mtu
876	 *
877	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
878	 *	always fragment
879	 *
880	 * 4: if dontfrag == 1 && alwaysfrag == 1
881	 *	error, as we cannot handle this conflicting request
882	 */
883	tlen = m->m_pkthdr.len;
884
885	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
886		dontfrag = 1;
887	else
888		dontfrag = 0;
889	if (dontfrag && alwaysfrag) {	/* case 4 */
890		/* conflicting request - can't transmit */
891		error = EMSGSIZE;
892		goto bad;
893	}
894	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
895		/*
896		 * Even if the DONTFRAG option is specified, we cannot send the
897		 * packet when the data length is larger than the MTU of the
898		 * outgoing interface.
899		 * Notify the error by sending IPV6_PATHMTU ancillary data as
900		 * well as returning an error code (the latter is not described
901		 * in the API spec.)
902		 */
903		u_int32_t mtu32;
904		struct ip6ctlparam ip6cp;
905
906		mtu32 = (u_int32_t)mtu;
907		bzero(&ip6cp, sizeof(ip6cp));
908		ip6cp.ip6c_cmdarg = (void *)&mtu32;
909		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
910		    (void *)&ip6cp);
911
912		error = EMSGSIZE;
913		goto bad;
914	}
915
916	/*
917	 * transmit packet without fragmentation
918	 */
919	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
920		struct in6_ifaddr *ia6;
921
922		ip6 = mtod(m, struct ip6_hdr *);
923		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
924		if (ia6) {
925			/* Record statistics for this interface address. */
926			ia6->ia_ifa.if_opackets++;
927			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
928		}
929		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
930		goto done;
931	}
932
933	/*
934	 * try to fragment the packet.  case 1-b and 3
935	 */
936	if (mtu < IPV6_MMTU) {
937		/* path MTU cannot be less than IPV6_MMTU */
938		error = EMSGSIZE;
939		in6_ifstat_inc(ifp, ifs6_out_fragfail);
940		goto bad;
941	} else if (ip6->ip6_plen == 0) {
942		/* jumbo payload cannot be fragmented */
943		error = EMSGSIZE;
944		in6_ifstat_inc(ifp, ifs6_out_fragfail);
945		goto bad;
946	} else {
947		struct mbuf **mnext, *m_frgpart;
948		struct ip6_frag *ip6f;
949		u_int32_t id = htonl(ip6_randomid());
950		u_char nextproto;
951
952		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
953
954		/*
955		 * Too large for the destination or interface;
956		 * fragment if possible.
957		 * Must be able to put at least 8 bytes per fragment.
958		 */
959		hlen = unfragpartlen;
960		if (mtu > IPV6_MAXPACKET)
961			mtu = IPV6_MAXPACKET;
962
963		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
964		if (len < 8) {
965			error = EMSGSIZE;
966			in6_ifstat_inc(ifp, ifs6_out_fragfail);
967			goto bad;
968		}
969
970		/*
971		 * Verify that we have any chance at all of being able to queue
972		 *      the packet or packet fragments
973		 */
974		if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
975		    < tlen  /* - hlen */)) {
976			error = ENOBUFS;
977			ip6stat.ip6s_odropped++;
978			goto bad;
979		}
980
981		mnext = &m->m_nextpkt;
982
983		/*
984		 * Change the next header field of the last header in the
985		 * unfragmentable part.
986		 */
987		if (exthdrs.ip6e_rthdr) {
988			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
989			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
990		} else if (exthdrs.ip6e_dest1) {
991			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
992			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
993		} else if (exthdrs.ip6e_hbh) {
994			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
995			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
996		} else {
997			nextproto = ip6->ip6_nxt;
998			ip6->ip6_nxt = IPPROTO_FRAGMENT;
999		}
1000
1001		/*
1002		 * Loop through length of segment after first fragment,
1003		 * make new header and copy data of each part and link onto
1004		 * chain.
1005		 */
1006		m0 = m;
1007		for (off = hlen; off < tlen; off += len) {
1008			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1009			if (!m) {
1010				error = ENOBUFS;
1011				ip6stat.ip6s_odropped++;
1012				goto sendorfree;
1013			}
1014			m->m_pkthdr.rcvif = NULL;
1015			m->m_flags = m0->m_flags & M_COPYFLAGS;
1016			*mnext = m;
1017			mnext = &m->m_nextpkt;
1018			m->m_data += max_linkhdr;
1019			mhip6 = mtod(m, struct ip6_hdr *);
1020			*mhip6 = *ip6;
1021			m->m_len = sizeof(*mhip6);
1022			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1023			if (error) {
1024				ip6stat.ip6s_odropped++;
1025				goto sendorfree;
1026			}
1027			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1028			if (off + len >= tlen)
1029				len = tlen - off;
1030			else
1031				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1032			mhip6->ip6_plen = htons((u_short)(len + hlen +
1033			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1034			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1035				error = ENOBUFS;
1036				ip6stat.ip6s_odropped++;
1037				goto sendorfree;
1038			}
1039			m_cat(m, m_frgpart);
1040			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1041			m->m_pkthdr.rcvif = NULL;
1042			ip6f->ip6f_reserved = 0;
1043			ip6f->ip6f_ident = id;
1044			ip6f->ip6f_nxt = nextproto;
1045			ip6stat.ip6s_ofragments++;
1046			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1047		}
1048
1049		in6_ifstat_inc(ifp, ifs6_out_fragok);
1050	}
1051
1052	/*
1053	 * Remove leading garbages.
1054	 */
1055sendorfree:
1056	m = m0->m_nextpkt;
1057	m0->m_nextpkt = 0;
1058	m_freem(m0);
1059	for (m0 = m; m; m = m0) {
1060		m0 = m->m_nextpkt;
1061		m->m_nextpkt = 0;
1062		if (error == 0) {
1063			/* Record statistics for this interface address. */
1064			if (ia) {
1065				ia->ia_ifa.if_opackets++;
1066				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1067			}
1068			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1069		} else
1070			m_freem(m);
1071	}
1072
1073	if (error == 0)
1074		ip6stat.ip6s_fragmented++;
1075
1076done:
1077	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1078		RTFREE(ro->ro_rt);
1079	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1080		RTFREE(ro_pmtu->ro_rt);
1081	}
1082
1083	return (error);
1084
1085freehdrs:
1086	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1087	m_freem(exthdrs.ip6e_dest1);
1088	m_freem(exthdrs.ip6e_rthdr);
1089	m_freem(exthdrs.ip6e_dest2);
1090	/* FALLTHROUGH */
1091bad:
1092	if (m)
1093		m_freem(m);
1094	goto done;
1095}
1096
1097static int
1098ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1099{
1100	struct mbuf *m;
1101
1102	if (hlen > MCLBYTES)
1103		return (ENOBUFS); /* XXX */
1104
1105	MGET(m, M_DONTWAIT, MT_DATA);
1106	if (!m)
1107		return (ENOBUFS);
1108
1109	if (hlen > MLEN) {
1110		MCLGET(m, M_DONTWAIT);
1111		if ((m->m_flags & M_EXT) == 0) {
1112			m_free(m);
1113			return (ENOBUFS);
1114		}
1115	}
1116	m->m_len = hlen;
1117	if (hdr)
1118		bcopy(hdr, mtod(m, caddr_t), hlen);
1119
1120	*mp = m;
1121	return (0);
1122}
1123
1124/*
1125 * Insert jumbo payload option.
1126 */
1127static int
1128ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1129{
1130	struct mbuf *mopt;
1131	u_char *optbuf;
1132	u_int32_t v;
1133
1134#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1135
1136	/*
1137	 * If there is no hop-by-hop options header, allocate new one.
1138	 * If there is one but it doesn't have enough space to store the
1139	 * jumbo payload option, allocate a cluster to store the whole options.
1140	 * Otherwise, use it to store the options.
1141	 */
1142	if (exthdrs->ip6e_hbh == 0) {
1143		MGET(mopt, M_DONTWAIT, MT_DATA);
1144		if (mopt == 0)
1145			return (ENOBUFS);
1146		mopt->m_len = JUMBOOPTLEN;
1147		optbuf = mtod(mopt, u_char *);
1148		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1149		exthdrs->ip6e_hbh = mopt;
1150	} else {
1151		struct ip6_hbh *hbh;
1152
1153		mopt = exthdrs->ip6e_hbh;
1154		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1155			/*
1156			 * XXX assumption:
1157			 * - exthdrs->ip6e_hbh is not referenced from places
1158			 *   other than exthdrs.
1159			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1160			 */
1161			int oldoptlen = mopt->m_len;
1162			struct mbuf *n;
1163
1164			/*
1165			 * XXX: give up if the whole (new) hbh header does
1166			 * not fit even in an mbuf cluster.
1167			 */
1168			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1169				return (ENOBUFS);
1170
1171			/*
1172			 * As a consequence, we must always prepare a cluster
1173			 * at this point.
1174			 */
1175			MGET(n, M_DONTWAIT, MT_DATA);
1176			if (n) {
1177				MCLGET(n, M_DONTWAIT);
1178				if ((n->m_flags & M_EXT) == 0) {
1179					m_freem(n);
1180					n = NULL;
1181				}
1182			}
1183			if (!n)
1184				return (ENOBUFS);
1185			n->m_len = oldoptlen + JUMBOOPTLEN;
1186			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1187			    oldoptlen);
1188			optbuf = mtod(n, caddr_t) + oldoptlen;
1189			m_freem(mopt);
1190			mopt = exthdrs->ip6e_hbh = n;
1191		} else {
1192			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1193			mopt->m_len += JUMBOOPTLEN;
1194		}
1195		optbuf[0] = IP6OPT_PADN;
1196		optbuf[1] = 1;
1197
1198		/*
1199		 * Adjust the header length according to the pad and
1200		 * the jumbo payload option.
1201		 */
1202		hbh = mtod(mopt, struct ip6_hbh *);
1203		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1204	}
1205
1206	/* fill in the option. */
1207	optbuf[2] = IP6OPT_JUMBO;
1208	optbuf[3] = 4;
1209	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1210	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1211
1212	/* finally, adjust the packet header length */
1213	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1214
1215	return (0);
1216#undef JUMBOOPTLEN
1217}
1218
1219/*
1220 * Insert fragment header and copy unfragmentable header portions.
1221 */
1222static int
1223ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1224    struct ip6_frag **frghdrp)
1225{
1226	struct mbuf *n, *mlast;
1227
1228	if (hlen > sizeof(struct ip6_hdr)) {
1229		n = m_copym(m0, sizeof(struct ip6_hdr),
1230		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1231		if (n == 0)
1232			return (ENOBUFS);
1233		m->m_next = n;
1234	} else
1235		n = m;
1236
1237	/* Search for the last mbuf of unfragmentable part. */
1238	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1239		;
1240
1241	if ((mlast->m_flags & M_EXT) == 0 &&
1242	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1243		/* use the trailing space of the last mbuf for the fragment hdr */
1244		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1245		    mlast->m_len);
1246		mlast->m_len += sizeof(struct ip6_frag);
1247		m->m_pkthdr.len += sizeof(struct ip6_frag);
1248	} else {
1249		/* allocate a new mbuf for the fragment header */
1250		struct mbuf *mfrg;
1251
1252		MGET(mfrg, M_DONTWAIT, MT_DATA);
1253		if (mfrg == 0)
1254			return (ENOBUFS);
1255		mfrg->m_len = sizeof(struct ip6_frag);
1256		*frghdrp = mtod(mfrg, struct ip6_frag *);
1257		mlast->m_next = mfrg;
1258	}
1259
1260	return (0);
1261}
1262
1263static int
1264ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1265    struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
1266    int *alwaysfragp)
1267{
1268	u_int32_t mtu = 0;
1269	int alwaysfrag = 0;
1270	int error = 0;
1271
1272	if (ro_pmtu != ro) {
1273		/* The first hop and the final destination may differ. */
1274		struct sockaddr_in6 *sa6_dst =
1275		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1276		if (ro_pmtu->ro_rt &&
1277		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1278		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1279			RTFREE(ro_pmtu->ro_rt);
1280			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1281		}
1282		if (ro_pmtu->ro_rt == NULL) {
1283			bzero(sa6_dst, sizeof(*sa6_dst));
1284			sa6_dst->sin6_family = AF_INET6;
1285			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1286			sa6_dst->sin6_addr = *dst;
1287
1288			rtalloc((struct route *)ro_pmtu);
1289		}
1290	}
1291	if (ro_pmtu->ro_rt) {
1292		u_int32_t ifmtu;
1293		struct in_conninfo inc;
1294
1295		bzero(&inc, sizeof(inc));
1296		inc.inc_flags = 1; /* IPv6 */
1297		inc.inc6_faddr = *dst;
1298
1299		if (ifp == NULL)
1300			ifp = ro_pmtu->ro_rt->rt_ifp;
1301		ifmtu = IN6_LINKMTU(ifp);
1302		mtu = tcp_hc_getmtu(&inc);
1303		if (mtu)
1304			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1305		else
1306			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1307		if (mtu == 0)
1308			mtu = ifmtu;
1309		else if (mtu < IPV6_MMTU) {
1310			/*
1311			 * RFC2460 section 5, last paragraph:
1312			 * if we record ICMPv6 too big message with
1313			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1314			 * or smaller, with framgent header attached.
1315			 * (fragment header is needed regardless from the
1316			 * packet size, for translators to identify packets)
1317			 */
1318			alwaysfrag = 1;
1319			mtu = IPV6_MMTU;
1320		} else if (mtu > ifmtu) {
1321			/*
1322			 * The MTU on the route is larger than the MTU on
1323			 * the interface!  This shouldn't happen, unless the
1324			 * MTU of the interface has been changed after the
1325			 * interface was brought up.  Change the MTU in the
1326			 * route to match the interface MTU (as long as the
1327			 * field isn't locked).
1328			 */
1329			mtu = ifmtu;
1330			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1331		}
1332	} else if (ifp) {
1333		mtu = IN6_LINKMTU(ifp);
1334	} else
1335		error = EHOSTUNREACH; /* XXX */
1336
1337	*mtup = mtu;
1338	if (alwaysfragp)
1339		*alwaysfragp = alwaysfrag;
1340	return (error);
1341}
1342
1343/*
1344 * IP6 socket option processing.
1345 */
1346int
1347ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1348{
1349	int privileged, optdatalen, uproto;
1350	void *optdata;
1351	struct inpcb *in6p = sotoinpcb(so);
1352	int error, optval;
1353	int level, op, optname;
1354	int optlen;
1355	struct thread *td;
1356
1357	if (sopt) {
1358		level = sopt->sopt_level;
1359		op = sopt->sopt_dir;
1360		optname = sopt->sopt_name;
1361		optlen = sopt->sopt_valsize;
1362		td = sopt->sopt_td;
1363	} else {
1364		panic("ip6_ctloutput: arg soopt is NULL");
1365	}
1366	error = optval = 0;
1367
1368	privileged = (td == 0 || suser(td)) ? 0 : 1;
1369	uproto = (int)so->so_proto->pr_protocol;
1370
1371	if (level == IPPROTO_IPV6) {
1372		switch (op) {
1373
1374		case SOPT_SET:
1375			switch (optname) {
1376			case IPV6_2292PKTOPTIONS:
1377#ifdef IPV6_PKTOPTIONS
1378			case IPV6_PKTOPTIONS:
1379#endif
1380			{
1381				struct mbuf *m;
1382
1383				error = soopt_getm(sopt, &m); /* XXX */
1384				if (error != 0)
1385					break;
1386				error = soopt_mcopyin(sopt, m); /* XXX */
1387				if (error != 0)
1388					break;
1389				error = ip6_pcbopts(&in6p->in6p_outputopts,
1390						    m, so, sopt);
1391				m_freem(m); /* XXX */
1392				break;
1393			}
1394
1395			/*
1396			 * Use of some Hop-by-Hop options or some
1397			 * Destination options, might require special
1398			 * privilege.  That is, normal applications
1399			 * (without special privilege) might be forbidden
1400			 * from setting certain options in outgoing packets,
1401			 * and might never see certain options in received
1402			 * packets. [RFC 2292 Section 6]
1403			 * KAME specific note:
1404			 *  KAME prevents non-privileged users from sending or
1405			 *  receiving ANY hbh/dst options in order to avoid
1406			 *  overhead of parsing options in the kernel.
1407			 */
1408			case IPV6_RECVHOPOPTS:
1409			case IPV6_RECVDSTOPTS:
1410			case IPV6_RECVRTHDRDSTOPTS:
1411				if (!privileged) {
1412					error = EPERM;
1413					break;
1414				}
1415				/* FALLTHROUGH */
1416			case IPV6_UNICAST_HOPS:
1417			case IPV6_HOPLIMIT:
1418			case IPV6_FAITH:
1419
1420			case IPV6_RECVPKTINFO:
1421			case IPV6_RECVHOPLIMIT:
1422			case IPV6_RECVRTHDR:
1423			case IPV6_RECVPATHMTU:
1424			case IPV6_RECVTCLASS:
1425			case IPV6_V6ONLY:
1426			case IPV6_AUTOFLOWLABEL:
1427				if (optlen != sizeof(int)) {
1428					error = EINVAL;
1429					break;
1430				}
1431				error = sooptcopyin(sopt, &optval,
1432					sizeof optval, sizeof optval);
1433				if (error)
1434					break;
1435				switch (optname) {
1436
1437				case IPV6_UNICAST_HOPS:
1438					if (optval < -1 || optval >= 256)
1439						error = EINVAL;
1440					else {
1441						/* -1 = kernel default */
1442						in6p->in6p_hops = optval;
1443						if ((in6p->in6p_vflag &
1444						     INP_IPV4) != 0)
1445							in6p->inp_ip_ttl = optval;
1446					}
1447					break;
1448#define OPTSET(bit) \
1449do { \
1450	if (optval) \
1451		in6p->in6p_flags |= (bit); \
1452	else \
1453		in6p->in6p_flags &= ~(bit); \
1454} while (/*CONSTCOND*/ 0)
1455#define OPTSET2292(bit) \
1456do { \
1457	in6p->in6p_flags |= IN6P_RFC2292; \
1458	if (optval) \
1459		in6p->in6p_flags |= (bit); \
1460	else \
1461		in6p->in6p_flags &= ~(bit); \
1462} while (/*CONSTCOND*/ 0)
1463#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1464
1465				case IPV6_RECVPKTINFO:
1466					/* cannot mix with RFC2292 */
1467					if (OPTBIT(IN6P_RFC2292)) {
1468						error = EINVAL;
1469						break;
1470					}
1471					OPTSET(IN6P_PKTINFO);
1472					break;
1473
1474				case IPV6_HOPLIMIT:
1475				{
1476					struct ip6_pktopts **optp;
1477
1478					/* cannot mix with RFC2292 */
1479					if (OPTBIT(IN6P_RFC2292)) {
1480						error = EINVAL;
1481						break;
1482					}
1483					optp = &in6p->in6p_outputopts;
1484					error = ip6_pcbopt(IPV6_HOPLIMIT,
1485							   (u_char *)&optval,
1486							   sizeof(optval),
1487							   optp,
1488							   privileged, uproto);
1489					break;
1490				}
1491
1492				case IPV6_RECVHOPLIMIT:
1493					/* cannot mix with RFC2292 */
1494					if (OPTBIT(IN6P_RFC2292)) {
1495						error = EINVAL;
1496						break;
1497					}
1498					OPTSET(IN6P_HOPLIMIT);
1499					break;
1500
1501				case IPV6_RECVHOPOPTS:
1502					/* cannot mix with RFC2292 */
1503					if (OPTBIT(IN6P_RFC2292)) {
1504						error = EINVAL;
1505						break;
1506					}
1507					OPTSET(IN6P_HOPOPTS);
1508					break;
1509
1510				case IPV6_RECVDSTOPTS:
1511					/* cannot mix with RFC2292 */
1512					if (OPTBIT(IN6P_RFC2292)) {
1513						error = EINVAL;
1514						break;
1515					}
1516					OPTSET(IN6P_DSTOPTS);
1517					break;
1518
1519				case IPV6_RECVRTHDRDSTOPTS:
1520					/* cannot mix with RFC2292 */
1521					if (OPTBIT(IN6P_RFC2292)) {
1522						error = EINVAL;
1523						break;
1524					}
1525					OPTSET(IN6P_RTHDRDSTOPTS);
1526					break;
1527
1528				case IPV6_RECVRTHDR:
1529					/* cannot mix with RFC2292 */
1530					if (OPTBIT(IN6P_RFC2292)) {
1531						error = EINVAL;
1532						break;
1533					}
1534					OPTSET(IN6P_RTHDR);
1535					break;
1536
1537				case IPV6_FAITH:
1538					OPTSET(IN6P_FAITH);
1539					break;
1540
1541				case IPV6_RECVPATHMTU:
1542					/*
1543					 * We ignore this option for TCP
1544					 * sockets.
1545					 * (RFC3542 leaves this case
1546					 * unspecified.)
1547					 */
1548					if (uproto != IPPROTO_TCP)
1549						OPTSET(IN6P_MTU);
1550					break;
1551
1552				case IPV6_V6ONLY:
1553					/*
1554					 * make setsockopt(IPV6_V6ONLY)
1555					 * available only prior to bind(2).
1556					 * see ipng mailing list, Jun 22 2001.
1557					 */
1558					if (in6p->in6p_lport ||
1559					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1560						error = EINVAL;
1561						break;
1562					}
1563					OPTSET(IN6P_IPV6_V6ONLY);
1564					if (optval)
1565						in6p->in6p_vflag &= ~INP_IPV4;
1566					else
1567						in6p->in6p_vflag |= INP_IPV4;
1568					break;
1569				case IPV6_RECVTCLASS:
1570					/* cannot mix with RFC2292 XXX */
1571					if (OPTBIT(IN6P_RFC2292)) {
1572						error = EINVAL;
1573						break;
1574					}
1575					OPTSET(IN6P_TCLASS);
1576					break;
1577				case IPV6_AUTOFLOWLABEL:
1578					OPTSET(IN6P_AUTOFLOWLABEL);
1579					break;
1580
1581				}
1582				break;
1583
1584			case IPV6_TCLASS:
1585			case IPV6_DONTFRAG:
1586			case IPV6_USE_MIN_MTU:
1587			case IPV6_PREFER_TEMPADDR:
1588				if (optlen != sizeof(optval)) {
1589					error = EINVAL;
1590					break;
1591				}
1592				error = sooptcopyin(sopt, &optval,
1593					sizeof optval, sizeof optval);
1594				if (error)
1595					break;
1596				{
1597					struct ip6_pktopts **optp;
1598					optp = &in6p->in6p_outputopts;
1599					error = ip6_pcbopt(optname,
1600							   (u_char *)&optval,
1601							   sizeof(optval),
1602							   optp,
1603							   privileged, uproto);
1604					break;
1605				}
1606
1607			case IPV6_2292PKTINFO:
1608			case IPV6_2292HOPLIMIT:
1609			case IPV6_2292HOPOPTS:
1610			case IPV6_2292DSTOPTS:
1611			case IPV6_2292RTHDR:
1612				/* RFC 2292 */
1613				if (optlen != sizeof(int)) {
1614					error = EINVAL;
1615					break;
1616				}
1617				error = sooptcopyin(sopt, &optval,
1618					sizeof optval, sizeof optval);
1619				if (error)
1620					break;
1621				switch (optname) {
1622				case IPV6_2292PKTINFO:
1623					OPTSET2292(IN6P_PKTINFO);
1624					break;
1625				case IPV6_2292HOPLIMIT:
1626					OPTSET2292(IN6P_HOPLIMIT);
1627					break;
1628				case IPV6_2292HOPOPTS:
1629					/*
1630					 * Check super-user privilege.
1631					 * See comments for IPV6_RECVHOPOPTS.
1632					 */
1633					if (!privileged)
1634						return (EPERM);
1635					OPTSET2292(IN6P_HOPOPTS);
1636					break;
1637				case IPV6_2292DSTOPTS:
1638					if (!privileged)
1639						return (EPERM);
1640					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1641					break;
1642				case IPV6_2292RTHDR:
1643					OPTSET2292(IN6P_RTHDR);
1644					break;
1645				}
1646				break;
1647			case IPV6_PKTINFO:
1648			case IPV6_HOPOPTS:
1649			case IPV6_RTHDR:
1650			case IPV6_DSTOPTS:
1651			case IPV6_RTHDRDSTOPTS:
1652			case IPV6_NEXTHOP:
1653			{
1654				/* new advanced API (RFC3542) */
1655				u_char *optbuf;
1656				u_char optbuf_storage[MCLBYTES];
1657				int optlen;
1658				struct ip6_pktopts **optp;
1659
1660				/* cannot mix with RFC2292 */
1661				if (OPTBIT(IN6P_RFC2292)) {
1662					error = EINVAL;
1663					break;
1664				}
1665
1666				/*
1667				 * We only ensure valsize is not too large
1668				 * here.  Further validation will be done
1669				 * later.
1670				 */
1671				error = sooptcopyin(sopt, optbuf_storage,
1672				    sizeof(optbuf_storage), 0);
1673				if (error)
1674					break;
1675				optlen = sopt->sopt_valsize;
1676				optbuf = optbuf_storage;
1677				optp = &in6p->in6p_outputopts;
1678				error = ip6_pcbopt(optname,
1679						   optbuf, optlen,
1680						   optp, privileged, uproto);
1681				break;
1682			}
1683#undef OPTSET
1684
1685			case IPV6_MULTICAST_IF:
1686			case IPV6_MULTICAST_HOPS:
1687			case IPV6_MULTICAST_LOOP:
1688			case IPV6_JOIN_GROUP:
1689			case IPV6_LEAVE_GROUP:
1690			    {
1691				if (sopt->sopt_valsize > MLEN) {
1692					error = EMSGSIZE;
1693					break;
1694				}
1695				/* XXX */
1696			    }
1697			    /* FALLTHROUGH */
1698			    {
1699				struct mbuf *m;
1700
1701				if (sopt->sopt_valsize > MCLBYTES) {
1702					error = EMSGSIZE;
1703					break;
1704				}
1705				/* XXX */
1706				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
1707				if (m == 0) {
1708					error = ENOBUFS;
1709					break;
1710				}
1711				if (sopt->sopt_valsize > MLEN) {
1712					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1713					if ((m->m_flags & M_EXT) == 0) {
1714						m_free(m);
1715						error = ENOBUFS;
1716						break;
1717					}
1718				}
1719				m->m_len = sopt->sopt_valsize;
1720				error = sooptcopyin(sopt, mtod(m, char *),
1721						    m->m_len, m->m_len);
1722				if (error) {
1723					(void)m_free(m);
1724					break;
1725				}
1726				error =	ip6_setmoptions(sopt->sopt_name,
1727							&in6p->in6p_moptions,
1728							m);
1729				(void)m_free(m);
1730			    }
1731				break;
1732
1733			case IPV6_PORTRANGE:
1734				error = sooptcopyin(sopt, &optval,
1735				    sizeof optval, sizeof optval);
1736				if (error)
1737					break;
1738
1739				switch (optval) {
1740				case IPV6_PORTRANGE_DEFAULT:
1741					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1742					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1743					break;
1744
1745				case IPV6_PORTRANGE_HIGH:
1746					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1747					in6p->in6p_flags |= IN6P_HIGHPORT;
1748					break;
1749
1750				case IPV6_PORTRANGE_LOW:
1751					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1752					in6p->in6p_flags |= IN6P_LOWPORT;
1753					break;
1754
1755				default:
1756					error = EINVAL;
1757					break;
1758				}
1759				break;
1760
1761#ifdef IPSEC
1762			case IPV6_IPSEC_POLICY:
1763			    {
1764				caddr_t req = NULL;
1765				size_t len = 0;
1766				struct mbuf *m;
1767
1768				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1769					break;
1770				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1771					break;
1772				if (m) {
1773					req = mtod(m, caddr_t);
1774					len = m->m_len;
1775				}
1776				error = ipsec6_set_policy(in6p, optname, req,
1777							  len, privileged);
1778				m_freem(m);
1779			    }
1780				break;
1781#endif /* IPSEC */
1782
1783			default:
1784				error = ENOPROTOOPT;
1785				break;
1786			}
1787			break;
1788
1789		case SOPT_GET:
1790			switch (optname) {
1791
1792			case IPV6_2292PKTOPTIONS:
1793#ifdef IPV6_PKTOPTIONS
1794			case IPV6_PKTOPTIONS:
1795#endif
1796				/*
1797				 * RFC3542 (effectively) deprecated the
1798				 * semantics of the 2292-style pktoptions.
1799				 * Since it was not reliable in nature (i.e.,
1800				 * applications had to expect the lack of some
1801				 * information after all), it would make sense
1802				 * to simplify this part by always returning
1803				 * empty data.
1804				 */
1805				sopt->sopt_valsize = 0;
1806				break;
1807
1808			case IPV6_RECVHOPOPTS:
1809			case IPV6_RECVDSTOPTS:
1810			case IPV6_RECVRTHDRDSTOPTS:
1811			case IPV6_UNICAST_HOPS:
1812			case IPV6_RECVPKTINFO:
1813			case IPV6_RECVHOPLIMIT:
1814			case IPV6_RECVRTHDR:
1815			case IPV6_RECVPATHMTU:
1816
1817			case IPV6_FAITH:
1818			case IPV6_V6ONLY:
1819			case IPV6_PORTRANGE:
1820			case IPV6_RECVTCLASS:
1821			case IPV6_AUTOFLOWLABEL:
1822				switch (optname) {
1823
1824				case IPV6_RECVHOPOPTS:
1825					optval = OPTBIT(IN6P_HOPOPTS);
1826					break;
1827
1828				case IPV6_RECVDSTOPTS:
1829					optval = OPTBIT(IN6P_DSTOPTS);
1830					break;
1831
1832				case IPV6_RECVRTHDRDSTOPTS:
1833					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1834					break;
1835
1836				case IPV6_UNICAST_HOPS:
1837					optval = in6p->in6p_hops;
1838					break;
1839
1840				case IPV6_RECVPKTINFO:
1841					optval = OPTBIT(IN6P_PKTINFO);
1842					break;
1843
1844				case IPV6_RECVHOPLIMIT:
1845					optval = OPTBIT(IN6P_HOPLIMIT);
1846					break;
1847
1848				case IPV6_RECVRTHDR:
1849					optval = OPTBIT(IN6P_RTHDR);
1850					break;
1851
1852				case IPV6_RECVPATHMTU:
1853					optval = OPTBIT(IN6P_MTU);
1854					break;
1855
1856				case IPV6_FAITH:
1857					optval = OPTBIT(IN6P_FAITH);
1858					break;
1859
1860				case IPV6_V6ONLY:
1861					optval = OPTBIT(IN6P_IPV6_V6ONLY);
1862					break;
1863
1864				case IPV6_PORTRANGE:
1865				    {
1866					int flags;
1867					flags = in6p->in6p_flags;
1868					if (flags & IN6P_HIGHPORT)
1869						optval = IPV6_PORTRANGE_HIGH;
1870					else if (flags & IN6P_LOWPORT)
1871						optval = IPV6_PORTRANGE_LOW;
1872					else
1873						optval = 0;
1874					break;
1875				    }
1876				case IPV6_RECVTCLASS:
1877					optval = OPTBIT(IN6P_TCLASS);
1878					break;
1879
1880				case IPV6_AUTOFLOWLABEL:
1881					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1882					break;
1883				}
1884				if (error)
1885					break;
1886				error = sooptcopyout(sopt, &optval,
1887					sizeof optval);
1888				break;
1889
1890			case IPV6_PATHMTU:
1891			{
1892				u_long pmtu = 0;
1893				struct ip6_mtuinfo mtuinfo;
1894				struct route_in6 sro;
1895
1896				bzero(&sro, sizeof(sro));
1897
1898				if (!(so->so_state & SS_ISCONNECTED))
1899					return (ENOTCONN);
1900				/*
1901				 * XXX: we dot not consider the case of source
1902				 * routing, or optional information to specify
1903				 * the outgoing interface.
1904				 */
1905				error = ip6_getpmtu(&sro, NULL, NULL,
1906				    &in6p->in6p_faddr, &pmtu, NULL);
1907				if (sro.ro_rt)
1908					RTFREE(sro.ro_rt);
1909				if (error)
1910					break;
1911				if (pmtu > IPV6_MAXPACKET)
1912					pmtu = IPV6_MAXPACKET;
1913
1914				bzero(&mtuinfo, sizeof(mtuinfo));
1915				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1916				optdata = (void *)&mtuinfo;
1917				optdatalen = sizeof(mtuinfo);
1918				error = sooptcopyout(sopt, optdata,
1919				    optdatalen);
1920				break;
1921			}
1922
1923			case IPV6_2292PKTINFO:
1924			case IPV6_2292HOPLIMIT:
1925			case IPV6_2292HOPOPTS:
1926			case IPV6_2292RTHDR:
1927			case IPV6_2292DSTOPTS:
1928				switch (optname) {
1929				case IPV6_2292PKTINFO:
1930					optval = OPTBIT(IN6P_PKTINFO);
1931					break;
1932				case IPV6_2292HOPLIMIT:
1933					optval = OPTBIT(IN6P_HOPLIMIT);
1934					break;
1935				case IPV6_2292HOPOPTS:
1936					optval = OPTBIT(IN6P_HOPOPTS);
1937					break;
1938				case IPV6_2292RTHDR:
1939					optval = OPTBIT(IN6P_RTHDR);
1940					break;
1941				case IPV6_2292DSTOPTS:
1942					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1943					break;
1944				}
1945				error = sooptcopyout(sopt, &optval,
1946				    sizeof optval);
1947				break;
1948			case IPV6_PKTINFO:
1949			case IPV6_HOPOPTS:
1950			case IPV6_RTHDR:
1951			case IPV6_DSTOPTS:
1952			case IPV6_RTHDRDSTOPTS:
1953			case IPV6_NEXTHOP:
1954			case IPV6_TCLASS:
1955			case IPV6_DONTFRAG:
1956			case IPV6_USE_MIN_MTU:
1957			case IPV6_PREFER_TEMPADDR:
1958				error = ip6_getpcbopt(in6p->in6p_outputopts,
1959				    optname, sopt);
1960				break;
1961
1962			case IPV6_MULTICAST_IF:
1963			case IPV6_MULTICAST_HOPS:
1964			case IPV6_MULTICAST_LOOP:
1965			case IPV6_JOIN_GROUP:
1966			case IPV6_LEAVE_GROUP:
1967			    {
1968				struct mbuf *m;
1969				error = ip6_getmoptions(sopt->sopt_name,
1970				    in6p->in6p_moptions, &m);
1971				if (error == 0)
1972					error = sooptcopyout(sopt,
1973					    mtod(m, char *), m->m_len);
1974				m_freem(m);
1975			    }
1976				break;
1977
1978#ifdef IPSEC
1979			case IPV6_IPSEC_POLICY:
1980			  {
1981				caddr_t req = NULL;
1982				size_t len = 0;
1983				struct mbuf *m = NULL;
1984				struct mbuf **mp = &m;
1985				size_t ovalsize = sopt->sopt_valsize;
1986				caddr_t oval = (caddr_t)sopt->sopt_val;
1987
1988				error = soopt_getm(sopt, &m); /* XXX */
1989				if (error != 0)
1990					break;
1991				error = soopt_mcopyin(sopt, m); /* XXX */
1992				if (error != 0)
1993					break;
1994				sopt->sopt_valsize = ovalsize;
1995				sopt->sopt_val = oval;
1996				if (m) {
1997					req = mtod(m, caddr_t);
1998					len = m->m_len;
1999				}
2000				error = ipsec6_get_policy(in6p, req, len, mp);
2001				if (error == 0)
2002					error = soopt_mcopyout(sopt, m); /* XXX */
2003				if (error == 0 && m)
2004					m_freem(m);
2005				break;
2006			  }
2007#endif /* IPSEC */
2008
2009			default:
2010				error = ENOPROTOOPT;
2011				break;
2012			}
2013			break;
2014		}
2015	} else {		/* level != IPPROTO_IPV6 */
2016		error = EINVAL;
2017	}
2018	return (error);
2019}
2020
2021int
2022ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2023{
2024	int error = 0, optval, optlen;
2025	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2026	struct in6pcb *in6p = sotoin6pcb(so);
2027	int level, op, optname;
2028
2029	if (sopt) {
2030		level = sopt->sopt_level;
2031		op = sopt->sopt_dir;
2032		optname = sopt->sopt_name;
2033		optlen = sopt->sopt_valsize;
2034	} else
2035		panic("ip6_raw_ctloutput: arg soopt is NULL");
2036
2037	if (level != IPPROTO_IPV6) {
2038		return (EINVAL);
2039	}
2040
2041	switch (optname) {
2042	case IPV6_CHECKSUM:
2043		/*
2044		 * For ICMPv6 sockets, no modification allowed for checksum
2045		 * offset, permit "no change" values to help existing apps.
2046		 *
2047		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2048		 * for an ICMPv6 socket will fail."
2049		 * The current behavior does not meet RFC3542.
2050		 */
2051		switch (op) {
2052		case SOPT_SET:
2053			if (optlen != sizeof(int)) {
2054				error = EINVAL;
2055				break;
2056			}
2057			error = sooptcopyin(sopt, &optval, sizeof(optval),
2058					    sizeof(optval));
2059			if (error)
2060				break;
2061			if ((optval % 2) != 0) {
2062				/* the API assumes even offset values */
2063				error = EINVAL;
2064			} else if (so->so_proto->pr_protocol ==
2065			    IPPROTO_ICMPV6) {
2066				if (optval != icmp6off)
2067					error = EINVAL;
2068			} else
2069				in6p->in6p_cksum = optval;
2070			break;
2071
2072		case SOPT_GET:
2073			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2074				optval = icmp6off;
2075			else
2076				optval = in6p->in6p_cksum;
2077
2078			error = sooptcopyout(sopt, &optval, sizeof(optval));
2079			break;
2080
2081		default:
2082			error = EINVAL;
2083			break;
2084		}
2085		break;
2086
2087	default:
2088		error = ENOPROTOOPT;
2089		break;
2090	}
2091
2092	return (error);
2093}
2094
2095/*
2096 * Set up IP6 options in pcb for insertion in output packets or
2097 * specifying behavior of outgoing packets.
2098 */
2099static int
2100ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2101    struct socket *so, struct sockopt *sopt)
2102{
2103	struct ip6_pktopts *opt = *pktopt;
2104	int error = 0;
2105	struct thread *td = sopt->sopt_td;
2106	int priv = 0;
2107
2108	/* turn off any old options. */
2109	if (opt) {
2110#ifdef DIAGNOSTIC
2111		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2112		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2113		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2114			printf("ip6_pcbopts: all specified options are cleared.\n");
2115#endif
2116		ip6_clearpktopts(opt, -1);
2117	} else
2118		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2119	*pktopt = NULL;
2120
2121	if (!m || m->m_len == 0) {
2122		/*
2123		 * Only turning off any previous options, regardless of
2124		 * whether the opt is just created or given.
2125		 */
2126		free(opt, M_IP6OPT);
2127		return (0);
2128	}
2129
2130	/*  set options specified by user. */
2131	if (td && !suser(td))
2132		priv = 1;
2133	if ((error = ip6_setpktopts(m, opt, NULL, priv,
2134	    so->so_proto->pr_protocol)) != 0) {
2135		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2136		free(opt, M_IP6OPT);
2137		return (error);
2138	}
2139	*pktopt = opt;
2140	return (0);
2141}
2142
2143/*
2144 * initialize ip6_pktopts.  beware that there are non-zero default values in
2145 * the struct.
2146 */
2147void
2148ip6_initpktopts(struct ip6_pktopts *opt)
2149{
2150
2151	bzero(opt, sizeof(*opt));
2152	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2153	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2154	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2155	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2156}
2157
2158static int
2159ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2160    int priv, int uproto)
2161{
2162	struct ip6_pktopts *opt;
2163
2164	if (*pktopt == NULL) {
2165		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2166		    M_WAITOK);
2167		ip6_initpktopts(*pktopt);
2168	}
2169	opt = *pktopt;
2170
2171	return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
2172}
2173
2174static int
2175ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2176{
2177	void *optdata = NULL;
2178	int optdatalen = 0;
2179	struct ip6_ext *ip6e;
2180	int error = 0;
2181	struct in6_pktinfo null_pktinfo;
2182	int deftclass = 0, on;
2183	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2184	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2185
2186	switch (optname) {
2187	case IPV6_PKTINFO:
2188		if (pktopt && pktopt->ip6po_pktinfo)
2189			optdata = (void *)pktopt->ip6po_pktinfo;
2190		else {
2191			/* XXX: we don't have to do this every time... */
2192			bzero(&null_pktinfo, sizeof(null_pktinfo));
2193			optdata = (void *)&null_pktinfo;
2194		}
2195		optdatalen = sizeof(struct in6_pktinfo);
2196		break;
2197	case IPV6_TCLASS:
2198		if (pktopt && pktopt->ip6po_tclass >= 0)
2199			optdata = (void *)&pktopt->ip6po_tclass;
2200		else
2201			optdata = (void *)&deftclass;
2202		optdatalen = sizeof(int);
2203		break;
2204	case IPV6_HOPOPTS:
2205		if (pktopt && pktopt->ip6po_hbh) {
2206			optdata = (void *)pktopt->ip6po_hbh;
2207			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2208			optdatalen = (ip6e->ip6e_len + 1) << 3;
2209		}
2210		break;
2211	case IPV6_RTHDR:
2212		if (pktopt && pktopt->ip6po_rthdr) {
2213			optdata = (void *)pktopt->ip6po_rthdr;
2214			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2215			optdatalen = (ip6e->ip6e_len + 1) << 3;
2216		}
2217		break;
2218	case IPV6_RTHDRDSTOPTS:
2219		if (pktopt && pktopt->ip6po_dest1) {
2220			optdata = (void *)pktopt->ip6po_dest1;
2221			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2222			optdatalen = (ip6e->ip6e_len + 1) << 3;
2223		}
2224		break;
2225	case IPV6_DSTOPTS:
2226		if (pktopt && pktopt->ip6po_dest2) {
2227			optdata = (void *)pktopt->ip6po_dest2;
2228			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2229			optdatalen = (ip6e->ip6e_len + 1) << 3;
2230		}
2231		break;
2232	case IPV6_NEXTHOP:
2233		if (pktopt && pktopt->ip6po_nexthop) {
2234			optdata = (void *)pktopt->ip6po_nexthop;
2235			optdatalen = pktopt->ip6po_nexthop->sa_len;
2236		}
2237		break;
2238	case IPV6_USE_MIN_MTU:
2239		if (pktopt)
2240			optdata = (void *)&pktopt->ip6po_minmtu;
2241		else
2242			optdata = (void *)&defminmtu;
2243		optdatalen = sizeof(int);
2244		break;
2245	case IPV6_DONTFRAG:
2246		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2247			on = 1;
2248		else
2249			on = 0;
2250		optdata = (void *)&on;
2251		optdatalen = sizeof(on);
2252		break;
2253	case IPV6_PREFER_TEMPADDR:
2254		if (pktopt)
2255			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2256		else
2257			optdata = (void *)&defpreftemp;
2258		optdatalen = sizeof(int);
2259		break;
2260	default:		/* should not happen */
2261#ifdef DIAGNOSTIC
2262		panic("ip6_getpcbopt: unexpected option\n");
2263#endif
2264		return (ENOPROTOOPT);
2265	}
2266
2267	error = sooptcopyout(sopt, optdata, optdatalen);
2268
2269	return (error);
2270}
2271
2272void
2273ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2274{
2275	if (pktopt == NULL)
2276		return;
2277
2278	if (optname == -1 || optname == IPV6_PKTINFO) {
2279		if (pktopt->ip6po_pktinfo)
2280			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2281		pktopt->ip6po_pktinfo = NULL;
2282	}
2283	if (optname == -1 || optname == IPV6_HOPLIMIT)
2284		pktopt->ip6po_hlim = -1;
2285	if (optname == -1 || optname == IPV6_TCLASS)
2286		pktopt->ip6po_tclass = -1;
2287	if (optname == -1 || optname == IPV6_NEXTHOP) {
2288		if (pktopt->ip6po_nextroute.ro_rt) {
2289			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2290			pktopt->ip6po_nextroute.ro_rt = NULL;
2291		}
2292		if (pktopt->ip6po_nexthop)
2293			free(pktopt->ip6po_nexthop, M_IP6OPT);
2294		pktopt->ip6po_nexthop = NULL;
2295	}
2296	if (optname == -1 || optname == IPV6_HOPOPTS) {
2297		if (pktopt->ip6po_hbh)
2298			free(pktopt->ip6po_hbh, M_IP6OPT);
2299		pktopt->ip6po_hbh = NULL;
2300	}
2301	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2302		if (pktopt->ip6po_dest1)
2303			free(pktopt->ip6po_dest1, M_IP6OPT);
2304		pktopt->ip6po_dest1 = NULL;
2305	}
2306	if (optname == -1 || optname == IPV6_RTHDR) {
2307		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2308			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2309		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2310		if (pktopt->ip6po_route.ro_rt) {
2311			RTFREE(pktopt->ip6po_route.ro_rt);
2312			pktopt->ip6po_route.ro_rt = NULL;
2313		}
2314	}
2315	if (optname == -1 || optname == IPV6_DSTOPTS) {
2316		if (pktopt->ip6po_dest2)
2317			free(pktopt->ip6po_dest2, M_IP6OPT);
2318		pktopt->ip6po_dest2 = NULL;
2319	}
2320}
2321
2322#define PKTOPT_EXTHDRCPY(type) \
2323do {\
2324	if (src->type) {\
2325		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2326		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2327		if (dst->type == NULL && canwait == M_NOWAIT)\
2328			goto bad;\
2329		bcopy(src->type, dst->type, hlen);\
2330	}\
2331} while (/*CONSTCOND*/ 0)
2332
2333static int
2334copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2335{
2336	if (dst == NULL || src == NULL)  {
2337		printf("ip6_clearpktopts: invalid argument\n");
2338		return (EINVAL);
2339	}
2340
2341	dst->ip6po_hlim = src->ip6po_hlim;
2342	dst->ip6po_tclass = src->ip6po_tclass;
2343	dst->ip6po_flags = src->ip6po_flags;
2344	if (src->ip6po_pktinfo) {
2345		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2346		    M_IP6OPT, canwait);
2347		if (dst->ip6po_pktinfo == NULL)
2348			goto bad;
2349		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2350	}
2351	if (src->ip6po_nexthop) {
2352		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2353		    M_IP6OPT, canwait);
2354		if (dst->ip6po_nexthop == NULL)
2355			goto bad;
2356		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2357		    src->ip6po_nexthop->sa_len);
2358	}
2359	PKTOPT_EXTHDRCPY(ip6po_hbh);
2360	PKTOPT_EXTHDRCPY(ip6po_dest1);
2361	PKTOPT_EXTHDRCPY(ip6po_dest2);
2362	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2363	return (0);
2364
2365  bad:
2366	ip6_clearpktopts(dst, -1);
2367	return (ENOBUFS);
2368}
2369#undef PKTOPT_EXTHDRCPY
2370
2371struct ip6_pktopts *
2372ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2373{
2374	int error;
2375	struct ip6_pktopts *dst;
2376
2377	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2378	if (dst == NULL)
2379		return (NULL);
2380	ip6_initpktopts(dst);
2381
2382	if ((error = copypktopts(dst, src, canwait)) != 0) {
2383		free(dst, M_IP6OPT);
2384		return (NULL);
2385	}
2386
2387	return (dst);
2388}
2389
2390void
2391ip6_freepcbopts(struct ip6_pktopts *pktopt)
2392{
2393	if (pktopt == NULL)
2394		return;
2395
2396	ip6_clearpktopts(pktopt, -1);
2397
2398	free(pktopt, M_IP6OPT);
2399}
2400
2401/*
2402 * Set the IP6 multicast options in response to user setsockopt().
2403 */
2404static int
2405ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
2406{
2407	int error = 0;
2408	u_int loop, ifindex;
2409	struct ipv6_mreq *mreq;
2410	struct ifnet *ifp;
2411	struct ip6_moptions *im6o = *im6op;
2412	struct route_in6 ro;
2413	struct in6_multi_mship *imm;
2414	struct thread *td = curthread;
2415
2416	if (im6o == NULL) {
2417		/*
2418		 * No multicast option buffer attached to the pcb;
2419		 * allocate one and initialize to default values.
2420		 */
2421		im6o = (struct ip6_moptions *)
2422			malloc(sizeof(*im6o), M_IP6MOPTS, M_WAITOK);
2423
2424		if (im6o == NULL)
2425			return (ENOBUFS);
2426		*im6op = im6o;
2427		im6o->im6o_multicast_ifp = NULL;
2428		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2429		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2430		LIST_INIT(&im6o->im6o_memberships);
2431	}
2432
2433	switch (optname) {
2434
2435	case IPV6_MULTICAST_IF:
2436		/*
2437		 * Select the interface for outgoing multicast packets.
2438		 */
2439		if (m == NULL || m->m_len != sizeof(u_int)) {
2440			error = EINVAL;
2441			break;
2442		}
2443		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2444		if (ifindex < 0 || if_index < ifindex) {
2445			error = ENXIO;	/* XXX EINVAL? */
2446			break;
2447		}
2448		ifp = ifnet_byindex(ifindex);
2449		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2450			error = EADDRNOTAVAIL;
2451			break;
2452		}
2453		im6o->im6o_multicast_ifp = ifp;
2454		break;
2455
2456	case IPV6_MULTICAST_HOPS:
2457	    {
2458		/*
2459		 * Set the IP6 hoplimit for outgoing multicast packets.
2460		 */
2461		int optval;
2462		if (m == NULL || m->m_len != sizeof(int)) {
2463			error = EINVAL;
2464			break;
2465		}
2466		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2467		if (optval < -1 || optval >= 256)
2468			error = EINVAL;
2469		else if (optval == -1)
2470			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2471		else
2472			im6o->im6o_multicast_hlim = optval;
2473		break;
2474	    }
2475
2476	case IPV6_MULTICAST_LOOP:
2477		/*
2478		 * Set the loopback flag for outgoing multicast packets.
2479		 * Must be zero or one.
2480		 */
2481		if (m == NULL || m->m_len != sizeof(u_int)) {
2482			error = EINVAL;
2483			break;
2484		}
2485		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2486		if (loop > 1) {
2487			error = EINVAL;
2488			break;
2489		}
2490		im6o->im6o_multicast_loop = loop;
2491		break;
2492
2493	case IPV6_JOIN_GROUP:
2494		/*
2495		 * Add a multicast group membership.
2496		 * Group must be a valid IP6 multicast address.
2497		 */
2498		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2499			error = EINVAL;
2500			break;
2501		}
2502		mreq = mtod(m, struct ipv6_mreq *);
2503
2504		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2505			/*
2506			 * We use the unspecified address to specify to accept
2507			 * all multicast addresses. Only super user is allowed
2508			 * to do this.
2509			 */
2510			if (suser(td)) {
2511				error = EACCES;
2512				break;
2513			}
2514		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2515			error = EINVAL;
2516			break;
2517		}
2518
2519		/*
2520		 * If no interface was explicitly specified, choose an
2521		 * appropriate one according to the given multicast address.
2522		 */
2523		if (mreq->ipv6mr_interface == 0) {
2524			struct sockaddr_in6 *dst;
2525
2526			/*
2527			 * Look up the routing table for the
2528			 * address, and choose the outgoing interface.
2529			 *   XXX: is it a good approach?
2530			 */
2531			ro.ro_rt = NULL;
2532			dst = (struct sockaddr_in6 *)&ro.ro_dst;
2533			bzero(dst, sizeof(*dst));
2534			dst->sin6_family = AF_INET6;
2535			dst->sin6_len = sizeof(*dst);
2536			dst->sin6_addr = mreq->ipv6mr_multiaddr;
2537			rtalloc((struct route *)&ro);
2538			if (ro.ro_rt == NULL) {
2539				error = EADDRNOTAVAIL;
2540				break;
2541			}
2542			ifp = ro.ro_rt->rt_ifp;
2543			RTFREE(ro.ro_rt);
2544		} else {
2545			/*
2546			 * If the interface is specified, validate it.
2547			 */
2548			if (mreq->ipv6mr_interface < 0 ||
2549			    if_index < mreq->ipv6mr_interface) {
2550				error = ENXIO;	/* XXX EINVAL? */
2551				break;
2552			}
2553			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2554			if (!ifp) {
2555				error = ENXIO;	/* XXX EINVAL? */
2556				break;
2557			}
2558		}
2559
2560		/*
2561		 * See if we found an interface, and confirm that it
2562		 * supports multicast
2563		 */
2564		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2565			error = EADDRNOTAVAIL;
2566			break;
2567		}
2568
2569		if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2570			error = EADDRNOTAVAIL; /* XXX: should not happen */
2571			break;
2572		}
2573
2574		/*
2575		 * See if the membership already exists.
2576		 */
2577		for (imm = im6o->im6o_memberships.lh_first;
2578		     imm != NULL; imm = imm->i6mm_chain.le_next)
2579			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2580			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2581					       &mreq->ipv6mr_multiaddr))
2582				break;
2583		if (imm != NULL) {
2584			error = EADDRINUSE;
2585			break;
2586		}
2587		/*
2588		 * Everything looks good; add a new record to the multicast
2589		 * address list for the given interface.
2590		 */
2591		imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr,  &error, 0);
2592		if (imm == NULL)
2593			break;
2594		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2595		break;
2596
2597	case IPV6_LEAVE_GROUP:
2598		/*
2599		 * Drop a multicast group membership.
2600		 * Group must be a valid IP6 multicast address.
2601		 */
2602		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2603			error = EINVAL;
2604			break;
2605		}
2606		mreq = mtod(m, struct ipv6_mreq *);
2607
2608		/*
2609		 * If an interface address was specified, get a pointer
2610		 * to its ifnet structure.
2611		 */
2612		if (mreq->ipv6mr_interface < 0 ||
2613		    if_index < mreq->ipv6mr_interface) {
2614			error = ENXIO;	/* XXX EINVAL? */
2615			break;
2616		}
2617		if (mreq->ipv6mr_interface == 0)
2618			ifp = NULL;
2619		else
2620			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2621
2622		/* Fill in the scope zone ID */
2623		if (ifp) {
2624			if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2625				/* XXX: should not happen */
2626				error = EADDRNOTAVAIL;
2627				break;
2628			}
2629		} else if (mreq->ipv6mr_interface != 0) {
2630			/*
2631			 * This case happens when the (positive) index is in
2632			 * the valid range, but the corresponding interface has
2633			 * been detached dynamically (XXX).
2634			 */
2635			error = EADDRNOTAVAIL;
2636			break;
2637		} else {	/* ipv6mr_interface == 0 */
2638			struct sockaddr_in6 sa6_mc;
2639
2640			/*
2641			 * The API spec says as follows:
2642			 *  If the interface index is specified as 0, the
2643			 *  system may choose a multicast group membership to
2644			 *  drop by matching the multicast address only.
2645			 * On the other hand, we cannot disambiguate the scope
2646			 * zone unless an interface is provided.  Thus, we
2647			 * check if there's ambiguity with the default scope
2648			 * zone as the last resort.
2649			 */
2650			bzero(&sa6_mc, sizeof(sa6_mc));
2651			sa6_mc.sin6_family = AF_INET6;
2652			sa6_mc.sin6_len = sizeof(sa6_mc);
2653			sa6_mc.sin6_addr = mreq->ipv6mr_multiaddr;
2654			error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2655			if (error != 0)
2656				break;
2657			mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
2658		}
2659
2660		/*
2661		 * Find the membership in the membership list.
2662		 */
2663		for (imm = im6o->im6o_memberships.lh_first;
2664		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2665			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2666			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2667			    &mreq->ipv6mr_multiaddr))
2668				break;
2669		}
2670		if (imm == NULL) {
2671			/* Unable to resolve interface */
2672			error = EADDRNOTAVAIL;
2673			break;
2674		}
2675		/*
2676		 * Give up the multicast address record to which the
2677		 * membership points.
2678		 */
2679		LIST_REMOVE(imm, i6mm_chain);
2680		in6_delmulti(imm->i6mm_maddr);
2681		free(imm, M_IP6MADDR);
2682		break;
2683
2684	default:
2685		error = EOPNOTSUPP;
2686		break;
2687	}
2688
2689	/*
2690	 * If all options have default values, no need to keep the mbuf.
2691	 */
2692	if (im6o->im6o_multicast_ifp == NULL &&
2693	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2694	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2695	    im6o->im6o_memberships.lh_first == NULL) {
2696		free(*im6op, M_IP6MOPTS);
2697		*im6op = NULL;
2698	}
2699
2700	return (error);
2701}
2702
2703/*
2704 * Return the IP6 multicast options in response to user getsockopt().
2705 */
2706static int
2707ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
2708{
2709	u_int *hlim, *loop, *ifindex;
2710
2711	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2712
2713	switch (optname) {
2714
2715	case IPV6_MULTICAST_IF:
2716		ifindex = mtod(*mp, u_int *);
2717		(*mp)->m_len = sizeof(u_int);
2718		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2719			*ifindex = 0;
2720		else
2721			*ifindex = im6o->im6o_multicast_ifp->if_index;
2722		return (0);
2723
2724	case IPV6_MULTICAST_HOPS:
2725		hlim = mtod(*mp, u_int *);
2726		(*mp)->m_len = sizeof(u_int);
2727		if (im6o == NULL)
2728			*hlim = ip6_defmcasthlim;
2729		else
2730			*hlim = im6o->im6o_multicast_hlim;
2731		return (0);
2732
2733	case IPV6_MULTICAST_LOOP:
2734		loop = mtod(*mp, u_int *);
2735		(*mp)->m_len = sizeof(u_int);
2736		if (im6o == NULL)
2737			*loop = ip6_defmcasthlim;
2738		else
2739			*loop = im6o->im6o_multicast_loop;
2740		return (0);
2741
2742	default:
2743		return (EOPNOTSUPP);
2744	}
2745}
2746
2747/*
2748 * Discard the IP6 multicast options.
2749 */
2750void
2751ip6_freemoptions(struct ip6_moptions *im6o)
2752{
2753	struct in6_multi_mship *imm;
2754
2755	if (im6o == NULL)
2756		return;
2757
2758	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2759		LIST_REMOVE(imm, i6mm_chain);
2760		if (imm->i6mm_maddr)
2761			in6_delmulti(imm->i6mm_maddr);
2762		free(imm, M_IP6MADDR);
2763	}
2764	free(im6o, M_IP6MOPTS);
2765}
2766
2767/*
2768 * Set IPv6 outgoing packet options based on advanced API.
2769 */
2770int
2771ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2772    struct ip6_pktopts *stickyopt, int priv, int uproto)
2773{
2774	struct cmsghdr *cm = 0;
2775
2776	if (control == NULL || opt == NULL)
2777		return (EINVAL);
2778
2779	ip6_initpktopts(opt);
2780	if (stickyopt) {
2781		int error;
2782
2783		/*
2784		 * If stickyopt is provided, make a local copy of the options
2785		 * for this particular packet, then override them by ancillary
2786		 * objects.
2787		 * XXX: copypktopts() does not copy the cached route to a next
2788		 * hop (if any).  This is not very good in terms of efficiency,
2789		 * but we can allow this since this option should be rarely
2790		 * used.
2791		 */
2792		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2793			return (error);
2794	}
2795
2796	/*
2797	 * XXX: Currently, we assume all the optional information is stored
2798	 * in a single mbuf.
2799	 */
2800	if (control->m_next)
2801		return (EINVAL);
2802
2803	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2804	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2805		int error;
2806
2807		if (control->m_len < CMSG_LEN(0))
2808			return (EINVAL);
2809
2810		cm = mtod(control, struct cmsghdr *);
2811		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2812			return (EINVAL);
2813		if (cm->cmsg_level != IPPROTO_IPV6)
2814			continue;
2815
2816		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2817		    cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
2818		if (error)
2819			return (error);
2820	}
2821
2822	return (0);
2823}
2824
2825/*
2826 * Set a particular packet option, as a sticky option or an ancillary data
2827 * item.  "len" can be 0 only when it's a sticky option.
2828 * We have 4 cases of combination of "sticky" and "cmsg":
2829 * "sticky=0, cmsg=0": impossible
2830 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2831 * "sticky=1, cmsg=0": RFC3542 socket option
2832 * "sticky=1, cmsg=1": RFC2292 socket option
2833 */
2834static int
2835ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2836    int priv, int sticky, int cmsg, int uproto)
2837{
2838	int minmtupolicy, preftemp;
2839
2840	if (!sticky && !cmsg) {
2841#ifdef DIAGNOSTIC
2842		printf("ip6_setpktopt: impossible case\n");
2843#endif
2844		return (EINVAL);
2845	}
2846
2847	/*
2848	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2849	 * not be specified in the context of RFC3542.  Conversely,
2850	 * RFC3542 types should not be specified in the context of RFC2292.
2851	 */
2852	if (!cmsg) {
2853		switch (optname) {
2854		case IPV6_2292PKTINFO:
2855		case IPV6_2292HOPLIMIT:
2856		case IPV6_2292NEXTHOP:
2857		case IPV6_2292HOPOPTS:
2858		case IPV6_2292DSTOPTS:
2859		case IPV6_2292RTHDR:
2860		case IPV6_2292PKTOPTIONS:
2861			return (ENOPROTOOPT);
2862		}
2863	}
2864	if (sticky && cmsg) {
2865		switch (optname) {
2866		case IPV6_PKTINFO:
2867		case IPV6_HOPLIMIT:
2868		case IPV6_NEXTHOP:
2869		case IPV6_HOPOPTS:
2870		case IPV6_DSTOPTS:
2871		case IPV6_RTHDRDSTOPTS:
2872		case IPV6_RTHDR:
2873		case IPV6_USE_MIN_MTU:
2874		case IPV6_DONTFRAG:
2875		case IPV6_TCLASS:
2876		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2877			return (ENOPROTOOPT);
2878		}
2879	}
2880
2881	switch (optname) {
2882	case IPV6_2292PKTINFO:
2883	case IPV6_PKTINFO:
2884	{
2885		struct ifnet *ifp = NULL;
2886		struct in6_pktinfo *pktinfo;
2887
2888		if (len != sizeof(struct in6_pktinfo))
2889			return (EINVAL);
2890
2891		pktinfo = (struct in6_pktinfo *)buf;
2892
2893		/*
2894		 * An application can clear any sticky IPV6_PKTINFO option by
2895		 * doing a "regular" setsockopt with ipi6_addr being
2896		 * in6addr_any and ipi6_ifindex being zero.
2897		 * [RFC 3542, Section 6]
2898		 */
2899		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2900		    pktinfo->ipi6_ifindex == 0 &&
2901		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2902			ip6_clearpktopts(opt, optname);
2903			break;
2904		}
2905
2906		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2907		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2908			return (EINVAL);
2909		}
2910
2911		/* validate the interface index if specified. */
2912		if (pktinfo->ipi6_ifindex > if_index ||
2913		    pktinfo->ipi6_ifindex < 0) {
2914			 return (ENXIO);
2915		}
2916		if (pktinfo->ipi6_ifindex) {
2917			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
2918			if (ifp == NULL)
2919				return (ENXIO);
2920		}
2921
2922		/*
2923		 * We store the address anyway, and let in6_selectsrc()
2924		 * validate the specified address.  This is because ipi6_addr
2925		 * may not have enough information about its scope zone, and
2926		 * we may need additional information (such as outgoing
2927		 * interface or the scope zone of a destination address) to
2928		 * disambiguate the scope.
2929		 * XXX: the delay of the validation may confuse the
2930		 * application when it is used as a sticky option.
2931		 */
2932		if (opt->ip6po_pktinfo == NULL) {
2933			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2934			    M_IP6OPT, M_NOWAIT);
2935			if (opt->ip6po_pktinfo == NULL)
2936				return (ENOBUFS);
2937		}
2938		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2939		break;
2940	}
2941
2942	case IPV6_2292HOPLIMIT:
2943	case IPV6_HOPLIMIT:
2944	{
2945		int *hlimp;
2946
2947		/*
2948		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2949		 * to simplify the ordering among hoplimit options.
2950		 */
2951		if (optname == IPV6_HOPLIMIT && sticky)
2952			return (ENOPROTOOPT);
2953
2954		if (len != sizeof(int))
2955			return (EINVAL);
2956		hlimp = (int *)buf;
2957		if (*hlimp < -1 || *hlimp > 255)
2958			return (EINVAL);
2959
2960		opt->ip6po_hlim = *hlimp;
2961		break;
2962	}
2963
2964	case IPV6_TCLASS:
2965	{
2966		int tclass;
2967
2968		if (len != sizeof(int))
2969			return (EINVAL);
2970		tclass = *(int *)buf;
2971		if (tclass < -1 || tclass > 255)
2972			return (EINVAL);
2973
2974		opt->ip6po_tclass = tclass;
2975		break;
2976	}
2977
2978	case IPV6_2292NEXTHOP:
2979	case IPV6_NEXTHOP:
2980		if (!priv)
2981			return (EPERM);
2982
2983		if (len == 0) {	/* just remove the option */
2984			ip6_clearpktopts(opt, IPV6_NEXTHOP);
2985			break;
2986		}
2987
2988		/* check if cmsg_len is large enough for sa_len */
2989		if (len < sizeof(struct sockaddr) || len < *buf)
2990			return (EINVAL);
2991
2992		switch (((struct sockaddr *)buf)->sa_family) {
2993		case AF_INET6:
2994		{
2995			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
2996			int error;
2997
2998			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
2999				return (EINVAL);
3000
3001			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3002			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3003				return (EINVAL);
3004			}
3005			if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3006			    != 0) {
3007				return (error);
3008			}
3009			break;
3010		}
3011		case AF_LINK:	/* should eventually be supported */
3012		default:
3013			return (EAFNOSUPPORT);
3014		}
3015
3016		/* turn off the previous option, then set the new option. */
3017		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3018		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3019		if (opt->ip6po_nexthop == NULL)
3020			return (ENOBUFS);
3021		bcopy(buf, opt->ip6po_nexthop, *buf);
3022		break;
3023
3024	case IPV6_2292HOPOPTS:
3025	case IPV6_HOPOPTS:
3026	{
3027		struct ip6_hbh *hbh;
3028		int hbhlen;
3029
3030		/*
3031		 * XXX: We don't allow a non-privileged user to set ANY HbH
3032		 * options, since per-option restriction has too much
3033		 * overhead.
3034		 */
3035		if (!priv)
3036			return (EPERM);
3037
3038		if (len == 0) {
3039			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3040			break;	/* just remove the option */
3041		}
3042
3043		/* message length validation */
3044		if (len < sizeof(struct ip6_hbh))
3045			return (EINVAL);
3046		hbh = (struct ip6_hbh *)buf;
3047		hbhlen = (hbh->ip6h_len + 1) << 3;
3048		if (len != hbhlen)
3049			return (EINVAL);
3050
3051		/* turn off the previous option, then set the new option. */
3052		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3053		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3054		if (opt->ip6po_hbh == NULL)
3055			return (ENOBUFS);
3056		bcopy(hbh, opt->ip6po_hbh, hbhlen);
3057
3058		break;
3059	}
3060
3061	case IPV6_2292DSTOPTS:
3062	case IPV6_DSTOPTS:
3063	case IPV6_RTHDRDSTOPTS:
3064	{
3065		struct ip6_dest *dest, **newdest = NULL;
3066		int destlen;
3067
3068		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3069			return (EPERM);
3070
3071		if (len == 0) {
3072			ip6_clearpktopts(opt, optname);
3073			break;	/* just remove the option */
3074		}
3075
3076		/* message length validation */
3077		if (len < sizeof(struct ip6_dest))
3078			return (EINVAL);
3079		dest = (struct ip6_dest *)buf;
3080		destlen = (dest->ip6d_len + 1) << 3;
3081		if (len != destlen)
3082			return (EINVAL);
3083
3084		/*
3085		 * Determine the position that the destination options header
3086		 * should be inserted; before or after the routing header.
3087		 */
3088		switch (optname) {
3089		case IPV6_2292DSTOPTS:
3090			/*
3091			 * The old advacned API is ambiguous on this point.
3092			 * Our approach is to determine the position based
3093			 * according to the existence of a routing header.
3094			 * Note, however, that this depends on the order of the
3095			 * extension headers in the ancillary data; the 1st
3096			 * part of the destination options header must appear
3097			 * before the routing header in the ancillary data,
3098			 * too.
3099			 * RFC3542 solved the ambiguity by introducing
3100			 * separate ancillary data or option types.
3101			 */
3102			if (opt->ip6po_rthdr == NULL)
3103				newdest = &opt->ip6po_dest1;
3104			else
3105				newdest = &opt->ip6po_dest2;
3106			break;
3107		case IPV6_RTHDRDSTOPTS:
3108			newdest = &opt->ip6po_dest1;
3109			break;
3110		case IPV6_DSTOPTS:
3111			newdest = &opt->ip6po_dest2;
3112			break;
3113		}
3114
3115		/* turn off the previous option, then set the new option. */
3116		ip6_clearpktopts(opt, optname);
3117		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3118		if (*newdest == NULL)
3119			return (ENOBUFS);
3120		bcopy(dest, *newdest, destlen);
3121
3122		break;
3123	}
3124
3125	case IPV6_2292RTHDR:
3126	case IPV6_RTHDR:
3127	{
3128		struct ip6_rthdr *rth;
3129		int rthlen;
3130
3131		if (len == 0) {
3132			ip6_clearpktopts(opt, IPV6_RTHDR);
3133			break;	/* just remove the option */
3134		}
3135
3136		/* message length validation */
3137		if (len < sizeof(struct ip6_rthdr))
3138			return (EINVAL);
3139		rth = (struct ip6_rthdr *)buf;
3140		rthlen = (rth->ip6r_len + 1) << 3;
3141		if (len != rthlen)
3142			return (EINVAL);
3143
3144		switch (rth->ip6r_type) {
3145		case IPV6_RTHDR_TYPE_0:
3146			if (rth->ip6r_len == 0)	/* must contain one addr */
3147				return (EINVAL);
3148			if (rth->ip6r_len % 2) /* length must be even */
3149				return (EINVAL);
3150			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3151				return (EINVAL);
3152			break;
3153		default:
3154			return (EINVAL);	/* not supported */
3155		}
3156
3157		/* turn off the previous option */
3158		ip6_clearpktopts(opt, IPV6_RTHDR);
3159		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3160		if (opt->ip6po_rthdr == NULL)
3161			return (ENOBUFS);
3162		bcopy(rth, opt->ip6po_rthdr, rthlen);
3163
3164		break;
3165	}
3166
3167	case IPV6_USE_MIN_MTU:
3168		if (len != sizeof(int))
3169			return (EINVAL);
3170		minmtupolicy = *(int *)buf;
3171		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3172		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3173		    minmtupolicy != IP6PO_MINMTU_ALL) {
3174			return (EINVAL);
3175		}
3176		opt->ip6po_minmtu = minmtupolicy;
3177		break;
3178
3179	case IPV6_DONTFRAG:
3180		if (len != sizeof(int))
3181			return (EINVAL);
3182
3183		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3184			/*
3185			 * we ignore this option for TCP sockets.
3186			 * (RFC3542 leaves this case unspecified.)
3187			 */
3188			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3189		} else
3190			opt->ip6po_flags |= IP6PO_DONTFRAG;
3191		break;
3192
3193	case IPV6_PREFER_TEMPADDR:
3194		if (len != sizeof(int))
3195			return (EINVAL);
3196		preftemp = *(int *)buf;
3197		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3198		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3199		    preftemp != IP6PO_TEMPADDR_PREFER) {
3200			return (EINVAL);
3201		}
3202		opt->ip6po_prefer_tempaddr = preftemp;
3203		break;
3204
3205	default:
3206		return (ENOPROTOOPT);
3207	} /* end of switch */
3208
3209	return (0);
3210}
3211
3212/*
3213 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3214 * packet to the input queue of a specified interface.  Note that this
3215 * calls the output routine of the loopback "driver", but with an interface
3216 * pointer that might NOT be &loif -- easier than replicating that code here.
3217 */
3218void
3219ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
3220{
3221	struct mbuf *copym;
3222	struct ip6_hdr *ip6;
3223
3224	copym = m_copy(m, 0, M_COPYALL);
3225	if (copym == NULL)
3226		return;
3227
3228	/*
3229	 * Make sure to deep-copy IPv6 header portion in case the data
3230	 * is in an mbuf cluster, so that we can safely override the IPv6
3231	 * header portion later.
3232	 */
3233	if ((copym->m_flags & M_EXT) != 0 ||
3234	    copym->m_len < sizeof(struct ip6_hdr)) {
3235		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3236		if (copym == NULL)
3237			return;
3238	}
3239
3240#ifdef DIAGNOSTIC
3241	if (copym->m_len < sizeof(*ip6)) {
3242		m_freem(copym);
3243		return;
3244	}
3245#endif
3246
3247	ip6 = mtod(copym, struct ip6_hdr *);
3248	/*
3249	 * clear embedded scope identifiers if necessary.
3250	 * in6_clearscope will touch the addresses only when necessary.
3251	 */
3252	in6_clearscope(&ip6->ip6_src);
3253	in6_clearscope(&ip6->ip6_dst);
3254
3255	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3256}
3257
3258/*
3259 * Chop IPv6 header off from the payload.
3260 */
3261static int
3262ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3263{
3264	struct mbuf *mh;
3265	struct ip6_hdr *ip6;
3266
3267	ip6 = mtod(m, struct ip6_hdr *);
3268	if (m->m_len > sizeof(*ip6)) {
3269		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3270		if (mh == 0) {
3271			m_freem(m);
3272			return ENOBUFS;
3273		}
3274		M_MOVE_PKTHDR(mh, m);
3275		MH_ALIGN(mh, sizeof(*ip6));
3276		m->m_len -= sizeof(*ip6);
3277		m->m_data += sizeof(*ip6);
3278		mh->m_next = m;
3279		m = mh;
3280		m->m_len = sizeof(*ip6);
3281		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3282	}
3283	exthdrs->ip6e_ip6 = m;
3284	return 0;
3285}
3286
3287/*
3288 * Compute IPv6 extension header length.
3289 */
3290int
3291ip6_optlen(struct in6pcb *in6p)
3292{
3293	int len;
3294
3295	if (!in6p->in6p_outputopts)
3296		return 0;
3297
3298	len = 0;
3299#define elen(x) \
3300    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3301
3302	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3303	if (in6p->in6p_outputopts->ip6po_rthdr)
3304		/* dest1 is valid with rthdr only */
3305		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3306	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3307	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3308	return len;
3309#undef elen
3310}
3311