ip6_output.c revision 148250
1/*	$FreeBSD: head/sys/netinet6/ip6_output.c 148250 2005-07-21 16:39:23Z ume $	*/
2/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
3
4/*-
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*-
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
62 */
63
64#include "opt_ip6fw.h"
65#include "opt_inet.h"
66#include "opt_inet6.h"
67#include "opt_ipsec.h"
68
69#include <sys/param.h>
70#include <sys/malloc.h>
71#include <sys/mbuf.h>
72#include <sys/proc.h>
73#include <sys/errno.h>
74#include <sys/protosw.h>
75#include <sys/socket.h>
76#include <sys/socketvar.h>
77#include <sys/systm.h>
78#include <sys/kernel.h>
79
80#include <net/if.h>
81#include <net/netisr.h>
82#include <net/route.h>
83#include <net/pfil.h>
84
85#include <netinet/in.h>
86#include <netinet/in_var.h>
87#include <netinet6/in6_var.h>
88#include <netinet/ip6.h>
89#include <netinet/icmp6.h>
90#include <netinet6/ip6_var.h>
91#include <netinet/in_pcb.h>
92#include <netinet/tcp_var.h>
93#include <netinet6/nd6.h>
94
95#ifdef IPSEC
96#include <netinet6/ipsec.h>
97#ifdef INET6
98#include <netinet6/ipsec6.h>
99#endif
100#include <netkey/key.h>
101#endif /* IPSEC */
102
103#ifdef FAST_IPSEC
104#include <netipsec/ipsec.h>
105#include <netipsec/ipsec6.h>
106#include <netipsec/key.h>
107#endif /* FAST_IPSEC */
108
109#include <netinet6/ip6_fw.h>
110
111#include <net/net_osdep.h>
112
113#include <netinet6/ip6protosw.h>
114
115static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
116
117struct ip6_exthdrs {
118	struct mbuf *ip6e_ip6;
119	struct mbuf *ip6e_hbh;
120	struct mbuf *ip6e_dest1;
121	struct mbuf *ip6e_rthdr;
122	struct mbuf *ip6e_dest2;
123};
124
125static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
126			   int, int));
127static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
128	struct socket *, struct sockopt *));
129static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
130static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *, int,
131	int, int, int));
132
133static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
134static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
135static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
136static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
137	struct ip6_frag **));
138static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
139static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
140static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
141	struct ifnet *, struct in6_addr *, u_long *, int *));
142static int copypktopts __P((struct ip6_pktopts *, struct ip6_pktopts *, int));
143
144
145/*
146 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
147 * header (with pri, len, nxt, hlim, src, dst).
148 * This function may modify ver and hlim only.
149 * The mbuf chain containing the packet will be freed.
150 * The mbuf opt, if present, will not be freed.
151 *
152 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
153 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
154 * which is rt_rmx.rmx_mtu.
155 */
156int
157ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
158	struct mbuf *m0;
159	struct ip6_pktopts *opt;
160	struct route_in6 *ro;
161	int flags;
162	struct ip6_moptions *im6o;
163	struct ifnet **ifpp;		/* XXX: just for statistics */
164	struct inpcb *inp;
165{
166	struct ip6_hdr *ip6, *mhip6;
167	struct ifnet *ifp, *origifp;
168	struct mbuf *m = m0;
169	int hlen, tlen, len, off;
170	struct route_in6 ip6route;
171	struct sockaddr_in6 *dst;
172	struct in6_addr odst;
173	int error = 0;
174	struct in6_ifaddr *ia = NULL;
175	u_long mtu;
176	int alwaysfrag, dontfrag;
177	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
178	struct ip6_exthdrs exthdrs;
179	struct in6_addr finaldst;
180	struct route_in6 *ro_pmtu = NULL;
181	int hdrsplit = 0;
182	int needipsec = 0;
183#if defined(IPSEC) || defined(FAST_IPSEC)
184	int needipsectun = 0;
185	struct secpolicy *sp = NULL;
186#endif /*IPSEC || FAST_IPSEC*/
187
188	ip6 = mtod(m, struct ip6_hdr *);
189	finaldst = ip6->ip6_dst;
190
191#define MAKE_EXTHDR(hp, mp)						\
192    do {								\
193	if (hp) {							\
194		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
195		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
196		    ((eh)->ip6e_len + 1) << 3);				\
197		if (error)						\
198			goto freehdrs;					\
199	}								\
200    } while (/*CONSTCOND*/ 0)
201
202	bzero(&exthdrs, sizeof(exthdrs));
203
204	if (opt) {
205		/* Hop-by-Hop options header */
206		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
207		/* Destination options header(1st part) */
208		if (opt->ip6po_rthdr) {
209			/*
210			 * Destination options header(1st part)
211			 * This only makes sence with a routing header.
212			 * See Section 9.2 of RFC 3542.
213			 * Disabling this part just for MIP6 convenience is
214			 * a bad idea.  We need to think carefully about a
215			 * way to make the advanced API coexist with MIP6
216			 * options, which might automatically be inserted in
217			 * the kernel.
218			 */
219			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
220		}
221		/* Routing header */
222		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
223		/* Destination options header(2nd part) */
224		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
225	}
226
227#ifdef IPSEC
228	/* get a security policy for this packet */
229	if (inp == NULL)
230		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
231	else
232		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
233
234	if (sp == NULL) {
235		ipsec6stat.out_inval++;
236		goto freehdrs;
237	}
238
239	error = 0;
240
241	/* check policy */
242	switch (sp->policy) {
243	case IPSEC_POLICY_DISCARD:
244		/*
245		 * This packet is just discarded.
246		 */
247		ipsec6stat.out_polvio++;
248		goto freehdrs;
249
250	case IPSEC_POLICY_BYPASS:
251	case IPSEC_POLICY_NONE:
252		/* no need to do IPsec. */
253		needipsec = 0;
254		break;
255
256	case IPSEC_POLICY_IPSEC:
257		if (sp->req == NULL) {
258			/* acquire a policy */
259			error = key_spdacquire(sp);
260			goto freehdrs;
261		}
262		needipsec = 1;
263		break;
264
265	case IPSEC_POLICY_ENTRUST:
266	default:
267		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
268	}
269#endif /* IPSEC */
270#ifdef FAST_IPSEC
271	/* get a security policy for this packet */
272	if (inp == NULL)
273		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
274	else
275		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
276
277	if (sp == NULL) {
278		newipsecstat.ips_out_inval++;
279		goto freehdrs;
280	}
281
282	error = 0;
283
284	/* check policy */
285	switch (sp->policy) {
286	case IPSEC_POLICY_DISCARD:
287		/*
288		 * This packet is just discarded.
289		 */
290		newipsecstat.ips_out_polvio++;
291		goto freehdrs;
292
293	case IPSEC_POLICY_BYPASS:
294	case IPSEC_POLICY_NONE:
295		/* no need to do IPsec. */
296		needipsec = 0;
297		break;
298
299	case IPSEC_POLICY_IPSEC:
300		if (sp->req == NULL) {
301			/* acquire a policy */
302			error = key_spdacquire(sp);
303			goto freehdrs;
304		}
305		needipsec = 1;
306		break;
307
308	case IPSEC_POLICY_ENTRUST:
309	default:
310		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
311	}
312#endif /* FAST_IPSEC */
313
314	/*
315	 * Calculate the total length of the extension header chain.
316	 * Keep the length of the unfragmentable part for fragmentation.
317	 */
318	optlen = 0;
319	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
320	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
321	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
322	unfragpartlen = optlen + sizeof(struct ip6_hdr);
323	/* NOTE: we don't add AH/ESP length here. do that later. */
324	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
325
326	/*
327	 * If we need IPsec, or there is at least one extension header,
328	 * separate IP6 header from the payload.
329	 */
330	if ((needipsec || optlen) && !hdrsplit) {
331		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
332			m = NULL;
333			goto freehdrs;
334		}
335		m = exthdrs.ip6e_ip6;
336		hdrsplit++;
337	}
338
339	/* adjust pointer */
340	ip6 = mtod(m, struct ip6_hdr *);
341
342	/* adjust mbuf packet header length */
343	m->m_pkthdr.len += optlen;
344	plen = m->m_pkthdr.len - sizeof(*ip6);
345
346	/* If this is a jumbo payload, insert a jumbo payload option. */
347	if (plen > IPV6_MAXPACKET) {
348		if (!hdrsplit) {
349			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
350				m = NULL;
351				goto freehdrs;
352			}
353			m = exthdrs.ip6e_ip6;
354			hdrsplit++;
355		}
356		/* adjust pointer */
357		ip6 = mtod(m, struct ip6_hdr *);
358		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
359			goto freehdrs;
360		ip6->ip6_plen = 0;
361	} else
362		ip6->ip6_plen = htons(plen);
363
364	/*
365	 * Concatenate headers and fill in next header fields.
366	 * Here we have, on "m"
367	 *	IPv6 payload
368	 * and we insert headers accordingly.  Finally, we should be getting:
369	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
370	 *
371	 * during the header composing process, "m" points to IPv6 header.
372	 * "mprev" points to an extension header prior to esp.
373	 */
374	{
375		u_char *nexthdrp = &ip6->ip6_nxt;
376		struct mbuf *mprev = m;
377
378		/*
379		 * we treat dest2 specially.  this makes IPsec processing
380		 * much easier.  the goal here is to make mprev point the
381		 * mbuf prior to dest2.
382		 *
383		 * result: IPv6 dest2 payload
384		 * m and mprev will point to IPv6 header.
385		 */
386		if (exthdrs.ip6e_dest2) {
387			if (!hdrsplit)
388				panic("assumption failed: hdr not split");
389			exthdrs.ip6e_dest2->m_next = m->m_next;
390			m->m_next = exthdrs.ip6e_dest2;
391			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
392			ip6->ip6_nxt = IPPROTO_DSTOPTS;
393		}
394
395#define MAKE_CHAIN(m, mp, p, i)\
396    do {\
397	if (m) {\
398		if (!hdrsplit) \
399			panic("assumption failed: hdr not split"); \
400		*mtod((m), u_char *) = *(p);\
401		*(p) = (i);\
402		p = mtod((m), u_char *);\
403		(m)->m_next = (mp)->m_next;\
404		(mp)->m_next = (m);\
405		(mp) = (m);\
406	}\
407    } while (/*CONSTCOND*/ 0)
408		/*
409		 * result: IPv6 hbh dest1 rthdr dest2 payload
410		 * m will point to IPv6 header.  mprev will point to the
411		 * extension header prior to dest2 (rthdr in the above case).
412		 */
413		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
414		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
415		    IPPROTO_DSTOPTS);
416		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
417		    IPPROTO_ROUTING);
418
419#if defined(IPSEC) || defined(FAST_IPSEC)
420		if (!needipsec)
421			goto skip_ipsec2;
422
423		/*
424		 * pointers after IPsec headers are not valid any more.
425		 * other pointers need a great care too.
426		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
427		 */
428		exthdrs.ip6e_dest2 = NULL;
429
430	    {
431		struct ip6_rthdr *rh = NULL;
432		int segleft_org = 0;
433		struct ipsec_output_state state;
434
435		if (exthdrs.ip6e_rthdr) {
436			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
437			segleft_org = rh->ip6r_segleft;
438			rh->ip6r_segleft = 0;
439		}
440
441		bzero(&state, sizeof(state));
442		state.m = m;
443		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
444		    &needipsectun);
445		m = state.m;
446		if (error) {
447			/* mbuf is already reclaimed in ipsec6_output_trans. */
448			m = NULL;
449			switch (error) {
450			case EHOSTUNREACH:
451			case ENETUNREACH:
452			case EMSGSIZE:
453			case ENOBUFS:
454			case ENOMEM:
455				break;
456			default:
457				printf("ip6_output (ipsec): error code %d\n", error);
458				/* FALLTHROUGH */
459			case ENOENT:
460				/* don't show these error codes to the user */
461				error = 0;
462				break;
463			}
464			goto bad;
465		}
466		if (exthdrs.ip6e_rthdr) {
467			/* ah6_output doesn't modify mbuf chain */
468			rh->ip6r_segleft = segleft_org;
469		}
470	    }
471skip_ipsec2:;
472#endif
473	}
474
475	/*
476	 * If there is a routing header, replace the destination address field
477	 * with the first hop of the routing header.
478	 */
479	if (exthdrs.ip6e_rthdr) {
480		struct ip6_rthdr *rh =
481			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
482						  struct ip6_rthdr *));
483		struct ip6_rthdr0 *rh0;
484		struct in6_addr *addrs;
485
486		switch (rh->ip6r_type) {
487		case IPV6_RTHDR_TYPE_0:
488			 rh0 = (struct ip6_rthdr0 *)rh;
489			 addrs = (struct in6_addr *)(rh0 + 1);
490
491			 ip6->ip6_dst = *addrs;
492			 bcopy((caddr_t)(addrs + 1), (caddr_t)addrs,
493			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
494				 );
495			 *(addrs + rh0->ip6r0_segleft - 1) = finaldst;
496			 break;
497		default:	/* is it possible? */
498			 error = EINVAL;
499			 goto bad;
500		}
501	}
502
503	/* Source address validation */
504	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
505	    (flags & IPV6_DADOUTPUT) == 0) {
506		error = EOPNOTSUPP;
507		ip6stat.ip6s_badscope++;
508		goto bad;
509	}
510	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
511		error = EOPNOTSUPP;
512		ip6stat.ip6s_badscope++;
513		goto bad;
514	}
515
516	ip6stat.ip6s_localout++;
517
518	/*
519	 * Route packet.
520	 */
521	if (ro == 0) {
522		ro = &ip6route;
523		bzero((caddr_t)ro, sizeof(*ro));
524	}
525	ro_pmtu = ro;
526	if (opt && opt->ip6po_rthdr)
527		ro = &opt->ip6po_route;
528	dst = (struct sockaddr_in6 *)&ro->ro_dst;
529
530again:
531	/*
532	 * If there is a cached route,
533	 * check that it is to the same destination
534	 * and is still up. If not, free it and try again.
535	 */
536	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
537			 dst->sin6_family != AF_INET6 ||
538			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
539		RTFREE(ro->ro_rt);
540		ro->ro_rt = (struct rtentry *)0;
541	}
542	if (ro->ro_rt == 0) {
543		bzero(dst, sizeof(*dst));
544		dst->sin6_family = AF_INET6;
545		dst->sin6_len = sizeof(struct sockaddr_in6);
546		dst->sin6_addr = ip6->ip6_dst;
547	}
548
549 	/*
550	 * if specified, try to fill in the traffic class field.
551	 * do not override if a non-zero value is already set.
552	 * we check the diffserv field and the ecn field separately.
553	 */
554	if (opt && opt->ip6po_tclass >= 0) {
555		int mask = 0;
556
557		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
558			mask |= 0xfc;
559		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
560			mask |= 0x03;
561		if (mask != 0)
562			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
563	}
564
565	/* fill in or override the hop limit field, if necessary. */
566	if (opt && opt->ip6po_hlim != -1)
567		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
568	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
569		if (im6o != NULL)
570			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
571		else
572			ip6->ip6_hlim = ip6_defmcasthlim;
573	}
574
575#if defined(IPSEC) || defined(FAST_IPSEC)
576	if (needipsec && needipsectun) {
577		struct ipsec_output_state state;
578
579		/*
580		 * All the extension headers will become inaccessible
581		 * (since they can be encrypted).
582		 * Don't panic, we need no more updates to extension headers
583		 * on inner IPv6 packet (since they are now encapsulated).
584		 *
585		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
586		 */
587		bzero(&exthdrs, sizeof(exthdrs));
588		exthdrs.ip6e_ip6 = m;
589
590		bzero(&state, sizeof(state));
591		state.m = m;
592		state.ro = (struct route *)ro;
593		state.dst = (struct sockaddr *)dst;
594
595		error = ipsec6_output_tunnel(&state, sp, flags);
596
597		m = state.m;
598		ro = (struct route_in6 *)state.ro;
599		dst = (struct sockaddr_in6 *)state.dst;
600		if (error) {
601			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
602			m0 = m = NULL;
603			m = NULL;
604			switch (error) {
605			case EHOSTUNREACH:
606			case ENETUNREACH:
607			case EMSGSIZE:
608			case ENOBUFS:
609			case ENOMEM:
610				break;
611			default:
612				printf("ip6_output (ipsec): error code %d\n", error);
613				/* FALLTHROUGH */
614			case ENOENT:
615				/* don't show these error codes to the user */
616				error = 0;
617				break;
618			}
619			goto bad;
620		}
621
622		exthdrs.ip6e_ip6 = m;
623	}
624#endif /* IPSEC */
625
626	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
627		/* Unicast */
628
629#define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
630#define sin6tosa(sin6)	((struct sockaddr *)(sin6))
631		/* xxx
632		 * interface selection comes here
633		 * if an interface is specified from an upper layer,
634		 * ifp must point it.
635		 */
636		if (ro->ro_rt == 0) {
637			/*
638			 * non-bsdi always clone routes, if parent is
639			 * PRF_CLONING.
640			 */
641			rtalloc((struct route *)ro);
642		}
643		if (ro->ro_rt == 0) {
644			ip6stat.ip6s_noroute++;
645			error = EHOSTUNREACH;
646			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
647			goto bad;
648		}
649		/* XXX rt not locked */
650		ia = ifatoia6(ro->ro_rt->rt_ifa);
651		ifp = ro->ro_rt->rt_ifp;
652		ro->ro_rt->rt_rmx.rmx_pksent++;
653		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
654			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
655		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
656
657		in6_ifstat_inc(ifp, ifs6_out_request);
658
659		/*
660		 * Check if the outgoing interface conflicts with
661		 * the interface specified by ifi6_ifindex (if specified).
662		 * Note that loopback interface is always okay.
663		 * (this may happen when we are sending a packet to one of
664		 *  our own addresses.)
665		 */
666		if (opt && opt->ip6po_pktinfo
667		 && opt->ip6po_pktinfo->ipi6_ifindex) {
668			if (!(ifp->if_flags & IFF_LOOPBACK)
669			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
670				ip6stat.ip6s_noroute++;
671				in6_ifstat_inc(ifp, ifs6_out_discard);
672				error = EHOSTUNREACH;
673				goto bad;
674			}
675		}
676
677		if (opt && opt->ip6po_hlim != -1)
678			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
679	} else {
680		/* Multicast */
681		struct	in6_multi *in6m;
682
683		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
684
685		/*
686		 * See if the caller provided any multicast options
687		 */
688		ifp = NULL;
689		if (im6o != NULL) {
690			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
691			if (im6o->im6o_multicast_ifp != NULL)
692				ifp = im6o->im6o_multicast_ifp;
693		} else
694			ip6->ip6_hlim = ip6_defmcasthlim;
695
696		/*
697		 * See if the caller provided the outgoing interface
698		 * as an ancillary data.
699		 * Boundary check for ifindex is assumed to be already done.
700		 */
701		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
702			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
703
704		/*
705		 * If the destination is a node-local scope multicast,
706		 * the packet should be loop-backed only.
707		 */
708		if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
709			/*
710			 * If the outgoing interface is already specified,
711			 * it should be a loopback interface.
712			 */
713			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
714				ip6stat.ip6s_badscope++;
715				error = ENETUNREACH; /* XXX: better error? */
716				/* XXX correct ifp? */
717				in6_ifstat_inc(ifp, ifs6_out_discard);
718				goto bad;
719			} else {
720				ifp = &loif[0];
721			}
722		}
723
724		if (opt && opt->ip6po_hlim != -1)
725			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
726
727		/*
728		 * If caller did not provide an interface lookup a
729		 * default in the routing table.  This is either a
730		 * default for the speicfied group (i.e. a host
731		 * route), or a multicast default (a route for the
732		 * ``net'' ff00::/8).
733		 */
734		if (ifp == NULL) {
735			if (ro->ro_rt == 0)
736				ro->ro_rt = rtalloc1((struct sockaddr *)
737						&ro->ro_dst, 0, 0UL);
738			else
739				RT_LOCK(ro->ro_rt);
740			if (ro->ro_rt == 0) {
741				ip6stat.ip6s_noroute++;
742				error = EHOSTUNREACH;
743				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
744				goto bad;
745			}
746			ia = ifatoia6(ro->ro_rt->rt_ifa);
747			ifp = ro->ro_rt->rt_ifp;
748			ro->ro_rt->rt_rmx.rmx_pksent++;
749			RT_UNLOCK(ro->ro_rt);
750		}
751
752		if ((flags & IPV6_FORWARDING) == 0)
753			in6_ifstat_inc(ifp, ifs6_out_request);
754		in6_ifstat_inc(ifp, ifs6_out_mcast);
755
756		/*
757		 * Confirm that the outgoing interface supports multicast.
758		 */
759		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
760			ip6stat.ip6s_noroute++;
761			in6_ifstat_inc(ifp, ifs6_out_discard);
762			error = ENETUNREACH;
763			goto bad;
764		}
765		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
766		if (in6m != NULL &&
767		   (im6o == NULL || im6o->im6o_multicast_loop)) {
768			/*
769			 * If we belong to the destination multicast group
770			 * on the outgoing interface, and the caller did not
771			 * forbid loopback, loop back a copy.
772			 */
773			ip6_mloopback(ifp, m, dst);
774		} else {
775			/*
776			 * If we are acting as a multicast router, perform
777			 * multicast forwarding as if the packet had just
778			 * arrived on the interface to which we are about
779			 * to send.  The multicast forwarding function
780			 * recursively calls this function, using the
781			 * IPV6_FORWARDING flag to prevent infinite recursion.
782			 *
783			 * Multicasts that are looped back by ip6_mloopback(),
784			 * above, will be forwarded by the ip6_input() routine,
785			 * if necessary.
786			 */
787			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
788				if (ip6_mforward(ip6, ifp, m) != 0) {
789					m_freem(m);
790					goto done;
791				}
792			}
793		}
794		/*
795		 * Multicasts with a hoplimit of zero may be looped back,
796		 * above, but must not be transmitted on a network.
797		 * Also, multicasts addressed to the loopback interface
798		 * are not sent -- the above call to ip6_mloopback() will
799		 * loop back a copy if this host actually belongs to the
800		 * destination group on the loopback interface.
801		 */
802		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
803		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
804			m_freem(m);
805			goto done;
806		}
807	}
808
809	/*
810	 * Fill the outgoing inteface to tell the upper layer
811	 * to increment per-interface statistics.
812	 */
813	if (ifpp)
814		*ifpp = ifp;
815
816	/* Determine path MTU. */
817	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
818	    &alwaysfrag)) != 0)
819		goto bad;
820
821	/*
822	 * The caller of this function may specify to use the minimum MTU
823	 * in some cases.
824	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
825	 * setting.  The logic is a bit complicated; by default, unicast
826	 * packets will follow path MTU while multicast packets will be sent at
827	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
828	 * including unicast ones will be sent at the minimum MTU.  Multicast
829	 * packets will always be sent at the minimum MTU unless
830	 * IP6PO_MINMTU_DISABLE is explicitly specified.
831	 * See RFC 3542 for more details.
832	 */
833	if (mtu > IPV6_MMTU) {
834		if ((flags & IPV6_MINMTU))
835			mtu = IPV6_MMTU;
836		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
837			mtu = IPV6_MMTU;
838		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
839			 (opt == NULL ||
840			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
841			mtu = IPV6_MMTU;
842		}
843	}
844
845	/* Fake scoped addresses */
846	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
847		/*
848		 * If source or destination address is a scoped address, and
849		 * the packet is going to be sent to a loopback interface,
850		 * we should keep the original interface.
851		 */
852
853		/*
854		 * XXX: this is a very experimental and temporary solution.
855		 * We eventually have sockaddr_in6 and use the sin6_scope_id
856		 * field of the structure here.
857		 * We rely on the consistency between two scope zone ids
858		 * of source and destination, which should already be assured.
859		 * Larger scopes than link will be supported in the future.
860		 */
861		origifp = NULL;
862		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
863			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
864		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
865			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
866		/*
867		 * XXX: origifp can be NULL even in those two cases above.
868		 * For example, if we remove the (only) link-local address
869		 * from the loopback interface, and try to send a link-local
870		 * address without link-id information.  Then the source
871		 * address is ::1, and the destination address is the
872		 * link-local address with its s6_addr16[1] being zero.
873		 * What is worse, if the packet goes to the loopback interface
874		 * by a default rejected route, the null pointer would be
875		 * passed to looutput, and the kernel would hang.
876		 * The following last resort would prevent such disaster.
877		 */
878		if (origifp == NULL)
879			origifp = ifp;
880	}
881	else
882		origifp = ifp;
883	/*
884	 * clear embedded scope identifiers if necessary.
885	 * in6_clearscope will touch the addresses only when necessary.
886	 */
887	in6_clearscope(&ip6->ip6_src);
888	in6_clearscope(&ip6->ip6_dst);
889
890	/*
891	 * Check with the firewall...
892	 */
893	if (ip6_fw_enable && ip6_fw_chk_ptr) {
894		u_short port = 0;
895		m->m_pkthdr.rcvif = NULL;	/* XXX */
896		/* If ipfw says divert, we have to just drop packet */
897		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
898			m_freem(m);
899			goto done;
900		}
901		if (!m) {
902			error = EACCES;
903			goto done;
904		}
905	}
906
907	/*
908	 * If the outgoing packet contains a hop-by-hop options header,
909	 * it must be examined and processed even by the source node.
910	 * (RFC 2460, section 4.)
911	 */
912	if (exthdrs.ip6e_hbh) {
913		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
914		u_int32_t dummy; /* XXX unused */
915		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
916
917#ifdef DIAGNOSTIC
918		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
919			panic("ip6e_hbh is not continuous");
920#endif
921		/*
922		 *  XXX: if we have to send an ICMPv6 error to the sender,
923		 *       we need the M_LOOP flag since icmp6_error() expects
924		 *       the IPv6 and the hop-by-hop options header are
925		 *       continuous unless the flag is set.
926		 */
927		m->m_flags |= M_LOOP;
928		m->m_pkthdr.rcvif = ifp;
929		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
930		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
931		    &dummy, &plen) < 0) {
932			/* m was already freed at this point */
933			error = EINVAL;/* better error? */
934			goto done;
935		}
936		m->m_flags &= ~M_LOOP; /* XXX */
937		m->m_pkthdr.rcvif = NULL;
938	}
939
940	/* Jump over all PFIL processing if hooks are not active. */
941	if (inet6_pfil_hook.ph_busy_count == -1)
942		goto passout;
943
944	odst = ip6->ip6_dst;
945	/* Run through list of hooks for output packets. */
946	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
947	if (error != 0 || m == NULL)
948		goto done;
949	ip6 = mtod(m, struct ip6_hdr *);
950
951	/* See if destination IP address was changed by packet filter. */
952	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
953		m->m_flags |= M_SKIP_FIREWALL;
954		/* If destination is now ourself drop to ip6_input(). */
955		if (in6_localaddr(&ip6->ip6_dst)) {
956			if (m->m_pkthdr.rcvif == NULL)
957				m->m_pkthdr.rcvif = loif;
958			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
959				m->m_pkthdr.csum_flags |=
960				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
961				m->m_pkthdr.csum_data = 0xffff;
962			}
963			m->m_pkthdr.csum_flags |=
964			    CSUM_IP_CHECKED | CSUM_IP_VALID;
965			error = netisr_queue(NETISR_IPV6, m);
966			goto done;
967		} else
968			goto again;	/* Redo the routing table lookup. */
969	}
970
971	/* XXX: IPFIREWALL_FORWARD */
972
973passout:
974	/*
975	 * Send the packet to the outgoing interface.
976	 * If necessary, do IPv6 fragmentation before sending.
977	 *
978	 * the logic here is rather complex:
979	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
980	 * 1-a:	send as is if tlen <= path mtu
981	 * 1-b:	fragment if tlen > path mtu
982	 *
983	 * 2: if user asks us not to fragment (dontfrag == 1)
984	 * 2-a:	send as is if tlen <= interface mtu
985	 * 2-b:	error if tlen > interface mtu
986	 *
987	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
988	 *	always fragment
989	 *
990	 * 4: if dontfrag == 1 && alwaysfrag == 1
991	 *	error, as we cannot handle this conflicting request
992	 */
993	tlen = m->m_pkthdr.len;
994
995	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
996		dontfrag = 1;
997	else
998		dontfrag = 0;
999	if (dontfrag && alwaysfrag) {	/* case 4 */
1000		/* conflicting request - can't transmit */
1001		error = EMSGSIZE;
1002		goto bad;
1003	}
1004	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
1005		/*
1006		 * Even if the DONTFRAG option is specified, we cannot send the
1007		 * packet when the data length is larger than the MTU of the
1008		 * outgoing interface.
1009		 * Notify the error by sending IPV6_PATHMTU ancillary data as
1010		 * well as returning an error code (the latter is not described
1011		 * in the API spec.)
1012		 */
1013		u_int32_t mtu32;
1014		struct ip6ctlparam ip6cp;
1015
1016		mtu32 = (u_int32_t)mtu;
1017		bzero(&ip6cp, sizeof(ip6cp));
1018		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1019		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1020		    (void *)&ip6cp);
1021
1022		error = EMSGSIZE;
1023		goto bad;
1024	}
1025
1026	/*
1027	 * transmit packet without fragmentation
1028	 */
1029	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
1030		struct in6_ifaddr *ia6;
1031
1032		ip6 = mtod(m, struct ip6_hdr *);
1033		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1034		if (ia6) {
1035			/* Record statistics for this interface address. */
1036			ia6->ia_ifa.if_opackets++;
1037			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
1038		}
1039#ifdef IPSEC
1040		/* clean ipsec history once it goes out of the node */
1041		ipsec_delaux(m);
1042#endif
1043		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1044		goto done;
1045	}
1046
1047	/*
1048	 * try to fragment the packet.  case 1-b and 3
1049	 */
1050	if (mtu < IPV6_MMTU) {
1051		/* path MTU cannot be less than IPV6_MMTU */
1052		error = EMSGSIZE;
1053		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1054		goto bad;
1055	} else if (ip6->ip6_plen == 0) {
1056		/* jumbo payload cannot be fragmented */
1057		error = EMSGSIZE;
1058		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1059		goto bad;
1060	} else {
1061		struct mbuf **mnext, *m_frgpart;
1062		struct ip6_frag *ip6f;
1063		u_int32_t id = htonl(ip6_randomid());
1064		u_char nextproto;
1065#if 0
1066		struct ip6ctlparam ip6cp;
1067		u_int32_t mtu32;
1068#endif
1069		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
1070
1071		/*
1072		 * Too large for the destination or interface;
1073		 * fragment if possible.
1074		 * Must be able to put at least 8 bytes per fragment.
1075		 */
1076		hlen = unfragpartlen;
1077		if (mtu > IPV6_MAXPACKET)
1078			mtu = IPV6_MAXPACKET;
1079
1080#if 0
1081		/*
1082		 * It is believed this code is a leftover from the
1083		 * development of the IPV6_RECVPATHMTU sockopt and
1084		 * associated work to implement RFC3542.
1085		 * It's not entirely clear what the intent of the API
1086		 * is at this point, so disable this code for now.
1087		 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
1088		 * will send notifications if the application requests.
1089		 */
1090
1091		/* Notify a proper path MTU to applications. */
1092		mtu32 = (u_int32_t)mtu;
1093		bzero(&ip6cp, sizeof(ip6cp));
1094		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1095		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1096		    (void *)&ip6cp);
1097#endif
1098
1099		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1100		if (len < 8) {
1101			error = EMSGSIZE;
1102			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1103			goto bad;
1104		}
1105
1106		/*
1107		 * Verify that we have any chance at all of being able to queue
1108		 *      the packet or packet fragments
1109		 */
1110		if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
1111		    < tlen  /* - hlen */)) {
1112			error = ENOBUFS;
1113			ip6stat.ip6s_odropped++;
1114			goto bad;
1115		}
1116
1117		mnext = &m->m_nextpkt;
1118
1119		/*
1120		 * Change the next header field of the last header in the
1121		 * unfragmentable part.
1122		 */
1123		if (exthdrs.ip6e_rthdr) {
1124			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1125			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1126		} else if (exthdrs.ip6e_dest1) {
1127			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1128			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1129		} else if (exthdrs.ip6e_hbh) {
1130			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1131			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1132		} else {
1133			nextproto = ip6->ip6_nxt;
1134			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1135		}
1136
1137		/*
1138		 * Loop through length of segment after first fragment,
1139		 * make new header and copy data of each part and link onto
1140		 * chain.
1141		 */
1142		m0 = m;
1143		for (off = hlen; off < tlen; off += len) {
1144			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1145			if (!m) {
1146				error = ENOBUFS;
1147				ip6stat.ip6s_odropped++;
1148				goto sendorfree;
1149			}
1150			m->m_pkthdr.rcvif = NULL;
1151			m->m_flags = m0->m_flags & M_COPYFLAGS;
1152			*mnext = m;
1153			mnext = &m->m_nextpkt;
1154			m->m_data += max_linkhdr;
1155			mhip6 = mtod(m, struct ip6_hdr *);
1156			*mhip6 = *ip6;
1157			m->m_len = sizeof(*mhip6);
1158			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1159			if (error) {
1160				ip6stat.ip6s_odropped++;
1161				goto sendorfree;
1162			}
1163			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1164			if (off + len >= tlen)
1165				len = tlen - off;
1166			else
1167				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1168			mhip6->ip6_plen = htons((u_short)(len + hlen +
1169			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1170			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1171				error = ENOBUFS;
1172				ip6stat.ip6s_odropped++;
1173				goto sendorfree;
1174			}
1175			m_cat(m, m_frgpart);
1176			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1177			m->m_pkthdr.rcvif = NULL;
1178			ip6f->ip6f_reserved = 0;
1179			ip6f->ip6f_ident = id;
1180			ip6f->ip6f_nxt = nextproto;
1181			ip6stat.ip6s_ofragments++;
1182			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1183		}
1184
1185		in6_ifstat_inc(ifp, ifs6_out_fragok);
1186	}
1187
1188	/*
1189	 * Remove leading garbages.
1190	 */
1191sendorfree:
1192	m = m0->m_nextpkt;
1193	m0->m_nextpkt = 0;
1194	m_freem(m0);
1195	for (m0 = m; m; m = m0) {
1196		m0 = m->m_nextpkt;
1197		m->m_nextpkt = 0;
1198		if (error == 0) {
1199 			/* Record statistics for this interface address. */
1200 			if (ia) {
1201 				ia->ia_ifa.if_opackets++;
1202 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1203 			}
1204#ifdef IPSEC
1205			/* clean ipsec history once it goes out of the node */
1206			ipsec_delaux(m);
1207#endif
1208			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1209		} else
1210			m_freem(m);
1211	}
1212
1213	if (error == 0)
1214		ip6stat.ip6s_fragmented++;
1215
1216done:
1217	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1218		RTFREE(ro->ro_rt);
1219	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1220		RTFREE(ro_pmtu->ro_rt);
1221	}
1222
1223#ifdef IPSEC
1224	if (sp != NULL)
1225		key_freesp(sp);
1226#endif /* IPSEC */
1227#ifdef FAST_IPSEC
1228	if (sp != NULL)
1229		KEY_FREESP(&sp);
1230#endif /* FAST_IPSEC */
1231
1232	return (error);
1233
1234freehdrs:
1235	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1236	m_freem(exthdrs.ip6e_dest1);
1237	m_freem(exthdrs.ip6e_rthdr);
1238	m_freem(exthdrs.ip6e_dest2);
1239	/* FALLTHROUGH */
1240bad:
1241	m_freem(m);
1242	goto done;
1243}
1244
1245static int
1246ip6_copyexthdr(mp, hdr, hlen)
1247	struct mbuf **mp;
1248	caddr_t hdr;
1249	int hlen;
1250{
1251	struct mbuf *m;
1252
1253	if (hlen > MCLBYTES)
1254		return (ENOBUFS); /* XXX */
1255
1256	MGET(m, M_DONTWAIT, MT_DATA);
1257	if (!m)
1258		return (ENOBUFS);
1259
1260	if (hlen > MLEN) {
1261		MCLGET(m, M_DONTWAIT);
1262		if ((m->m_flags & M_EXT) == 0) {
1263			m_free(m);
1264			return (ENOBUFS);
1265		}
1266	}
1267	m->m_len = hlen;
1268	if (hdr)
1269		bcopy(hdr, mtod(m, caddr_t), hlen);
1270
1271	*mp = m;
1272	return (0);
1273}
1274
1275/*
1276 * Insert jumbo payload option.
1277 */
1278static int
1279ip6_insert_jumboopt(exthdrs, plen)
1280	struct ip6_exthdrs *exthdrs;
1281	u_int32_t plen;
1282{
1283	struct mbuf *mopt;
1284	u_char *optbuf;
1285	u_int32_t v;
1286
1287#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1288
1289	/*
1290	 * If there is no hop-by-hop options header, allocate new one.
1291	 * If there is one but it doesn't have enough space to store the
1292	 * jumbo payload option, allocate a cluster to store the whole options.
1293	 * Otherwise, use it to store the options.
1294	 */
1295	if (exthdrs->ip6e_hbh == 0) {
1296		MGET(mopt, M_DONTWAIT, MT_DATA);
1297		if (mopt == 0)
1298			return (ENOBUFS);
1299		mopt->m_len = JUMBOOPTLEN;
1300		optbuf = mtod(mopt, u_char *);
1301		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1302		exthdrs->ip6e_hbh = mopt;
1303	} else {
1304		struct ip6_hbh *hbh;
1305
1306		mopt = exthdrs->ip6e_hbh;
1307		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1308			/*
1309			 * XXX assumption:
1310			 * - exthdrs->ip6e_hbh is not referenced from places
1311			 *   other than exthdrs.
1312			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1313			 */
1314			int oldoptlen = mopt->m_len;
1315			struct mbuf *n;
1316
1317			/*
1318			 * XXX: give up if the whole (new) hbh header does
1319			 * not fit even in an mbuf cluster.
1320			 */
1321			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1322				return (ENOBUFS);
1323
1324			/*
1325			 * As a consequence, we must always prepare a cluster
1326			 * at this point.
1327			 */
1328			MGET(n, M_DONTWAIT, MT_DATA);
1329			if (n) {
1330				MCLGET(n, M_DONTWAIT);
1331				if ((n->m_flags & M_EXT) == 0) {
1332					m_freem(n);
1333					n = NULL;
1334				}
1335			}
1336			if (!n)
1337				return (ENOBUFS);
1338			n->m_len = oldoptlen + JUMBOOPTLEN;
1339			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1340			    oldoptlen);
1341			optbuf = mtod(n, caddr_t) + oldoptlen;
1342			m_freem(mopt);
1343			mopt = exthdrs->ip6e_hbh = n;
1344		} else {
1345			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1346			mopt->m_len += JUMBOOPTLEN;
1347		}
1348		optbuf[0] = IP6OPT_PADN;
1349		optbuf[1] = 1;
1350
1351		/*
1352		 * Adjust the header length according to the pad and
1353		 * the jumbo payload option.
1354		 */
1355		hbh = mtod(mopt, struct ip6_hbh *);
1356		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1357	}
1358
1359	/* fill in the option. */
1360	optbuf[2] = IP6OPT_JUMBO;
1361	optbuf[3] = 4;
1362	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1363	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1364
1365	/* finally, adjust the packet header length */
1366	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1367
1368	return (0);
1369#undef JUMBOOPTLEN
1370}
1371
1372/*
1373 * Insert fragment header and copy unfragmentable header portions.
1374 */
1375static int
1376ip6_insertfraghdr(m0, m, hlen, frghdrp)
1377	struct mbuf *m0, *m;
1378	int hlen;
1379	struct ip6_frag **frghdrp;
1380{
1381	struct mbuf *n, *mlast;
1382
1383	if (hlen > sizeof(struct ip6_hdr)) {
1384		n = m_copym(m0, sizeof(struct ip6_hdr),
1385		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1386		if (n == 0)
1387			return (ENOBUFS);
1388		m->m_next = n;
1389	} else
1390		n = m;
1391
1392	/* Search for the last mbuf of unfragmentable part. */
1393	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1394		;
1395
1396	if ((mlast->m_flags & M_EXT) == 0 &&
1397	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1398		/* use the trailing space of the last mbuf for the fragment hdr */
1399		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1400		    mlast->m_len);
1401		mlast->m_len += sizeof(struct ip6_frag);
1402		m->m_pkthdr.len += sizeof(struct ip6_frag);
1403	} else {
1404		/* allocate a new mbuf for the fragment header */
1405		struct mbuf *mfrg;
1406
1407		MGET(mfrg, M_DONTWAIT, MT_DATA);
1408		if (mfrg == 0)
1409			return (ENOBUFS);
1410		mfrg->m_len = sizeof(struct ip6_frag);
1411		*frghdrp = mtod(mfrg, struct ip6_frag *);
1412		mlast->m_next = mfrg;
1413	}
1414
1415	return (0);
1416}
1417
1418static int
1419ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1420	struct route_in6 *ro_pmtu, *ro;
1421	struct ifnet *ifp;
1422	struct in6_addr *dst;
1423	u_long *mtup;
1424	int *alwaysfragp;
1425{
1426	u_int32_t mtu = 0;
1427	int alwaysfrag = 0;
1428	int error = 0;
1429
1430	if (ro_pmtu != ro) {
1431		/* The first hop and the final destination may differ. */
1432		struct sockaddr_in6 *sa6_dst =
1433		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1434		if (ro_pmtu->ro_rt &&
1435		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1436		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1437			RTFREE(ro_pmtu->ro_rt);
1438			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1439		}
1440		if (ro_pmtu->ro_rt == NULL) {
1441			bzero(sa6_dst, sizeof(*sa6_dst));
1442			sa6_dst->sin6_family = AF_INET6;
1443			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1444			sa6_dst->sin6_addr = *dst;
1445
1446			rtalloc((struct route *)ro_pmtu);
1447		}
1448	}
1449	if (ro_pmtu->ro_rt) {
1450		u_int32_t ifmtu;
1451		struct in_conninfo inc;
1452
1453		bzero(&inc, sizeof(inc));
1454		inc.inc_flags = 1; /* IPv6 */
1455		inc.inc6_faddr = *dst;
1456
1457		if (ifp == NULL)
1458			ifp = ro_pmtu->ro_rt->rt_ifp;
1459		ifmtu = IN6_LINKMTU(ifp);
1460		mtu = tcp_hc_getmtu(&inc);
1461		if (mtu)
1462			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1463		else
1464			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1465		if (mtu == 0)
1466			mtu = ifmtu;
1467		else if (mtu < IPV6_MMTU) {
1468			/*
1469			 * RFC2460 section 5, last paragraph:
1470			 * if we record ICMPv6 too big message with
1471			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1472			 * or smaller, with framgent header attached.
1473			 * (fragment header is needed regardless from the
1474			 * packet size, for translators to identify packets)
1475			 */
1476			alwaysfrag = 1;
1477			mtu = IPV6_MMTU;
1478		} else if (mtu > ifmtu) {
1479			/*
1480			 * The MTU on the route is larger than the MTU on
1481			 * the interface!  This shouldn't happen, unless the
1482			 * MTU of the interface has been changed after the
1483			 * interface was brought up.  Change the MTU in the
1484			 * route to match the interface MTU (as long as the
1485			 * field isn't locked).
1486			 */
1487			mtu = ifmtu;
1488			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1489		}
1490	} else if (ifp) {
1491		mtu = IN6_LINKMTU(ifp);
1492	} else
1493		error = EHOSTUNREACH; /* XXX */
1494
1495	*mtup = mtu;
1496	if (alwaysfragp)
1497		*alwaysfragp = alwaysfrag;
1498	return (error);
1499}
1500
1501/*
1502 * IP6 socket option processing.
1503 */
1504int
1505ip6_ctloutput(so, sopt)
1506	struct socket *so;
1507	struct sockopt *sopt;
1508{
1509	int privileged, optdatalen, uproto;
1510	void *optdata;
1511	struct inpcb *in6p = sotoinpcb(so);
1512	int error, optval;
1513	int level, op, optname;
1514	int optlen;
1515	struct thread *td;
1516
1517	if (sopt) {
1518		level = sopt->sopt_level;
1519		op = sopt->sopt_dir;
1520		optname = sopt->sopt_name;
1521		optlen = sopt->sopt_valsize;
1522		td = sopt->sopt_td;
1523	} else {
1524		panic("ip6_ctloutput: arg soopt is NULL");
1525	}
1526	error = optval = 0;
1527
1528	privileged = (td == 0 || suser(td)) ? 0 : 1;
1529	uproto = (int)so->so_proto->pr_protocol;
1530
1531	if (level == IPPROTO_IPV6) {
1532		switch (op) {
1533
1534		case SOPT_SET:
1535			switch (optname) {
1536			case IPV6_2292PKTOPTIONS:
1537#ifdef IPV6_PKTOPTIONS
1538			case IPV6_PKTOPTIONS:
1539#endif
1540			{
1541				struct mbuf *m;
1542
1543				error = soopt_getm(sopt, &m); /* XXX */
1544				if (error != 0)
1545					break;
1546				error = soopt_mcopyin(sopt, m); /* XXX */
1547				if (error != 0)
1548					break;
1549				error = ip6_pcbopts(&in6p->in6p_outputopts,
1550						    m, so, sopt);
1551				m_freem(m); /* XXX */
1552				break;
1553			}
1554
1555			/*
1556			 * Use of some Hop-by-Hop options or some
1557			 * Destination options, might require special
1558			 * privilege.  That is, normal applications
1559			 * (without special privilege) might be forbidden
1560			 * from setting certain options in outgoing packets,
1561			 * and might never see certain options in received
1562			 * packets. [RFC 2292 Section 6]
1563			 * KAME specific note:
1564			 *  KAME prevents non-privileged users from sending or
1565			 *  receiving ANY hbh/dst options in order to avoid
1566			 *  overhead of parsing options in the kernel.
1567			 */
1568			case IPV6_RECVHOPOPTS:
1569			case IPV6_RECVDSTOPTS:
1570			case IPV6_RECVRTHDRDSTOPTS:
1571				if (!privileged) {
1572					error = EPERM;
1573					break;
1574				}
1575				/* FALLTHROUGH */
1576			case IPV6_UNICAST_HOPS:
1577			case IPV6_HOPLIMIT:
1578			case IPV6_FAITH:
1579
1580			case IPV6_RECVPKTINFO:
1581			case IPV6_RECVHOPLIMIT:
1582			case IPV6_RECVRTHDR:
1583			case IPV6_RECVPATHMTU:
1584			case IPV6_RECVTCLASS:
1585			case IPV6_V6ONLY:
1586			case IPV6_AUTOFLOWLABEL:
1587				if (optlen != sizeof(int)) {
1588					error = EINVAL;
1589					break;
1590				}
1591				error = sooptcopyin(sopt, &optval,
1592					sizeof optval, sizeof optval);
1593				if (error)
1594					break;
1595				switch (optname) {
1596
1597				case IPV6_UNICAST_HOPS:
1598					if (optval < -1 || optval >= 256)
1599						error = EINVAL;
1600					else {
1601						/* -1 = kernel default */
1602						in6p->in6p_hops = optval;
1603						if ((in6p->in6p_vflag &
1604						     INP_IPV4) != 0)
1605							in6p->inp_ip_ttl = optval;
1606					}
1607					break;
1608#define OPTSET(bit) \
1609do { \
1610	if (optval) \
1611		in6p->in6p_flags |= (bit); \
1612	else \
1613		in6p->in6p_flags &= ~(bit); \
1614} while (/*CONSTCOND*/ 0)
1615#define OPTSET2292(bit) \
1616do { \
1617	in6p->in6p_flags |= IN6P_RFC2292; \
1618	if (optval) \
1619		in6p->in6p_flags |= (bit); \
1620	else \
1621		in6p->in6p_flags &= ~(bit); \
1622} while (/*CONSTCOND*/ 0)
1623#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1624
1625				case IPV6_RECVPKTINFO:
1626					/* cannot mix with RFC2292 */
1627					if (OPTBIT(IN6P_RFC2292)) {
1628						error = EINVAL;
1629						break;
1630					}
1631					OPTSET(IN6P_PKTINFO);
1632					break;
1633
1634				case IPV6_HOPLIMIT:
1635				{
1636					struct ip6_pktopts **optp;
1637
1638					/* cannot mix with RFC2292 */
1639					if (OPTBIT(IN6P_RFC2292)) {
1640						error = EINVAL;
1641						break;
1642					}
1643					optp = &in6p->in6p_outputopts;
1644					error = ip6_pcbopt(IPV6_HOPLIMIT,
1645							   (u_char *)&optval,
1646							   sizeof(optval),
1647							   optp,
1648							   privileged, uproto);
1649					break;
1650				}
1651
1652				case IPV6_RECVHOPLIMIT:
1653					/* cannot mix with RFC2292 */
1654					if (OPTBIT(IN6P_RFC2292)) {
1655						error = EINVAL;
1656						break;
1657					}
1658					OPTSET(IN6P_HOPLIMIT);
1659					break;
1660
1661				case IPV6_RECVHOPOPTS:
1662					/* cannot mix with RFC2292 */
1663					if (OPTBIT(IN6P_RFC2292)) {
1664						error = EINVAL;
1665						break;
1666					}
1667					OPTSET(IN6P_HOPOPTS);
1668					break;
1669
1670				case IPV6_RECVDSTOPTS:
1671					/* cannot mix with RFC2292 */
1672					if (OPTBIT(IN6P_RFC2292)) {
1673						error = EINVAL;
1674						break;
1675					}
1676					OPTSET(IN6P_DSTOPTS);
1677					break;
1678
1679				case IPV6_RECVRTHDRDSTOPTS:
1680					/* cannot mix with RFC2292 */
1681					if (OPTBIT(IN6P_RFC2292)) {
1682						error = EINVAL;
1683						break;
1684					}
1685					OPTSET(IN6P_RTHDRDSTOPTS);
1686					break;
1687
1688				case IPV6_RECVRTHDR:
1689					/* cannot mix with RFC2292 */
1690					if (OPTBIT(IN6P_RFC2292)) {
1691						error = EINVAL;
1692						break;
1693					}
1694					OPTSET(IN6P_RTHDR);
1695					break;
1696
1697				case IPV6_FAITH:
1698					OPTSET(IN6P_FAITH);
1699					break;
1700
1701				case IPV6_RECVPATHMTU:
1702					/*
1703					 * We ignore this option for TCP
1704					 * sockets.
1705					 * (RFC3542 leaves this case
1706					 * unspecified.)
1707					 */
1708					if (uproto != IPPROTO_TCP)
1709						OPTSET(IN6P_MTU);
1710					break;
1711
1712				case IPV6_V6ONLY:
1713					/*
1714					 * make setsockopt(IPV6_V6ONLY)
1715					 * available only prior to bind(2).
1716					 * see ipng mailing list, Jun 22 2001.
1717					 */
1718					if (in6p->in6p_lport ||
1719					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1720						error = EINVAL;
1721						break;
1722					}
1723					OPTSET(IN6P_IPV6_V6ONLY);
1724					if (optval)
1725						in6p->in6p_vflag &= ~INP_IPV4;
1726					else
1727						in6p->in6p_vflag |= INP_IPV4;
1728					break;
1729				case IPV6_RECVTCLASS:
1730					/* cannot mix with RFC2292 XXX */
1731					if (OPTBIT(IN6P_RFC2292)) {
1732						error = EINVAL;
1733						break;
1734					}
1735					OPTSET(IN6P_TCLASS);
1736					break;
1737				case IPV6_AUTOFLOWLABEL:
1738					OPTSET(IN6P_AUTOFLOWLABEL);
1739					break;
1740
1741				}
1742				break;
1743
1744			case IPV6_TCLASS:
1745			case IPV6_DONTFRAG:
1746			case IPV6_USE_MIN_MTU:
1747			case IPV6_PREFER_TEMPADDR:
1748				if (optlen != sizeof(optval)) {
1749					error = EINVAL;
1750					break;
1751				}
1752				error = sooptcopyin(sopt, &optval,
1753					sizeof optval, sizeof optval);
1754				if (error)
1755					break;
1756				{
1757					struct ip6_pktopts **optp;
1758					optp = &in6p->in6p_outputopts;
1759					error = ip6_pcbopt(optname,
1760							   (u_char *)&optval,
1761							   sizeof(optval),
1762							   optp,
1763							   privileged, uproto);
1764					break;
1765				}
1766
1767			case IPV6_2292PKTINFO:
1768			case IPV6_2292HOPLIMIT:
1769			case IPV6_2292HOPOPTS:
1770			case IPV6_2292DSTOPTS:
1771			case IPV6_2292RTHDR:
1772				/* RFC 2292 */
1773				if (optlen != sizeof(int)) {
1774					error = EINVAL;
1775					break;
1776				}
1777				error = sooptcopyin(sopt, &optval,
1778					sizeof optval, sizeof optval);
1779				if (error)
1780					break;
1781				switch (optname) {
1782				case IPV6_2292PKTINFO:
1783					OPTSET2292(IN6P_PKTINFO);
1784					break;
1785				case IPV6_2292HOPLIMIT:
1786					OPTSET2292(IN6P_HOPLIMIT);
1787					break;
1788				case IPV6_2292HOPOPTS:
1789					/*
1790					 * Check super-user privilege.
1791					 * See comments for IPV6_RECVHOPOPTS.
1792					 */
1793					if (!privileged)
1794						return (EPERM);
1795					OPTSET2292(IN6P_HOPOPTS);
1796					break;
1797				case IPV6_2292DSTOPTS:
1798					if (!privileged)
1799						return (EPERM);
1800					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1801					break;
1802				case IPV6_2292RTHDR:
1803					OPTSET2292(IN6P_RTHDR);
1804					break;
1805				}
1806				break;
1807			case IPV6_PKTINFO:
1808			case IPV6_HOPOPTS:
1809			case IPV6_RTHDR:
1810			case IPV6_DSTOPTS:
1811			case IPV6_RTHDRDSTOPTS:
1812			case IPV6_NEXTHOP:
1813			{
1814				/* new advanced API (RFC3542) */
1815				u_char *optbuf;
1816				int optlen;
1817				struct ip6_pktopts **optp;
1818
1819				/* cannot mix with RFC2292 */
1820				if (OPTBIT(IN6P_RFC2292)) {
1821					error = EINVAL;
1822					break;
1823				}
1824
1825				switch (optname) {
1826				case IPV6_HOPOPTS:
1827				case IPV6_DSTOPTS:
1828				case IPV6_RTHDRDSTOPTS:
1829				case IPV6_NEXTHOP:
1830					if (!privileged)
1831						error = EPERM;
1832					break;
1833				}
1834				if (error)
1835					break;
1836
1837				switch (optname) {
1838				case IPV6_PKTINFO:
1839					optlen = sizeof(struct in6_pktinfo);
1840					break;
1841				case IPV6_NEXTHOP:
1842					optlen = SOCK_MAXADDRLEN;
1843					break;
1844				default:
1845					optlen = IPV6_MAXOPTHDR;
1846					break;
1847				}
1848				if (sopt->sopt_valsize > optlen) {
1849					error = EINVAL;
1850					break;
1851				}
1852
1853				optlen = sopt->sopt_valsize;
1854				optbuf = malloc(optlen, M_TEMP, M_WAITOK);
1855				error = sooptcopyin(sopt, optbuf, optlen,
1856				    optlen);
1857				if (error) {
1858					free(optbuf, M_TEMP);
1859					break;
1860				}
1861
1862				optp = &in6p->in6p_outputopts;
1863				error = ip6_pcbopt(optname,
1864						   optbuf, optlen,
1865						   optp, privileged, uproto);
1866				free(optbuf, M_TEMP);
1867				break;
1868			}
1869#undef OPTSET
1870
1871			case IPV6_MULTICAST_IF:
1872			case IPV6_MULTICAST_HOPS:
1873			case IPV6_MULTICAST_LOOP:
1874			case IPV6_JOIN_GROUP:
1875			case IPV6_LEAVE_GROUP:
1876			    {
1877				if (sopt->sopt_valsize > MLEN) {
1878					error = EMSGSIZE;
1879					break;
1880				}
1881				/* XXX */
1882			    }
1883			    /* FALLTHROUGH */
1884			    {
1885				struct mbuf *m;
1886
1887				if (sopt->sopt_valsize > MCLBYTES) {
1888					error = EMSGSIZE;
1889					break;
1890				}
1891				/* XXX */
1892				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_HEADER);
1893				if (m == 0) {
1894					error = ENOBUFS;
1895					break;
1896				}
1897				if (sopt->sopt_valsize > MLEN) {
1898					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1899					if ((m->m_flags & M_EXT) == 0) {
1900						m_free(m);
1901						error = ENOBUFS;
1902						break;
1903					}
1904				}
1905				m->m_len = sopt->sopt_valsize;
1906				error = sooptcopyin(sopt, mtod(m, char *),
1907						    m->m_len, m->m_len);
1908				if (error) {
1909					(void)m_free(m);
1910					break;
1911				}
1912				error =	ip6_setmoptions(sopt->sopt_name,
1913							&in6p->in6p_moptions,
1914							m);
1915				(void)m_free(m);
1916			    }
1917				break;
1918
1919			case IPV6_PORTRANGE:
1920				error = sooptcopyin(sopt, &optval,
1921				    sizeof optval, sizeof optval);
1922				if (error)
1923					break;
1924
1925				switch (optval) {
1926				case IPV6_PORTRANGE_DEFAULT:
1927					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1928					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1929					break;
1930
1931				case IPV6_PORTRANGE_HIGH:
1932					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1933					in6p->in6p_flags |= IN6P_HIGHPORT;
1934					break;
1935
1936				case IPV6_PORTRANGE_LOW:
1937					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1938					in6p->in6p_flags |= IN6P_LOWPORT;
1939					break;
1940
1941				default:
1942					error = EINVAL;
1943					break;
1944				}
1945				break;
1946
1947#if defined(IPSEC) || defined(FAST_IPSEC)
1948			case IPV6_IPSEC_POLICY:
1949			    {
1950				caddr_t req = NULL;
1951				size_t len = 0;
1952				struct mbuf *m;
1953
1954				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1955					break;
1956				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1957					break;
1958				if (m) {
1959					req = mtod(m, caddr_t);
1960					len = m->m_len;
1961				}
1962				error = ipsec6_set_policy(in6p, optname, req,
1963							  len, privileged);
1964				m_freem(m);
1965			    }
1966				break;
1967#endif /* KAME IPSEC */
1968
1969			case IPV6_FW_ADD:
1970			case IPV6_FW_DEL:
1971			case IPV6_FW_FLUSH:
1972			case IPV6_FW_ZERO:
1973			    {
1974				struct mbuf *m;
1975				struct mbuf **mp = &m;
1976
1977				if (ip6_fw_ctl_ptr == NULL)
1978					return EINVAL;
1979				/* XXX */
1980				if ((error = soopt_getm(sopt, &m)) != 0)
1981					break;
1982				/* XXX */
1983				if ((error = soopt_mcopyin(sopt, m)) != 0)
1984					break;
1985				error = (*ip6_fw_ctl_ptr)(optname, mp);
1986				m = *mp;
1987			    }
1988				break;
1989
1990			default:
1991				error = ENOPROTOOPT;
1992				break;
1993			}
1994			break;
1995
1996		case SOPT_GET:
1997			switch (optname) {
1998
1999			case IPV6_2292PKTOPTIONS:
2000#ifdef IPV6_PKTOPTIONS
2001			case IPV6_PKTOPTIONS:
2002#endif
2003				/*
2004				 * RFC3542 (effectively) deprecated the
2005				 * semantics of the 2292-style pktoptions.
2006				 * Since it was not reliable in nature (i.e.,
2007				 * applications had to expect the lack of some
2008				 * information after all), it would make sense
2009				 * to simplify this part by always returning
2010				 * empty data.
2011				 */
2012				sopt->sopt_valsize = 0;
2013				break;
2014
2015			case IPV6_RECVHOPOPTS:
2016			case IPV6_RECVDSTOPTS:
2017			case IPV6_RECVRTHDRDSTOPTS:
2018			case IPV6_UNICAST_HOPS:
2019			case IPV6_RECVPKTINFO:
2020			case IPV6_RECVHOPLIMIT:
2021			case IPV6_RECVRTHDR:
2022			case IPV6_RECVPATHMTU:
2023
2024			case IPV6_FAITH:
2025			case IPV6_V6ONLY:
2026			case IPV6_PORTRANGE:
2027			case IPV6_RECVTCLASS:
2028			case IPV6_AUTOFLOWLABEL:
2029				switch (optname) {
2030
2031				case IPV6_RECVHOPOPTS:
2032					optval = OPTBIT(IN6P_HOPOPTS);
2033					break;
2034
2035				case IPV6_RECVDSTOPTS:
2036					optval = OPTBIT(IN6P_DSTOPTS);
2037					break;
2038
2039				case IPV6_RECVRTHDRDSTOPTS:
2040					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2041					break;
2042
2043				case IPV6_UNICAST_HOPS:
2044					optval = in6p->in6p_hops;
2045					break;
2046
2047				case IPV6_RECVPKTINFO:
2048					optval = OPTBIT(IN6P_PKTINFO);
2049					break;
2050
2051				case IPV6_RECVHOPLIMIT:
2052					optval = OPTBIT(IN6P_HOPLIMIT);
2053					break;
2054
2055				case IPV6_RECVRTHDR:
2056					optval = OPTBIT(IN6P_RTHDR);
2057					break;
2058
2059				case IPV6_RECVPATHMTU:
2060					optval = OPTBIT(IN6P_MTU);
2061					break;
2062
2063				case IPV6_FAITH:
2064					optval = OPTBIT(IN6P_FAITH);
2065					break;
2066
2067				case IPV6_V6ONLY:
2068					optval = OPTBIT(IN6P_IPV6_V6ONLY);
2069					break;
2070
2071				case IPV6_PORTRANGE:
2072				    {
2073					int flags;
2074					flags = in6p->in6p_flags;
2075					if (flags & IN6P_HIGHPORT)
2076						optval = IPV6_PORTRANGE_HIGH;
2077					else if (flags & IN6P_LOWPORT)
2078						optval = IPV6_PORTRANGE_LOW;
2079					else
2080						optval = 0;
2081					break;
2082				    }
2083				case IPV6_RECVTCLASS:
2084					optval = OPTBIT(IN6P_TCLASS);
2085					break;
2086
2087				case IPV6_AUTOFLOWLABEL:
2088					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2089					break;
2090				}
2091				if (error)
2092					break;
2093				error = sooptcopyout(sopt, &optval,
2094					sizeof optval);
2095				break;
2096
2097			case IPV6_PATHMTU:
2098			{
2099				u_long pmtu = 0;
2100				struct ip6_mtuinfo mtuinfo;
2101				struct route_in6 sro;
2102
2103				bzero(&sro, sizeof(sro));
2104
2105				if (!(so->so_state & SS_ISCONNECTED))
2106					return (ENOTCONN);
2107				/*
2108				 * XXX: we dot not consider the case of source
2109				 * routing, or optional information to specify
2110				 * the outgoing interface.
2111				 */
2112				error = ip6_getpmtu(&sro, NULL, NULL,
2113				    &in6p->in6p_faddr, &pmtu, NULL);
2114				if (sro.ro_rt)
2115					RTFREE(sro.ro_rt);
2116				if (error)
2117					break;
2118				if (pmtu > IPV6_MAXPACKET)
2119					pmtu = IPV6_MAXPACKET;
2120
2121				bzero(&mtuinfo, sizeof(mtuinfo));
2122				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2123				optdata = (void *)&mtuinfo;
2124				optdatalen = sizeof(mtuinfo);
2125				error = sooptcopyout(sopt, optdata,
2126				    optdatalen);
2127				break;
2128			}
2129
2130			case IPV6_2292PKTINFO:
2131			case IPV6_2292HOPLIMIT:
2132			case IPV6_2292HOPOPTS:
2133			case IPV6_2292RTHDR:
2134			case IPV6_2292DSTOPTS:
2135				switch (optname) {
2136				case IPV6_2292PKTINFO:
2137					optval = OPTBIT(IN6P_PKTINFO);
2138					break;
2139				case IPV6_2292HOPLIMIT:
2140					optval = OPTBIT(IN6P_HOPLIMIT);
2141					break;
2142				case IPV6_2292HOPOPTS:
2143					optval = OPTBIT(IN6P_HOPOPTS);
2144					break;
2145				case IPV6_2292RTHDR:
2146					optval = OPTBIT(IN6P_RTHDR);
2147					break;
2148				case IPV6_2292DSTOPTS:
2149					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2150					break;
2151				}
2152				error = sooptcopyout(sopt, &optval,
2153				    sizeof optval);
2154				break;
2155			case IPV6_PKTINFO:
2156			case IPV6_HOPOPTS:
2157			case IPV6_RTHDR:
2158			case IPV6_DSTOPTS:
2159			case IPV6_RTHDRDSTOPTS:
2160			case IPV6_NEXTHOP:
2161			case IPV6_TCLASS:
2162			case IPV6_DONTFRAG:
2163			case IPV6_USE_MIN_MTU:
2164			case IPV6_PREFER_TEMPADDR:
2165				error = ip6_getpcbopt(in6p->in6p_outputopts,
2166				    optname, sopt);
2167				break;
2168
2169			case IPV6_MULTICAST_IF:
2170			case IPV6_MULTICAST_HOPS:
2171			case IPV6_MULTICAST_LOOP:
2172			case IPV6_JOIN_GROUP:
2173			case IPV6_LEAVE_GROUP:
2174			    {
2175				struct mbuf *m;
2176				error = ip6_getmoptions(sopt->sopt_name,
2177				    in6p->in6p_moptions, &m);
2178				if (error == 0)
2179					error = sooptcopyout(sopt,
2180					    mtod(m, char *), m->m_len);
2181				m_freem(m);
2182			    }
2183				break;
2184
2185#if defined(IPSEC) || defined(FAST_IPSEC)
2186			case IPV6_IPSEC_POLICY:
2187			  {
2188				caddr_t req = NULL;
2189				size_t len = 0;
2190				struct mbuf *m = NULL;
2191				struct mbuf **mp = &m;
2192				size_t ovalsize = sopt->sopt_valsize;
2193				caddr_t oval = (caddr_t)sopt->sopt_val;
2194
2195				error = soopt_getm(sopt, &m); /* XXX */
2196				if (error != 0)
2197					break;
2198				error = soopt_mcopyin(sopt, m); /* XXX */
2199				if (error != 0)
2200					break;
2201				sopt->sopt_valsize = ovalsize;
2202				sopt->sopt_val = oval;
2203				if (m) {
2204					req = mtod(m, caddr_t);
2205					len = m->m_len;
2206				}
2207				error = ipsec6_get_policy(in6p, req, len, mp);
2208				if (error == 0)
2209					error = soopt_mcopyout(sopt, m); /* XXX */
2210				if (error == 0 && m)
2211					m_freem(m);
2212				break;
2213			  }
2214#endif /* KAME IPSEC */
2215
2216			case IPV6_FW_GET:
2217			  {
2218				struct mbuf *m;
2219				struct mbuf **mp = &m;
2220
2221				if (ip6_fw_ctl_ptr == NULL)
2222			        {
2223					return EINVAL;
2224				}
2225				error = (*ip6_fw_ctl_ptr)(optname, mp);
2226				if (error == 0)
2227					error = soopt_mcopyout(sopt, m); /* XXX */
2228				if (error == 0 && m)
2229					m_freem(m);
2230			  }
2231				break;
2232
2233			default:
2234				error = ENOPROTOOPT;
2235				break;
2236			}
2237			break;
2238		}
2239	} else {		/* level != IPPROTO_IPV6 */
2240		error = EINVAL;
2241	}
2242	return (error);
2243}
2244
2245int
2246ip6_raw_ctloutput(so, sopt)
2247	struct socket *so;
2248	struct sockopt *sopt;
2249{
2250	int error = 0, optval, optlen;
2251	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2252	struct in6pcb *in6p = sotoin6pcb(so);
2253	int level, op, optname;
2254
2255	if (sopt) {
2256		level = sopt->sopt_level;
2257		op = sopt->sopt_dir;
2258		optname = sopt->sopt_name;
2259		optlen = sopt->sopt_valsize;
2260	} else
2261		panic("ip6_raw_ctloutput: arg soopt is NULL");
2262
2263	if (level != IPPROTO_IPV6) {
2264		return (EINVAL);
2265	}
2266
2267	switch (optname) {
2268	case IPV6_CHECKSUM:
2269		/*
2270		 * For ICMPv6 sockets, no modification allowed for checksum
2271		 * offset, permit "no change" values to help existing apps.
2272		 *
2273		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2274		 * for an ICMPv6 socket will fail."
2275		 * The current behavior does not meet RFC3542.
2276		 */
2277		switch (op) {
2278		case SOPT_SET:
2279			if (optlen != sizeof(int)) {
2280				error = EINVAL;
2281				break;
2282			}
2283			error = sooptcopyin(sopt, &optval, sizeof(optval),
2284					    sizeof(optval));
2285			if (error)
2286				break;
2287			if ((optval % 2) != 0) {
2288				/* the API assumes even offset values */
2289				error = EINVAL;
2290			} else if (so->so_proto->pr_protocol ==
2291			    IPPROTO_ICMPV6) {
2292				if (optval != icmp6off)
2293					error = EINVAL;
2294			} else
2295				in6p->in6p_cksum = optval;
2296			break;
2297
2298		case SOPT_GET:
2299			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2300				optval = icmp6off;
2301			else
2302				optval = in6p->in6p_cksum;
2303
2304			error = sooptcopyout(sopt, &optval, sizeof(optval));
2305			break;
2306
2307		default:
2308			error = EINVAL;
2309			break;
2310		}
2311		break;
2312
2313	default:
2314		error = ENOPROTOOPT;
2315		break;
2316	}
2317
2318	return (error);
2319}
2320
2321/*
2322 * Set up IP6 options in pcb for insertion in output packets or
2323 * specifying behavior of outgoing packets.
2324 */
2325static int
2326ip6_pcbopts(pktopt, m, so, sopt)
2327	struct ip6_pktopts **pktopt;
2328	struct mbuf *m;
2329	struct socket *so;
2330	struct sockopt *sopt;
2331{
2332	struct ip6_pktopts *opt = *pktopt;
2333	int error = 0;
2334	struct thread *td = sopt->sopt_td;
2335	int priv = 0;
2336
2337	/* turn off any old options. */
2338	if (opt) {
2339#ifdef DIAGNOSTIC
2340		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2341		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2342		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2343			printf("ip6_pcbopts: all specified options are cleared.\n");
2344#endif
2345		ip6_clearpktopts(opt, -1);
2346	} else
2347		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2348	*pktopt = NULL;
2349
2350	if (!m || m->m_len == 0) {
2351		/*
2352		 * Only turning off any previous options, regardless of
2353		 * whether the opt is just created or given.
2354		 */
2355		free(opt, M_IP6OPT);
2356		return (0);
2357	}
2358
2359	/*  set options specified by user. */
2360	if (td && !suser(td))
2361		priv = 1;
2362	if ((error = ip6_setpktopts(m, opt, NULL, priv,
2363	    so->so_proto->pr_protocol)) != 0) {
2364		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2365		free(opt, M_IP6OPT);
2366		return (error);
2367	}
2368	*pktopt = opt;
2369	return (0);
2370}
2371
2372/*
2373 * initialize ip6_pktopts.  beware that there are non-zero default values in
2374 * the struct.
2375 */
2376void
2377ip6_initpktopts(opt)
2378	struct ip6_pktopts *opt;
2379{
2380
2381	bzero(opt, sizeof(*opt));
2382	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2383	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2384	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2385	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2386}
2387
2388static int
2389ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2390	int optname, len, priv;
2391	u_char *buf;
2392	struct ip6_pktopts **pktopt;
2393	int uproto;
2394{
2395	struct ip6_pktopts *opt;
2396
2397	if (*pktopt == NULL) {
2398		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2399		    M_WAITOK);
2400		ip6_initpktopts(*pktopt);
2401	}
2402	opt = *pktopt;
2403
2404	return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
2405}
2406
2407static int
2408ip6_getpcbopt(pktopt, optname, sopt)
2409	struct ip6_pktopts *pktopt;
2410	struct sockopt *sopt;
2411	int optname;
2412{
2413	void *optdata = NULL;
2414	int optdatalen = 0;
2415	struct ip6_ext *ip6e;
2416	int error = 0;
2417	struct in6_pktinfo null_pktinfo;
2418	int deftclass = 0, on;
2419	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2420	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2421
2422	switch (optname) {
2423	case IPV6_PKTINFO:
2424		if (pktopt && pktopt->ip6po_pktinfo)
2425			optdata = (void *)pktopt->ip6po_pktinfo;
2426		else {
2427			/* XXX: we don't have to do this every time... */
2428			bzero(&null_pktinfo, sizeof(null_pktinfo));
2429			optdata = (void *)&null_pktinfo;
2430		}
2431		optdatalen = sizeof(struct in6_pktinfo);
2432		break;
2433	case IPV6_TCLASS:
2434		if (pktopt && pktopt->ip6po_tclass >= 0)
2435			optdata = (void *)&pktopt->ip6po_tclass;
2436		else
2437			optdata = (void *)&deftclass;
2438		optdatalen = sizeof(int);
2439		break;
2440	case IPV6_HOPOPTS:
2441		if (pktopt && pktopt->ip6po_hbh) {
2442			optdata = (void *)pktopt->ip6po_hbh;
2443			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2444			optdatalen = (ip6e->ip6e_len + 1) << 3;
2445		}
2446		break;
2447	case IPV6_RTHDR:
2448		if (pktopt && pktopt->ip6po_rthdr) {
2449			optdata = (void *)pktopt->ip6po_rthdr;
2450			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2451			optdatalen = (ip6e->ip6e_len + 1) << 3;
2452		}
2453		break;
2454	case IPV6_RTHDRDSTOPTS:
2455		if (pktopt && pktopt->ip6po_dest1) {
2456			optdata = (void *)pktopt->ip6po_dest1;
2457			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2458			optdatalen = (ip6e->ip6e_len + 1) << 3;
2459		}
2460		break;
2461	case IPV6_DSTOPTS:
2462		if (pktopt && pktopt->ip6po_dest2) {
2463			optdata = (void *)pktopt->ip6po_dest2;
2464			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2465			optdatalen = (ip6e->ip6e_len + 1) << 3;
2466		}
2467		break;
2468	case IPV6_NEXTHOP:
2469		if (pktopt && pktopt->ip6po_nexthop) {
2470			optdata = (void *)pktopt->ip6po_nexthop;
2471			optdatalen = pktopt->ip6po_nexthop->sa_len;
2472		}
2473		break;
2474	case IPV6_USE_MIN_MTU:
2475		if (pktopt)
2476			optdata = (void *)&pktopt->ip6po_minmtu;
2477		else
2478			optdata = (void *)&defminmtu;
2479		optdatalen = sizeof(int);
2480		break;
2481	case IPV6_DONTFRAG:
2482		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2483			on = 1;
2484		else
2485			on = 0;
2486		optdata = (void *)&on;
2487		optdatalen = sizeof(on);
2488		break;
2489	case IPV6_PREFER_TEMPADDR:
2490		if (pktopt)
2491			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2492		else
2493			optdata = (void *)&defpreftemp;
2494		optdatalen = sizeof(int);
2495		break;
2496	default:		/* should not happen */
2497#ifdef DIAGNOSTIC
2498		panic("ip6_getpcbopt: unexpected option\n");
2499#endif
2500		return (ENOPROTOOPT);
2501	}
2502
2503	error = sooptcopyout(sopt, optdata, optdatalen);
2504
2505	return (error);
2506}
2507
2508void
2509ip6_clearpktopts(pktopt, optname)
2510	struct ip6_pktopts *pktopt;
2511	int optname;
2512{
2513	if (pktopt == NULL)
2514		return;
2515
2516	if (optname == -1 || optname == IPV6_PKTINFO) {
2517		if (pktopt->ip6po_pktinfo)
2518			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2519		pktopt->ip6po_pktinfo = NULL;
2520	}
2521	if (optname == -1 || optname == IPV6_HOPLIMIT)
2522		pktopt->ip6po_hlim = -1;
2523	if (optname == -1 || optname == IPV6_TCLASS)
2524		pktopt->ip6po_tclass = -1;
2525	if (optname == -1 || optname == IPV6_NEXTHOP) {
2526		if (pktopt->ip6po_nextroute.ro_rt) {
2527			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2528			pktopt->ip6po_nextroute.ro_rt = NULL;
2529		}
2530		if (pktopt->ip6po_nexthop)
2531			free(pktopt->ip6po_nexthop, M_IP6OPT);
2532		pktopt->ip6po_nexthop = NULL;
2533	}
2534	if (optname == -1 || optname == IPV6_HOPOPTS) {
2535		if (pktopt->ip6po_hbh)
2536			free(pktopt->ip6po_hbh, M_IP6OPT);
2537		pktopt->ip6po_hbh = NULL;
2538	}
2539	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2540		if (pktopt->ip6po_dest1)
2541			free(pktopt->ip6po_dest1, M_IP6OPT);
2542		pktopt->ip6po_dest1 = NULL;
2543	}
2544	if (optname == -1 || optname == IPV6_RTHDR) {
2545		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2546			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2547		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2548		if (pktopt->ip6po_route.ro_rt) {
2549			RTFREE(pktopt->ip6po_route.ro_rt);
2550			pktopt->ip6po_route.ro_rt = NULL;
2551		}
2552	}
2553	if (optname == -1 || optname == IPV6_DSTOPTS) {
2554		if (pktopt->ip6po_dest2)
2555			free(pktopt->ip6po_dest2, M_IP6OPT);
2556		pktopt->ip6po_dest2 = NULL;
2557	}
2558}
2559
2560#define PKTOPT_EXTHDRCPY(type) \
2561do {\
2562	if (src->type) {\
2563		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2564		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2565		if (dst->type == NULL && canwait == M_NOWAIT)\
2566			goto bad;\
2567		bcopy(src->type, dst->type, hlen);\
2568	}\
2569} while (/*CONSTCOND*/ 0)
2570
2571static int
2572copypktopts(dst, src, canwait)
2573	struct ip6_pktopts *dst, *src;
2574	int canwait;
2575{
2576	if (dst == NULL || src == NULL)  {
2577		printf("ip6_clearpktopts: invalid argument\n");
2578		return (EINVAL);
2579	}
2580
2581	dst->ip6po_hlim = src->ip6po_hlim;
2582	dst->ip6po_tclass = src->ip6po_tclass;
2583	dst->ip6po_flags = src->ip6po_flags;
2584	if (src->ip6po_pktinfo) {
2585		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2586		    M_IP6OPT, canwait);
2587		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2588			goto bad;
2589		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2590	}
2591	if (src->ip6po_nexthop) {
2592		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2593		    M_IP6OPT, canwait);
2594		if (dst->ip6po_nexthop == NULL)
2595			goto bad;
2596		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2597		    src->ip6po_nexthop->sa_len);
2598	}
2599	PKTOPT_EXTHDRCPY(ip6po_hbh);
2600	PKTOPT_EXTHDRCPY(ip6po_dest1);
2601	PKTOPT_EXTHDRCPY(ip6po_dest2);
2602	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2603	return (0);
2604
2605  bad:
2606	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2607	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2608	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2609	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2610	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2611	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2612	return (ENOBUFS);
2613}
2614#undef PKTOPT_EXTHDRCPY
2615
2616struct ip6_pktopts *
2617ip6_copypktopts(src, canwait)
2618	struct ip6_pktopts *src;
2619	int canwait;
2620{
2621	int error;
2622	struct ip6_pktopts *dst;
2623
2624	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2625	if (dst == NULL && canwait == M_NOWAIT)
2626		return (NULL);
2627	ip6_initpktopts(dst);
2628
2629	if ((error = copypktopts(dst, src, canwait)) != 0) {
2630		free(dst, M_IP6OPT);
2631		return (NULL);
2632	}
2633
2634	return (dst);
2635}
2636
2637void
2638ip6_freepcbopts(pktopt)
2639	struct ip6_pktopts *pktopt;
2640{
2641	if (pktopt == NULL)
2642		return;
2643
2644	ip6_clearpktopts(pktopt, -1);
2645
2646	free(pktopt, M_IP6OPT);
2647}
2648
2649/*
2650 * Set the IP6 multicast options in response to user setsockopt().
2651 */
2652static int
2653ip6_setmoptions(optname, im6op, m)
2654	int optname;
2655	struct ip6_moptions **im6op;
2656	struct mbuf *m;
2657{
2658	int error = 0;
2659	u_int loop, ifindex;
2660	struct ipv6_mreq *mreq;
2661	struct ifnet *ifp;
2662	struct ip6_moptions *im6o = *im6op;
2663	struct route_in6 ro;
2664	struct sockaddr_in6 *dst;
2665	struct in6_multi_mship *imm;
2666	struct thread *td = curthread;
2667
2668	if (im6o == NULL) {
2669		/*
2670		 * No multicast option buffer attached to the pcb;
2671		 * allocate one and initialize to default values.
2672		 */
2673		im6o = (struct ip6_moptions *)
2674			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2675
2676		if (im6o == NULL)
2677			return (ENOBUFS);
2678		*im6op = im6o;
2679		im6o->im6o_multicast_ifp = NULL;
2680		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2681		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2682		LIST_INIT(&im6o->im6o_memberships);
2683	}
2684
2685	switch (optname) {
2686
2687	case IPV6_MULTICAST_IF:
2688		/*
2689		 * Select the interface for outgoing multicast packets.
2690		 */
2691		if (m == NULL || m->m_len != sizeof(u_int)) {
2692			error = EINVAL;
2693			break;
2694		}
2695		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2696		if (ifindex < 0 || if_index < ifindex) {
2697			error = ENXIO;	/* XXX EINVAL? */
2698			break;
2699		}
2700		ifp = ifnet_byindex(ifindex);
2701		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2702			error = EADDRNOTAVAIL;
2703			break;
2704		}
2705		im6o->im6o_multicast_ifp = ifp;
2706		break;
2707
2708	case IPV6_MULTICAST_HOPS:
2709	    {
2710		/*
2711		 * Set the IP6 hoplimit for outgoing multicast packets.
2712		 */
2713		int optval;
2714		if (m == NULL || m->m_len != sizeof(int)) {
2715			error = EINVAL;
2716			break;
2717		}
2718		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2719		if (optval < -1 || optval >= 256)
2720			error = EINVAL;
2721		else if (optval == -1)
2722			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2723		else
2724			im6o->im6o_multicast_hlim = optval;
2725		break;
2726	    }
2727
2728	case IPV6_MULTICAST_LOOP:
2729		/*
2730		 * Set the loopback flag for outgoing multicast packets.
2731		 * Must be zero or one.
2732		 */
2733		if (m == NULL || m->m_len != sizeof(u_int)) {
2734			error = EINVAL;
2735			break;
2736		}
2737		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2738		if (loop > 1) {
2739			error = EINVAL;
2740			break;
2741		}
2742		im6o->im6o_multicast_loop = loop;
2743		break;
2744
2745	case IPV6_JOIN_GROUP:
2746		/*
2747		 * Add a multicast group membership.
2748		 * Group must be a valid IP6 multicast address.
2749		 */
2750		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2751			error = EINVAL;
2752			break;
2753		}
2754		mreq = mtod(m, struct ipv6_mreq *);
2755		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2756			/*
2757			 * We use the unspecified address to specify to accept
2758			 * all multicast addresses. Only super user is allowed
2759			 * to do this.
2760			 */
2761			if (suser(td)) {
2762				error = EACCES;
2763				break;
2764			}
2765		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2766			error = EINVAL;
2767			break;
2768		}
2769
2770		/*
2771		 * If the interface is specified, validate it.
2772		 */
2773		if (mreq->ipv6mr_interface < 0 ||
2774		    if_index < mreq->ipv6mr_interface) {
2775			error = ENXIO;	/* XXX EINVAL? */
2776			break;
2777		}
2778		/*
2779		 * If no interface was explicitly specified, choose an
2780		 * appropriate one according to the given multicast address.
2781		 */
2782		if (mreq->ipv6mr_interface == 0) {
2783			/*
2784			 * If the multicast address is in node-local scope,
2785			 * the interface should be a loopback interface.
2786			 * Otherwise, look up the routing table for the
2787			 * address, and choose the outgoing interface.
2788			 *   XXX: is it a good approach?
2789			 */
2790			if (IN6_IS_ADDR_MC_INTFACELOCAL(&mreq->ipv6mr_multiaddr)) {
2791				ifp = &loif[0];
2792			} else {
2793				ro.ro_rt = NULL;
2794				dst = (struct sockaddr_in6 *)&ro.ro_dst;
2795				bzero(dst, sizeof(*dst));
2796				dst->sin6_len = sizeof(struct sockaddr_in6);
2797				dst->sin6_family = AF_INET6;
2798				dst->sin6_addr = mreq->ipv6mr_multiaddr;
2799				rtalloc((struct route *)&ro);
2800				if (ro.ro_rt == NULL) {
2801					error = EADDRNOTAVAIL;
2802					break;
2803				}
2804				ifp = ro.ro_rt->rt_ifp;
2805				RTFREE(ro.ro_rt);
2806			}
2807		} else
2808			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2809
2810		/*
2811		 * See if we found an interface, and confirm that it
2812		 * supports multicast
2813		 */
2814		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2815			error = EADDRNOTAVAIL;
2816			break;
2817		}
2818		/*
2819		 * Put interface index into the multicast address,
2820		 * if the address has link-local scope.
2821		 */
2822		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2823			mreq->ipv6mr_multiaddr.s6_addr16[1] =
2824			    htons(ifp->if_index);
2825		}
2826		/*
2827		 * See if the membership already exists.
2828		 */
2829		for (imm = im6o->im6o_memberships.lh_first;
2830		     imm != NULL; imm = imm->i6mm_chain.le_next)
2831			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2832			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2833					       &mreq->ipv6mr_multiaddr))
2834				break;
2835		if (imm != NULL) {
2836			error = EADDRINUSE;
2837			break;
2838		}
2839		/*
2840		 * Everything looks good; add a new record to the multicast
2841		 * address list for the given interface.
2842		 */
2843		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2844		if (imm == NULL) {
2845			error = ENOBUFS;
2846			break;
2847		}
2848		if ((imm->i6mm_maddr =
2849		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2850			free(imm, M_IPMADDR);
2851			break;
2852		}
2853		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2854		break;
2855
2856	case IPV6_LEAVE_GROUP:
2857		/*
2858		 * Drop a multicast group membership.
2859		 * Group must be a valid IP6 multicast address.
2860		 */
2861		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2862			error = EINVAL;
2863			break;
2864		}
2865		mreq = mtod(m, struct ipv6_mreq *);
2866		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2867			if (suser(td)) {
2868				error = EACCES;
2869				break;
2870			}
2871		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2872			error = EINVAL;
2873			break;
2874		}
2875		/*
2876		 * If an interface address was specified, get a pointer
2877		 * to its ifnet structure.
2878		 */
2879		if (mreq->ipv6mr_interface < 0
2880		 || if_index < mreq->ipv6mr_interface) {
2881			error = ENXIO;	/* XXX EINVAL? */
2882			break;
2883		}
2884		ifp = ifnet_byindex(mreq->ipv6mr_interface);
2885		/*
2886		 * Put interface index into the multicast address,
2887		 * if the address has link-local scope.
2888		 */
2889		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2890			mreq->ipv6mr_multiaddr.s6_addr16[1]
2891				= htons(mreq->ipv6mr_interface);
2892		}
2893
2894		/*
2895		 * Find the membership in the membership list.
2896		 */
2897		for (imm = im6o->im6o_memberships.lh_first;
2898		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2899			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2900			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2901			    &mreq->ipv6mr_multiaddr))
2902				break;
2903		}
2904		if (imm == NULL) {
2905			/* Unable to resolve interface */
2906			error = EADDRNOTAVAIL;
2907			break;
2908		}
2909		/*
2910		 * Give up the multicast address record to which the
2911		 * membership points.
2912		 */
2913		LIST_REMOVE(imm, i6mm_chain);
2914		in6_delmulti(imm->i6mm_maddr);
2915		free(imm, M_IPMADDR);
2916		break;
2917
2918	default:
2919		error = EOPNOTSUPP;
2920		break;
2921	}
2922
2923	/*
2924	 * If all options have default values, no need to keep the mbuf.
2925	 */
2926	if (im6o->im6o_multicast_ifp == NULL &&
2927	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2928	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2929	    im6o->im6o_memberships.lh_first == NULL) {
2930		free(*im6op, M_IPMOPTS);
2931		*im6op = NULL;
2932	}
2933
2934	return (error);
2935}
2936
2937/*
2938 * Return the IP6 multicast options in response to user getsockopt().
2939 */
2940static int
2941ip6_getmoptions(optname, im6o, mp)
2942	int optname;
2943	struct ip6_moptions *im6o;
2944	struct mbuf **mp;
2945{
2946	u_int *hlim, *loop, *ifindex;
2947
2948	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2949
2950	switch (optname) {
2951
2952	case IPV6_MULTICAST_IF:
2953		ifindex = mtod(*mp, u_int *);
2954		(*mp)->m_len = sizeof(u_int);
2955		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2956			*ifindex = 0;
2957		else
2958			*ifindex = im6o->im6o_multicast_ifp->if_index;
2959		return (0);
2960
2961	case IPV6_MULTICAST_HOPS:
2962		hlim = mtod(*mp, u_int *);
2963		(*mp)->m_len = sizeof(u_int);
2964		if (im6o == NULL)
2965			*hlim = ip6_defmcasthlim;
2966		else
2967			*hlim = im6o->im6o_multicast_hlim;
2968		return (0);
2969
2970	case IPV6_MULTICAST_LOOP:
2971		loop = mtod(*mp, u_int *);
2972		(*mp)->m_len = sizeof(u_int);
2973		if (im6o == NULL)
2974			*loop = ip6_defmcasthlim;
2975		else
2976			*loop = im6o->im6o_multicast_loop;
2977		return (0);
2978
2979	default:
2980		return (EOPNOTSUPP);
2981	}
2982}
2983
2984/*
2985 * Discard the IP6 multicast options.
2986 */
2987void
2988ip6_freemoptions(im6o)
2989	struct ip6_moptions *im6o;
2990{
2991	struct in6_multi_mship *imm;
2992
2993	if (im6o == NULL)
2994		return;
2995
2996	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2997		LIST_REMOVE(imm, i6mm_chain);
2998		if (imm->i6mm_maddr)
2999			in6_delmulti(imm->i6mm_maddr);
3000		free(imm, M_IPMADDR);
3001	}
3002	free(im6o, M_IPMOPTS);
3003}
3004
3005/*
3006 * Set IPv6 outgoing packet options based on advanced API.
3007 */
3008int
3009ip6_setpktopts(control, opt, stickyopt, priv, uproto)
3010	struct mbuf *control;
3011	struct ip6_pktopts *opt, *stickyopt;
3012	int priv, uproto;
3013{
3014	struct cmsghdr *cm = 0;
3015
3016	if (control == NULL || opt == NULL)
3017		return (EINVAL);
3018
3019	ip6_initpktopts(opt);
3020	if (stickyopt) {
3021		int error;
3022
3023		/*
3024		 * If stickyopt is provided, make a local copy of the options
3025		 * for this particular packet, then override them by ancillary
3026		 * objects.
3027		 * XXX: copypktopts() does not copy the cached route to a next
3028		 * hop (if any).  This is not very good in terms of efficiency,
3029		 * but we can allow this since this option should be rarely
3030		 * used.
3031		 */
3032		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
3033			return (error);
3034	}
3035
3036	/*
3037	 * XXX: Currently, we assume all the optional information is stored
3038	 * in a single mbuf.
3039	 */
3040	if (control->m_next)
3041		return (EINVAL);
3042
3043	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
3044	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
3045		int error;
3046
3047		if (control->m_len < CMSG_LEN(0))
3048			return (EINVAL);
3049
3050		cm = mtod(control, struct cmsghdr *);
3051		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
3052			return (EINVAL);
3053		if (cm->cmsg_level != IPPROTO_IPV6)
3054			continue;
3055
3056		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3057		    cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
3058		if (error)
3059			return (error);
3060	}
3061
3062	return (0);
3063}
3064
3065/*
3066 * Set a particular packet option, as a sticky option or an ancillary data
3067 * item.  "len" can be 0 only when it's a sticky option.
3068 * We have 4 cases of combination of "sticky" and "cmsg":
3069 * "sticky=0, cmsg=0": impossible
3070 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3071 * "sticky=1, cmsg=0": RFC3542 socket option
3072 * "sticky=1, cmsg=1": RFC2292 socket option
3073 */
3074static int
3075ip6_setpktopt(optname, buf, len, opt, priv, sticky, cmsg, uproto)
3076	int optname, len, priv, sticky, cmsg, uproto;
3077	u_char *buf;
3078	struct ip6_pktopts *opt;
3079{
3080	int minmtupolicy, preftemp;
3081
3082	if (!sticky && !cmsg) {
3083#ifdef DIAGNOSTIC
3084		printf("ip6_setpktopt: impossible case\n");
3085#endif
3086		return (EINVAL);
3087	}
3088
3089	/*
3090	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3091	 * not be specified in the context of RFC3542.  Conversely,
3092	 * RFC3542 types should not be specified in the context of RFC2292.
3093	 */
3094	if (!cmsg) {
3095		switch (optname) {
3096		case IPV6_2292PKTINFO:
3097		case IPV6_2292HOPLIMIT:
3098		case IPV6_2292NEXTHOP:
3099		case IPV6_2292HOPOPTS:
3100		case IPV6_2292DSTOPTS:
3101		case IPV6_2292RTHDR:
3102		case IPV6_2292PKTOPTIONS:
3103			return (ENOPROTOOPT);
3104		}
3105	}
3106	if (sticky && cmsg) {
3107		switch (optname) {
3108		case IPV6_PKTINFO:
3109		case IPV6_HOPLIMIT:
3110		case IPV6_NEXTHOP:
3111		case IPV6_HOPOPTS:
3112		case IPV6_DSTOPTS:
3113		case IPV6_RTHDRDSTOPTS:
3114		case IPV6_RTHDR:
3115		case IPV6_USE_MIN_MTU:
3116		case IPV6_DONTFRAG:
3117		case IPV6_TCLASS:
3118		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3119			return (ENOPROTOOPT);
3120		}
3121	}
3122
3123	switch (optname) {
3124	case IPV6_2292PKTINFO:
3125	case IPV6_PKTINFO:
3126	{
3127		struct ifnet *ifp = NULL;
3128		struct in6_pktinfo *pktinfo;
3129
3130		if (len != sizeof(struct in6_pktinfo))
3131			return (EINVAL);
3132
3133		pktinfo = (struct in6_pktinfo *)buf;
3134
3135		/*
3136		 * An application can clear any sticky IPV6_PKTINFO option by
3137		 * doing a "regular" setsockopt with ipi6_addr being
3138		 * in6addr_any and ipi6_ifindex being zero.
3139		 * [RFC 3542, Section 6]
3140		 */
3141		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3142		    pktinfo->ipi6_ifindex == 0 &&
3143		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3144			ip6_clearpktopts(opt, optname);
3145			break;
3146		}
3147
3148		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3149		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3150			return (EINVAL);
3151		}
3152
3153		/* validate the interface index if specified. */
3154		if (pktinfo->ipi6_ifindex > if_index ||
3155		    pktinfo->ipi6_ifindex < 0) {
3156			 return (ENXIO);
3157		}
3158		if (pktinfo->ipi6_ifindex) {
3159			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
3160			if (ifp == NULL)
3161				return (ENXIO);
3162		}
3163
3164		/*
3165		 * We store the address anyway, and let in6_selectsrc()
3166		 * validate the specified address.  This is because ipi6_addr
3167		 * may not have enough information about its scope zone, and
3168		 * we may need additional information (such as outgoing
3169		 * interface or the scope zone of a destination address) to
3170		 * disambiguate the scope.
3171		 * XXX: the delay of the validation may confuse the
3172		 * application when it is used as a sticky option.
3173		 */
3174		if (opt->ip6po_pktinfo == NULL) {
3175			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3176			    M_IP6OPT, M_NOWAIT);
3177			if (opt->ip6po_pktinfo == NULL)
3178				return (ENOBUFS);
3179		}
3180		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3181		break;
3182	}
3183
3184	case IPV6_2292HOPLIMIT:
3185	case IPV6_HOPLIMIT:
3186	{
3187		int *hlimp;
3188
3189		/*
3190		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3191		 * to simplify the ordering among hoplimit options.
3192		 */
3193		if (optname == IPV6_HOPLIMIT && sticky)
3194			return (ENOPROTOOPT);
3195
3196		if (len != sizeof(int))
3197			return (EINVAL);
3198		hlimp = (int *)buf;
3199		if (*hlimp < -1 || *hlimp > 255)
3200			return (EINVAL);
3201
3202		opt->ip6po_hlim = *hlimp;
3203		break;
3204	}
3205
3206	case IPV6_TCLASS:
3207	{
3208		int tclass;
3209
3210		if (len != sizeof(int))
3211			return (EINVAL);
3212		tclass = *(int *)buf;
3213		if (tclass < -1 || tclass > 255)
3214			return (EINVAL);
3215
3216		opt->ip6po_tclass = tclass;
3217		break;
3218	}
3219
3220	case IPV6_2292NEXTHOP:
3221	case IPV6_NEXTHOP:
3222		if (!priv)
3223			return (EPERM);
3224
3225		if (len == 0) {	/* just remove the option */
3226			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3227			break;
3228		}
3229
3230		/* check if cmsg_len is large enough for sa_len */
3231		if (len < sizeof(struct sockaddr) || len < *buf)
3232			return (EINVAL);
3233
3234		switch (((struct sockaddr *)buf)->sa_family) {
3235		case AF_INET6:
3236		{
3237			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3238#if 0
3239			int error;
3240#endif
3241
3242			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3243				return (EINVAL);
3244
3245			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3246			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3247				return (EINVAL);
3248			}
3249#if 0
3250			if ((error = scope6_check_id(sa6, ip6_use_defzone))
3251			    != 0) {
3252				return (error);
3253			}
3254#endif
3255			sa6->sin6_scope_id = 0; /* XXX */
3256			break;
3257		}
3258		case AF_LINK:	/* should eventually be supported */
3259		default:
3260			return (EAFNOSUPPORT);
3261		}
3262
3263		/* turn off the previous option, then set the new option. */
3264		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3265		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_WAITOK);
3266		bcopy(buf, opt->ip6po_nexthop, *buf);
3267		break;
3268
3269	case IPV6_2292HOPOPTS:
3270	case IPV6_HOPOPTS:
3271	{
3272		struct ip6_hbh *hbh;
3273		int hbhlen;
3274
3275		/*
3276		 * XXX: We don't allow a non-privileged user to set ANY HbH
3277		 * options, since per-option restriction has too much
3278		 * overhead.
3279		 */
3280		if (!priv)
3281			return (EPERM);
3282
3283		if (len == 0) {
3284			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3285			break;	/* just remove the option */
3286		}
3287
3288		/* message length validation */
3289		if (len < sizeof(struct ip6_hbh))
3290			return (EINVAL);
3291		hbh = (struct ip6_hbh *)buf;
3292		hbhlen = (hbh->ip6h_len + 1) << 3;
3293		if (len != hbhlen)
3294			return (EINVAL);
3295
3296		/* turn off the previous option, then set the new option. */
3297		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3298		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK);
3299		bcopy(hbh, opt->ip6po_hbh, hbhlen);
3300
3301		break;
3302	}
3303
3304	case IPV6_2292DSTOPTS:
3305	case IPV6_DSTOPTS:
3306	case IPV6_RTHDRDSTOPTS:
3307	{
3308		struct ip6_dest *dest, **newdest = NULL;
3309		int destlen;
3310
3311		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3312			return (EPERM);
3313
3314		if (len == 0) {
3315			ip6_clearpktopts(opt, optname);
3316			break;	/* just remove the option */
3317		}
3318
3319		/* message length validation */
3320		if (len < sizeof(struct ip6_dest))
3321			return (EINVAL);
3322		dest = (struct ip6_dest *)buf;
3323		destlen = (dest->ip6d_len + 1) << 3;
3324		if (len != destlen)
3325			return (EINVAL);
3326
3327		/*
3328		 * Determine the position that the destination options header
3329		 * should be inserted; before or after the routing header.
3330		 */
3331		switch (optname) {
3332		case IPV6_2292DSTOPTS:
3333			/*
3334			 * The old advacned API is ambiguous on this point.
3335			 * Our approach is to determine the position based
3336			 * according to the existence of a routing header.
3337			 * Note, however, that this depends on the order of the
3338			 * extension headers in the ancillary data; the 1st
3339			 * part of the destination options header must appear
3340			 * before the routing header in the ancillary data,
3341			 * too.
3342			 * RFC3542 solved the ambiguity by introducing
3343			 * separate ancillary data or option types.
3344			 */
3345			if (opt->ip6po_rthdr == NULL)
3346				newdest = &opt->ip6po_dest1;
3347			else
3348				newdest = &opt->ip6po_dest2;
3349			break;
3350		case IPV6_RTHDRDSTOPTS:
3351			newdest = &opt->ip6po_dest1;
3352			break;
3353		case IPV6_DSTOPTS:
3354			newdest = &opt->ip6po_dest2;
3355			break;
3356		}
3357
3358		/* turn off the previous option, then set the new option. */
3359		ip6_clearpktopts(opt, optname);
3360		*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
3361		bcopy(dest, *newdest, destlen);
3362
3363		break;
3364	}
3365
3366	case IPV6_2292RTHDR:
3367	case IPV6_RTHDR:
3368	{
3369		struct ip6_rthdr *rth;
3370		int rthlen;
3371
3372		if (len == 0) {
3373			ip6_clearpktopts(opt, IPV6_RTHDR);
3374			break;	/* just remove the option */
3375		}
3376
3377		/* message length validation */
3378		if (len < sizeof(struct ip6_rthdr))
3379			return (EINVAL);
3380		rth = (struct ip6_rthdr *)buf;
3381		rthlen = (rth->ip6r_len + 1) << 3;
3382		if (len != rthlen)
3383			return (EINVAL);
3384
3385		switch (rth->ip6r_type) {
3386		case IPV6_RTHDR_TYPE_0:
3387			if (rth->ip6r_len == 0)	/* must contain one addr */
3388				return (EINVAL);
3389			if (rth->ip6r_len % 2) /* length must be even */
3390				return (EINVAL);
3391			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3392				return (EINVAL);
3393			break;
3394		default:
3395			return (EINVAL);	/* not supported */
3396		}
3397
3398		/* turn off the previous option */
3399		ip6_clearpktopts(opt, IPV6_RTHDR);
3400		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK);
3401		bcopy(rth, opt->ip6po_rthdr, rthlen);
3402
3403		break;
3404	}
3405
3406	case IPV6_USE_MIN_MTU:
3407		if (len != sizeof(int))
3408			return (EINVAL);
3409		minmtupolicy = *(int *)buf;
3410		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3411		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3412		    minmtupolicy != IP6PO_MINMTU_ALL) {
3413			return (EINVAL);
3414		}
3415		opt->ip6po_minmtu = minmtupolicy;
3416		break;
3417
3418	case IPV6_DONTFRAG:
3419		if (len != sizeof(int))
3420			return (EINVAL);
3421
3422		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3423			/*
3424			 * we ignore this option for TCP sockets.
3425			 * (RFC3542 leaves this case unspecified.)
3426			 */
3427			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3428		} else
3429			opt->ip6po_flags |= IP6PO_DONTFRAG;
3430		break;
3431
3432	case IPV6_PREFER_TEMPADDR:
3433		if (len != sizeof(int))
3434			return (EINVAL);
3435		preftemp = *(int *)buf;
3436		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3437		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3438		    preftemp != IP6PO_TEMPADDR_PREFER) {
3439			return (EINVAL);
3440		}
3441		opt->ip6po_prefer_tempaddr = preftemp;
3442		break;
3443
3444	default:
3445		return (ENOPROTOOPT);
3446	} /* end of switch */
3447
3448	return (0);
3449}
3450
3451/*
3452 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3453 * packet to the input queue of a specified interface.  Note that this
3454 * calls the output routine of the loopback "driver", but with an interface
3455 * pointer that might NOT be &loif -- easier than replicating that code here.
3456 */
3457void
3458ip6_mloopback(ifp, m, dst)
3459	struct ifnet *ifp;
3460	struct mbuf *m;
3461	struct sockaddr_in6 *dst;
3462{
3463	struct mbuf *copym;
3464	struct ip6_hdr *ip6;
3465
3466	copym = m_copy(m, 0, M_COPYALL);
3467	if (copym == NULL)
3468		return;
3469
3470	/*
3471	 * Make sure to deep-copy IPv6 header portion in case the data
3472	 * is in an mbuf cluster, so that we can safely override the IPv6
3473	 * header portion later.
3474	 */
3475	if ((copym->m_flags & M_EXT) != 0 ||
3476	    copym->m_len < sizeof(struct ip6_hdr)) {
3477		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3478		if (copym == NULL)
3479			return;
3480	}
3481
3482#ifdef DIAGNOSTIC
3483	if (copym->m_len < sizeof(*ip6)) {
3484		m_freem(copym);
3485		return;
3486	}
3487#endif
3488
3489	ip6 = mtod(copym, struct ip6_hdr *);
3490	/*
3491	 * clear embedded scope identifiers if necessary.
3492	 * in6_clearscope will touch the addresses only when necessary.
3493	 */
3494	in6_clearscope(&ip6->ip6_src);
3495	in6_clearscope(&ip6->ip6_dst);
3496
3497	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3498}
3499
3500/*
3501 * Chop IPv6 header off from the payload.
3502 */
3503static int
3504ip6_splithdr(m, exthdrs)
3505	struct mbuf *m;
3506	struct ip6_exthdrs *exthdrs;
3507{
3508	struct mbuf *mh;
3509	struct ip6_hdr *ip6;
3510
3511	ip6 = mtod(m, struct ip6_hdr *);
3512	if (m->m_len > sizeof(*ip6)) {
3513		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3514		if (mh == 0) {
3515			m_freem(m);
3516			return ENOBUFS;
3517		}
3518		M_MOVE_PKTHDR(mh, m);
3519		MH_ALIGN(mh, sizeof(*ip6));
3520		m->m_len -= sizeof(*ip6);
3521		m->m_data += sizeof(*ip6);
3522		mh->m_next = m;
3523		m = mh;
3524		m->m_len = sizeof(*ip6);
3525		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3526	}
3527	exthdrs->ip6e_ip6 = m;
3528	return 0;
3529}
3530
3531/*
3532 * Compute IPv6 extension header length.
3533 */
3534int
3535ip6_optlen(in6p)
3536	struct in6pcb *in6p;
3537{
3538	int len;
3539
3540	if (!in6p->in6p_outputopts)
3541		return 0;
3542
3543	len = 0;
3544#define elen(x) \
3545    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3546
3547	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3548	if (in6p->in6p_outputopts->ip6po_rthdr)
3549		/* dest1 is valid with rthdr only */
3550		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3551	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3552	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3553	return len;
3554#undef elen
3555}
3556