ip6_output.c revision 128019
1/*	$FreeBSD: head/sys/netinet6/ip6_output.c 128019 2004-04-07 20:46:16Z imp $	*/
2/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
62 */
63
64#include "opt_ip6fw.h"
65#include "opt_inet.h"
66#include "opt_inet6.h"
67#include "opt_ipsec.h"
68#include "opt_pfil_hooks.h"
69#include "opt_random_ip_id.h"
70
71#include <sys/param.h>
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/proc.h>
75#include <sys/errno.h>
76#include <sys/protosw.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/systm.h>
80#include <sys/kernel.h>
81
82#include <net/if.h>
83#include <net/route.h>
84#ifdef PFIL_HOOKS
85#include <net/pfil.h>
86#endif
87
88#include <netinet/in.h>
89#include <netinet/in_var.h>
90#include <netinet6/in6_var.h>
91#include <netinet/ip6.h>
92#include <netinet/icmp6.h>
93#include <netinet6/ip6_var.h>
94#include <netinet/in_pcb.h>
95#include <netinet/tcp_var.h>
96#include <netinet6/nd6.h>
97
98#ifdef IPSEC
99#include <netinet6/ipsec.h>
100#ifdef INET6
101#include <netinet6/ipsec6.h>
102#endif
103#include <netkey/key.h>
104#endif /* IPSEC */
105
106#ifdef FAST_IPSEC
107#include <netipsec/ipsec.h>
108#include <netipsec/ipsec6.h>
109#include <netipsec/key.h>
110#endif /* FAST_IPSEC */
111
112#include <netinet6/ip6_fw.h>
113
114#include <net/net_osdep.h>
115
116#include <netinet6/ip6protosw.h>
117
118static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
119
120struct ip6_exthdrs {
121	struct mbuf *ip6e_ip6;
122	struct mbuf *ip6e_hbh;
123	struct mbuf *ip6e_dest1;
124	struct mbuf *ip6e_rthdr;
125	struct mbuf *ip6e_dest2;
126};
127
128static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
129			   int, int));
130static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
131	struct socket *, struct sockopt *));
132static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
133static int ip6_setpktoption __P((int, u_char *, int, struct ip6_pktopts *, int,
134	int, int, int));
135
136static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
137static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
138static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
139static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
140	struct ip6_frag **));
141static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
142static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
143static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
144	struct ifnet *, struct in6_addr *, u_long *, int *));
145
146
147/*
148 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
149 * header (with pri, len, nxt, hlim, src, dst).
150 * This function may modify ver and hlim only.
151 * The mbuf chain containing the packet will be freed.
152 * The mbuf opt, if present, will not be freed.
153 *
154 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
155 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
156 * which is rt_rmx.rmx_mtu.
157 */
158int
159ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
160	struct mbuf *m0;
161	struct ip6_pktopts *opt;
162	struct route_in6 *ro;
163	int flags;
164	struct ip6_moptions *im6o;
165	struct ifnet **ifpp;		/* XXX: just for statistics */
166	struct inpcb *inp;
167{
168	struct ip6_hdr *ip6, *mhip6;
169	struct ifnet *ifp, *origifp;
170	struct mbuf *m = m0;
171	int hlen, tlen, len, off;
172	struct route_in6 ip6route;
173	struct sockaddr_in6 *dst;
174	int error = 0;
175	struct in6_ifaddr *ia = NULL;
176	u_long mtu;
177	int alwaysfrag, dontfrag;
178	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
179	struct ip6_exthdrs exthdrs;
180	struct in6_addr finaldst;
181	struct route_in6 *ro_pmtu = NULL;
182	int hdrsplit = 0;
183	int needipsec = 0;
184#if defined(IPSEC) || defined(FAST_IPSEC)
185	int needipsectun = 0;
186	struct secpolicy *sp = NULL;
187#endif /*IPSEC || FAST_IPSEC*/
188
189	ip6 = mtod(m, struct ip6_hdr *);
190	finaldst = ip6->ip6_dst;
191
192#define MAKE_EXTHDR(hp, mp)						\
193    do {								\
194	if (hp) {							\
195		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
196		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
197		    ((eh)->ip6e_len + 1) << 3);				\
198		if (error)						\
199			goto freehdrs;					\
200	}								\
201    } while (/*CONSTCOND*/ 0)
202
203	bzero(&exthdrs, sizeof(exthdrs));
204
205	if (opt) {
206		/* Hop-by-Hop options header */
207		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
208		/* Destination options header(1st part) */
209		if (opt->ip6po_rthdr) {
210			/*
211			 * Destination options header(1st part)
212			 * This only makes sence with a routing header.
213			 * See Section 9.2 of RFC 3542.
214			 * Disabling this part just for MIP6 convenience is
215			 * a bad idea.  We need to think carefully about a
216			 * way to make the advanced API coexist with MIP6
217			 * options, which might automatically be inserted in
218			 * the kernel.
219			 */
220			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
221		}
222		/* Routing header */
223		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
224		/* Destination options header(2nd part) */
225		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
226	}
227
228#ifdef IPSEC
229	/* get a security policy for this packet */
230	if (inp == NULL)
231		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
232	else
233		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
234
235	if (sp == NULL) {
236		ipsec6stat.out_inval++;
237		goto freehdrs;
238	}
239
240	error = 0;
241
242	/* check policy */
243	switch (sp->policy) {
244	case IPSEC_POLICY_DISCARD:
245		/*
246		 * This packet is just discarded.
247		 */
248		ipsec6stat.out_polvio++;
249		goto freehdrs;
250
251	case IPSEC_POLICY_BYPASS:
252	case IPSEC_POLICY_NONE:
253		/* no need to do IPsec. */
254		needipsec = 0;
255		break;
256
257	case IPSEC_POLICY_IPSEC:
258		if (sp->req == NULL) {
259			/* acquire a policy */
260			error = key_spdacquire(sp);
261			goto freehdrs;
262		}
263		needipsec = 1;
264		break;
265
266	case IPSEC_POLICY_ENTRUST:
267	default:
268		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
269	}
270#endif /* IPSEC */
271#ifdef FAST_IPSEC
272	/* get a security policy for this packet */
273	if (inp == NULL)
274		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
275	else
276		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
277
278	if (sp == NULL) {
279		newipsecstat.ips_out_inval++;
280		goto freehdrs;
281	}
282
283	error = 0;
284
285	/* check policy */
286	switch (sp->policy) {
287	case IPSEC_POLICY_DISCARD:
288		/*
289		 * This packet is just discarded.
290		 */
291		newipsecstat.ips_out_polvio++;
292		goto freehdrs;
293
294	case IPSEC_POLICY_BYPASS:
295	case IPSEC_POLICY_NONE:
296		/* no need to do IPsec. */
297		needipsec = 0;
298		break;
299
300	case IPSEC_POLICY_IPSEC:
301		if (sp->req == NULL) {
302			/* acquire a policy */
303			error = key_spdacquire(sp);
304			goto freehdrs;
305		}
306		needipsec = 1;
307		break;
308
309	case IPSEC_POLICY_ENTRUST:
310	default:
311		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
312	}
313#endif /* FAST_IPSEC */
314
315	/*
316	 * Calculate the total length of the extension header chain.
317	 * Keep the length of the unfragmentable part for fragmentation.
318	 */
319	optlen = 0;
320	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
321	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
322	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
323	unfragpartlen = optlen + sizeof(struct ip6_hdr);
324	/* NOTE: we don't add AH/ESP length here. do that later. */
325	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
326
327	/*
328	 * If we need IPsec, or there is at least one extension header,
329	 * separate IP6 header from the payload.
330	 */
331	if ((needipsec || optlen) && !hdrsplit) {
332		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
333			m = NULL;
334			goto freehdrs;
335		}
336		m = exthdrs.ip6e_ip6;
337		hdrsplit++;
338	}
339
340	/* adjust pointer */
341	ip6 = mtod(m, struct ip6_hdr *);
342
343	/* adjust mbuf packet header length */
344	m->m_pkthdr.len += optlen;
345	plen = m->m_pkthdr.len - sizeof(*ip6);
346
347	/* If this is a jumbo payload, insert a jumbo payload option. */
348	if (plen > IPV6_MAXPACKET) {
349		if (!hdrsplit) {
350			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
351				m = NULL;
352				goto freehdrs;
353			}
354			m = exthdrs.ip6e_ip6;
355			hdrsplit++;
356		}
357		/* adjust pointer */
358		ip6 = mtod(m, struct ip6_hdr *);
359		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
360			goto freehdrs;
361		ip6->ip6_plen = 0;
362	} else
363		ip6->ip6_plen = htons(plen);
364
365	/*
366	 * Concatenate headers and fill in next header fields.
367	 * Here we have, on "m"
368	 *	IPv6 payload
369	 * and we insert headers accordingly.  Finally, we should be getting:
370	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
371	 *
372	 * during the header composing process, "m" points to IPv6 header.
373	 * "mprev" points to an extension header prior to esp.
374	 */
375	{
376		u_char *nexthdrp = &ip6->ip6_nxt;
377		struct mbuf *mprev = m;
378
379		/*
380		 * we treat dest2 specially.  this makes IPsec processing
381		 * much easier.  the goal here is to make mprev point the
382		 * mbuf prior to dest2.
383		 *
384		 * result: IPv6 dest2 payload
385		 * m and mprev will point to IPv6 header.
386		 */
387		if (exthdrs.ip6e_dest2) {
388			if (!hdrsplit)
389				panic("assumption failed: hdr not split");
390			exthdrs.ip6e_dest2->m_next = m->m_next;
391			m->m_next = exthdrs.ip6e_dest2;
392			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
393			ip6->ip6_nxt = IPPROTO_DSTOPTS;
394		}
395
396#define MAKE_CHAIN(m, mp, p, i)\
397    do {\
398	if (m) {\
399		if (!hdrsplit) \
400			panic("assumption failed: hdr not split"); \
401		*mtod((m), u_char *) = *(p);\
402		*(p) = (i);\
403		p = mtod((m), u_char *);\
404		(m)->m_next = (mp)->m_next;\
405		(mp)->m_next = (m);\
406		(mp) = (m);\
407	}\
408    } while (/*CONSTCOND*/ 0)
409		/*
410		 * result: IPv6 hbh dest1 rthdr dest2 payload
411		 * m will point to IPv6 header.  mprev will point to the
412		 * extension header prior to dest2 (rthdr in the above case).
413		 */
414		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
415		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
416		    IPPROTO_DSTOPTS);
417		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
418		    IPPROTO_ROUTING);
419
420#if defined(IPSEC) || defined(FAST_IPSEC)
421		if (!needipsec)
422			goto skip_ipsec2;
423
424		/*
425		 * pointers after IPsec headers are not valid any more.
426		 * other pointers need a great care too.
427		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
428		 */
429		exthdrs.ip6e_dest2 = NULL;
430
431	    {
432		struct ip6_rthdr *rh = NULL;
433		int segleft_org = 0;
434		struct ipsec_output_state state;
435
436		if (exthdrs.ip6e_rthdr) {
437			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
438			segleft_org = rh->ip6r_segleft;
439			rh->ip6r_segleft = 0;
440		}
441
442		bzero(&state, sizeof(state));
443		state.m = m;
444		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
445		    &needipsectun);
446		m = state.m;
447		if (error) {
448			/* mbuf is already reclaimed in ipsec6_output_trans. */
449			m = NULL;
450			switch (error) {
451			case EHOSTUNREACH:
452			case ENETUNREACH:
453			case EMSGSIZE:
454			case ENOBUFS:
455			case ENOMEM:
456				break;
457			default:
458				printf("ip6_output (ipsec): error code %d\n", error);
459				/* FALLTHROUGH */
460			case ENOENT:
461				/* don't show these error codes to the user */
462				error = 0;
463				break;
464			}
465			goto bad;
466		}
467		if (exthdrs.ip6e_rthdr) {
468			/* ah6_output doesn't modify mbuf chain */
469			rh->ip6r_segleft = segleft_org;
470		}
471	    }
472skip_ipsec2:;
473#endif
474	}
475
476	/*
477	 * If there is a routing header, replace the destination address field
478	 * with the first hop of the routing header.
479	 */
480	if (exthdrs.ip6e_rthdr) {
481		struct ip6_rthdr *rh =
482			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
483						  struct ip6_rthdr *));
484		struct ip6_rthdr0 *rh0;
485		struct in6_addr *addrs;
486
487		switch (rh->ip6r_type) {
488		case IPV6_RTHDR_TYPE_0:
489			 rh0 = (struct ip6_rthdr0 *)rh;
490			 addrs = (struct in6_addr *)(rh0 + 1);
491
492			 ip6->ip6_dst = *addrs;
493			 bcopy((caddr_t)(addrs + 1), (caddr_t)addrs,
494			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
495				 );
496			 *(addrs + rh0->ip6r0_segleft - 1) = finaldst;
497			 break;
498		default:	/* is it possible? */
499			 error = EINVAL;
500			 goto bad;
501		}
502	}
503
504	/* Source address validation */
505	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
506	    (flags & IPV6_DADOUTPUT) == 0) {
507		error = EOPNOTSUPP;
508		ip6stat.ip6s_badscope++;
509		goto bad;
510	}
511	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
512		error = EOPNOTSUPP;
513		ip6stat.ip6s_badscope++;
514		goto bad;
515	}
516
517	ip6stat.ip6s_localout++;
518
519	/*
520	 * Route packet.
521	 */
522	if (ro == 0) {
523		ro = &ip6route;
524		bzero((caddr_t)ro, sizeof(*ro));
525	}
526	ro_pmtu = ro;
527	if (opt && opt->ip6po_rthdr)
528		ro = &opt->ip6po_route;
529	dst = (struct sockaddr_in6 *)&ro->ro_dst;
530
531	/*
532	 * If there is a cached route,
533	 * check that it is to the same destination
534	 * and is still up. If not, free it and try again.
535	 */
536	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
537			 dst->sin6_family != AF_INET6 ||
538			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
539		RTFREE(ro->ro_rt);
540		ro->ro_rt = (struct rtentry *)0;
541	}
542	if (ro->ro_rt == 0) {
543		bzero(dst, sizeof(*dst));
544		dst->sin6_family = AF_INET6;
545		dst->sin6_len = sizeof(struct sockaddr_in6);
546		dst->sin6_addr = ip6->ip6_dst;
547	}
548
549 	/*
550	 * if specified, try to fill in the traffic class field.
551	 * do not override if a non-zero value is already set.
552	 * we check the diffserv field and the ecn field separately.
553	 */
554	if (opt && opt->ip6po_tclass >= 0) {
555		int mask = 0;
556
557		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
558			mask |= 0xfc;
559		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
560			mask |= 0x03;
561		if (mask != 0)
562			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
563	}
564
565	/* fill in or override the hop limit field, if necessary. */
566	if (opt && opt->ip6po_hlim != -1)
567		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
568	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
569		if (im6o != NULL)
570			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
571		else
572			ip6->ip6_hlim = ip6_defmcasthlim;
573	}
574
575#if defined(IPSEC) || defined(FAST_IPSEC)
576	if (needipsec && needipsectun) {
577		struct ipsec_output_state state;
578
579		/*
580		 * All the extension headers will become inaccessible
581		 * (since they can be encrypted).
582		 * Don't panic, we need no more updates to extension headers
583		 * on inner IPv6 packet (since they are now encapsulated).
584		 *
585		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
586		 */
587		bzero(&exthdrs, sizeof(exthdrs));
588		exthdrs.ip6e_ip6 = m;
589
590		bzero(&state, sizeof(state));
591		state.m = m;
592		state.ro = (struct route *)ro;
593		state.dst = (struct sockaddr *)dst;
594
595		error = ipsec6_output_tunnel(&state, sp, flags);
596
597		m = state.m;
598		ro = (struct route_in6 *)state.ro;
599		dst = (struct sockaddr_in6 *)state.dst;
600		if (error) {
601			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
602			m0 = m = NULL;
603			m = NULL;
604			switch (error) {
605			case EHOSTUNREACH:
606			case ENETUNREACH:
607			case EMSGSIZE:
608			case ENOBUFS:
609			case ENOMEM:
610				break;
611			default:
612				printf("ip6_output (ipsec): error code %d\n", error);
613				/* FALLTHROUGH */
614			case ENOENT:
615				/* don't show these error codes to the user */
616				error = 0;
617				break;
618			}
619			goto bad;
620		}
621
622		exthdrs.ip6e_ip6 = m;
623	}
624#endif /* IPSEC */
625
626	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
627		/* Unicast */
628
629#define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
630#define sin6tosa(sin6)	((struct sockaddr *)(sin6))
631		/* xxx
632		 * interface selection comes here
633		 * if an interface is specified from an upper layer,
634		 * ifp must point it.
635		 */
636		if (ro->ro_rt == 0) {
637			/*
638			 * non-bsdi always clone routes, if parent is
639			 * PRF_CLONING.
640			 */
641			rtalloc((struct route *)ro);
642		}
643		if (ro->ro_rt == 0) {
644			ip6stat.ip6s_noroute++;
645			error = EHOSTUNREACH;
646			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
647			goto bad;
648		}
649		/* XXX rt not locked */
650		ia = ifatoia6(ro->ro_rt->rt_ifa);
651		ifp = ro->ro_rt->rt_ifp;
652		ro->ro_rt->rt_rmx.rmx_pksent++;
653		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
654			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
655		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
656
657		in6_ifstat_inc(ifp, ifs6_out_request);
658
659		/*
660		 * Check if the outgoing interface conflicts with
661		 * the interface specified by ifi6_ifindex (if specified).
662		 * Note that loopback interface is always okay.
663		 * (this may happen when we are sending a packet to one of
664		 *  our own addresses.)
665		 */
666		if (opt && opt->ip6po_pktinfo
667		 && opt->ip6po_pktinfo->ipi6_ifindex) {
668			if (!(ifp->if_flags & IFF_LOOPBACK)
669			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
670				ip6stat.ip6s_noroute++;
671				in6_ifstat_inc(ifp, ifs6_out_discard);
672				error = EHOSTUNREACH;
673				goto bad;
674			}
675		}
676
677		if (opt && opt->ip6po_hlim != -1)
678			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
679	} else {
680		/* Multicast */
681		struct	in6_multi *in6m;
682
683		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
684
685		/*
686		 * See if the caller provided any multicast options
687		 */
688		ifp = NULL;
689		if (im6o != NULL) {
690			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
691			if (im6o->im6o_multicast_ifp != NULL)
692				ifp = im6o->im6o_multicast_ifp;
693		} else
694			ip6->ip6_hlim = ip6_defmcasthlim;
695
696		/*
697		 * See if the caller provided the outgoing interface
698		 * as an ancillary data.
699		 * Boundary check for ifindex is assumed to be already done.
700		 */
701		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
702			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
703
704		/*
705		 * If the destination is a node-local scope multicast,
706		 * the packet should be loop-backed only.
707		 */
708		if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
709			/*
710			 * If the outgoing interface is already specified,
711			 * it should be a loopback interface.
712			 */
713			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
714				ip6stat.ip6s_badscope++;
715				error = ENETUNREACH; /* XXX: better error? */
716				/* XXX correct ifp? */
717				in6_ifstat_inc(ifp, ifs6_out_discard);
718				goto bad;
719			} else {
720				ifp = &loif[0];
721			}
722		}
723
724		if (opt && opt->ip6po_hlim != -1)
725			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
726
727		/*
728		 * If caller did not provide an interface lookup a
729		 * default in the routing table.  This is either a
730		 * default for the speicfied group (i.e. a host
731		 * route), or a multicast default (a route for the
732		 * ``net'' ff00::/8).
733		 */
734		if (ifp == NULL) {
735			if (ro->ro_rt == 0)
736				ro->ro_rt = rtalloc1((struct sockaddr *)
737						&ro->ro_dst, 0, 0UL);
738			else
739				RT_LOCK(ro->ro_rt);
740			if (ro->ro_rt == 0) {
741				ip6stat.ip6s_noroute++;
742				error = EHOSTUNREACH;
743				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
744				goto bad;
745			}
746			ia = ifatoia6(ro->ro_rt->rt_ifa);
747			ifp = ro->ro_rt->rt_ifp;
748			ro->ro_rt->rt_rmx.rmx_pksent++;
749			RT_UNLOCK(ro->ro_rt);
750		}
751
752		if ((flags & IPV6_FORWARDING) == 0)
753			in6_ifstat_inc(ifp, ifs6_out_request);
754		in6_ifstat_inc(ifp, ifs6_out_mcast);
755
756		/*
757		 * Confirm that the outgoing interface supports multicast.
758		 */
759		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
760			ip6stat.ip6s_noroute++;
761			in6_ifstat_inc(ifp, ifs6_out_discard);
762			error = ENETUNREACH;
763			goto bad;
764		}
765		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
766		if (in6m != NULL &&
767		   (im6o == NULL || im6o->im6o_multicast_loop)) {
768			/*
769			 * If we belong to the destination multicast group
770			 * on the outgoing interface, and the caller did not
771			 * forbid loopback, loop back a copy.
772			 */
773			ip6_mloopback(ifp, m, dst);
774		} else {
775			/*
776			 * If we are acting as a multicast router, perform
777			 * multicast forwarding as if the packet had just
778			 * arrived on the interface to which we are about
779			 * to send.  The multicast forwarding function
780			 * recursively calls this function, using the
781			 * IPV6_FORWARDING flag to prevent infinite recursion.
782			 *
783			 * Multicasts that are looped back by ip6_mloopback(),
784			 * above, will be forwarded by the ip6_input() routine,
785			 * if necessary.
786			 */
787			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
788				if (ip6_mforward(ip6, ifp, m) != 0) {
789					m_freem(m);
790					goto done;
791				}
792			}
793		}
794		/*
795		 * Multicasts with a hoplimit of zero may be looped back,
796		 * above, but must not be transmitted on a network.
797		 * Also, multicasts addressed to the loopback interface
798		 * are not sent -- the above call to ip6_mloopback() will
799		 * loop back a copy if this host actually belongs to the
800		 * destination group on the loopback interface.
801		 */
802		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
803		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
804			m_freem(m);
805			goto done;
806		}
807	}
808
809	/*
810	 * Fill the outgoing inteface to tell the upper layer
811	 * to increment per-interface statistics.
812	 */
813	if (ifpp)
814		*ifpp = ifp;
815
816	/* Determine path MTU. */
817	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
818	    &alwaysfrag)) != 0)
819		goto bad;
820
821	/*
822	 * The caller of this function may specify to use the minimum MTU
823	 * in some cases.
824	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
825	 * setting.  The logic is a bit complicated; by default, unicast
826	 * packets will follow path MTU while multicast packets will be sent at
827	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
828	 * including unicast ones will be sent at the minimum MTU.  Multicast
829	 * packets will always be sent at the minimum MTU unless
830	 * IP6PO_MINMTU_DISABLE is explicitly specified.
831	 * See RFC 3542 for more details.
832	 */
833	if (mtu > IPV6_MMTU) {
834		if ((flags & IPV6_MINMTU))
835			mtu = IPV6_MMTU;
836		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
837			mtu = IPV6_MMTU;
838		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
839			 (opt == NULL ||
840			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
841			mtu = IPV6_MMTU;
842		}
843	}
844
845	/* Fake scoped addresses */
846	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
847		/*
848		 * If source or destination address is a scoped address, and
849		 * the packet is going to be sent to a loopback interface,
850		 * we should keep the original interface.
851		 */
852
853		/*
854		 * XXX: this is a very experimental and temporary solution.
855		 * We eventually have sockaddr_in6 and use the sin6_scope_id
856		 * field of the structure here.
857		 * We rely on the consistency between two scope zone ids
858		 * of source and destination, which should already be assured.
859		 * Larger scopes than link will be supported in the future.
860		 */
861		origifp = NULL;
862		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
863			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
864		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
865			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
866		/*
867		 * XXX: origifp can be NULL even in those two cases above.
868		 * For example, if we remove the (only) link-local address
869		 * from the loopback interface, and try to send a link-local
870		 * address without link-id information.  Then the source
871		 * address is ::1, and the destination address is the
872		 * link-local address with its s6_addr16[1] being zero.
873		 * What is worse, if the packet goes to the loopback interface
874		 * by a default rejected route, the null pointer would be
875		 * passed to looutput, and the kernel would hang.
876		 * The following last resort would prevent such disaster.
877		 */
878		if (origifp == NULL)
879			origifp = ifp;
880	}
881	else
882		origifp = ifp;
883	/*
884	 * clear embedded scope identifiers if necessary.
885	 * in6_clearscope will touch the addresses only when necessary.
886	 */
887	in6_clearscope(&ip6->ip6_src);
888	in6_clearscope(&ip6->ip6_dst);
889
890	/*
891	 * Check with the firewall...
892	 */
893	if (ip6_fw_enable && ip6_fw_chk_ptr) {
894		u_short port = 0;
895		m->m_pkthdr.rcvif = NULL;	/* XXX */
896		/* If ipfw says divert, we have to just drop packet */
897		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
898			m_freem(m);
899			goto done;
900		}
901		if (!m) {
902			error = EACCES;
903			goto done;
904		}
905	}
906
907	/*
908	 * If the outgoing packet contains a hop-by-hop options header,
909	 * it must be examined and processed even by the source node.
910	 * (RFC 2460, section 4.)
911	 */
912	if (exthdrs.ip6e_hbh) {
913		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
914		u_int32_t dummy1; /* XXX unused */
915		u_int32_t dummy2; /* XXX unused */
916
917#ifdef DIAGNOSTIC
918		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
919			panic("ip6e_hbh is not continuous");
920#endif
921		/*
922		 *  XXX: if we have to send an ICMPv6 error to the sender,
923		 *       we need the M_LOOP flag since icmp6_error() expects
924		 *       the IPv6 and the hop-by-hop options header are
925		 *       continuous unless the flag is set.
926		 */
927		m->m_flags |= M_LOOP;
928		m->m_pkthdr.rcvif = ifp;
929		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
930		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
931		    &dummy1, &dummy2) < 0) {
932			/* m was already freed at this point */
933			error = EINVAL;/* better error? */
934			goto done;
935		}
936		m->m_flags &= ~M_LOOP; /* XXX */
937		m->m_pkthdr.rcvif = NULL;
938	}
939
940#ifdef PFIL_HOOKS
941	/*
942	 * Run through list of hooks for output packets.
943	 */
944	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
945	if (error != 0 || m == NULL)
946		goto done;
947	ip6 = mtod(m, struct ip6_hdr *);
948#endif /* PFIL_HOOKS */
949
950	/*
951	 * Send the packet to the outgoing interface.
952	 * If necessary, do IPv6 fragmentation before sending.
953	 *
954	 * the logic here is rather complex:
955	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
956	 * 1-a:	send as is if tlen <= path mtu
957	 * 1-b:	fragment if tlen > path mtu
958	 *
959	 * 2: if user asks us not to fragment (dontfrag == 1)
960	 * 2-a:	send as is if tlen <= interface mtu
961	 * 2-b:	error if tlen > interface mtu
962	 *
963	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
964	 *	always fragment
965	 *
966	 * 4: if dontfrag == 1 && alwaysfrag == 1
967	 *	error, as we cannot handle this conflicting request
968	 */
969	tlen = m->m_pkthdr.len;
970
971	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
972		dontfrag = 1;
973	else
974		dontfrag = 0;
975	if (dontfrag && alwaysfrag) {	/* case 4 */
976		/* conflicting request - can't transmit */
977		error = EMSGSIZE;
978		goto bad;
979	}
980	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
981		/*
982		 * Even if the DONTFRAG option is specified, we cannot send the
983		 * packet when the data length is larger than the MTU of the
984		 * outgoing interface.
985		 * Notify the error by sending IPV6_PATHMTU ancillary data as
986		 * well as returning an error code (the latter is not described
987		 * in the API spec.)
988		 */
989		u_int32_t mtu32;
990		struct ip6ctlparam ip6cp;
991
992		mtu32 = (u_int32_t)mtu;
993		bzero(&ip6cp, sizeof(ip6cp));
994		ip6cp.ip6c_cmdarg = (void *)&mtu32;
995		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
996		    (void *)&ip6cp);
997
998		error = EMSGSIZE;
999		goto bad;
1000	}
1001
1002	/*
1003	 * transmit packet without fragmentation
1004	 */
1005	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
1006		struct in6_ifaddr *ia6;
1007
1008		ip6 = mtod(m, struct ip6_hdr *);
1009		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1010		if (ia6) {
1011			/* Record statistics for this interface address. */
1012			ia6->ia_ifa.if_opackets++;
1013			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
1014		}
1015#ifdef IPSEC
1016		/* clean ipsec history once it goes out of the node */
1017		ipsec_delaux(m);
1018#endif
1019		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1020		goto done;
1021	}
1022
1023	/*
1024	 * try to fragment the packet.  case 1-b and 3
1025	 */
1026	if (mtu < IPV6_MMTU) {
1027		/* path MTU cannot be less than IPV6_MMTU */
1028		error = EMSGSIZE;
1029		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1030		goto bad;
1031	} else if (ip6->ip6_plen == 0) {
1032		/* jumbo payload cannot be fragmented */
1033		error = EMSGSIZE;
1034		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1035		goto bad;
1036	} else {
1037		struct mbuf **mnext, *m_frgpart;
1038		struct ip6_frag *ip6f;
1039#ifdef RANDOM_IP_ID
1040		u_int32_t id = htonl(ip6_randomid());
1041#else
1042		u_int32_t id = htonl(ip6_id++);
1043#endif
1044		u_char nextproto;
1045		struct ip6ctlparam ip6cp;
1046		u_int32_t mtu32;
1047
1048		/*
1049		 * Too large for the destination or interface;
1050		 * fragment if possible.
1051		 * Must be able to put at least 8 bytes per fragment.
1052		 */
1053		hlen = unfragpartlen;
1054		if (mtu > IPV6_MAXPACKET)
1055			mtu = IPV6_MAXPACKET;
1056
1057		/* Notify a proper path MTU to applications. */
1058		mtu32 = (u_int32_t)mtu;
1059		bzero(&ip6cp, sizeof(ip6cp));
1060		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1061		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1062		    (void *)&ip6cp);
1063
1064		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1065		if (len < 8) {
1066			error = EMSGSIZE;
1067			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1068			goto bad;
1069		}
1070
1071		mnext = &m->m_nextpkt;
1072
1073		/*
1074		 * Change the next header field of the last header in the
1075		 * unfragmentable part.
1076		 */
1077		if (exthdrs.ip6e_rthdr) {
1078			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1079			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1080		} else if (exthdrs.ip6e_dest1) {
1081			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1082			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1083		} else if (exthdrs.ip6e_hbh) {
1084			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1085			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1086		} else {
1087			nextproto = ip6->ip6_nxt;
1088			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1089		}
1090
1091		/*
1092		 * Loop through length of segment after first fragment,
1093		 * make new header and copy data of each part and link onto
1094		 * chain.
1095		 */
1096		m0 = m;
1097		for (off = hlen; off < tlen; off += len) {
1098			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1099			if (!m) {
1100				error = ENOBUFS;
1101				ip6stat.ip6s_odropped++;
1102				goto sendorfree;
1103			}
1104			m->m_pkthdr.rcvif = NULL;
1105			m->m_flags = m0->m_flags & M_COPYFLAGS;
1106			*mnext = m;
1107			mnext = &m->m_nextpkt;
1108			m->m_data += max_linkhdr;
1109			mhip6 = mtod(m, struct ip6_hdr *);
1110			*mhip6 = *ip6;
1111			m->m_len = sizeof(*mhip6);
1112			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1113			if (error) {
1114				ip6stat.ip6s_odropped++;
1115				goto sendorfree;
1116			}
1117			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1118			if (off + len >= tlen)
1119				len = tlen - off;
1120			else
1121				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1122			mhip6->ip6_plen = htons((u_short)(len + hlen +
1123			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1124			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1125				error = ENOBUFS;
1126				ip6stat.ip6s_odropped++;
1127				goto sendorfree;
1128			}
1129			m_cat(m, m_frgpart);
1130			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1131			m->m_pkthdr.rcvif = (struct ifnet *)0;
1132			ip6f->ip6f_reserved = 0;
1133			ip6f->ip6f_ident = id;
1134			ip6f->ip6f_nxt = nextproto;
1135			ip6stat.ip6s_ofragments++;
1136			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1137		}
1138
1139		in6_ifstat_inc(ifp, ifs6_out_fragok);
1140	}
1141
1142	/*
1143	 * Remove leading garbages.
1144	 */
1145sendorfree:
1146	m = m0->m_nextpkt;
1147	m0->m_nextpkt = 0;
1148	m_freem(m0);
1149	for (m0 = m; m; m = m0) {
1150		m0 = m->m_nextpkt;
1151		m->m_nextpkt = 0;
1152		if (error == 0) {
1153 			/* Record statistics for this interface address. */
1154 			if (ia) {
1155 				ia->ia_ifa.if_opackets++;
1156 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1157 			}
1158#ifdef IPSEC
1159			/* clean ipsec history once it goes out of the node */
1160			ipsec_delaux(m);
1161#endif
1162			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1163		} else
1164			m_freem(m);
1165	}
1166
1167	if (error == 0)
1168		ip6stat.ip6s_fragmented++;
1169
1170done:
1171	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1172		RTFREE(ro->ro_rt);
1173	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1174		RTFREE(ro_pmtu->ro_rt);
1175	}
1176
1177#ifdef IPSEC
1178	if (sp != NULL)
1179		key_freesp(sp);
1180#endif /* IPSEC */
1181#ifdef FAST_IPSEC
1182	if (sp != NULL)
1183		KEY_FREESP(&sp);
1184#endif /* FAST_IPSEC */
1185
1186	return (error);
1187
1188freehdrs:
1189	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1190	m_freem(exthdrs.ip6e_dest1);
1191	m_freem(exthdrs.ip6e_rthdr);
1192	m_freem(exthdrs.ip6e_dest2);
1193	/* FALLTHROUGH */
1194bad:
1195	m_freem(m);
1196	goto done;
1197}
1198
1199static int
1200ip6_copyexthdr(mp, hdr, hlen)
1201	struct mbuf **mp;
1202	caddr_t hdr;
1203	int hlen;
1204{
1205	struct mbuf *m;
1206
1207	if (hlen > MCLBYTES)
1208		return (ENOBUFS); /* XXX */
1209
1210	MGET(m, M_DONTWAIT, MT_DATA);
1211	if (!m)
1212		return (ENOBUFS);
1213
1214	if (hlen > MLEN) {
1215		MCLGET(m, M_DONTWAIT);
1216		if ((m->m_flags & M_EXT) == 0) {
1217			m_free(m);
1218			return (ENOBUFS);
1219		}
1220	}
1221	m->m_len = hlen;
1222	if (hdr)
1223		bcopy(hdr, mtod(m, caddr_t), hlen);
1224
1225	*mp = m;
1226	return (0);
1227}
1228
1229/*
1230 * Insert jumbo payload option.
1231 */
1232static int
1233ip6_insert_jumboopt(exthdrs, plen)
1234	struct ip6_exthdrs *exthdrs;
1235	u_int32_t plen;
1236{
1237	struct mbuf *mopt;
1238	u_char *optbuf;
1239	u_int32_t v;
1240
1241#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1242
1243	/*
1244	 * If there is no hop-by-hop options header, allocate new one.
1245	 * If there is one but it doesn't have enough space to store the
1246	 * jumbo payload option, allocate a cluster to store the whole options.
1247	 * Otherwise, use it to store the options.
1248	 */
1249	if (exthdrs->ip6e_hbh == 0) {
1250		MGET(mopt, M_DONTWAIT, MT_DATA);
1251		if (mopt == 0)
1252			return (ENOBUFS);
1253		mopt->m_len = JUMBOOPTLEN;
1254		optbuf = mtod(mopt, u_char *);
1255		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1256		exthdrs->ip6e_hbh = mopt;
1257	} else {
1258		struct ip6_hbh *hbh;
1259
1260		mopt = exthdrs->ip6e_hbh;
1261		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1262			/*
1263			 * XXX assumption:
1264			 * - exthdrs->ip6e_hbh is not referenced from places
1265			 *   other than exthdrs.
1266			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1267			 */
1268			int oldoptlen = mopt->m_len;
1269			struct mbuf *n;
1270
1271			/*
1272			 * XXX: give up if the whole (new) hbh header does
1273			 * not fit even in an mbuf cluster.
1274			 */
1275			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1276				return (ENOBUFS);
1277
1278			/*
1279			 * As a consequence, we must always prepare a cluster
1280			 * at this point.
1281			 */
1282			MGET(n, M_DONTWAIT, MT_DATA);
1283			if (n) {
1284				MCLGET(n, M_DONTWAIT);
1285				if ((n->m_flags & M_EXT) == 0) {
1286					m_freem(n);
1287					n = NULL;
1288				}
1289			}
1290			if (!n)
1291				return (ENOBUFS);
1292			n->m_len = oldoptlen + JUMBOOPTLEN;
1293			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1294			    oldoptlen);
1295			optbuf = mtod(n, caddr_t) + oldoptlen;
1296			m_freem(mopt);
1297			mopt = exthdrs->ip6e_hbh = n;
1298		} else {
1299			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1300			mopt->m_len += JUMBOOPTLEN;
1301		}
1302		optbuf[0] = IP6OPT_PADN;
1303		optbuf[1] = 1;
1304
1305		/*
1306		 * Adjust the header length according to the pad and
1307		 * the jumbo payload option.
1308		 */
1309		hbh = mtod(mopt, struct ip6_hbh *);
1310		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1311	}
1312
1313	/* fill in the option. */
1314	optbuf[2] = IP6OPT_JUMBO;
1315	optbuf[3] = 4;
1316	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1317	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1318
1319	/* finally, adjust the packet header length */
1320	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1321
1322	return (0);
1323#undef JUMBOOPTLEN
1324}
1325
1326/*
1327 * Insert fragment header and copy unfragmentable header portions.
1328 */
1329static int
1330ip6_insertfraghdr(m0, m, hlen, frghdrp)
1331	struct mbuf *m0, *m;
1332	int hlen;
1333	struct ip6_frag **frghdrp;
1334{
1335	struct mbuf *n, *mlast;
1336
1337	if (hlen > sizeof(struct ip6_hdr)) {
1338		n = m_copym(m0, sizeof(struct ip6_hdr),
1339		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1340		if (n == 0)
1341			return (ENOBUFS);
1342		m->m_next = n;
1343	} else
1344		n = m;
1345
1346	/* Search for the last mbuf of unfragmentable part. */
1347	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1348		;
1349
1350	if ((mlast->m_flags & M_EXT) == 0 &&
1351	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1352		/* use the trailing space of the last mbuf for the fragment hdr */
1353		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1354		    mlast->m_len);
1355		mlast->m_len += sizeof(struct ip6_frag);
1356		m->m_pkthdr.len += sizeof(struct ip6_frag);
1357	} else {
1358		/* allocate a new mbuf for the fragment header */
1359		struct mbuf *mfrg;
1360
1361		MGET(mfrg, M_DONTWAIT, MT_DATA);
1362		if (mfrg == 0)
1363			return (ENOBUFS);
1364		mfrg->m_len = sizeof(struct ip6_frag);
1365		*frghdrp = mtod(mfrg, struct ip6_frag *);
1366		mlast->m_next = mfrg;
1367	}
1368
1369	return (0);
1370}
1371
1372static int
1373ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1374	struct route_in6 *ro_pmtu, *ro;
1375	struct ifnet *ifp;
1376	struct in6_addr *dst;
1377	u_long *mtup;
1378	int *alwaysfragp;
1379{
1380	u_int32_t mtu = 0;
1381	int alwaysfrag = 0;
1382	int error = 0;
1383
1384	if (ro_pmtu != ro) {
1385		/* The first hop and the final destination may differ. */
1386		struct sockaddr_in6 *sa6_dst =
1387		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1388		if (ro_pmtu->ro_rt &&
1389		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1390		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1391			RTFREE(ro_pmtu->ro_rt);
1392			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1393		}
1394		if (ro_pmtu->ro_rt == NULL) {
1395			bzero(sa6_dst, sizeof(*sa6_dst));
1396			sa6_dst->sin6_family = AF_INET6;
1397			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1398			sa6_dst->sin6_addr = *dst;
1399
1400			rtalloc((struct route *)ro_pmtu);
1401		}
1402	}
1403	if (ro_pmtu->ro_rt) {
1404		u_int32_t ifmtu;
1405		struct in_conninfo inc;
1406
1407		bzero(&inc, sizeof(inc));
1408		inc.inc_flags = 1; /* IPv6 */
1409		inc.inc6_faddr = *dst;
1410
1411		if (ifp == NULL)
1412			ifp = ro_pmtu->ro_rt->rt_ifp;
1413		ifmtu = IN6_LINKMTU(ifp);
1414		mtu = tcp_hc_getmtu(&inc);
1415		if (mtu)
1416			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1417		else
1418			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1419		if (mtu == 0)
1420			mtu = ifmtu;
1421		else if (mtu < IPV6_MMTU) {
1422			/*
1423			 * RFC2460 section 5, last paragraph:
1424			 * if we record ICMPv6 too big message with
1425			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1426			 * or smaller, with framgent header attached.
1427			 * (fragment header is needed regardless from the
1428			 * packet size, for translators to identify packets)
1429			 */
1430			alwaysfrag = 1;
1431			mtu = IPV6_MMTU;
1432		} else if (mtu > ifmtu) {
1433			/*
1434			 * The MTU on the route is larger than the MTU on
1435			 * the interface!  This shouldn't happen, unless the
1436			 * MTU of the interface has been changed after the
1437			 * interface was brought up.  Change the MTU in the
1438			 * route to match the interface MTU (as long as the
1439			 * field isn't locked).
1440			 */
1441			mtu = ifmtu;
1442			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1443		}
1444	} else if (ifp) {
1445		mtu = IN6_LINKMTU(ifp);
1446	} else
1447		error = EHOSTUNREACH; /* XXX */
1448
1449	*mtup = mtu;
1450	if (alwaysfragp)
1451		*alwaysfragp = alwaysfrag;
1452	return (error);
1453}
1454
1455/*
1456 * IP6 socket option processing.
1457 */
1458int
1459ip6_ctloutput(so, sopt)
1460	struct socket *so;
1461	struct sockopt *sopt;
1462{
1463	int privileged, optdatalen, uproto;
1464	void *optdata;
1465	struct inpcb *in6p = sotoinpcb(so);
1466	int error, optval;
1467	int level, op, optname;
1468	int optlen;
1469	struct thread *td;
1470
1471	if (sopt) {
1472		level = sopt->sopt_level;
1473		op = sopt->sopt_dir;
1474		optname = sopt->sopt_name;
1475		optlen = sopt->sopt_valsize;
1476		td = sopt->sopt_td;
1477	} else {
1478		panic("ip6_ctloutput: arg soopt is NULL");
1479	}
1480	error = optval = 0;
1481
1482	privileged = (td == 0 || suser(td)) ? 0 : 1;
1483	uproto = (int)so->so_proto->pr_protocol;
1484
1485	if (level == IPPROTO_IPV6) {
1486		switch (op) {
1487
1488		case SOPT_SET:
1489			switch (optname) {
1490			case IPV6_2292PKTOPTIONS:
1491#ifdef IPV6_PKTOPTIONS
1492			case IPV6_PKTOPTIONS:
1493#endif
1494			{
1495				struct mbuf *m;
1496
1497				error = soopt_getm(sopt, &m); /* XXX */
1498				if (error != 0)
1499					break;
1500				error = soopt_mcopyin(sopt, m); /* XXX */
1501				if (error != 0)
1502					break;
1503				error = ip6_pcbopts(&in6p->in6p_outputopts,
1504						    m, so, sopt);
1505				m_freem(m); /* XXX */
1506				break;
1507			}
1508
1509			/*
1510			 * Use of some Hop-by-Hop options or some
1511			 * Destination options, might require special
1512			 * privilege.  That is, normal applications
1513			 * (without special privilege) might be forbidden
1514			 * from setting certain options in outgoing packets,
1515			 * and might never see certain options in received
1516			 * packets. [RFC 2292 Section 6]
1517			 * KAME specific note:
1518			 *  KAME prevents non-privileged users from sending or
1519			 *  receiving ANY hbh/dst options in order to avoid
1520			 *  overhead of parsing options in the kernel.
1521			 */
1522			case IPV6_RECVHOPOPTS:
1523			case IPV6_RECVDSTOPTS:
1524			case IPV6_RECVRTHDRDSTOPTS:
1525				if (!privileged) {
1526					error = EPERM;
1527					break;
1528				}
1529				/* FALLTHROUGH */
1530			case IPV6_UNICAST_HOPS:
1531			case IPV6_HOPLIMIT:
1532			case IPV6_FAITH:
1533
1534			case IPV6_RECVPKTINFO:
1535			case IPV6_RECVHOPLIMIT:
1536			case IPV6_RECVRTHDR:
1537			case IPV6_RECVPATHMTU:
1538			case IPV6_RECVTCLASS:
1539			case IPV6_V6ONLY:
1540			case IPV6_AUTOFLOWLABEL:
1541				if (optlen != sizeof(int)) {
1542					error = EINVAL;
1543					break;
1544				}
1545				error = sooptcopyin(sopt, &optval,
1546					sizeof optval, sizeof optval);
1547				if (error)
1548					break;
1549				switch (optname) {
1550
1551				case IPV6_UNICAST_HOPS:
1552					if (optval < -1 || optval >= 256)
1553						error = EINVAL;
1554					else {
1555						/* -1 = kernel default */
1556						in6p->in6p_hops = optval;
1557						if ((in6p->in6p_vflag &
1558						     INP_IPV4) != 0)
1559							in6p->inp_ip_ttl = optval;
1560					}
1561					break;
1562#define OPTSET(bit) \
1563do { \
1564	if (optval) \
1565		in6p->in6p_flags |= (bit); \
1566	else \
1567		in6p->in6p_flags &= ~(bit); \
1568} while (/*CONSTCOND*/ 0)
1569#define OPTSET2292(bit) \
1570do { \
1571	in6p->in6p_flags |= IN6P_RFC2292; \
1572	if (optval) \
1573		in6p->in6p_flags |= (bit); \
1574	else \
1575		in6p->in6p_flags &= ~(bit); \
1576} while (/*CONSTCOND*/ 0)
1577#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1578
1579				case IPV6_RECVPKTINFO:
1580					/* cannot mix with RFC2292 */
1581					if (OPTBIT(IN6P_RFC2292)) {
1582						error = EINVAL;
1583						break;
1584					}
1585					OPTSET(IN6P_PKTINFO);
1586					break;
1587
1588				case IPV6_HOPLIMIT:
1589				{
1590					struct ip6_pktopts **optp;
1591
1592					/* cannot mix with RFC2292 */
1593					if (OPTBIT(IN6P_RFC2292)) {
1594						error = EINVAL;
1595						break;
1596					}
1597					optp = &in6p->in6p_outputopts;
1598					error = ip6_pcbopt(IPV6_HOPLIMIT,
1599							   (u_char *)&optval,
1600							   sizeof(optval),
1601							   optp,
1602							   privileged, uproto);
1603					break;
1604				}
1605
1606				case IPV6_RECVHOPLIMIT:
1607					/* cannot mix with RFC2292 */
1608					if (OPTBIT(IN6P_RFC2292)) {
1609						error = EINVAL;
1610						break;
1611					}
1612					OPTSET(IN6P_HOPLIMIT);
1613					break;
1614
1615				case IPV6_RECVHOPOPTS:
1616					/* cannot mix with RFC2292 */
1617					if (OPTBIT(IN6P_RFC2292)) {
1618						error = EINVAL;
1619						break;
1620					}
1621					OPTSET(IN6P_HOPOPTS);
1622					break;
1623
1624				case IPV6_RECVDSTOPTS:
1625					/* cannot mix with RFC2292 */
1626					if (OPTBIT(IN6P_RFC2292)) {
1627						error = EINVAL;
1628						break;
1629					}
1630					OPTSET(IN6P_DSTOPTS);
1631					break;
1632
1633				case IPV6_RECVRTHDRDSTOPTS:
1634					/* cannot mix with RFC2292 */
1635					if (OPTBIT(IN6P_RFC2292)) {
1636						error = EINVAL;
1637						break;
1638					}
1639					OPTSET(IN6P_RTHDRDSTOPTS);
1640					break;
1641
1642				case IPV6_RECVRTHDR:
1643					/* cannot mix with RFC2292 */
1644					if (OPTBIT(IN6P_RFC2292)) {
1645						error = EINVAL;
1646						break;
1647					}
1648					OPTSET(IN6P_RTHDR);
1649					break;
1650
1651				case IPV6_FAITH:
1652					OPTSET(IN6P_FAITH);
1653					break;
1654
1655				case IPV6_RECVPATHMTU:
1656					/*
1657					 * We ignore this option for TCP
1658					 * sockets.
1659					 * (rfc2292bis leaves this case
1660					 * unspecified.)
1661					 */
1662					if (uproto != IPPROTO_TCP)
1663						OPTSET(IN6P_MTU);
1664					break;
1665
1666				case IPV6_V6ONLY:
1667					/*
1668					 * make setsockopt(IPV6_V6ONLY)
1669					 * available only prior to bind(2).
1670					 * see ipng mailing list, Jun 22 2001.
1671					 */
1672					if (in6p->in6p_lport ||
1673					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1674						error = EINVAL;
1675						break;
1676					}
1677					OPTSET(IN6P_IPV6_V6ONLY);
1678					if (optval)
1679						in6p->in6p_vflag &= ~INP_IPV4;
1680					else
1681						in6p->in6p_vflag |= INP_IPV4;
1682					break;
1683				case IPV6_RECVTCLASS:
1684					/* cannot mix with RFC2292 XXX */
1685					if (OPTBIT(IN6P_RFC2292)) {
1686						error = EINVAL;
1687						break;
1688					}
1689					OPTSET(IN6P_TCLASS);
1690					break;
1691				case IPV6_AUTOFLOWLABEL:
1692					OPTSET(IN6P_AUTOFLOWLABEL);
1693					break;
1694
1695				}
1696				break;
1697
1698			case IPV6_TCLASS:
1699			case IPV6_DONTFRAG:
1700			case IPV6_USE_MIN_MTU:
1701			case IPV6_PREFER_TEMPADDR:
1702				if (optlen != sizeof(optval)) {
1703					error = EINVAL;
1704					break;
1705				}
1706				error = sooptcopyin(sopt, &optval,
1707					sizeof optval, sizeof optval);
1708				if (error)
1709					break;
1710				{
1711					struct ip6_pktopts **optp;
1712					optp = &in6p->in6p_outputopts;
1713					error = ip6_pcbopt(optname,
1714							   (u_char *)&optval,
1715							   sizeof(optval),
1716							   optp,
1717							   privileged, uproto);
1718					break;
1719				}
1720
1721			case IPV6_2292PKTINFO:
1722			case IPV6_2292HOPLIMIT:
1723			case IPV6_2292HOPOPTS:
1724			case IPV6_2292DSTOPTS:
1725			case IPV6_2292RTHDR:
1726				/* RFC 2292 */
1727				if (optlen != sizeof(int)) {
1728					error = EINVAL;
1729					break;
1730				}
1731				error = sooptcopyin(sopt, &optval,
1732					sizeof optval, sizeof optval);
1733				if (error)
1734					break;
1735				switch (optname) {
1736				case IPV6_2292PKTINFO:
1737					OPTSET2292(IN6P_PKTINFO);
1738					break;
1739				case IPV6_2292HOPLIMIT:
1740					OPTSET2292(IN6P_HOPLIMIT);
1741					break;
1742				case IPV6_2292HOPOPTS:
1743					/*
1744					 * Check super-user privilege.
1745					 * See comments for IPV6_RECVHOPOPTS.
1746					 */
1747					if (!privileged)
1748						return (EPERM);
1749					OPTSET2292(IN6P_HOPOPTS);
1750					break;
1751				case IPV6_2292DSTOPTS:
1752					if (!privileged)
1753						return (EPERM);
1754					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1755					break;
1756				case IPV6_2292RTHDR:
1757					OPTSET2292(IN6P_RTHDR);
1758					break;
1759				}
1760				break;
1761			case IPV6_PKTINFO:
1762			case IPV6_HOPOPTS:
1763			case IPV6_RTHDR:
1764			case IPV6_DSTOPTS:
1765			case IPV6_RTHDRDSTOPTS:
1766			case IPV6_NEXTHOP:
1767			{
1768				/* new advanced API (2292bis) */
1769				u_char *optbuf;
1770				int optlen;
1771				struct ip6_pktopts **optp;
1772
1773				/* cannot mix with RFC2292 */
1774				if (OPTBIT(IN6P_RFC2292)) {
1775					error = EINVAL;
1776					break;
1777				}
1778
1779				switch (optname) {
1780				case IPV6_HOPOPTS:
1781				case IPV6_DSTOPTS:
1782				case IPV6_RTHDRDSTOPTS:
1783				case IPV6_NEXTHOP:
1784					if (!privileged)
1785						error = EPERM;
1786					break;
1787				}
1788				if (error)
1789					break;
1790
1791				switch (optname) {
1792				case IPV6_PKTINFO:
1793					optlen = sizeof(struct in6_pktinfo);
1794					break;
1795				case IPV6_NEXTHOP:
1796					optlen = SOCK_MAXADDRLEN;
1797					break;
1798				default:
1799					optlen = IPV6_MAXOPTHDR;
1800					break;
1801				}
1802				if (sopt->sopt_valsize > optlen) {
1803					error = EINVAL;
1804					break;
1805				}
1806
1807				optlen = sopt->sopt_valsize;
1808				optbuf = malloc(optlen, M_TEMP, M_WAITOK);
1809				error = sooptcopyin(sopt, optbuf, optlen,
1810				    optlen);
1811				if (error) {
1812					free(optbuf, M_TEMP);
1813					break;
1814				}
1815
1816				optp = &in6p->in6p_outputopts;
1817				error = ip6_pcbopt(optname,
1818						   optbuf, optlen,
1819						   optp, privileged, uproto);
1820				free(optbuf, M_TEMP);
1821				break;
1822			}
1823#undef OPTSET
1824
1825			case IPV6_MULTICAST_IF:
1826			case IPV6_MULTICAST_HOPS:
1827			case IPV6_MULTICAST_LOOP:
1828			case IPV6_JOIN_GROUP:
1829			case IPV6_LEAVE_GROUP:
1830			    {
1831				if (sopt->sopt_valsize > MLEN) {
1832					error = EMSGSIZE;
1833					break;
1834				}
1835				/* XXX */
1836			    }
1837			    /* FALLTHROUGH */
1838			    {
1839				struct mbuf *m;
1840
1841				if (sopt->sopt_valsize > MCLBYTES) {
1842					error = EMSGSIZE;
1843					break;
1844				}
1845				/* XXX */
1846				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_HEADER);
1847				if (m == 0) {
1848					error = ENOBUFS;
1849					break;
1850				}
1851				if (sopt->sopt_valsize > MLEN) {
1852					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1853					if ((m->m_flags & M_EXT) == 0) {
1854						m_free(m);
1855						error = ENOBUFS;
1856						break;
1857					}
1858				}
1859				m->m_len = sopt->sopt_valsize;
1860				error = sooptcopyin(sopt, mtod(m, char *),
1861						    m->m_len, m->m_len);
1862				if (error) {
1863					(void)m_free(m);
1864					break;
1865				}
1866				error =	ip6_setmoptions(sopt->sopt_name,
1867							&in6p->in6p_moptions,
1868							m);
1869				(void)m_free(m);
1870			    }
1871				break;
1872
1873			case IPV6_PORTRANGE:
1874				error = sooptcopyin(sopt, &optval,
1875				    sizeof optval, sizeof optval);
1876				if (error)
1877					break;
1878
1879				switch (optval) {
1880				case IPV6_PORTRANGE_DEFAULT:
1881					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1882					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1883					break;
1884
1885				case IPV6_PORTRANGE_HIGH:
1886					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1887					in6p->in6p_flags |= IN6P_HIGHPORT;
1888					break;
1889
1890				case IPV6_PORTRANGE_LOW:
1891					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1892					in6p->in6p_flags |= IN6P_LOWPORT;
1893					break;
1894
1895				default:
1896					error = EINVAL;
1897					break;
1898				}
1899				break;
1900
1901#if defined(IPSEC) || defined(FAST_IPSEC)
1902			case IPV6_IPSEC_POLICY:
1903			    {
1904				caddr_t req = NULL;
1905				size_t len = 0;
1906				struct mbuf *m;
1907
1908				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1909					break;
1910				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1911					break;
1912				if (m) {
1913					req = mtod(m, caddr_t);
1914					len = m->m_len;
1915				}
1916				error = ipsec6_set_policy(in6p, optname, req,
1917							  len, privileged);
1918				m_freem(m);
1919			    }
1920				break;
1921#endif /* KAME IPSEC */
1922
1923			case IPV6_FW_ADD:
1924			case IPV6_FW_DEL:
1925			case IPV6_FW_FLUSH:
1926			case IPV6_FW_ZERO:
1927			    {
1928				struct mbuf *m;
1929				struct mbuf **mp = &m;
1930
1931				if (ip6_fw_ctl_ptr == NULL)
1932					return EINVAL;
1933				/* XXX */
1934				if ((error = soopt_getm(sopt, &m)) != 0)
1935					break;
1936				/* XXX */
1937				if ((error = soopt_mcopyin(sopt, m)) != 0)
1938					break;
1939				error = (*ip6_fw_ctl_ptr)(optname, mp);
1940				m = *mp;
1941			    }
1942				break;
1943
1944			default:
1945				error = ENOPROTOOPT;
1946				break;
1947			}
1948			break;
1949
1950		case SOPT_GET:
1951			switch (optname) {
1952
1953			case IPV6_2292PKTOPTIONS:
1954#ifdef IPV6_PKTOPTIONS
1955			case IPV6_PKTOPTIONS:
1956#endif
1957				/*
1958				 * RFC3542 (effectively) deprecated the
1959				 * semantics of the 2292-style pktoptions.
1960				 * Since it was not reliable in nature (i.e.,
1961				 * applications had to expect the lack of some
1962				 * information after all), it would make sense
1963				 * to simplify this part by always returning
1964				 * empty data.
1965				 */
1966				sopt->sopt_valsize = 0;
1967				break;
1968
1969			case IPV6_RECVHOPOPTS:
1970			case IPV6_RECVDSTOPTS:
1971			case IPV6_RECVRTHDRDSTOPTS:
1972			case IPV6_UNICAST_HOPS:
1973			case IPV6_RECVPKTINFO:
1974			case IPV6_RECVHOPLIMIT:
1975			case IPV6_RECVRTHDR:
1976			case IPV6_RECVPATHMTU:
1977
1978			case IPV6_FAITH:
1979			case IPV6_V6ONLY:
1980			case IPV6_PORTRANGE:
1981			case IPV6_RECVTCLASS:
1982			case IPV6_AUTOFLOWLABEL:
1983				switch (optname) {
1984
1985				case IPV6_RECVHOPOPTS:
1986					optval = OPTBIT(IN6P_HOPOPTS);
1987					break;
1988
1989				case IPV6_RECVDSTOPTS:
1990					optval = OPTBIT(IN6P_DSTOPTS);
1991					break;
1992
1993				case IPV6_RECVRTHDRDSTOPTS:
1994					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1995					break;
1996
1997				case IPV6_UNICAST_HOPS:
1998					optval = in6p->in6p_hops;
1999					break;
2000
2001				case IPV6_RECVPKTINFO:
2002					optval = OPTBIT(IN6P_PKTINFO);
2003					break;
2004
2005				case IPV6_RECVHOPLIMIT:
2006					optval = OPTBIT(IN6P_HOPLIMIT);
2007					break;
2008
2009				case IPV6_RECVRTHDR:
2010					optval = OPTBIT(IN6P_RTHDR);
2011					break;
2012
2013				case IPV6_RECVPATHMTU:
2014					optval = OPTBIT(IN6P_MTU);
2015					break;
2016
2017				case IPV6_FAITH:
2018					optval = OPTBIT(IN6P_FAITH);
2019					break;
2020
2021				case IPV6_V6ONLY:
2022					optval = OPTBIT(IN6P_IPV6_V6ONLY);
2023					break;
2024
2025				case IPV6_PORTRANGE:
2026				    {
2027					int flags;
2028					flags = in6p->in6p_flags;
2029					if (flags & IN6P_HIGHPORT)
2030						optval = IPV6_PORTRANGE_HIGH;
2031					else if (flags & IN6P_LOWPORT)
2032						optval = IPV6_PORTRANGE_LOW;
2033					else
2034						optval = 0;
2035					break;
2036				    }
2037				case IPV6_RECVTCLASS:
2038					optval = OPTBIT(IN6P_TCLASS);
2039					break;
2040
2041				case IPV6_AUTOFLOWLABEL:
2042					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2043					break;
2044				}
2045				if (error)
2046					break;
2047				error = sooptcopyout(sopt, &optval,
2048					sizeof optval);
2049				break;
2050
2051			case IPV6_PATHMTU:
2052			{
2053				u_long pmtu = 0;
2054				struct ip6_mtuinfo mtuinfo;
2055				struct route_in6 sro;
2056
2057				bzero(&sro, sizeof(sro));
2058
2059				if (!(so->so_state & SS_ISCONNECTED))
2060					return (ENOTCONN);
2061				/*
2062				 * XXX: we dot not consider the case of source
2063				 * routing, or optional information to specify
2064				 * the outgoing interface.
2065				 */
2066				error = ip6_getpmtu(&sro, NULL, NULL,
2067				    &in6p->in6p_faddr, &pmtu, NULL);
2068				if (sro.ro_rt)
2069					RTFREE(sro.ro_rt);
2070				if (error)
2071					break;
2072				if (pmtu > IPV6_MAXPACKET)
2073					pmtu = IPV6_MAXPACKET;
2074
2075				bzero(&mtuinfo, sizeof(mtuinfo));
2076				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2077				optdata = (void *)&mtuinfo;
2078				optdatalen = sizeof(mtuinfo);
2079				error = sooptcopyout(sopt, optdata,
2080				    optdatalen);
2081				break;
2082			}
2083
2084			case IPV6_2292PKTINFO:
2085			case IPV6_2292HOPLIMIT:
2086			case IPV6_2292HOPOPTS:
2087			case IPV6_2292RTHDR:
2088			case IPV6_2292DSTOPTS:
2089				switch (optname) {
2090				case IPV6_2292PKTINFO:
2091					optval = OPTBIT(IN6P_PKTINFO);
2092					break;
2093				case IPV6_2292HOPLIMIT:
2094					optval = OPTBIT(IN6P_HOPLIMIT);
2095					break;
2096				case IPV6_2292HOPOPTS:
2097					optval = OPTBIT(IN6P_HOPOPTS);
2098					break;
2099				case IPV6_2292RTHDR:
2100					optval = OPTBIT(IN6P_RTHDR);
2101					break;
2102				case IPV6_2292DSTOPTS:
2103					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2104					break;
2105				}
2106				error = sooptcopyout(sopt, &optval,
2107				    sizeof optval);
2108				break;
2109			case IPV6_PKTINFO:
2110			case IPV6_HOPOPTS:
2111			case IPV6_RTHDR:
2112			case IPV6_DSTOPTS:
2113			case IPV6_RTHDRDSTOPTS:
2114			case IPV6_NEXTHOP:
2115			case IPV6_TCLASS:
2116			case IPV6_DONTFRAG:
2117			case IPV6_USE_MIN_MTU:
2118			case IPV6_PREFER_TEMPADDR:
2119				error = ip6_getpcbopt(in6p->in6p_outputopts,
2120				    optname, sopt);
2121				break;
2122
2123			case IPV6_MULTICAST_IF:
2124			case IPV6_MULTICAST_HOPS:
2125			case IPV6_MULTICAST_LOOP:
2126			case IPV6_JOIN_GROUP:
2127			case IPV6_LEAVE_GROUP:
2128			    {
2129				struct mbuf *m;
2130				error = ip6_getmoptions(sopt->sopt_name,
2131				    in6p->in6p_moptions, &m);
2132				if (error == 0)
2133					error = sooptcopyout(sopt,
2134					    mtod(m, char *), m->m_len);
2135				m_freem(m);
2136			    }
2137				break;
2138
2139#if defined(IPSEC) || defined(FAST_IPSEC)
2140			case IPV6_IPSEC_POLICY:
2141			  {
2142				caddr_t req = NULL;
2143				size_t len = 0;
2144				struct mbuf *m = NULL;
2145				struct mbuf **mp = &m;
2146				size_t ovalsize = sopt->sopt_valsize;
2147				caddr_t oval = (caddr_t)sopt->sopt_val;
2148
2149				error = soopt_getm(sopt, &m); /* XXX */
2150				if (error != 0)
2151					break;
2152				error = soopt_mcopyin(sopt, m); /* XXX */
2153				if (error != 0)
2154					break;
2155				sopt->sopt_valsize = ovalsize;
2156				sopt->sopt_val = oval;
2157				if (m) {
2158					req = mtod(m, caddr_t);
2159					len = m->m_len;
2160				}
2161				error = ipsec6_get_policy(in6p, req, len, mp);
2162				if (error == 0)
2163					error = soopt_mcopyout(sopt, m); /* XXX */
2164				if (error == 0 && m)
2165					m_freem(m);
2166				break;
2167			  }
2168#endif /* KAME IPSEC */
2169
2170			case IPV6_FW_GET:
2171			  {
2172				struct mbuf *m;
2173				struct mbuf **mp = &m;
2174
2175				if (ip6_fw_ctl_ptr == NULL)
2176			        {
2177					return EINVAL;
2178				}
2179				error = (*ip6_fw_ctl_ptr)(optname, mp);
2180				if (error == 0)
2181					error = soopt_mcopyout(sopt, m); /* XXX */
2182				if (error == 0 && m)
2183					m_freem(m);
2184			  }
2185				break;
2186
2187			default:
2188				error = ENOPROTOOPT;
2189				break;
2190			}
2191			break;
2192		}
2193	} else {		/* level != IPPROTO_IPV6 */
2194		error = EINVAL;
2195	}
2196	return (error);
2197}
2198
2199int
2200ip6_raw_ctloutput(so, sopt)
2201	struct socket *so;
2202	struct sockopt *sopt;
2203{
2204	int error = 0, optval, optlen;
2205	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2206	struct in6pcb *in6p = sotoin6pcb(so);
2207	int level, op, optname;
2208
2209	if (sopt) {
2210		level = sopt->sopt_level;
2211		op = sopt->sopt_dir;
2212		optname = sopt->sopt_name;
2213		optlen = sopt->sopt_valsize;
2214	} else
2215		panic("ip6_raw_ctloutput: arg soopt is NULL");
2216
2217	if (level != IPPROTO_IPV6) {
2218		return (EINVAL);
2219	}
2220
2221	switch (optname) {
2222	case IPV6_CHECKSUM:
2223		/*
2224		 * For ICMPv6 sockets, no modification allowed for checksum
2225		 * offset, permit "no change" values to help existing apps.
2226		 *
2227		 * XXX 2292bis says: "An attempt to set IPV6_CHECKSUM
2228		 * for an ICMPv6 socket will fail."
2229		 * The current behavior does not meet 2292bis.
2230		 */
2231		switch (op) {
2232		case SOPT_SET:
2233			if (optlen != sizeof(int)) {
2234				error = EINVAL;
2235				break;
2236			}
2237			error = sooptcopyin(sopt, &optval, sizeof(optval),
2238					    sizeof(optval));
2239			if (error)
2240				break;
2241			if ((optval % 2) != 0) {
2242				/* the API assumes even offset values */
2243				error = EINVAL;
2244			} else if (so->so_proto->pr_protocol ==
2245			    IPPROTO_ICMPV6) {
2246				if (optval != icmp6off)
2247					error = EINVAL;
2248			} else
2249				in6p->in6p_cksum = optval;
2250			break;
2251
2252		case SOPT_GET:
2253			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2254				optval = icmp6off;
2255			else
2256				optval = in6p->in6p_cksum;
2257
2258			error = sooptcopyout(sopt, &optval, sizeof(optval));
2259			break;
2260
2261		default:
2262			error = EINVAL;
2263			break;
2264		}
2265		break;
2266
2267	default:
2268		error = ENOPROTOOPT;
2269		break;
2270	}
2271
2272	return (error);
2273}
2274
2275/*
2276 * Set up IP6 options in pcb for insertion in output packets or
2277 * specifying behavior of outgoing packets.
2278 */
2279static int
2280ip6_pcbopts(pktopt, m, so, sopt)
2281	struct ip6_pktopts **pktopt;
2282	struct mbuf *m;
2283	struct socket *so;
2284	struct sockopt *sopt;
2285{
2286	struct ip6_pktopts *opt = *pktopt;
2287	int error = 0;
2288	struct thread *td = sopt->sopt_td;
2289	int priv = 0;
2290
2291	/* turn off any old options. */
2292	if (opt) {
2293#ifdef DIAGNOSTIC
2294		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2295		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2296		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2297			printf("ip6_pcbopts: all specified options are cleared.\n");
2298#endif
2299		ip6_clearpktopts(opt, -1);
2300	} else
2301		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2302	*pktopt = NULL;
2303
2304	if (!m || m->m_len == 0) {
2305		/*
2306		 * Only turning off any previous options, regardless of
2307		 * whether the opt is just created or given.
2308		 */
2309		free(opt, M_IP6OPT);
2310		return (0);
2311	}
2312
2313	/*  set options specified by user. */
2314	if (td && !suser(td))
2315		priv = 1;
2316	if ((error = ip6_setpktoptions(m, opt, NULL, priv, 1,
2317	    so->so_proto->pr_protocol)) != 0) {
2318		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2319		free(opt, M_IP6OPT);
2320		return (error);
2321	}
2322	*pktopt = opt;
2323	return (0);
2324}
2325
2326/*
2327 * initialize ip6_pktopts.  beware that there are non-zero default values in
2328 * the struct.
2329 */
2330void
2331init_ip6pktopts(opt)
2332	struct ip6_pktopts *opt;
2333{
2334
2335	bzero(opt, sizeof(*opt));
2336	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2337	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2338	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2339	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2340}
2341
2342static int
2343ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2344	int optname, len, priv;
2345	u_char *buf;
2346	struct ip6_pktopts **pktopt;
2347	int uproto;
2348{
2349	struct ip6_pktopts *opt;
2350
2351	if (*pktopt == NULL) {
2352		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2353		    M_WAITOK);
2354		init_ip6pktopts(*pktopt);
2355		(*pktopt)->needfree = 1;
2356	}
2357	opt = *pktopt;
2358
2359	return (ip6_setpktoption(optname, buf, len, opt, priv, 1, 0, uproto));
2360}
2361
2362static int
2363ip6_getpcbopt(pktopt, optname, sopt)
2364	struct ip6_pktopts *pktopt;
2365	struct sockopt *sopt;
2366	int optname;
2367{
2368	void *optdata = NULL;
2369	int optdatalen = 0;
2370	struct ip6_ext *ip6e;
2371	int error = 0;
2372	struct in6_pktinfo null_pktinfo;
2373	int deftclass = 0, on;
2374	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2375	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2376
2377	switch (optname) {
2378	case IPV6_PKTINFO:
2379		if (pktopt && pktopt->ip6po_pktinfo)
2380			optdata = (void *)pktopt->ip6po_pktinfo;
2381		else {
2382			/* XXX: we don't have to do this every time... */
2383			bzero(&null_pktinfo, sizeof(null_pktinfo));
2384			optdata = (void *)&null_pktinfo;
2385		}
2386		optdatalen = sizeof(struct in6_pktinfo);
2387		break;
2388	case IPV6_TCLASS:
2389		if (pktopt && pktopt->ip6po_tclass >= 0)
2390			optdata = (void *)&pktopt->ip6po_tclass;
2391		else
2392			optdata = (void *)&deftclass;
2393		optdatalen = sizeof(int);
2394		break;
2395	case IPV6_HOPOPTS:
2396		if (pktopt && pktopt->ip6po_hbh) {
2397			optdata = (void *)pktopt->ip6po_hbh;
2398			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2399			optdatalen = (ip6e->ip6e_len + 1) << 3;
2400		}
2401		break;
2402	case IPV6_RTHDR:
2403		if (pktopt && pktopt->ip6po_rthdr) {
2404			optdata = (void *)pktopt->ip6po_rthdr;
2405			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2406			optdatalen = (ip6e->ip6e_len + 1) << 3;
2407		}
2408		break;
2409	case IPV6_RTHDRDSTOPTS:
2410		if (pktopt && pktopt->ip6po_dest1) {
2411			optdata = (void *)pktopt->ip6po_dest1;
2412			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2413			optdatalen = (ip6e->ip6e_len + 1) << 3;
2414		}
2415		break;
2416	case IPV6_DSTOPTS:
2417		if (pktopt && pktopt->ip6po_dest2) {
2418			optdata = (void *)pktopt->ip6po_dest2;
2419			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2420			optdatalen = (ip6e->ip6e_len + 1) << 3;
2421		}
2422		break;
2423	case IPV6_NEXTHOP:
2424		if (pktopt && pktopt->ip6po_nexthop) {
2425			optdata = (void *)pktopt->ip6po_nexthop;
2426			optdatalen = pktopt->ip6po_nexthop->sa_len;
2427		}
2428		break;
2429	case IPV6_USE_MIN_MTU:
2430		if (pktopt)
2431			optdata = (void *)&pktopt->ip6po_minmtu;
2432		else
2433			optdata = (void *)&defminmtu;
2434		optdatalen = sizeof(int);
2435		break;
2436	case IPV6_DONTFRAG:
2437		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2438			on = 1;
2439		else
2440			on = 0;
2441		optdata = (void *)&on;
2442		optdatalen = sizeof(on);
2443		break;
2444	case IPV6_PREFER_TEMPADDR:
2445		if (pktopt)
2446			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2447		else
2448			optdata = (void *)&defpreftemp;
2449		optdatalen = sizeof(int);
2450		break;
2451	default:		/* should not happen */
2452#ifdef DIAGNOSTIC
2453		panic("ip6_getpcbopt: unexpected option\n");
2454#endif
2455		return (ENOPROTOOPT);
2456	}
2457
2458	error = sooptcopyout(sopt, optdata, optdatalen);
2459
2460	return (error);
2461}
2462
2463void
2464ip6_clearpktopts(pktopt, optname)
2465	struct ip6_pktopts *pktopt;
2466	int optname;
2467{
2468	int needfree;
2469
2470	if (pktopt == NULL)
2471		return;
2472
2473	needfree = pktopt->needfree;
2474
2475	if (optname == -1 || optname == IPV6_PKTINFO) {
2476		if (needfree && pktopt->ip6po_pktinfo)
2477			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2478		pktopt->ip6po_pktinfo = NULL;
2479	}
2480	if (optname == -1 || optname == IPV6_HOPLIMIT)
2481		pktopt->ip6po_hlim = -1;
2482	if (optname == -1 || optname == IPV6_TCLASS)
2483		pktopt->ip6po_tclass = -1;
2484	if (optname == -1 || optname == IPV6_NEXTHOP) {
2485		if (pktopt->ip6po_nextroute.ro_rt) {
2486			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2487			pktopt->ip6po_nextroute.ro_rt = NULL;
2488		}
2489		if (needfree && pktopt->ip6po_nexthop)
2490			free(pktopt->ip6po_nexthop, M_IP6OPT);
2491		pktopt->ip6po_nexthop = NULL;
2492	}
2493	if (optname == -1 || optname == IPV6_HOPOPTS) {
2494		if (needfree && pktopt->ip6po_hbh)
2495			free(pktopt->ip6po_hbh, M_IP6OPT);
2496		pktopt->ip6po_hbh = NULL;
2497	}
2498	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2499		if (needfree && pktopt->ip6po_dest1)
2500			free(pktopt->ip6po_dest1, M_IP6OPT);
2501		pktopt->ip6po_dest1 = NULL;
2502	}
2503	if (optname == -1 || optname == IPV6_RTHDR) {
2504		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2505			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2506		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2507		if (pktopt->ip6po_route.ro_rt) {
2508			RTFREE(pktopt->ip6po_route.ro_rt);
2509			pktopt->ip6po_route.ro_rt = NULL;
2510		}
2511	}
2512	if (optname == -1 || optname == IPV6_DSTOPTS) {
2513		if (needfree && pktopt->ip6po_dest2)
2514			free(pktopt->ip6po_dest2, M_IP6OPT);
2515		pktopt->ip6po_dest2 = NULL;
2516	}
2517}
2518
2519#define PKTOPT_EXTHDRCPY(type) \
2520do {\
2521	if (src->type) {\
2522		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2523		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2524		if (dst->type == NULL && canwait == M_NOWAIT)\
2525			goto bad;\
2526		bcopy(src->type, dst->type, hlen);\
2527	}\
2528} while (/*CONSTCOND*/ 0)
2529
2530struct ip6_pktopts *
2531ip6_copypktopts(src, canwait)
2532	struct ip6_pktopts *src;
2533	int canwait;
2534{
2535	struct ip6_pktopts *dst;
2536
2537	if (src == NULL) {
2538		printf("ip6_clearpktopts: invalid argument\n");
2539		return (NULL);
2540	}
2541
2542	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2543	if (dst == NULL && canwait == M_NOWAIT)
2544		return (NULL);
2545	bzero(dst, sizeof(*dst));
2546	dst->needfree = 1;
2547
2548	dst->ip6po_hlim = src->ip6po_hlim;
2549	dst->ip6po_tclass = src->ip6po_tclass;
2550	dst->ip6po_flags = src->ip6po_flags;
2551	if (src->ip6po_pktinfo) {
2552		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2553		    M_IP6OPT, canwait);
2554		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2555			goto bad;
2556		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2557	}
2558	if (src->ip6po_nexthop) {
2559		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2560		    M_IP6OPT, canwait);
2561		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2562			goto bad;
2563		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2564		    src->ip6po_nexthop->sa_len);
2565	}
2566	PKTOPT_EXTHDRCPY(ip6po_hbh);
2567	PKTOPT_EXTHDRCPY(ip6po_dest1);
2568	PKTOPT_EXTHDRCPY(ip6po_dest2);
2569	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2570	return (dst);
2571
2572  bad:
2573	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2574	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2575	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2576	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2577	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2578	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2579	free(dst, M_IP6OPT);
2580	return (NULL);
2581}
2582#undef PKTOPT_EXTHDRCPY
2583
2584void
2585ip6_freepcbopts(pktopt)
2586	struct ip6_pktopts *pktopt;
2587{
2588	if (pktopt == NULL)
2589		return;
2590
2591	ip6_clearpktopts(pktopt, -1);
2592
2593	free(pktopt, M_IP6OPT);
2594}
2595
2596/*
2597 * Set the IP6 multicast options in response to user setsockopt().
2598 */
2599static int
2600ip6_setmoptions(optname, im6op, m)
2601	int optname;
2602	struct ip6_moptions **im6op;
2603	struct mbuf *m;
2604{
2605	int error = 0;
2606	u_int loop, ifindex;
2607	struct ipv6_mreq *mreq;
2608	struct ifnet *ifp;
2609	struct ip6_moptions *im6o = *im6op;
2610	struct route_in6 ro;
2611	struct sockaddr_in6 *dst;
2612	struct in6_multi_mship *imm;
2613	struct thread *td = curthread;
2614
2615	if (im6o == NULL) {
2616		/*
2617		 * No multicast option buffer attached to the pcb;
2618		 * allocate one and initialize to default values.
2619		 */
2620		im6o = (struct ip6_moptions *)
2621			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2622
2623		if (im6o == NULL)
2624			return (ENOBUFS);
2625		*im6op = im6o;
2626		im6o->im6o_multicast_ifp = NULL;
2627		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2628		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2629		LIST_INIT(&im6o->im6o_memberships);
2630	}
2631
2632	switch (optname) {
2633
2634	case IPV6_MULTICAST_IF:
2635		/*
2636		 * Select the interface for outgoing multicast packets.
2637		 */
2638		if (m == NULL || m->m_len != sizeof(u_int)) {
2639			error = EINVAL;
2640			break;
2641		}
2642		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2643		if (ifindex < 0 || if_index < ifindex) {
2644			error = ENXIO;	/* XXX EINVAL? */
2645			break;
2646		}
2647		ifp = ifnet_byindex(ifindex);
2648		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2649			error = EADDRNOTAVAIL;
2650			break;
2651		}
2652		im6o->im6o_multicast_ifp = ifp;
2653		break;
2654
2655	case IPV6_MULTICAST_HOPS:
2656	    {
2657		/*
2658		 * Set the IP6 hoplimit for outgoing multicast packets.
2659		 */
2660		int optval;
2661		if (m == NULL || m->m_len != sizeof(int)) {
2662			error = EINVAL;
2663			break;
2664		}
2665		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2666		if (optval < -1 || optval >= 256)
2667			error = EINVAL;
2668		else if (optval == -1)
2669			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2670		else
2671			im6o->im6o_multicast_hlim = optval;
2672		break;
2673	    }
2674
2675	case IPV6_MULTICAST_LOOP:
2676		/*
2677		 * Set the loopback flag for outgoing multicast packets.
2678		 * Must be zero or one.
2679		 */
2680		if (m == NULL || m->m_len != sizeof(u_int)) {
2681			error = EINVAL;
2682			break;
2683		}
2684		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2685		if (loop > 1) {
2686			error = EINVAL;
2687			break;
2688		}
2689		im6o->im6o_multicast_loop = loop;
2690		break;
2691
2692	case IPV6_JOIN_GROUP:
2693		/*
2694		 * Add a multicast group membership.
2695		 * Group must be a valid IP6 multicast address.
2696		 */
2697		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2698			error = EINVAL;
2699			break;
2700		}
2701		mreq = mtod(m, struct ipv6_mreq *);
2702		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2703			/*
2704			 * We use the unspecified address to specify to accept
2705			 * all multicast addresses. Only super user is allowed
2706			 * to do this.
2707			 */
2708			if (suser(td)) {
2709				error = EACCES;
2710				break;
2711			}
2712		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2713			error = EINVAL;
2714			break;
2715		}
2716
2717		/*
2718		 * If the interface is specified, validate it.
2719		 */
2720		if (mreq->ipv6mr_interface < 0 ||
2721		    if_index < mreq->ipv6mr_interface) {
2722			error = ENXIO;	/* XXX EINVAL? */
2723			break;
2724		}
2725		/*
2726		 * If no interface was explicitly specified, choose an
2727		 * appropriate one according to the given multicast address.
2728		 */
2729		if (mreq->ipv6mr_interface == 0) {
2730			/*
2731			 * If the multicast address is in node-local scope,
2732			 * the interface should be a loopback interface.
2733			 * Otherwise, look up the routing table for the
2734			 * address, and choose the outgoing interface.
2735			 *   XXX: is it a good approach?
2736			 */
2737			if (IN6_IS_ADDR_MC_INTFACELOCAL(&mreq->ipv6mr_multiaddr)) {
2738				ifp = &loif[0];
2739			} else {
2740				ro.ro_rt = NULL;
2741				dst = (struct sockaddr_in6 *)&ro.ro_dst;
2742				bzero(dst, sizeof(*dst));
2743				dst->sin6_len = sizeof(struct sockaddr_in6);
2744				dst->sin6_family = AF_INET6;
2745				dst->sin6_addr = mreq->ipv6mr_multiaddr;
2746				rtalloc((struct route *)&ro);
2747				if (ro.ro_rt == NULL) {
2748					error = EADDRNOTAVAIL;
2749					break;
2750				}
2751				ifp = ro.ro_rt->rt_ifp;
2752				RTFREE(ro.ro_rt);
2753			}
2754		} else
2755			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2756
2757		/*
2758		 * See if we found an interface, and confirm that it
2759		 * supports multicast
2760		 */
2761		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2762			error = EADDRNOTAVAIL;
2763			break;
2764		}
2765		/*
2766		 * Put interface index into the multicast address,
2767		 * if the address has link-local scope.
2768		 */
2769		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2770			mreq->ipv6mr_multiaddr.s6_addr16[1] =
2771			    htons(ifp->if_index);
2772		}
2773		/*
2774		 * See if the membership already exists.
2775		 */
2776		for (imm = im6o->im6o_memberships.lh_first;
2777		     imm != NULL; imm = imm->i6mm_chain.le_next)
2778			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2779			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2780					       &mreq->ipv6mr_multiaddr))
2781				break;
2782		if (imm != NULL) {
2783			error = EADDRINUSE;
2784			break;
2785		}
2786		/*
2787		 * Everything looks good; add a new record to the multicast
2788		 * address list for the given interface.
2789		 */
2790		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2791		if (imm == NULL) {
2792			error = ENOBUFS;
2793			break;
2794		}
2795		if ((imm->i6mm_maddr =
2796		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2797			free(imm, M_IPMADDR);
2798			break;
2799		}
2800		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2801		break;
2802
2803	case IPV6_LEAVE_GROUP:
2804		/*
2805		 * Drop a multicast group membership.
2806		 * Group must be a valid IP6 multicast address.
2807		 */
2808		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2809			error = EINVAL;
2810			break;
2811		}
2812		mreq = mtod(m, struct ipv6_mreq *);
2813		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2814			if (suser(td)) {
2815				error = EACCES;
2816				break;
2817			}
2818		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2819			error = EINVAL;
2820			break;
2821		}
2822		/*
2823		 * If an interface address was specified, get a pointer
2824		 * to its ifnet structure.
2825		 */
2826		if (mreq->ipv6mr_interface < 0
2827		 || if_index < mreq->ipv6mr_interface) {
2828			error = ENXIO;	/* XXX EINVAL? */
2829			break;
2830		}
2831		ifp = ifnet_byindex(mreq->ipv6mr_interface);
2832		/*
2833		 * Put interface index into the multicast address,
2834		 * if the address has link-local scope.
2835		 */
2836		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2837			mreq->ipv6mr_multiaddr.s6_addr16[1]
2838				= htons(mreq->ipv6mr_interface);
2839		}
2840
2841		/*
2842		 * Find the membership in the membership list.
2843		 */
2844		for (imm = im6o->im6o_memberships.lh_first;
2845		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2846			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2847			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2848			    &mreq->ipv6mr_multiaddr))
2849				break;
2850		}
2851		if (imm == NULL) {
2852			/* Unable to resolve interface */
2853			error = EADDRNOTAVAIL;
2854			break;
2855		}
2856		/*
2857		 * Give up the multicast address record to which the
2858		 * membership points.
2859		 */
2860		LIST_REMOVE(imm, i6mm_chain);
2861		in6_delmulti(imm->i6mm_maddr);
2862		free(imm, M_IPMADDR);
2863		break;
2864
2865	default:
2866		error = EOPNOTSUPP;
2867		break;
2868	}
2869
2870	/*
2871	 * If all options have default values, no need to keep the mbuf.
2872	 */
2873	if (im6o->im6o_multicast_ifp == NULL &&
2874	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2875	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2876	    im6o->im6o_memberships.lh_first == NULL) {
2877		free(*im6op, M_IPMOPTS);
2878		*im6op = NULL;
2879	}
2880
2881	return (error);
2882}
2883
2884/*
2885 * Return the IP6 multicast options in response to user getsockopt().
2886 */
2887static int
2888ip6_getmoptions(optname, im6o, mp)
2889	int optname;
2890	struct ip6_moptions *im6o;
2891	struct mbuf **mp;
2892{
2893	u_int *hlim, *loop, *ifindex;
2894
2895	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2896
2897	switch (optname) {
2898
2899	case IPV6_MULTICAST_IF:
2900		ifindex = mtod(*mp, u_int *);
2901		(*mp)->m_len = sizeof(u_int);
2902		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2903			*ifindex = 0;
2904		else
2905			*ifindex = im6o->im6o_multicast_ifp->if_index;
2906		return (0);
2907
2908	case IPV6_MULTICAST_HOPS:
2909		hlim = mtod(*mp, u_int *);
2910		(*mp)->m_len = sizeof(u_int);
2911		if (im6o == NULL)
2912			*hlim = ip6_defmcasthlim;
2913		else
2914			*hlim = im6o->im6o_multicast_hlim;
2915		return (0);
2916
2917	case IPV6_MULTICAST_LOOP:
2918		loop = mtod(*mp, u_int *);
2919		(*mp)->m_len = sizeof(u_int);
2920		if (im6o == NULL)
2921			*loop = ip6_defmcasthlim;
2922		else
2923			*loop = im6o->im6o_multicast_loop;
2924		return (0);
2925
2926	default:
2927		return (EOPNOTSUPP);
2928	}
2929}
2930
2931/*
2932 * Discard the IP6 multicast options.
2933 */
2934void
2935ip6_freemoptions(im6o)
2936	struct ip6_moptions *im6o;
2937{
2938	struct in6_multi_mship *imm;
2939
2940	if (im6o == NULL)
2941		return;
2942
2943	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2944		LIST_REMOVE(imm, i6mm_chain);
2945		if (imm->i6mm_maddr)
2946			in6_delmulti(imm->i6mm_maddr);
2947		free(imm, M_IPMADDR);
2948	}
2949	free(im6o, M_IPMOPTS);
2950}
2951
2952/*
2953 * Set IPv6 outgoing packet options based on advanced API.
2954 */
2955int
2956ip6_setpktoptions(control, opt, stickyopt, priv, needcopy, uproto)
2957	struct mbuf *control;
2958	struct ip6_pktopts *opt, *stickyopt;
2959	int priv, needcopy, uproto;
2960{
2961	struct cmsghdr *cm = 0;
2962
2963	if (control == 0 || opt == 0)
2964		return (EINVAL);
2965
2966	if (stickyopt) {
2967		/*
2968		 * If stickyopt is provided, make a local copy of the options
2969		 * for this particular packet, then override them by ancillary
2970		 * objects.
2971		 * XXX: need to gain a reference for the cached route of the
2972		 * next hop in case of the overriding.
2973		 */
2974		*opt = *stickyopt;
2975		if (opt->ip6po_nextroute.ro_rt) {
2976			RT_LOCK(opt->ip6po_nextroute.ro_rt);
2977			RT_ADDREF(opt->ip6po_nextroute.ro_rt);
2978			RT_UNLOCK(opt->ip6po_nextroute.ro_rt);
2979		}
2980	} else
2981		init_ip6pktopts(opt);
2982	opt->needfree = needcopy;
2983
2984	/*
2985	 * XXX: Currently, we assume all the optional information is stored
2986	 * in a single mbuf.
2987	 */
2988	if (control->m_next)
2989		return (EINVAL);
2990
2991	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2992	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2993		int error;
2994
2995		if (control->m_len < CMSG_LEN(0))
2996			return (EINVAL);
2997
2998		cm = mtod(control, struct cmsghdr *);
2999		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
3000			return (EINVAL);
3001		if (cm->cmsg_level != IPPROTO_IPV6)
3002			continue;
3003
3004		error = ip6_setpktoption(cm->cmsg_type, CMSG_DATA(cm),
3005		    cm->cmsg_len - CMSG_LEN(0), opt, priv, needcopy, 1, uproto);
3006		if (error)
3007			return (error);
3008	}
3009
3010	return (0);
3011}
3012
3013/*
3014 * Set a particular packet option, as a sticky option or an ancillary data
3015 * item.  "len" can be 0 only when it's a sticky option.
3016 * We have 4 cases of combination of "sticky" and "cmsg":
3017 * "sticky=0, cmsg=0": impossible
3018 * "sticky=0, cmsg=1": RFC2292 or rfc2292bis ancillary data
3019 * "sticky=1, cmsg=0": rfc2292bis socket option
3020 * "sticky=1, cmsg=1": RFC2292 socket option
3021 */
3022static int
3023ip6_setpktoption(optname, buf, len, opt, priv, sticky, cmsg, uproto)
3024	int optname, len, priv, sticky, cmsg, uproto;
3025	u_char *buf;
3026	struct ip6_pktopts *opt;
3027{
3028	int minmtupolicy, preftemp;
3029
3030	if (!sticky && !cmsg) {
3031#ifdef DIAGNOSTIC
3032		printf("ip6_setpktoption: impossible case\n");
3033#endif
3034		return (EINVAL);
3035	}
3036
3037	/*
3038	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3039	 * not be specified in the context of rfc2292bis.  Conversely,
3040	 * rfc2292bis types should not be specified in the context of RFC2292.
3041	 */
3042	if (!cmsg) {
3043		switch (optname) {
3044		case IPV6_2292PKTINFO:
3045		case IPV6_2292HOPLIMIT:
3046		case IPV6_2292NEXTHOP:
3047		case IPV6_2292HOPOPTS:
3048		case IPV6_2292DSTOPTS:
3049		case IPV6_2292RTHDR:
3050		case IPV6_2292PKTOPTIONS:
3051			return (ENOPROTOOPT);
3052		}
3053	}
3054	if (sticky && cmsg) {
3055		switch (optname) {
3056		case IPV6_PKTINFO:
3057		case IPV6_HOPLIMIT:
3058		case IPV6_NEXTHOP:
3059		case IPV6_HOPOPTS:
3060		case IPV6_DSTOPTS:
3061		case IPV6_RTHDRDSTOPTS:
3062		case IPV6_RTHDR:
3063		case IPV6_USE_MIN_MTU:
3064		case IPV6_DONTFRAG:
3065		case IPV6_TCLASS:
3066		case IPV6_PREFER_TEMPADDR: /* XXX: not an rfc2292bis option */
3067			return (ENOPROTOOPT);
3068		}
3069	}
3070
3071	switch (optname) {
3072	case IPV6_2292PKTINFO:
3073	case IPV6_PKTINFO:
3074	{
3075		struct ifnet *ifp = NULL;
3076		struct in6_pktinfo *pktinfo;
3077
3078		if (len != sizeof(struct in6_pktinfo))
3079			return (EINVAL);
3080
3081		pktinfo = (struct in6_pktinfo *)buf;
3082
3083		/*
3084		 * An application can clear any sticky IPV6_PKTINFO option by
3085		 * doing a "regular" setsockopt with ipi6_addr being
3086		 * in6addr_any and ipi6_ifindex being zero.
3087		 * [RFC 3542, Section 6]
3088		 */
3089		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3090		    pktinfo->ipi6_ifindex == 0 &&
3091		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3092			ip6_clearpktopts(opt, optname);
3093			break;
3094		}
3095
3096		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3097		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3098			return (EINVAL);
3099		}
3100
3101		/* validate the interface index if specified. */
3102		if (pktinfo->ipi6_ifindex > if_index ||
3103		    pktinfo->ipi6_ifindex < 0) {
3104			 return (ENXIO);
3105		}
3106		if (pktinfo->ipi6_ifindex) {
3107			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
3108			if (ifp == NULL)
3109				return (ENXIO);
3110		}
3111
3112		/*
3113		 * We store the address anyway, and let in6_selectsrc()
3114		 * validate the specified address.  This is because ipi6_addr
3115		 * may not have enough information about its scope zone, and
3116		 * we may need additional information (such as outgoing
3117		 * interface or the scope zone of a destination address) to
3118		 * disambiguate the scope.
3119		 * XXX: the delay of the validation may confuse the
3120		 * application when it is used as a sticky option.
3121		 */
3122		if (sticky) {
3123			if (opt->ip6po_pktinfo == NULL) {
3124				opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3125				    M_IP6OPT, M_WAITOK);
3126			}
3127			bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3128		} else
3129			opt->ip6po_pktinfo = pktinfo;
3130		break;
3131	}
3132
3133	case IPV6_2292HOPLIMIT:
3134	case IPV6_HOPLIMIT:
3135	{
3136		int *hlimp;
3137
3138		/*
3139		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3140		 * to simplify the ordering among hoplimit options.
3141		 */
3142		if (optname == IPV6_HOPLIMIT && sticky)
3143			return (ENOPROTOOPT);
3144
3145		if (len != sizeof(int))
3146			return (EINVAL);
3147		hlimp = (int *)buf;
3148		if (*hlimp < -1 || *hlimp > 255)
3149			return (EINVAL);
3150
3151		opt->ip6po_hlim = *hlimp;
3152		break;
3153	}
3154
3155	case IPV6_TCLASS:
3156	{
3157		int tclass;
3158
3159		if (len != sizeof(int))
3160			return (EINVAL);
3161		tclass = *(int *)buf;
3162		if (tclass < -1 || tclass > 255)
3163			return (EINVAL);
3164
3165		opt->ip6po_tclass = tclass;
3166		break;
3167	}
3168
3169	case IPV6_2292NEXTHOP:
3170	case IPV6_NEXTHOP:
3171		if (!priv)
3172			return (EPERM);
3173
3174		if (len == 0) {	/* just remove the option */
3175			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3176			break;
3177		}
3178
3179		/* check if cmsg_len is large enough for sa_len */
3180		if (len < sizeof(struct sockaddr) || len < *buf)
3181			return (EINVAL);
3182
3183		switch (((struct sockaddr *)buf)->sa_family) {
3184		case AF_INET6:
3185		{
3186			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3187#if 0
3188			int error;
3189#endif
3190
3191			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3192				return (EINVAL);
3193
3194			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3195			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3196				return (EINVAL);
3197			}
3198#if 0
3199			if ((error = scope6_check_id(sa6, ip6_use_defzone))
3200			    != 0) {
3201				return (error);
3202			}
3203#endif
3204			sa6->sin6_scope_id = 0; /* XXX */
3205			break;
3206		}
3207		case AF_LINK:	/* should eventually be supported */
3208		default:
3209			return (EAFNOSUPPORT);
3210		}
3211
3212		/* turn off the previous option, then set the new option. */
3213		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3214		if (sticky) {
3215			opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_WAITOK);
3216			bcopy(buf, opt->ip6po_nexthop, *buf);
3217		} else
3218			opt->ip6po_nexthop = (struct sockaddr *)buf;
3219		break;
3220
3221	case IPV6_2292HOPOPTS:
3222	case IPV6_HOPOPTS:
3223	{
3224		struct ip6_hbh *hbh;
3225		int hbhlen;
3226
3227		/*
3228		 * XXX: We don't allow a non-privileged user to set ANY HbH
3229		 * options, since per-option restriction has too much
3230		 * overhead.
3231		 */
3232		if (!priv)
3233			return (EPERM);
3234
3235		if (len == 0) {
3236			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3237			break;	/* just remove the option */
3238		}
3239
3240		/* message length validation */
3241		if (len < sizeof(struct ip6_hbh))
3242			return (EINVAL);
3243		hbh = (struct ip6_hbh *)buf;
3244		hbhlen = (hbh->ip6h_len + 1) << 3;
3245		if (len != hbhlen)
3246			return (EINVAL);
3247
3248		/* turn off the previous option, then set the new option. */
3249		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3250		if (sticky) {
3251			opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK);
3252			bcopy(hbh, opt->ip6po_hbh, hbhlen);
3253		} else
3254			opt->ip6po_hbh = hbh;
3255
3256		break;
3257	}
3258
3259	case IPV6_2292DSTOPTS:
3260	case IPV6_DSTOPTS:
3261	case IPV6_RTHDRDSTOPTS:
3262	{
3263		struct ip6_dest *dest, **newdest = NULL;
3264		int destlen;
3265
3266		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3267			return (EPERM);
3268
3269		if (len == 0) {
3270			ip6_clearpktopts(opt, optname);
3271			break;	/* just remove the option */
3272		}
3273
3274		/* message length validation */
3275		if (len < sizeof(struct ip6_dest))
3276			return (EINVAL);
3277		dest = (struct ip6_dest *)buf;
3278		destlen = (dest->ip6d_len + 1) << 3;
3279		if (len != destlen)
3280			return (EINVAL);
3281
3282		/*
3283		 * Determine the position that the destination options header
3284		 * should be inserted; before or after the routing header.
3285		 */
3286		switch (optname) {
3287		case IPV6_2292DSTOPTS:
3288			/*
3289			 * The old advacned API is ambiguous on this point.
3290			 * Our approach is to determine the position based
3291			 * according to the existence of a routing header.
3292			 * Note, however, that this depends on the order of the
3293			 * extension headers in the ancillary data; the 1st
3294			 * part of the destination options header must appear
3295			 * before the routing header in the ancillary data,
3296			 * too.
3297			 * RFC2292bis solved the ambiguity by introducing
3298			 * separate ancillary data or option types.
3299			 */
3300			if (opt->ip6po_rthdr == NULL)
3301				newdest = &opt->ip6po_dest1;
3302			else
3303				newdest = &opt->ip6po_dest2;
3304			break;
3305		case IPV6_RTHDRDSTOPTS:
3306			newdest = &opt->ip6po_dest1;
3307			break;
3308		case IPV6_DSTOPTS:
3309			newdest = &opt->ip6po_dest2;
3310			break;
3311		}
3312
3313		/* turn off the previous option, then set the new option. */
3314		ip6_clearpktopts(opt, optname);
3315		if (sticky) {
3316			*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
3317			bcopy(dest, *newdest, destlen);
3318		} else
3319			*newdest = dest;
3320
3321		break;
3322	}
3323
3324	case IPV6_2292RTHDR:
3325	case IPV6_RTHDR:
3326	{
3327		struct ip6_rthdr *rth;
3328		int rthlen;
3329
3330		if (len == 0) {
3331			ip6_clearpktopts(opt, IPV6_RTHDR);
3332			break;	/* just remove the option */
3333		}
3334
3335		/* message length validation */
3336		if (len < sizeof(struct ip6_rthdr))
3337			return (EINVAL);
3338		rth = (struct ip6_rthdr *)buf;
3339		rthlen = (rth->ip6r_len + 1) << 3;
3340		if (len != rthlen)
3341			return (EINVAL);
3342
3343		switch (rth->ip6r_type) {
3344		case IPV6_RTHDR_TYPE_0:
3345			if (rth->ip6r_len == 0)	/* must contain one addr */
3346				return (EINVAL);
3347			if (rth->ip6r_len % 2) /* length must be even */
3348				return (EINVAL);
3349			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3350				return (EINVAL);
3351			break;
3352		default:
3353			return (EINVAL);	/* not supported */
3354		}
3355
3356		/* turn off the previous option */
3357		ip6_clearpktopts(opt, IPV6_RTHDR);
3358		if (sticky) {
3359			opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK);
3360			bcopy(rth, opt->ip6po_rthdr, rthlen);
3361		} else
3362			opt->ip6po_rthdr = rth;
3363
3364		break;
3365	}
3366
3367	case IPV6_USE_MIN_MTU:
3368		if (len != sizeof(int))
3369			return (EINVAL);
3370		minmtupolicy = *(int *)buf;
3371		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3372		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3373		    minmtupolicy != IP6PO_MINMTU_ALL) {
3374			return (EINVAL);
3375		}
3376		opt->ip6po_minmtu = minmtupolicy;
3377		break;
3378
3379	case IPV6_DONTFRAG:
3380		if (len != sizeof(int))
3381			return (EINVAL);
3382
3383		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3384			/*
3385			 * we ignore this option for TCP sockets.
3386			 * (rfc2292bis leaves this case unspecified.)
3387			 */
3388			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3389		} else
3390			opt->ip6po_flags |= IP6PO_DONTFRAG;
3391		break;
3392
3393	case IPV6_PREFER_TEMPADDR:
3394		if (len != sizeof(int))
3395			return (EINVAL);
3396		preftemp = *(int *)buf;
3397		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3398		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3399		    preftemp != IP6PO_TEMPADDR_PREFER) {
3400			return (EINVAL);
3401		}
3402		opt->ip6po_prefer_tempaddr = preftemp;
3403		break;
3404
3405	default:
3406		return (ENOPROTOOPT);
3407	} /* end of switch */
3408
3409	return (0);
3410}
3411
3412/*
3413 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3414 * packet to the input queue of a specified interface.  Note that this
3415 * calls the output routine of the loopback "driver", but with an interface
3416 * pointer that might NOT be &loif -- easier than replicating that code here.
3417 */
3418void
3419ip6_mloopback(ifp, m, dst)
3420	struct ifnet *ifp;
3421	struct mbuf *m;
3422	struct sockaddr_in6 *dst;
3423{
3424	struct mbuf *copym;
3425	struct ip6_hdr *ip6;
3426
3427	copym = m_copy(m, 0, M_COPYALL);
3428	if (copym == NULL)
3429		return;
3430
3431	/*
3432	 * Make sure to deep-copy IPv6 header portion in case the data
3433	 * is in an mbuf cluster, so that we can safely override the IPv6
3434	 * header portion later.
3435	 */
3436	if ((copym->m_flags & M_EXT) != 0 ||
3437	    copym->m_len < sizeof(struct ip6_hdr)) {
3438		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3439		if (copym == NULL)
3440			return;
3441	}
3442
3443#ifdef DIAGNOSTIC
3444	if (copym->m_len < sizeof(*ip6)) {
3445		m_freem(copym);
3446		return;
3447	}
3448#endif
3449
3450	ip6 = mtod(copym, struct ip6_hdr *);
3451	/*
3452	 * clear embedded scope identifiers if necessary.
3453	 * in6_clearscope will touch the addresses only when necessary.
3454	 */
3455	in6_clearscope(&ip6->ip6_src);
3456	in6_clearscope(&ip6->ip6_dst);
3457
3458	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3459}
3460
3461/*
3462 * Chop IPv6 header off from the payload.
3463 */
3464static int
3465ip6_splithdr(m, exthdrs)
3466	struct mbuf *m;
3467	struct ip6_exthdrs *exthdrs;
3468{
3469	struct mbuf *mh;
3470	struct ip6_hdr *ip6;
3471
3472	ip6 = mtod(m, struct ip6_hdr *);
3473	if (m->m_len > sizeof(*ip6)) {
3474		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3475		if (mh == 0) {
3476			m_freem(m);
3477			return ENOBUFS;
3478		}
3479		M_MOVE_PKTHDR(mh, m);
3480		MH_ALIGN(mh, sizeof(*ip6));
3481		m->m_len -= sizeof(*ip6);
3482		m->m_data += sizeof(*ip6);
3483		mh->m_next = m;
3484		m = mh;
3485		m->m_len = sizeof(*ip6);
3486		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3487	}
3488	exthdrs->ip6e_ip6 = m;
3489	return 0;
3490}
3491
3492/*
3493 * Compute IPv6 extension header length.
3494 */
3495int
3496ip6_optlen(in6p)
3497	struct in6pcb *in6p;
3498{
3499	int len;
3500
3501	if (!in6p->in6p_outputopts)
3502		return 0;
3503
3504	len = 0;
3505#define elen(x) \
3506    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3507
3508	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3509	if (in6p->in6p_outputopts->ip6po_rthdr)
3510		/* dest1 is valid with rthdr only */
3511		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3512	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3513	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3514	return len;
3515#undef elen
3516}
3517