ip6_output.c revision 129196
1/*	$FreeBSD: head/sys/netinet6/ip6_output.c 129196 2004-05-14 03:57:17Z wpaul $	*/
2/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
62 */
63
64#include "opt_ip6fw.h"
65#include "opt_inet.h"
66#include "opt_inet6.h"
67#include "opt_ipsec.h"
68#include "opt_pfil_hooks.h"
69#include "opt_random_ip_id.h"
70
71#include <sys/param.h>
72#include <sys/malloc.h>
73#include <sys/mbuf.h>
74#include <sys/proc.h>
75#include <sys/errno.h>
76#include <sys/protosw.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/systm.h>
80#include <sys/kernel.h>
81
82#include <net/if.h>
83#include <net/route.h>
84#ifdef PFIL_HOOKS
85#include <net/pfil.h>
86#endif
87
88#include <netinet/in.h>
89#include <netinet/in_var.h>
90#include <netinet6/in6_var.h>
91#include <netinet/ip6.h>
92#include <netinet/icmp6.h>
93#include <netinet6/ip6_var.h>
94#include <netinet/in_pcb.h>
95#include <netinet/tcp_var.h>
96#include <netinet6/nd6.h>
97
98#ifdef IPSEC
99#include <netinet6/ipsec.h>
100#ifdef INET6
101#include <netinet6/ipsec6.h>
102#endif
103#include <netkey/key.h>
104#endif /* IPSEC */
105
106#ifdef FAST_IPSEC
107#include <netipsec/ipsec.h>
108#include <netipsec/ipsec6.h>
109#include <netipsec/key.h>
110#endif /* FAST_IPSEC */
111
112#include <netinet6/ip6_fw.h>
113
114#include <net/net_osdep.h>
115
116#include <netinet6/ip6protosw.h>
117
118static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
119
120struct ip6_exthdrs {
121	struct mbuf *ip6e_ip6;
122	struct mbuf *ip6e_hbh;
123	struct mbuf *ip6e_dest1;
124	struct mbuf *ip6e_rthdr;
125	struct mbuf *ip6e_dest2;
126};
127
128static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
129			   int, int));
130static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
131	struct socket *, struct sockopt *));
132static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
133static int ip6_setpktoption __P((int, u_char *, int, struct ip6_pktopts *, int,
134	int, int, int));
135
136static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
137static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
138static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
139static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
140	struct ip6_frag **));
141static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
142static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
143static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
144	struct ifnet *, struct in6_addr *, u_long *, int *));
145
146
147/*
148 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
149 * header (with pri, len, nxt, hlim, src, dst).
150 * This function may modify ver and hlim only.
151 * The mbuf chain containing the packet will be freed.
152 * The mbuf opt, if present, will not be freed.
153 *
154 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
155 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
156 * which is rt_rmx.rmx_mtu.
157 */
158int
159ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
160	struct mbuf *m0;
161	struct ip6_pktopts *opt;
162	struct route_in6 *ro;
163	int flags;
164	struct ip6_moptions *im6o;
165	struct ifnet **ifpp;		/* XXX: just for statistics */
166	struct inpcb *inp;
167{
168	struct ip6_hdr *ip6, *mhip6;
169	struct ifnet *ifp, *origifp;
170	struct mbuf *m = m0;
171	int hlen, tlen, len, off;
172	struct route_in6 ip6route;
173	struct sockaddr_in6 *dst;
174	int error = 0;
175	struct in6_ifaddr *ia = NULL;
176	u_long mtu;
177	int alwaysfrag, dontfrag;
178	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
179	struct ip6_exthdrs exthdrs;
180	struct in6_addr finaldst;
181	struct route_in6 *ro_pmtu = NULL;
182	int hdrsplit = 0;
183	int needipsec = 0;
184#if defined(IPSEC) || defined(FAST_IPSEC)
185	int needipsectun = 0;
186	struct secpolicy *sp = NULL;
187#endif /*IPSEC || FAST_IPSEC*/
188
189	ip6 = mtod(m, struct ip6_hdr *);
190	finaldst = ip6->ip6_dst;
191
192#define MAKE_EXTHDR(hp, mp)						\
193    do {								\
194	if (hp) {							\
195		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
196		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
197		    ((eh)->ip6e_len + 1) << 3);				\
198		if (error)						\
199			goto freehdrs;					\
200	}								\
201    } while (/*CONSTCOND*/ 0)
202
203	bzero(&exthdrs, sizeof(exthdrs));
204
205	if (opt) {
206		/* Hop-by-Hop options header */
207		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
208		/* Destination options header(1st part) */
209		if (opt->ip6po_rthdr) {
210			/*
211			 * Destination options header(1st part)
212			 * This only makes sence with a routing header.
213			 * See Section 9.2 of RFC 3542.
214			 * Disabling this part just for MIP6 convenience is
215			 * a bad idea.  We need to think carefully about a
216			 * way to make the advanced API coexist with MIP6
217			 * options, which might automatically be inserted in
218			 * the kernel.
219			 */
220			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
221		}
222		/* Routing header */
223		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
224		/* Destination options header(2nd part) */
225		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
226	}
227
228#ifdef IPSEC
229	/* get a security policy for this packet */
230	if (inp == NULL)
231		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
232	else
233		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
234
235	if (sp == NULL) {
236		ipsec6stat.out_inval++;
237		goto freehdrs;
238	}
239
240	error = 0;
241
242	/* check policy */
243	switch (sp->policy) {
244	case IPSEC_POLICY_DISCARD:
245		/*
246		 * This packet is just discarded.
247		 */
248		ipsec6stat.out_polvio++;
249		goto freehdrs;
250
251	case IPSEC_POLICY_BYPASS:
252	case IPSEC_POLICY_NONE:
253		/* no need to do IPsec. */
254		needipsec = 0;
255		break;
256
257	case IPSEC_POLICY_IPSEC:
258		if (sp->req == NULL) {
259			/* acquire a policy */
260			error = key_spdacquire(sp);
261			goto freehdrs;
262		}
263		needipsec = 1;
264		break;
265
266	case IPSEC_POLICY_ENTRUST:
267	default:
268		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
269	}
270#endif /* IPSEC */
271#ifdef FAST_IPSEC
272	/* get a security policy for this packet */
273	if (inp == NULL)
274		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
275	else
276		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
277
278	if (sp == NULL) {
279		newipsecstat.ips_out_inval++;
280		goto freehdrs;
281	}
282
283	error = 0;
284
285	/* check policy */
286	switch (sp->policy) {
287	case IPSEC_POLICY_DISCARD:
288		/*
289		 * This packet is just discarded.
290		 */
291		newipsecstat.ips_out_polvio++;
292		goto freehdrs;
293
294	case IPSEC_POLICY_BYPASS:
295	case IPSEC_POLICY_NONE:
296		/* no need to do IPsec. */
297		needipsec = 0;
298		break;
299
300	case IPSEC_POLICY_IPSEC:
301		if (sp->req == NULL) {
302			/* acquire a policy */
303			error = key_spdacquire(sp);
304			goto freehdrs;
305		}
306		needipsec = 1;
307		break;
308
309	case IPSEC_POLICY_ENTRUST:
310	default:
311		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
312	}
313#endif /* FAST_IPSEC */
314
315	/*
316	 * Calculate the total length of the extension header chain.
317	 * Keep the length of the unfragmentable part for fragmentation.
318	 */
319	optlen = 0;
320	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
321	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
322	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
323	unfragpartlen = optlen + sizeof(struct ip6_hdr);
324	/* NOTE: we don't add AH/ESP length here. do that later. */
325	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
326
327	/*
328	 * If we need IPsec, or there is at least one extension header,
329	 * separate IP6 header from the payload.
330	 */
331	if ((needipsec || optlen) && !hdrsplit) {
332		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
333			m = NULL;
334			goto freehdrs;
335		}
336		m = exthdrs.ip6e_ip6;
337		hdrsplit++;
338	}
339
340	/* adjust pointer */
341	ip6 = mtod(m, struct ip6_hdr *);
342
343	/* adjust mbuf packet header length */
344	m->m_pkthdr.len += optlen;
345	plen = m->m_pkthdr.len - sizeof(*ip6);
346
347	/* If this is a jumbo payload, insert a jumbo payload option. */
348	if (plen > IPV6_MAXPACKET) {
349		if (!hdrsplit) {
350			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
351				m = NULL;
352				goto freehdrs;
353			}
354			m = exthdrs.ip6e_ip6;
355			hdrsplit++;
356		}
357		/* adjust pointer */
358		ip6 = mtod(m, struct ip6_hdr *);
359		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
360			goto freehdrs;
361		ip6->ip6_plen = 0;
362	} else
363		ip6->ip6_plen = htons(plen);
364
365	/*
366	 * Concatenate headers and fill in next header fields.
367	 * Here we have, on "m"
368	 *	IPv6 payload
369	 * and we insert headers accordingly.  Finally, we should be getting:
370	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
371	 *
372	 * during the header composing process, "m" points to IPv6 header.
373	 * "mprev" points to an extension header prior to esp.
374	 */
375	{
376		u_char *nexthdrp = &ip6->ip6_nxt;
377		struct mbuf *mprev = m;
378
379		/*
380		 * we treat dest2 specially.  this makes IPsec processing
381		 * much easier.  the goal here is to make mprev point the
382		 * mbuf prior to dest2.
383		 *
384		 * result: IPv6 dest2 payload
385		 * m and mprev will point to IPv6 header.
386		 */
387		if (exthdrs.ip6e_dest2) {
388			if (!hdrsplit)
389				panic("assumption failed: hdr not split");
390			exthdrs.ip6e_dest2->m_next = m->m_next;
391			m->m_next = exthdrs.ip6e_dest2;
392			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
393			ip6->ip6_nxt = IPPROTO_DSTOPTS;
394		}
395
396#define MAKE_CHAIN(m, mp, p, i)\
397    do {\
398	if (m) {\
399		if (!hdrsplit) \
400			panic("assumption failed: hdr not split"); \
401		*mtod((m), u_char *) = *(p);\
402		*(p) = (i);\
403		p = mtod((m), u_char *);\
404		(m)->m_next = (mp)->m_next;\
405		(mp)->m_next = (m);\
406		(mp) = (m);\
407	}\
408    } while (/*CONSTCOND*/ 0)
409		/*
410		 * result: IPv6 hbh dest1 rthdr dest2 payload
411		 * m will point to IPv6 header.  mprev will point to the
412		 * extension header prior to dest2 (rthdr in the above case).
413		 */
414		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
415		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
416		    IPPROTO_DSTOPTS);
417		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
418		    IPPROTO_ROUTING);
419
420#if defined(IPSEC) || defined(FAST_IPSEC)
421		if (!needipsec)
422			goto skip_ipsec2;
423
424		/*
425		 * pointers after IPsec headers are not valid any more.
426		 * other pointers need a great care too.
427		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
428		 */
429		exthdrs.ip6e_dest2 = NULL;
430
431	    {
432		struct ip6_rthdr *rh = NULL;
433		int segleft_org = 0;
434		struct ipsec_output_state state;
435
436		if (exthdrs.ip6e_rthdr) {
437			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
438			segleft_org = rh->ip6r_segleft;
439			rh->ip6r_segleft = 0;
440		}
441
442		bzero(&state, sizeof(state));
443		state.m = m;
444		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
445		    &needipsectun);
446		m = state.m;
447		if (error) {
448			/* mbuf is already reclaimed in ipsec6_output_trans. */
449			m = NULL;
450			switch (error) {
451			case EHOSTUNREACH:
452			case ENETUNREACH:
453			case EMSGSIZE:
454			case ENOBUFS:
455			case ENOMEM:
456				break;
457			default:
458				printf("ip6_output (ipsec): error code %d\n", error);
459				/* FALLTHROUGH */
460			case ENOENT:
461				/* don't show these error codes to the user */
462				error = 0;
463				break;
464			}
465			goto bad;
466		}
467		if (exthdrs.ip6e_rthdr) {
468			/* ah6_output doesn't modify mbuf chain */
469			rh->ip6r_segleft = segleft_org;
470		}
471	    }
472skip_ipsec2:;
473#endif
474	}
475
476	/*
477	 * If there is a routing header, replace the destination address field
478	 * with the first hop of the routing header.
479	 */
480	if (exthdrs.ip6e_rthdr) {
481		struct ip6_rthdr *rh =
482			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
483						  struct ip6_rthdr *));
484		struct ip6_rthdr0 *rh0;
485		struct in6_addr *addrs;
486
487		switch (rh->ip6r_type) {
488		case IPV6_RTHDR_TYPE_0:
489			 rh0 = (struct ip6_rthdr0 *)rh;
490			 addrs = (struct in6_addr *)(rh0 + 1);
491
492			 ip6->ip6_dst = *addrs;
493			 bcopy((caddr_t)(addrs + 1), (caddr_t)addrs,
494			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
495				 );
496			 *(addrs + rh0->ip6r0_segleft - 1) = finaldst;
497			 break;
498		default:	/* is it possible? */
499			 error = EINVAL;
500			 goto bad;
501		}
502	}
503
504	/* Source address validation */
505	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
506	    (flags & IPV6_DADOUTPUT) == 0) {
507		error = EOPNOTSUPP;
508		ip6stat.ip6s_badscope++;
509		goto bad;
510	}
511	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
512		error = EOPNOTSUPP;
513		ip6stat.ip6s_badscope++;
514		goto bad;
515	}
516
517	ip6stat.ip6s_localout++;
518
519	/*
520	 * Route packet.
521	 */
522	if (ro == 0) {
523		ro = &ip6route;
524		bzero((caddr_t)ro, sizeof(*ro));
525	}
526	ro_pmtu = ro;
527	if (opt && opt->ip6po_rthdr)
528		ro = &opt->ip6po_route;
529	dst = (struct sockaddr_in6 *)&ro->ro_dst;
530
531	/*
532	 * If there is a cached route,
533	 * check that it is to the same destination
534	 * and is still up. If not, free it and try again.
535	 */
536	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
537			 dst->sin6_family != AF_INET6 ||
538			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
539		RTFREE(ro->ro_rt);
540		ro->ro_rt = (struct rtentry *)0;
541	}
542	if (ro->ro_rt == 0) {
543		bzero(dst, sizeof(*dst));
544		dst->sin6_family = AF_INET6;
545		dst->sin6_len = sizeof(struct sockaddr_in6);
546		dst->sin6_addr = ip6->ip6_dst;
547	}
548
549 	/*
550	 * if specified, try to fill in the traffic class field.
551	 * do not override if a non-zero value is already set.
552	 * we check the diffserv field and the ecn field separately.
553	 */
554	if (opt && opt->ip6po_tclass >= 0) {
555		int mask = 0;
556
557		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
558			mask |= 0xfc;
559		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
560			mask |= 0x03;
561		if (mask != 0)
562			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
563	}
564
565	/* fill in or override the hop limit field, if necessary. */
566	if (opt && opt->ip6po_hlim != -1)
567		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
568	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
569		if (im6o != NULL)
570			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
571		else
572			ip6->ip6_hlim = ip6_defmcasthlim;
573	}
574
575#if defined(IPSEC) || defined(FAST_IPSEC)
576	if (needipsec && needipsectun) {
577		struct ipsec_output_state state;
578
579		/*
580		 * All the extension headers will become inaccessible
581		 * (since they can be encrypted).
582		 * Don't panic, we need no more updates to extension headers
583		 * on inner IPv6 packet (since they are now encapsulated).
584		 *
585		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
586		 */
587		bzero(&exthdrs, sizeof(exthdrs));
588		exthdrs.ip6e_ip6 = m;
589
590		bzero(&state, sizeof(state));
591		state.m = m;
592		state.ro = (struct route *)ro;
593		state.dst = (struct sockaddr *)dst;
594
595		error = ipsec6_output_tunnel(&state, sp, flags);
596
597		m = state.m;
598		ro = (struct route_in6 *)state.ro;
599		dst = (struct sockaddr_in6 *)state.dst;
600		if (error) {
601			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
602			m0 = m = NULL;
603			m = NULL;
604			switch (error) {
605			case EHOSTUNREACH:
606			case ENETUNREACH:
607			case EMSGSIZE:
608			case ENOBUFS:
609			case ENOMEM:
610				break;
611			default:
612				printf("ip6_output (ipsec): error code %d\n", error);
613				/* FALLTHROUGH */
614			case ENOENT:
615				/* don't show these error codes to the user */
616				error = 0;
617				break;
618			}
619			goto bad;
620		}
621
622		exthdrs.ip6e_ip6 = m;
623	}
624#endif /* IPSEC */
625
626	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
627		/* Unicast */
628
629#define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
630#define sin6tosa(sin6)	((struct sockaddr *)(sin6))
631		/* xxx
632		 * interface selection comes here
633		 * if an interface is specified from an upper layer,
634		 * ifp must point it.
635		 */
636		if (ro->ro_rt == 0) {
637			/*
638			 * non-bsdi always clone routes, if parent is
639			 * PRF_CLONING.
640			 */
641			rtalloc((struct route *)ro);
642		}
643		if (ro->ro_rt == 0) {
644			ip6stat.ip6s_noroute++;
645			error = EHOSTUNREACH;
646			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
647			goto bad;
648		}
649		/* XXX rt not locked */
650		ia = ifatoia6(ro->ro_rt->rt_ifa);
651		ifp = ro->ro_rt->rt_ifp;
652		ro->ro_rt->rt_rmx.rmx_pksent++;
653		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
654			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
655		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
656
657		in6_ifstat_inc(ifp, ifs6_out_request);
658
659		/*
660		 * Check if the outgoing interface conflicts with
661		 * the interface specified by ifi6_ifindex (if specified).
662		 * Note that loopback interface is always okay.
663		 * (this may happen when we are sending a packet to one of
664		 *  our own addresses.)
665		 */
666		if (opt && opt->ip6po_pktinfo
667		 && opt->ip6po_pktinfo->ipi6_ifindex) {
668			if (!(ifp->if_flags & IFF_LOOPBACK)
669			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
670				ip6stat.ip6s_noroute++;
671				in6_ifstat_inc(ifp, ifs6_out_discard);
672				error = EHOSTUNREACH;
673				goto bad;
674			}
675		}
676
677		if (opt && opt->ip6po_hlim != -1)
678			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
679	} else {
680		/* Multicast */
681		struct	in6_multi *in6m;
682
683		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
684
685		/*
686		 * See if the caller provided any multicast options
687		 */
688		ifp = NULL;
689		if (im6o != NULL) {
690			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
691			if (im6o->im6o_multicast_ifp != NULL)
692				ifp = im6o->im6o_multicast_ifp;
693		} else
694			ip6->ip6_hlim = ip6_defmcasthlim;
695
696		/*
697		 * See if the caller provided the outgoing interface
698		 * as an ancillary data.
699		 * Boundary check for ifindex is assumed to be already done.
700		 */
701		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
702			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
703
704		/*
705		 * If the destination is a node-local scope multicast,
706		 * the packet should be loop-backed only.
707		 */
708		if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
709			/*
710			 * If the outgoing interface is already specified,
711			 * it should be a loopback interface.
712			 */
713			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
714				ip6stat.ip6s_badscope++;
715				error = ENETUNREACH; /* XXX: better error? */
716				/* XXX correct ifp? */
717				in6_ifstat_inc(ifp, ifs6_out_discard);
718				goto bad;
719			} else {
720				ifp = &loif[0];
721			}
722		}
723
724		if (opt && opt->ip6po_hlim != -1)
725			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
726
727		/*
728		 * If caller did not provide an interface lookup a
729		 * default in the routing table.  This is either a
730		 * default for the speicfied group (i.e. a host
731		 * route), or a multicast default (a route for the
732		 * ``net'' ff00::/8).
733		 */
734		if (ifp == NULL) {
735			if (ro->ro_rt == 0)
736				ro->ro_rt = rtalloc1((struct sockaddr *)
737						&ro->ro_dst, 0, 0UL);
738			else
739				RT_LOCK(ro->ro_rt);
740			if (ro->ro_rt == 0) {
741				ip6stat.ip6s_noroute++;
742				error = EHOSTUNREACH;
743				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
744				goto bad;
745			}
746			ia = ifatoia6(ro->ro_rt->rt_ifa);
747			ifp = ro->ro_rt->rt_ifp;
748			ro->ro_rt->rt_rmx.rmx_pksent++;
749			RT_UNLOCK(ro->ro_rt);
750		}
751
752		if ((flags & IPV6_FORWARDING) == 0)
753			in6_ifstat_inc(ifp, ifs6_out_request);
754		in6_ifstat_inc(ifp, ifs6_out_mcast);
755
756		/*
757		 * Confirm that the outgoing interface supports multicast.
758		 */
759		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
760			ip6stat.ip6s_noroute++;
761			in6_ifstat_inc(ifp, ifs6_out_discard);
762			error = ENETUNREACH;
763			goto bad;
764		}
765		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
766		if (in6m != NULL &&
767		   (im6o == NULL || im6o->im6o_multicast_loop)) {
768			/*
769			 * If we belong to the destination multicast group
770			 * on the outgoing interface, and the caller did not
771			 * forbid loopback, loop back a copy.
772			 */
773			ip6_mloopback(ifp, m, dst);
774		} else {
775			/*
776			 * If we are acting as a multicast router, perform
777			 * multicast forwarding as if the packet had just
778			 * arrived on the interface to which we are about
779			 * to send.  The multicast forwarding function
780			 * recursively calls this function, using the
781			 * IPV6_FORWARDING flag to prevent infinite recursion.
782			 *
783			 * Multicasts that are looped back by ip6_mloopback(),
784			 * above, will be forwarded by the ip6_input() routine,
785			 * if necessary.
786			 */
787			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
788				if (ip6_mforward(ip6, ifp, m) != 0) {
789					m_freem(m);
790					goto done;
791				}
792			}
793		}
794		/*
795		 * Multicasts with a hoplimit of zero may be looped back,
796		 * above, but must not be transmitted on a network.
797		 * Also, multicasts addressed to the loopback interface
798		 * are not sent -- the above call to ip6_mloopback() will
799		 * loop back a copy if this host actually belongs to the
800		 * destination group on the loopback interface.
801		 */
802		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
803		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
804			m_freem(m);
805			goto done;
806		}
807	}
808
809	/*
810	 * Fill the outgoing inteface to tell the upper layer
811	 * to increment per-interface statistics.
812	 */
813	if (ifpp)
814		*ifpp = ifp;
815
816	/* Determine path MTU. */
817	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
818	    &alwaysfrag)) != 0)
819		goto bad;
820
821	/*
822	 * The caller of this function may specify to use the minimum MTU
823	 * in some cases.
824	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
825	 * setting.  The logic is a bit complicated; by default, unicast
826	 * packets will follow path MTU while multicast packets will be sent at
827	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
828	 * including unicast ones will be sent at the minimum MTU.  Multicast
829	 * packets will always be sent at the minimum MTU unless
830	 * IP6PO_MINMTU_DISABLE is explicitly specified.
831	 * See RFC 3542 for more details.
832	 */
833	if (mtu > IPV6_MMTU) {
834		if ((flags & IPV6_MINMTU))
835			mtu = IPV6_MMTU;
836		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
837			mtu = IPV6_MMTU;
838		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
839			 (opt == NULL ||
840			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
841			mtu = IPV6_MMTU;
842		}
843	}
844
845	/* Fake scoped addresses */
846	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
847		/*
848		 * If source or destination address is a scoped address, and
849		 * the packet is going to be sent to a loopback interface,
850		 * we should keep the original interface.
851		 */
852
853		/*
854		 * XXX: this is a very experimental and temporary solution.
855		 * We eventually have sockaddr_in6 and use the sin6_scope_id
856		 * field of the structure here.
857		 * We rely on the consistency between two scope zone ids
858		 * of source and destination, which should already be assured.
859		 * Larger scopes than link will be supported in the future.
860		 */
861		origifp = NULL;
862		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
863			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
864		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
865			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
866		/*
867		 * XXX: origifp can be NULL even in those two cases above.
868		 * For example, if we remove the (only) link-local address
869		 * from the loopback interface, and try to send a link-local
870		 * address without link-id information.  Then the source
871		 * address is ::1, and the destination address is the
872		 * link-local address with its s6_addr16[1] being zero.
873		 * What is worse, if the packet goes to the loopback interface
874		 * by a default rejected route, the null pointer would be
875		 * passed to looutput, and the kernel would hang.
876		 * The following last resort would prevent such disaster.
877		 */
878		if (origifp == NULL)
879			origifp = ifp;
880	}
881	else
882		origifp = ifp;
883	/*
884	 * clear embedded scope identifiers if necessary.
885	 * in6_clearscope will touch the addresses only when necessary.
886	 */
887	in6_clearscope(&ip6->ip6_src);
888	in6_clearscope(&ip6->ip6_dst);
889
890	/*
891	 * Check with the firewall...
892	 */
893	if (ip6_fw_enable && ip6_fw_chk_ptr) {
894		u_short port = 0;
895		m->m_pkthdr.rcvif = NULL;	/* XXX */
896		/* If ipfw says divert, we have to just drop packet */
897		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
898			m_freem(m);
899			goto done;
900		}
901		if (!m) {
902			error = EACCES;
903			goto done;
904		}
905	}
906
907	/*
908	 * If the outgoing packet contains a hop-by-hop options header,
909	 * it must be examined and processed even by the source node.
910	 * (RFC 2460, section 4.)
911	 */
912	if (exthdrs.ip6e_hbh) {
913		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
914		u_int32_t dummy1; /* XXX unused */
915		u_int32_t dummy2; /* XXX unused */
916
917#ifdef DIAGNOSTIC
918		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
919			panic("ip6e_hbh is not continuous");
920#endif
921		/*
922		 *  XXX: if we have to send an ICMPv6 error to the sender,
923		 *       we need the M_LOOP flag since icmp6_error() expects
924		 *       the IPv6 and the hop-by-hop options header are
925		 *       continuous unless the flag is set.
926		 */
927		m->m_flags |= M_LOOP;
928		m->m_pkthdr.rcvif = ifp;
929		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
930		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
931		    &dummy1, &dummy2) < 0) {
932			/* m was already freed at this point */
933			error = EINVAL;/* better error? */
934			goto done;
935		}
936		m->m_flags &= ~M_LOOP; /* XXX */
937		m->m_pkthdr.rcvif = NULL;
938	}
939
940#ifdef PFIL_HOOKS
941	/*
942	 * Run through list of hooks for output packets.
943	 */
944	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
945	if (error != 0 || m == NULL)
946		goto done;
947	ip6 = mtod(m, struct ip6_hdr *);
948#endif /* PFIL_HOOKS */
949
950	/*
951	 * Send the packet to the outgoing interface.
952	 * If necessary, do IPv6 fragmentation before sending.
953	 *
954	 * the logic here is rather complex:
955	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
956	 * 1-a:	send as is if tlen <= path mtu
957	 * 1-b:	fragment if tlen > path mtu
958	 *
959	 * 2: if user asks us not to fragment (dontfrag == 1)
960	 * 2-a:	send as is if tlen <= interface mtu
961	 * 2-b:	error if tlen > interface mtu
962	 *
963	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
964	 *	always fragment
965	 *
966	 * 4: if dontfrag == 1 && alwaysfrag == 1
967	 *	error, as we cannot handle this conflicting request
968	 */
969	tlen = m->m_pkthdr.len;
970
971	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
972		dontfrag = 1;
973	else
974		dontfrag = 0;
975	if (dontfrag && alwaysfrag) {	/* case 4 */
976		/* conflicting request - can't transmit */
977		error = EMSGSIZE;
978		goto bad;
979	}
980	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
981		/*
982		 * Even if the DONTFRAG option is specified, we cannot send the
983		 * packet when the data length is larger than the MTU of the
984		 * outgoing interface.
985		 * Notify the error by sending IPV6_PATHMTU ancillary data as
986		 * well as returning an error code (the latter is not described
987		 * in the API spec.)
988		 */
989		u_int32_t mtu32;
990		struct ip6ctlparam ip6cp;
991
992		mtu32 = (u_int32_t)mtu;
993		bzero(&ip6cp, sizeof(ip6cp));
994		ip6cp.ip6c_cmdarg = (void *)&mtu32;
995		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
996		    (void *)&ip6cp);
997
998		error = EMSGSIZE;
999		goto bad;
1000	}
1001
1002	/*
1003	 * transmit packet without fragmentation
1004	 */
1005	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
1006		struct in6_ifaddr *ia6;
1007
1008		ip6 = mtod(m, struct ip6_hdr *);
1009		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1010		if (ia6) {
1011			/* Record statistics for this interface address. */
1012			ia6->ia_ifa.if_opackets++;
1013			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
1014		}
1015#ifdef IPSEC
1016		/* clean ipsec history once it goes out of the node */
1017		ipsec_delaux(m);
1018#endif
1019		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1020		goto done;
1021	}
1022
1023	/*
1024	 * try to fragment the packet.  case 1-b and 3
1025	 */
1026	if (mtu < IPV6_MMTU) {
1027		/* path MTU cannot be less than IPV6_MMTU */
1028		error = EMSGSIZE;
1029		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1030		goto bad;
1031	} else if (ip6->ip6_plen == 0) {
1032		/* jumbo payload cannot be fragmented */
1033		error = EMSGSIZE;
1034		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1035		goto bad;
1036	} else {
1037		struct mbuf **mnext, *m_frgpart;
1038		struct ip6_frag *ip6f;
1039#ifdef RANDOM_IP_ID
1040		u_int32_t id = htonl(ip6_randomid());
1041#else
1042		u_int32_t id = htonl(ip6_id++);
1043#endif
1044		u_char nextproto;
1045		struct ip6ctlparam ip6cp;
1046		u_int32_t mtu32;
1047		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
1048
1049		/*
1050		 * Too large for the destination or interface;
1051		 * fragment if possible.
1052		 * Must be able to put at least 8 bytes per fragment.
1053		 */
1054		hlen = unfragpartlen;
1055		if (mtu > IPV6_MAXPACKET)
1056			mtu = IPV6_MAXPACKET;
1057
1058		/* Notify a proper path MTU to applications. */
1059		mtu32 = (u_int32_t)mtu;
1060		bzero(&ip6cp, sizeof(ip6cp));
1061		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1062		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1063		    (void *)&ip6cp);
1064
1065		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1066		if (len < 8) {
1067			error = EMSGSIZE;
1068			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1069			goto bad;
1070		}
1071
1072		/*
1073		 * Verify that we have any chance at all of being able to queue
1074		 *      the packet or packet fragments
1075		 */
1076		if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
1077		    < tlen  /* - hlen */)) {
1078			error = ENOBUFS;
1079			ip6stat.ip6s_odropped++;
1080			goto bad;
1081		}
1082
1083		mnext = &m->m_nextpkt;
1084
1085		/*
1086		 * Change the next header field of the last header in the
1087		 * unfragmentable part.
1088		 */
1089		if (exthdrs.ip6e_rthdr) {
1090			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1091			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1092		} else if (exthdrs.ip6e_dest1) {
1093			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1094			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1095		} else if (exthdrs.ip6e_hbh) {
1096			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1097			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1098		} else {
1099			nextproto = ip6->ip6_nxt;
1100			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1101		}
1102
1103		/*
1104		 * Loop through length of segment after first fragment,
1105		 * make new header and copy data of each part and link onto
1106		 * chain.
1107		 */
1108		m0 = m;
1109		for (off = hlen; off < tlen; off += len) {
1110			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1111			if (!m) {
1112				error = ENOBUFS;
1113				ip6stat.ip6s_odropped++;
1114				goto sendorfree;
1115			}
1116			m->m_pkthdr.rcvif = NULL;
1117			m->m_flags = m0->m_flags & M_COPYFLAGS;
1118			*mnext = m;
1119			mnext = &m->m_nextpkt;
1120			m->m_data += max_linkhdr;
1121			mhip6 = mtod(m, struct ip6_hdr *);
1122			*mhip6 = *ip6;
1123			m->m_len = sizeof(*mhip6);
1124			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1125			if (error) {
1126				ip6stat.ip6s_odropped++;
1127				goto sendorfree;
1128			}
1129			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1130			if (off + len >= tlen)
1131				len = tlen - off;
1132			else
1133				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1134			mhip6->ip6_plen = htons((u_short)(len + hlen +
1135			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1136			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1137				error = ENOBUFS;
1138				ip6stat.ip6s_odropped++;
1139				goto sendorfree;
1140			}
1141			m_cat(m, m_frgpart);
1142			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1143			m->m_pkthdr.rcvif = (struct ifnet *)0;
1144			ip6f->ip6f_reserved = 0;
1145			ip6f->ip6f_ident = id;
1146			ip6f->ip6f_nxt = nextproto;
1147			ip6stat.ip6s_ofragments++;
1148			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1149		}
1150
1151		in6_ifstat_inc(ifp, ifs6_out_fragok);
1152	}
1153
1154	/*
1155	 * Remove leading garbages.
1156	 */
1157sendorfree:
1158	m = m0->m_nextpkt;
1159	m0->m_nextpkt = 0;
1160	m_freem(m0);
1161	for (m0 = m; m; m = m0) {
1162		m0 = m->m_nextpkt;
1163		m->m_nextpkt = 0;
1164		if (error == 0) {
1165 			/* Record statistics for this interface address. */
1166 			if (ia) {
1167 				ia->ia_ifa.if_opackets++;
1168 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1169 			}
1170#ifdef IPSEC
1171			/* clean ipsec history once it goes out of the node */
1172			ipsec_delaux(m);
1173#endif
1174			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1175		} else
1176			m_freem(m);
1177	}
1178
1179	if (error == 0)
1180		ip6stat.ip6s_fragmented++;
1181
1182done:
1183	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1184		RTFREE(ro->ro_rt);
1185	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1186		RTFREE(ro_pmtu->ro_rt);
1187	}
1188
1189#ifdef IPSEC
1190	if (sp != NULL)
1191		key_freesp(sp);
1192#endif /* IPSEC */
1193#ifdef FAST_IPSEC
1194	if (sp != NULL)
1195		KEY_FREESP(&sp);
1196#endif /* FAST_IPSEC */
1197
1198	return (error);
1199
1200freehdrs:
1201	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1202	m_freem(exthdrs.ip6e_dest1);
1203	m_freem(exthdrs.ip6e_rthdr);
1204	m_freem(exthdrs.ip6e_dest2);
1205	/* FALLTHROUGH */
1206bad:
1207	m_freem(m);
1208	goto done;
1209}
1210
1211static int
1212ip6_copyexthdr(mp, hdr, hlen)
1213	struct mbuf **mp;
1214	caddr_t hdr;
1215	int hlen;
1216{
1217	struct mbuf *m;
1218
1219	if (hlen > MCLBYTES)
1220		return (ENOBUFS); /* XXX */
1221
1222	MGET(m, M_DONTWAIT, MT_DATA);
1223	if (!m)
1224		return (ENOBUFS);
1225
1226	if (hlen > MLEN) {
1227		MCLGET(m, M_DONTWAIT);
1228		if ((m->m_flags & M_EXT) == 0) {
1229			m_free(m);
1230			return (ENOBUFS);
1231		}
1232	}
1233	m->m_len = hlen;
1234	if (hdr)
1235		bcopy(hdr, mtod(m, caddr_t), hlen);
1236
1237	*mp = m;
1238	return (0);
1239}
1240
1241/*
1242 * Insert jumbo payload option.
1243 */
1244static int
1245ip6_insert_jumboopt(exthdrs, plen)
1246	struct ip6_exthdrs *exthdrs;
1247	u_int32_t plen;
1248{
1249	struct mbuf *mopt;
1250	u_char *optbuf;
1251	u_int32_t v;
1252
1253#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1254
1255	/*
1256	 * If there is no hop-by-hop options header, allocate new one.
1257	 * If there is one but it doesn't have enough space to store the
1258	 * jumbo payload option, allocate a cluster to store the whole options.
1259	 * Otherwise, use it to store the options.
1260	 */
1261	if (exthdrs->ip6e_hbh == 0) {
1262		MGET(mopt, M_DONTWAIT, MT_DATA);
1263		if (mopt == 0)
1264			return (ENOBUFS);
1265		mopt->m_len = JUMBOOPTLEN;
1266		optbuf = mtod(mopt, u_char *);
1267		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1268		exthdrs->ip6e_hbh = mopt;
1269	} else {
1270		struct ip6_hbh *hbh;
1271
1272		mopt = exthdrs->ip6e_hbh;
1273		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1274			/*
1275			 * XXX assumption:
1276			 * - exthdrs->ip6e_hbh is not referenced from places
1277			 *   other than exthdrs.
1278			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1279			 */
1280			int oldoptlen = mopt->m_len;
1281			struct mbuf *n;
1282
1283			/*
1284			 * XXX: give up if the whole (new) hbh header does
1285			 * not fit even in an mbuf cluster.
1286			 */
1287			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1288				return (ENOBUFS);
1289
1290			/*
1291			 * As a consequence, we must always prepare a cluster
1292			 * at this point.
1293			 */
1294			MGET(n, M_DONTWAIT, MT_DATA);
1295			if (n) {
1296				MCLGET(n, M_DONTWAIT);
1297				if ((n->m_flags & M_EXT) == 0) {
1298					m_freem(n);
1299					n = NULL;
1300				}
1301			}
1302			if (!n)
1303				return (ENOBUFS);
1304			n->m_len = oldoptlen + JUMBOOPTLEN;
1305			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1306			    oldoptlen);
1307			optbuf = mtod(n, caddr_t) + oldoptlen;
1308			m_freem(mopt);
1309			mopt = exthdrs->ip6e_hbh = n;
1310		} else {
1311			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1312			mopt->m_len += JUMBOOPTLEN;
1313		}
1314		optbuf[0] = IP6OPT_PADN;
1315		optbuf[1] = 1;
1316
1317		/*
1318		 * Adjust the header length according to the pad and
1319		 * the jumbo payload option.
1320		 */
1321		hbh = mtod(mopt, struct ip6_hbh *);
1322		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1323	}
1324
1325	/* fill in the option. */
1326	optbuf[2] = IP6OPT_JUMBO;
1327	optbuf[3] = 4;
1328	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1329	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1330
1331	/* finally, adjust the packet header length */
1332	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1333
1334	return (0);
1335#undef JUMBOOPTLEN
1336}
1337
1338/*
1339 * Insert fragment header and copy unfragmentable header portions.
1340 */
1341static int
1342ip6_insertfraghdr(m0, m, hlen, frghdrp)
1343	struct mbuf *m0, *m;
1344	int hlen;
1345	struct ip6_frag **frghdrp;
1346{
1347	struct mbuf *n, *mlast;
1348
1349	if (hlen > sizeof(struct ip6_hdr)) {
1350		n = m_copym(m0, sizeof(struct ip6_hdr),
1351		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1352		if (n == 0)
1353			return (ENOBUFS);
1354		m->m_next = n;
1355	} else
1356		n = m;
1357
1358	/* Search for the last mbuf of unfragmentable part. */
1359	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1360		;
1361
1362	if ((mlast->m_flags & M_EXT) == 0 &&
1363	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1364		/* use the trailing space of the last mbuf for the fragment hdr */
1365		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1366		    mlast->m_len);
1367		mlast->m_len += sizeof(struct ip6_frag);
1368		m->m_pkthdr.len += sizeof(struct ip6_frag);
1369	} else {
1370		/* allocate a new mbuf for the fragment header */
1371		struct mbuf *mfrg;
1372
1373		MGET(mfrg, M_DONTWAIT, MT_DATA);
1374		if (mfrg == 0)
1375			return (ENOBUFS);
1376		mfrg->m_len = sizeof(struct ip6_frag);
1377		*frghdrp = mtod(mfrg, struct ip6_frag *);
1378		mlast->m_next = mfrg;
1379	}
1380
1381	return (0);
1382}
1383
1384static int
1385ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1386	struct route_in6 *ro_pmtu, *ro;
1387	struct ifnet *ifp;
1388	struct in6_addr *dst;
1389	u_long *mtup;
1390	int *alwaysfragp;
1391{
1392	u_int32_t mtu = 0;
1393	int alwaysfrag = 0;
1394	int error = 0;
1395
1396	if (ro_pmtu != ro) {
1397		/* The first hop and the final destination may differ. */
1398		struct sockaddr_in6 *sa6_dst =
1399		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1400		if (ro_pmtu->ro_rt &&
1401		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1402		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1403			RTFREE(ro_pmtu->ro_rt);
1404			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1405		}
1406		if (ro_pmtu->ro_rt == NULL) {
1407			bzero(sa6_dst, sizeof(*sa6_dst));
1408			sa6_dst->sin6_family = AF_INET6;
1409			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1410			sa6_dst->sin6_addr = *dst;
1411
1412			rtalloc((struct route *)ro_pmtu);
1413		}
1414	}
1415	if (ro_pmtu->ro_rt) {
1416		u_int32_t ifmtu;
1417		struct in_conninfo inc;
1418
1419		bzero(&inc, sizeof(inc));
1420		inc.inc_flags = 1; /* IPv6 */
1421		inc.inc6_faddr = *dst;
1422
1423		if (ifp == NULL)
1424			ifp = ro_pmtu->ro_rt->rt_ifp;
1425		ifmtu = IN6_LINKMTU(ifp);
1426		mtu = tcp_hc_getmtu(&inc);
1427		if (mtu)
1428			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1429		else
1430			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1431		if (mtu == 0)
1432			mtu = ifmtu;
1433		else if (mtu < IPV6_MMTU) {
1434			/*
1435			 * RFC2460 section 5, last paragraph:
1436			 * if we record ICMPv6 too big message with
1437			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1438			 * or smaller, with framgent header attached.
1439			 * (fragment header is needed regardless from the
1440			 * packet size, for translators to identify packets)
1441			 */
1442			alwaysfrag = 1;
1443			mtu = IPV6_MMTU;
1444		} else if (mtu > ifmtu) {
1445			/*
1446			 * The MTU on the route is larger than the MTU on
1447			 * the interface!  This shouldn't happen, unless the
1448			 * MTU of the interface has been changed after the
1449			 * interface was brought up.  Change the MTU in the
1450			 * route to match the interface MTU (as long as the
1451			 * field isn't locked).
1452			 */
1453			mtu = ifmtu;
1454			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1455		}
1456	} else if (ifp) {
1457		mtu = IN6_LINKMTU(ifp);
1458	} else
1459		error = EHOSTUNREACH; /* XXX */
1460
1461	*mtup = mtu;
1462	if (alwaysfragp)
1463		*alwaysfragp = alwaysfrag;
1464	return (error);
1465}
1466
1467/*
1468 * IP6 socket option processing.
1469 */
1470int
1471ip6_ctloutput(so, sopt)
1472	struct socket *so;
1473	struct sockopt *sopt;
1474{
1475	int privileged, optdatalen, uproto;
1476	void *optdata;
1477	struct inpcb *in6p = sotoinpcb(so);
1478	int error, optval;
1479	int level, op, optname;
1480	int optlen;
1481	struct thread *td;
1482
1483	if (sopt) {
1484		level = sopt->sopt_level;
1485		op = sopt->sopt_dir;
1486		optname = sopt->sopt_name;
1487		optlen = sopt->sopt_valsize;
1488		td = sopt->sopt_td;
1489	} else {
1490		panic("ip6_ctloutput: arg soopt is NULL");
1491	}
1492	error = optval = 0;
1493
1494	privileged = (td == 0 || suser(td)) ? 0 : 1;
1495	uproto = (int)so->so_proto->pr_protocol;
1496
1497	if (level == IPPROTO_IPV6) {
1498		switch (op) {
1499
1500		case SOPT_SET:
1501			switch (optname) {
1502			case IPV6_2292PKTOPTIONS:
1503#ifdef IPV6_PKTOPTIONS
1504			case IPV6_PKTOPTIONS:
1505#endif
1506			{
1507				struct mbuf *m;
1508
1509				error = soopt_getm(sopt, &m); /* XXX */
1510				if (error != 0)
1511					break;
1512				error = soopt_mcopyin(sopt, m); /* XXX */
1513				if (error != 0)
1514					break;
1515				error = ip6_pcbopts(&in6p->in6p_outputopts,
1516						    m, so, sopt);
1517				m_freem(m); /* XXX */
1518				break;
1519			}
1520
1521			/*
1522			 * Use of some Hop-by-Hop options or some
1523			 * Destination options, might require special
1524			 * privilege.  That is, normal applications
1525			 * (without special privilege) might be forbidden
1526			 * from setting certain options in outgoing packets,
1527			 * and might never see certain options in received
1528			 * packets. [RFC 2292 Section 6]
1529			 * KAME specific note:
1530			 *  KAME prevents non-privileged users from sending or
1531			 *  receiving ANY hbh/dst options in order to avoid
1532			 *  overhead of parsing options in the kernel.
1533			 */
1534			case IPV6_RECVHOPOPTS:
1535			case IPV6_RECVDSTOPTS:
1536			case IPV6_RECVRTHDRDSTOPTS:
1537				if (!privileged) {
1538					error = EPERM;
1539					break;
1540				}
1541				/* FALLTHROUGH */
1542			case IPV6_UNICAST_HOPS:
1543			case IPV6_HOPLIMIT:
1544			case IPV6_FAITH:
1545
1546			case IPV6_RECVPKTINFO:
1547			case IPV6_RECVHOPLIMIT:
1548			case IPV6_RECVRTHDR:
1549			case IPV6_RECVPATHMTU:
1550			case IPV6_RECVTCLASS:
1551			case IPV6_V6ONLY:
1552			case IPV6_AUTOFLOWLABEL:
1553				if (optlen != sizeof(int)) {
1554					error = EINVAL;
1555					break;
1556				}
1557				error = sooptcopyin(sopt, &optval,
1558					sizeof optval, sizeof optval);
1559				if (error)
1560					break;
1561				switch (optname) {
1562
1563				case IPV6_UNICAST_HOPS:
1564					if (optval < -1 || optval >= 256)
1565						error = EINVAL;
1566					else {
1567						/* -1 = kernel default */
1568						in6p->in6p_hops = optval;
1569						if ((in6p->in6p_vflag &
1570						     INP_IPV4) != 0)
1571							in6p->inp_ip_ttl = optval;
1572					}
1573					break;
1574#define OPTSET(bit) \
1575do { \
1576	if (optval) \
1577		in6p->in6p_flags |= (bit); \
1578	else \
1579		in6p->in6p_flags &= ~(bit); \
1580} while (/*CONSTCOND*/ 0)
1581#define OPTSET2292(bit) \
1582do { \
1583	in6p->in6p_flags |= IN6P_RFC2292; \
1584	if (optval) \
1585		in6p->in6p_flags |= (bit); \
1586	else \
1587		in6p->in6p_flags &= ~(bit); \
1588} while (/*CONSTCOND*/ 0)
1589#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1590
1591				case IPV6_RECVPKTINFO:
1592					/* cannot mix with RFC2292 */
1593					if (OPTBIT(IN6P_RFC2292)) {
1594						error = EINVAL;
1595						break;
1596					}
1597					OPTSET(IN6P_PKTINFO);
1598					break;
1599
1600				case IPV6_HOPLIMIT:
1601				{
1602					struct ip6_pktopts **optp;
1603
1604					/* cannot mix with RFC2292 */
1605					if (OPTBIT(IN6P_RFC2292)) {
1606						error = EINVAL;
1607						break;
1608					}
1609					optp = &in6p->in6p_outputopts;
1610					error = ip6_pcbopt(IPV6_HOPLIMIT,
1611							   (u_char *)&optval,
1612							   sizeof(optval),
1613							   optp,
1614							   privileged, uproto);
1615					break;
1616				}
1617
1618				case IPV6_RECVHOPLIMIT:
1619					/* cannot mix with RFC2292 */
1620					if (OPTBIT(IN6P_RFC2292)) {
1621						error = EINVAL;
1622						break;
1623					}
1624					OPTSET(IN6P_HOPLIMIT);
1625					break;
1626
1627				case IPV6_RECVHOPOPTS:
1628					/* cannot mix with RFC2292 */
1629					if (OPTBIT(IN6P_RFC2292)) {
1630						error = EINVAL;
1631						break;
1632					}
1633					OPTSET(IN6P_HOPOPTS);
1634					break;
1635
1636				case IPV6_RECVDSTOPTS:
1637					/* cannot mix with RFC2292 */
1638					if (OPTBIT(IN6P_RFC2292)) {
1639						error = EINVAL;
1640						break;
1641					}
1642					OPTSET(IN6P_DSTOPTS);
1643					break;
1644
1645				case IPV6_RECVRTHDRDSTOPTS:
1646					/* cannot mix with RFC2292 */
1647					if (OPTBIT(IN6P_RFC2292)) {
1648						error = EINVAL;
1649						break;
1650					}
1651					OPTSET(IN6P_RTHDRDSTOPTS);
1652					break;
1653
1654				case IPV6_RECVRTHDR:
1655					/* cannot mix with RFC2292 */
1656					if (OPTBIT(IN6P_RFC2292)) {
1657						error = EINVAL;
1658						break;
1659					}
1660					OPTSET(IN6P_RTHDR);
1661					break;
1662
1663				case IPV6_FAITH:
1664					OPTSET(IN6P_FAITH);
1665					break;
1666
1667				case IPV6_RECVPATHMTU:
1668					/*
1669					 * We ignore this option for TCP
1670					 * sockets.
1671					 * (rfc2292bis leaves this case
1672					 * unspecified.)
1673					 */
1674					if (uproto != IPPROTO_TCP)
1675						OPTSET(IN6P_MTU);
1676					break;
1677
1678				case IPV6_V6ONLY:
1679					/*
1680					 * make setsockopt(IPV6_V6ONLY)
1681					 * available only prior to bind(2).
1682					 * see ipng mailing list, Jun 22 2001.
1683					 */
1684					if (in6p->in6p_lport ||
1685					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1686						error = EINVAL;
1687						break;
1688					}
1689					OPTSET(IN6P_IPV6_V6ONLY);
1690					if (optval)
1691						in6p->in6p_vflag &= ~INP_IPV4;
1692					else
1693						in6p->in6p_vflag |= INP_IPV4;
1694					break;
1695				case IPV6_RECVTCLASS:
1696					/* cannot mix with RFC2292 XXX */
1697					if (OPTBIT(IN6P_RFC2292)) {
1698						error = EINVAL;
1699						break;
1700					}
1701					OPTSET(IN6P_TCLASS);
1702					break;
1703				case IPV6_AUTOFLOWLABEL:
1704					OPTSET(IN6P_AUTOFLOWLABEL);
1705					break;
1706
1707				}
1708				break;
1709
1710			case IPV6_TCLASS:
1711			case IPV6_DONTFRAG:
1712			case IPV6_USE_MIN_MTU:
1713			case IPV6_PREFER_TEMPADDR:
1714				if (optlen != sizeof(optval)) {
1715					error = EINVAL;
1716					break;
1717				}
1718				error = sooptcopyin(sopt, &optval,
1719					sizeof optval, sizeof optval);
1720				if (error)
1721					break;
1722				{
1723					struct ip6_pktopts **optp;
1724					optp = &in6p->in6p_outputopts;
1725					error = ip6_pcbopt(optname,
1726							   (u_char *)&optval,
1727							   sizeof(optval),
1728							   optp,
1729							   privileged, uproto);
1730					break;
1731				}
1732
1733			case IPV6_2292PKTINFO:
1734			case IPV6_2292HOPLIMIT:
1735			case IPV6_2292HOPOPTS:
1736			case IPV6_2292DSTOPTS:
1737			case IPV6_2292RTHDR:
1738				/* RFC 2292 */
1739				if (optlen != sizeof(int)) {
1740					error = EINVAL;
1741					break;
1742				}
1743				error = sooptcopyin(sopt, &optval,
1744					sizeof optval, sizeof optval);
1745				if (error)
1746					break;
1747				switch (optname) {
1748				case IPV6_2292PKTINFO:
1749					OPTSET2292(IN6P_PKTINFO);
1750					break;
1751				case IPV6_2292HOPLIMIT:
1752					OPTSET2292(IN6P_HOPLIMIT);
1753					break;
1754				case IPV6_2292HOPOPTS:
1755					/*
1756					 * Check super-user privilege.
1757					 * See comments for IPV6_RECVHOPOPTS.
1758					 */
1759					if (!privileged)
1760						return (EPERM);
1761					OPTSET2292(IN6P_HOPOPTS);
1762					break;
1763				case IPV6_2292DSTOPTS:
1764					if (!privileged)
1765						return (EPERM);
1766					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1767					break;
1768				case IPV6_2292RTHDR:
1769					OPTSET2292(IN6P_RTHDR);
1770					break;
1771				}
1772				break;
1773			case IPV6_PKTINFO:
1774			case IPV6_HOPOPTS:
1775			case IPV6_RTHDR:
1776			case IPV6_DSTOPTS:
1777			case IPV6_RTHDRDSTOPTS:
1778			case IPV6_NEXTHOP:
1779			{
1780				/* new advanced API (2292bis) */
1781				u_char *optbuf;
1782				int optlen;
1783				struct ip6_pktopts **optp;
1784
1785				/* cannot mix with RFC2292 */
1786				if (OPTBIT(IN6P_RFC2292)) {
1787					error = EINVAL;
1788					break;
1789				}
1790
1791				switch (optname) {
1792				case IPV6_HOPOPTS:
1793				case IPV6_DSTOPTS:
1794				case IPV6_RTHDRDSTOPTS:
1795				case IPV6_NEXTHOP:
1796					if (!privileged)
1797						error = EPERM;
1798					break;
1799				}
1800				if (error)
1801					break;
1802
1803				switch (optname) {
1804				case IPV6_PKTINFO:
1805					optlen = sizeof(struct in6_pktinfo);
1806					break;
1807				case IPV6_NEXTHOP:
1808					optlen = SOCK_MAXADDRLEN;
1809					break;
1810				default:
1811					optlen = IPV6_MAXOPTHDR;
1812					break;
1813				}
1814				if (sopt->sopt_valsize > optlen) {
1815					error = EINVAL;
1816					break;
1817				}
1818
1819				optlen = sopt->sopt_valsize;
1820				optbuf = malloc(optlen, M_TEMP, M_WAITOK);
1821				error = sooptcopyin(sopt, optbuf, optlen,
1822				    optlen);
1823				if (error) {
1824					free(optbuf, M_TEMP);
1825					break;
1826				}
1827
1828				optp = &in6p->in6p_outputopts;
1829				error = ip6_pcbopt(optname,
1830						   optbuf, optlen,
1831						   optp, privileged, uproto);
1832				free(optbuf, M_TEMP);
1833				break;
1834			}
1835#undef OPTSET
1836
1837			case IPV6_MULTICAST_IF:
1838			case IPV6_MULTICAST_HOPS:
1839			case IPV6_MULTICAST_LOOP:
1840			case IPV6_JOIN_GROUP:
1841			case IPV6_LEAVE_GROUP:
1842			    {
1843				if (sopt->sopt_valsize > MLEN) {
1844					error = EMSGSIZE;
1845					break;
1846				}
1847				/* XXX */
1848			    }
1849			    /* FALLTHROUGH */
1850			    {
1851				struct mbuf *m;
1852
1853				if (sopt->sopt_valsize > MCLBYTES) {
1854					error = EMSGSIZE;
1855					break;
1856				}
1857				/* XXX */
1858				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_HEADER);
1859				if (m == 0) {
1860					error = ENOBUFS;
1861					break;
1862				}
1863				if (sopt->sopt_valsize > MLEN) {
1864					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1865					if ((m->m_flags & M_EXT) == 0) {
1866						m_free(m);
1867						error = ENOBUFS;
1868						break;
1869					}
1870				}
1871				m->m_len = sopt->sopt_valsize;
1872				error = sooptcopyin(sopt, mtod(m, char *),
1873						    m->m_len, m->m_len);
1874				if (error) {
1875					(void)m_free(m);
1876					break;
1877				}
1878				error =	ip6_setmoptions(sopt->sopt_name,
1879							&in6p->in6p_moptions,
1880							m);
1881				(void)m_free(m);
1882			    }
1883				break;
1884
1885			case IPV6_PORTRANGE:
1886				error = sooptcopyin(sopt, &optval,
1887				    sizeof optval, sizeof optval);
1888				if (error)
1889					break;
1890
1891				switch (optval) {
1892				case IPV6_PORTRANGE_DEFAULT:
1893					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1894					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1895					break;
1896
1897				case IPV6_PORTRANGE_HIGH:
1898					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1899					in6p->in6p_flags |= IN6P_HIGHPORT;
1900					break;
1901
1902				case IPV6_PORTRANGE_LOW:
1903					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1904					in6p->in6p_flags |= IN6P_LOWPORT;
1905					break;
1906
1907				default:
1908					error = EINVAL;
1909					break;
1910				}
1911				break;
1912
1913#if defined(IPSEC) || defined(FAST_IPSEC)
1914			case IPV6_IPSEC_POLICY:
1915			    {
1916				caddr_t req = NULL;
1917				size_t len = 0;
1918				struct mbuf *m;
1919
1920				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1921					break;
1922				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1923					break;
1924				if (m) {
1925					req = mtod(m, caddr_t);
1926					len = m->m_len;
1927				}
1928				error = ipsec6_set_policy(in6p, optname, req,
1929							  len, privileged);
1930				m_freem(m);
1931			    }
1932				break;
1933#endif /* KAME IPSEC */
1934
1935			case IPV6_FW_ADD:
1936			case IPV6_FW_DEL:
1937			case IPV6_FW_FLUSH:
1938			case IPV6_FW_ZERO:
1939			    {
1940				struct mbuf *m;
1941				struct mbuf **mp = &m;
1942
1943				if (ip6_fw_ctl_ptr == NULL)
1944					return EINVAL;
1945				/* XXX */
1946				if ((error = soopt_getm(sopt, &m)) != 0)
1947					break;
1948				/* XXX */
1949				if ((error = soopt_mcopyin(sopt, m)) != 0)
1950					break;
1951				error = (*ip6_fw_ctl_ptr)(optname, mp);
1952				m = *mp;
1953			    }
1954				break;
1955
1956			default:
1957				error = ENOPROTOOPT;
1958				break;
1959			}
1960			break;
1961
1962		case SOPT_GET:
1963			switch (optname) {
1964
1965			case IPV6_2292PKTOPTIONS:
1966#ifdef IPV6_PKTOPTIONS
1967			case IPV6_PKTOPTIONS:
1968#endif
1969				/*
1970				 * RFC3542 (effectively) deprecated the
1971				 * semantics of the 2292-style pktoptions.
1972				 * Since it was not reliable in nature (i.e.,
1973				 * applications had to expect the lack of some
1974				 * information after all), it would make sense
1975				 * to simplify this part by always returning
1976				 * empty data.
1977				 */
1978				sopt->sopt_valsize = 0;
1979				break;
1980
1981			case IPV6_RECVHOPOPTS:
1982			case IPV6_RECVDSTOPTS:
1983			case IPV6_RECVRTHDRDSTOPTS:
1984			case IPV6_UNICAST_HOPS:
1985			case IPV6_RECVPKTINFO:
1986			case IPV6_RECVHOPLIMIT:
1987			case IPV6_RECVRTHDR:
1988			case IPV6_RECVPATHMTU:
1989
1990			case IPV6_FAITH:
1991			case IPV6_V6ONLY:
1992			case IPV6_PORTRANGE:
1993			case IPV6_RECVTCLASS:
1994			case IPV6_AUTOFLOWLABEL:
1995				switch (optname) {
1996
1997				case IPV6_RECVHOPOPTS:
1998					optval = OPTBIT(IN6P_HOPOPTS);
1999					break;
2000
2001				case IPV6_RECVDSTOPTS:
2002					optval = OPTBIT(IN6P_DSTOPTS);
2003					break;
2004
2005				case IPV6_RECVRTHDRDSTOPTS:
2006					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2007					break;
2008
2009				case IPV6_UNICAST_HOPS:
2010					optval = in6p->in6p_hops;
2011					break;
2012
2013				case IPV6_RECVPKTINFO:
2014					optval = OPTBIT(IN6P_PKTINFO);
2015					break;
2016
2017				case IPV6_RECVHOPLIMIT:
2018					optval = OPTBIT(IN6P_HOPLIMIT);
2019					break;
2020
2021				case IPV6_RECVRTHDR:
2022					optval = OPTBIT(IN6P_RTHDR);
2023					break;
2024
2025				case IPV6_RECVPATHMTU:
2026					optval = OPTBIT(IN6P_MTU);
2027					break;
2028
2029				case IPV6_FAITH:
2030					optval = OPTBIT(IN6P_FAITH);
2031					break;
2032
2033				case IPV6_V6ONLY:
2034					optval = OPTBIT(IN6P_IPV6_V6ONLY);
2035					break;
2036
2037				case IPV6_PORTRANGE:
2038				    {
2039					int flags;
2040					flags = in6p->in6p_flags;
2041					if (flags & IN6P_HIGHPORT)
2042						optval = IPV6_PORTRANGE_HIGH;
2043					else if (flags & IN6P_LOWPORT)
2044						optval = IPV6_PORTRANGE_LOW;
2045					else
2046						optval = 0;
2047					break;
2048				    }
2049				case IPV6_RECVTCLASS:
2050					optval = OPTBIT(IN6P_TCLASS);
2051					break;
2052
2053				case IPV6_AUTOFLOWLABEL:
2054					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2055					break;
2056				}
2057				if (error)
2058					break;
2059				error = sooptcopyout(sopt, &optval,
2060					sizeof optval);
2061				break;
2062
2063			case IPV6_PATHMTU:
2064			{
2065				u_long pmtu = 0;
2066				struct ip6_mtuinfo mtuinfo;
2067				struct route_in6 sro;
2068
2069				bzero(&sro, sizeof(sro));
2070
2071				if (!(so->so_state & SS_ISCONNECTED))
2072					return (ENOTCONN);
2073				/*
2074				 * XXX: we dot not consider the case of source
2075				 * routing, or optional information to specify
2076				 * the outgoing interface.
2077				 */
2078				error = ip6_getpmtu(&sro, NULL, NULL,
2079				    &in6p->in6p_faddr, &pmtu, NULL);
2080				if (sro.ro_rt)
2081					RTFREE(sro.ro_rt);
2082				if (error)
2083					break;
2084				if (pmtu > IPV6_MAXPACKET)
2085					pmtu = IPV6_MAXPACKET;
2086
2087				bzero(&mtuinfo, sizeof(mtuinfo));
2088				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2089				optdata = (void *)&mtuinfo;
2090				optdatalen = sizeof(mtuinfo);
2091				error = sooptcopyout(sopt, optdata,
2092				    optdatalen);
2093				break;
2094			}
2095
2096			case IPV6_2292PKTINFO:
2097			case IPV6_2292HOPLIMIT:
2098			case IPV6_2292HOPOPTS:
2099			case IPV6_2292RTHDR:
2100			case IPV6_2292DSTOPTS:
2101				switch (optname) {
2102				case IPV6_2292PKTINFO:
2103					optval = OPTBIT(IN6P_PKTINFO);
2104					break;
2105				case IPV6_2292HOPLIMIT:
2106					optval = OPTBIT(IN6P_HOPLIMIT);
2107					break;
2108				case IPV6_2292HOPOPTS:
2109					optval = OPTBIT(IN6P_HOPOPTS);
2110					break;
2111				case IPV6_2292RTHDR:
2112					optval = OPTBIT(IN6P_RTHDR);
2113					break;
2114				case IPV6_2292DSTOPTS:
2115					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2116					break;
2117				}
2118				error = sooptcopyout(sopt, &optval,
2119				    sizeof optval);
2120				break;
2121			case IPV6_PKTINFO:
2122			case IPV6_HOPOPTS:
2123			case IPV6_RTHDR:
2124			case IPV6_DSTOPTS:
2125			case IPV6_RTHDRDSTOPTS:
2126			case IPV6_NEXTHOP:
2127			case IPV6_TCLASS:
2128			case IPV6_DONTFRAG:
2129			case IPV6_USE_MIN_MTU:
2130			case IPV6_PREFER_TEMPADDR:
2131				error = ip6_getpcbopt(in6p->in6p_outputopts,
2132				    optname, sopt);
2133				break;
2134
2135			case IPV6_MULTICAST_IF:
2136			case IPV6_MULTICAST_HOPS:
2137			case IPV6_MULTICAST_LOOP:
2138			case IPV6_JOIN_GROUP:
2139			case IPV6_LEAVE_GROUP:
2140			    {
2141				struct mbuf *m;
2142				error = ip6_getmoptions(sopt->sopt_name,
2143				    in6p->in6p_moptions, &m);
2144				if (error == 0)
2145					error = sooptcopyout(sopt,
2146					    mtod(m, char *), m->m_len);
2147				m_freem(m);
2148			    }
2149				break;
2150
2151#if defined(IPSEC) || defined(FAST_IPSEC)
2152			case IPV6_IPSEC_POLICY:
2153			  {
2154				caddr_t req = NULL;
2155				size_t len = 0;
2156				struct mbuf *m = NULL;
2157				struct mbuf **mp = &m;
2158				size_t ovalsize = sopt->sopt_valsize;
2159				caddr_t oval = (caddr_t)sopt->sopt_val;
2160
2161				error = soopt_getm(sopt, &m); /* XXX */
2162				if (error != 0)
2163					break;
2164				error = soopt_mcopyin(sopt, m); /* XXX */
2165				if (error != 0)
2166					break;
2167				sopt->sopt_valsize = ovalsize;
2168				sopt->sopt_val = oval;
2169				if (m) {
2170					req = mtod(m, caddr_t);
2171					len = m->m_len;
2172				}
2173				error = ipsec6_get_policy(in6p, req, len, mp);
2174				if (error == 0)
2175					error = soopt_mcopyout(sopt, m); /* XXX */
2176				if (error == 0 && m)
2177					m_freem(m);
2178				break;
2179			  }
2180#endif /* KAME IPSEC */
2181
2182			case IPV6_FW_GET:
2183			  {
2184				struct mbuf *m;
2185				struct mbuf **mp = &m;
2186
2187				if (ip6_fw_ctl_ptr == NULL)
2188			        {
2189					return EINVAL;
2190				}
2191				error = (*ip6_fw_ctl_ptr)(optname, mp);
2192				if (error == 0)
2193					error = soopt_mcopyout(sopt, m); /* XXX */
2194				if (error == 0 && m)
2195					m_freem(m);
2196			  }
2197				break;
2198
2199			default:
2200				error = ENOPROTOOPT;
2201				break;
2202			}
2203			break;
2204		}
2205	} else {		/* level != IPPROTO_IPV6 */
2206		error = EINVAL;
2207	}
2208	return (error);
2209}
2210
2211int
2212ip6_raw_ctloutput(so, sopt)
2213	struct socket *so;
2214	struct sockopt *sopt;
2215{
2216	int error = 0, optval, optlen;
2217	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2218	struct in6pcb *in6p = sotoin6pcb(so);
2219	int level, op, optname;
2220
2221	if (sopt) {
2222		level = sopt->sopt_level;
2223		op = sopt->sopt_dir;
2224		optname = sopt->sopt_name;
2225		optlen = sopt->sopt_valsize;
2226	} else
2227		panic("ip6_raw_ctloutput: arg soopt is NULL");
2228
2229	if (level != IPPROTO_IPV6) {
2230		return (EINVAL);
2231	}
2232
2233	switch (optname) {
2234	case IPV6_CHECKSUM:
2235		/*
2236		 * For ICMPv6 sockets, no modification allowed for checksum
2237		 * offset, permit "no change" values to help existing apps.
2238		 *
2239		 * XXX 2292bis says: "An attempt to set IPV6_CHECKSUM
2240		 * for an ICMPv6 socket will fail."
2241		 * The current behavior does not meet 2292bis.
2242		 */
2243		switch (op) {
2244		case SOPT_SET:
2245			if (optlen != sizeof(int)) {
2246				error = EINVAL;
2247				break;
2248			}
2249			error = sooptcopyin(sopt, &optval, sizeof(optval),
2250					    sizeof(optval));
2251			if (error)
2252				break;
2253			if ((optval % 2) != 0) {
2254				/* the API assumes even offset values */
2255				error = EINVAL;
2256			} else if (so->so_proto->pr_protocol ==
2257			    IPPROTO_ICMPV6) {
2258				if (optval != icmp6off)
2259					error = EINVAL;
2260			} else
2261				in6p->in6p_cksum = optval;
2262			break;
2263
2264		case SOPT_GET:
2265			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2266				optval = icmp6off;
2267			else
2268				optval = in6p->in6p_cksum;
2269
2270			error = sooptcopyout(sopt, &optval, sizeof(optval));
2271			break;
2272
2273		default:
2274			error = EINVAL;
2275			break;
2276		}
2277		break;
2278
2279	default:
2280		error = ENOPROTOOPT;
2281		break;
2282	}
2283
2284	return (error);
2285}
2286
2287/*
2288 * Set up IP6 options in pcb for insertion in output packets or
2289 * specifying behavior of outgoing packets.
2290 */
2291static int
2292ip6_pcbopts(pktopt, m, so, sopt)
2293	struct ip6_pktopts **pktopt;
2294	struct mbuf *m;
2295	struct socket *so;
2296	struct sockopt *sopt;
2297{
2298	struct ip6_pktopts *opt = *pktopt;
2299	int error = 0;
2300	struct thread *td = sopt->sopt_td;
2301	int priv = 0;
2302
2303	/* turn off any old options. */
2304	if (opt) {
2305#ifdef DIAGNOSTIC
2306		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2307		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2308		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2309			printf("ip6_pcbopts: all specified options are cleared.\n");
2310#endif
2311		ip6_clearpktopts(opt, -1);
2312	} else
2313		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2314	*pktopt = NULL;
2315
2316	if (!m || m->m_len == 0) {
2317		/*
2318		 * Only turning off any previous options, regardless of
2319		 * whether the opt is just created or given.
2320		 */
2321		free(opt, M_IP6OPT);
2322		return (0);
2323	}
2324
2325	/*  set options specified by user. */
2326	if (td && !suser(td))
2327		priv = 1;
2328	if ((error = ip6_setpktoptions(m, opt, NULL, priv, 1,
2329	    so->so_proto->pr_protocol)) != 0) {
2330		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2331		free(opt, M_IP6OPT);
2332		return (error);
2333	}
2334	*pktopt = opt;
2335	return (0);
2336}
2337
2338/*
2339 * initialize ip6_pktopts.  beware that there are non-zero default values in
2340 * the struct.
2341 */
2342void
2343init_ip6pktopts(opt)
2344	struct ip6_pktopts *opt;
2345{
2346
2347	bzero(opt, sizeof(*opt));
2348	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2349	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2350	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2351	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2352}
2353
2354static int
2355ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2356	int optname, len, priv;
2357	u_char *buf;
2358	struct ip6_pktopts **pktopt;
2359	int uproto;
2360{
2361	struct ip6_pktopts *opt;
2362
2363	if (*pktopt == NULL) {
2364		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2365		    M_WAITOK);
2366		init_ip6pktopts(*pktopt);
2367		(*pktopt)->needfree = 1;
2368	}
2369	opt = *pktopt;
2370
2371	return (ip6_setpktoption(optname, buf, len, opt, priv, 1, 0, uproto));
2372}
2373
2374static int
2375ip6_getpcbopt(pktopt, optname, sopt)
2376	struct ip6_pktopts *pktopt;
2377	struct sockopt *sopt;
2378	int optname;
2379{
2380	void *optdata = NULL;
2381	int optdatalen = 0;
2382	struct ip6_ext *ip6e;
2383	int error = 0;
2384	struct in6_pktinfo null_pktinfo;
2385	int deftclass = 0, on;
2386	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2387	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2388
2389	switch (optname) {
2390	case IPV6_PKTINFO:
2391		if (pktopt && pktopt->ip6po_pktinfo)
2392			optdata = (void *)pktopt->ip6po_pktinfo;
2393		else {
2394			/* XXX: we don't have to do this every time... */
2395			bzero(&null_pktinfo, sizeof(null_pktinfo));
2396			optdata = (void *)&null_pktinfo;
2397		}
2398		optdatalen = sizeof(struct in6_pktinfo);
2399		break;
2400	case IPV6_TCLASS:
2401		if (pktopt && pktopt->ip6po_tclass >= 0)
2402			optdata = (void *)&pktopt->ip6po_tclass;
2403		else
2404			optdata = (void *)&deftclass;
2405		optdatalen = sizeof(int);
2406		break;
2407	case IPV6_HOPOPTS:
2408		if (pktopt && pktopt->ip6po_hbh) {
2409			optdata = (void *)pktopt->ip6po_hbh;
2410			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2411			optdatalen = (ip6e->ip6e_len + 1) << 3;
2412		}
2413		break;
2414	case IPV6_RTHDR:
2415		if (pktopt && pktopt->ip6po_rthdr) {
2416			optdata = (void *)pktopt->ip6po_rthdr;
2417			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2418			optdatalen = (ip6e->ip6e_len + 1) << 3;
2419		}
2420		break;
2421	case IPV6_RTHDRDSTOPTS:
2422		if (pktopt && pktopt->ip6po_dest1) {
2423			optdata = (void *)pktopt->ip6po_dest1;
2424			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2425			optdatalen = (ip6e->ip6e_len + 1) << 3;
2426		}
2427		break;
2428	case IPV6_DSTOPTS:
2429		if (pktopt && pktopt->ip6po_dest2) {
2430			optdata = (void *)pktopt->ip6po_dest2;
2431			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2432			optdatalen = (ip6e->ip6e_len + 1) << 3;
2433		}
2434		break;
2435	case IPV6_NEXTHOP:
2436		if (pktopt && pktopt->ip6po_nexthop) {
2437			optdata = (void *)pktopt->ip6po_nexthop;
2438			optdatalen = pktopt->ip6po_nexthop->sa_len;
2439		}
2440		break;
2441	case IPV6_USE_MIN_MTU:
2442		if (pktopt)
2443			optdata = (void *)&pktopt->ip6po_minmtu;
2444		else
2445			optdata = (void *)&defminmtu;
2446		optdatalen = sizeof(int);
2447		break;
2448	case IPV6_DONTFRAG:
2449		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2450			on = 1;
2451		else
2452			on = 0;
2453		optdata = (void *)&on;
2454		optdatalen = sizeof(on);
2455		break;
2456	case IPV6_PREFER_TEMPADDR:
2457		if (pktopt)
2458			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2459		else
2460			optdata = (void *)&defpreftemp;
2461		optdatalen = sizeof(int);
2462		break;
2463	default:		/* should not happen */
2464#ifdef DIAGNOSTIC
2465		panic("ip6_getpcbopt: unexpected option\n");
2466#endif
2467		return (ENOPROTOOPT);
2468	}
2469
2470	error = sooptcopyout(sopt, optdata, optdatalen);
2471
2472	return (error);
2473}
2474
2475void
2476ip6_clearpktopts(pktopt, optname)
2477	struct ip6_pktopts *pktopt;
2478	int optname;
2479{
2480	int needfree;
2481
2482	if (pktopt == NULL)
2483		return;
2484
2485	needfree = pktopt->needfree;
2486
2487	if (optname == -1 || optname == IPV6_PKTINFO) {
2488		if (needfree && pktopt->ip6po_pktinfo)
2489			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2490		pktopt->ip6po_pktinfo = NULL;
2491	}
2492	if (optname == -1 || optname == IPV6_HOPLIMIT)
2493		pktopt->ip6po_hlim = -1;
2494	if (optname == -1 || optname == IPV6_TCLASS)
2495		pktopt->ip6po_tclass = -1;
2496	if (optname == -1 || optname == IPV6_NEXTHOP) {
2497		if (pktopt->ip6po_nextroute.ro_rt) {
2498			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2499			pktopt->ip6po_nextroute.ro_rt = NULL;
2500		}
2501		if (needfree && pktopt->ip6po_nexthop)
2502			free(pktopt->ip6po_nexthop, M_IP6OPT);
2503		pktopt->ip6po_nexthop = NULL;
2504	}
2505	if (optname == -1 || optname == IPV6_HOPOPTS) {
2506		if (needfree && pktopt->ip6po_hbh)
2507			free(pktopt->ip6po_hbh, M_IP6OPT);
2508		pktopt->ip6po_hbh = NULL;
2509	}
2510	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2511		if (needfree && pktopt->ip6po_dest1)
2512			free(pktopt->ip6po_dest1, M_IP6OPT);
2513		pktopt->ip6po_dest1 = NULL;
2514	}
2515	if (optname == -1 || optname == IPV6_RTHDR) {
2516		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2517			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2518		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2519		if (pktopt->ip6po_route.ro_rt) {
2520			RTFREE(pktopt->ip6po_route.ro_rt);
2521			pktopt->ip6po_route.ro_rt = NULL;
2522		}
2523	}
2524	if (optname == -1 || optname == IPV6_DSTOPTS) {
2525		if (needfree && pktopt->ip6po_dest2)
2526			free(pktopt->ip6po_dest2, M_IP6OPT);
2527		pktopt->ip6po_dest2 = NULL;
2528	}
2529}
2530
2531#define PKTOPT_EXTHDRCPY(type) \
2532do {\
2533	if (src->type) {\
2534		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2535		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2536		if (dst->type == NULL && canwait == M_NOWAIT)\
2537			goto bad;\
2538		bcopy(src->type, dst->type, hlen);\
2539	}\
2540} while (/*CONSTCOND*/ 0)
2541
2542struct ip6_pktopts *
2543ip6_copypktopts(src, canwait)
2544	struct ip6_pktopts *src;
2545	int canwait;
2546{
2547	struct ip6_pktopts *dst;
2548
2549	if (src == NULL) {
2550		printf("ip6_clearpktopts: invalid argument\n");
2551		return (NULL);
2552	}
2553
2554	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2555	if (dst == NULL && canwait == M_NOWAIT)
2556		return (NULL);
2557	bzero(dst, sizeof(*dst));
2558	dst->needfree = 1;
2559
2560	dst->ip6po_hlim = src->ip6po_hlim;
2561	dst->ip6po_tclass = src->ip6po_tclass;
2562	dst->ip6po_flags = src->ip6po_flags;
2563	if (src->ip6po_pktinfo) {
2564		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2565		    M_IP6OPT, canwait);
2566		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2567			goto bad;
2568		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2569	}
2570	if (src->ip6po_nexthop) {
2571		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2572		    M_IP6OPT, canwait);
2573		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2574			goto bad;
2575		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2576		    src->ip6po_nexthop->sa_len);
2577	}
2578	PKTOPT_EXTHDRCPY(ip6po_hbh);
2579	PKTOPT_EXTHDRCPY(ip6po_dest1);
2580	PKTOPT_EXTHDRCPY(ip6po_dest2);
2581	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2582	return (dst);
2583
2584  bad:
2585	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2586	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2587	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2588	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2589	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2590	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2591	free(dst, M_IP6OPT);
2592	return (NULL);
2593}
2594#undef PKTOPT_EXTHDRCPY
2595
2596void
2597ip6_freepcbopts(pktopt)
2598	struct ip6_pktopts *pktopt;
2599{
2600	if (pktopt == NULL)
2601		return;
2602
2603	ip6_clearpktopts(pktopt, -1);
2604
2605	free(pktopt, M_IP6OPT);
2606}
2607
2608/*
2609 * Set the IP6 multicast options in response to user setsockopt().
2610 */
2611static int
2612ip6_setmoptions(optname, im6op, m)
2613	int optname;
2614	struct ip6_moptions **im6op;
2615	struct mbuf *m;
2616{
2617	int error = 0;
2618	u_int loop, ifindex;
2619	struct ipv6_mreq *mreq;
2620	struct ifnet *ifp;
2621	struct ip6_moptions *im6o = *im6op;
2622	struct route_in6 ro;
2623	struct sockaddr_in6 *dst;
2624	struct in6_multi_mship *imm;
2625	struct thread *td = curthread;
2626
2627	if (im6o == NULL) {
2628		/*
2629		 * No multicast option buffer attached to the pcb;
2630		 * allocate one and initialize to default values.
2631		 */
2632		im6o = (struct ip6_moptions *)
2633			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2634
2635		if (im6o == NULL)
2636			return (ENOBUFS);
2637		*im6op = im6o;
2638		im6o->im6o_multicast_ifp = NULL;
2639		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2640		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2641		LIST_INIT(&im6o->im6o_memberships);
2642	}
2643
2644	switch (optname) {
2645
2646	case IPV6_MULTICAST_IF:
2647		/*
2648		 * Select the interface for outgoing multicast packets.
2649		 */
2650		if (m == NULL || m->m_len != sizeof(u_int)) {
2651			error = EINVAL;
2652			break;
2653		}
2654		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2655		if (ifindex < 0 || if_index < ifindex) {
2656			error = ENXIO;	/* XXX EINVAL? */
2657			break;
2658		}
2659		ifp = ifnet_byindex(ifindex);
2660		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2661			error = EADDRNOTAVAIL;
2662			break;
2663		}
2664		im6o->im6o_multicast_ifp = ifp;
2665		break;
2666
2667	case IPV6_MULTICAST_HOPS:
2668	    {
2669		/*
2670		 * Set the IP6 hoplimit for outgoing multicast packets.
2671		 */
2672		int optval;
2673		if (m == NULL || m->m_len != sizeof(int)) {
2674			error = EINVAL;
2675			break;
2676		}
2677		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2678		if (optval < -1 || optval >= 256)
2679			error = EINVAL;
2680		else if (optval == -1)
2681			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2682		else
2683			im6o->im6o_multicast_hlim = optval;
2684		break;
2685	    }
2686
2687	case IPV6_MULTICAST_LOOP:
2688		/*
2689		 * Set the loopback flag for outgoing multicast packets.
2690		 * Must be zero or one.
2691		 */
2692		if (m == NULL || m->m_len != sizeof(u_int)) {
2693			error = EINVAL;
2694			break;
2695		}
2696		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2697		if (loop > 1) {
2698			error = EINVAL;
2699			break;
2700		}
2701		im6o->im6o_multicast_loop = loop;
2702		break;
2703
2704	case IPV6_JOIN_GROUP:
2705		/*
2706		 * Add a multicast group membership.
2707		 * Group must be a valid IP6 multicast address.
2708		 */
2709		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2710			error = EINVAL;
2711			break;
2712		}
2713		mreq = mtod(m, struct ipv6_mreq *);
2714		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2715			/*
2716			 * We use the unspecified address to specify to accept
2717			 * all multicast addresses. Only super user is allowed
2718			 * to do this.
2719			 */
2720			if (suser(td)) {
2721				error = EACCES;
2722				break;
2723			}
2724		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2725			error = EINVAL;
2726			break;
2727		}
2728
2729		/*
2730		 * If the interface is specified, validate it.
2731		 */
2732		if (mreq->ipv6mr_interface < 0 ||
2733		    if_index < mreq->ipv6mr_interface) {
2734			error = ENXIO;	/* XXX EINVAL? */
2735			break;
2736		}
2737		/*
2738		 * If no interface was explicitly specified, choose an
2739		 * appropriate one according to the given multicast address.
2740		 */
2741		if (mreq->ipv6mr_interface == 0) {
2742			/*
2743			 * If the multicast address is in node-local scope,
2744			 * the interface should be a loopback interface.
2745			 * Otherwise, look up the routing table for the
2746			 * address, and choose the outgoing interface.
2747			 *   XXX: is it a good approach?
2748			 */
2749			if (IN6_IS_ADDR_MC_INTFACELOCAL(&mreq->ipv6mr_multiaddr)) {
2750				ifp = &loif[0];
2751			} else {
2752				ro.ro_rt = NULL;
2753				dst = (struct sockaddr_in6 *)&ro.ro_dst;
2754				bzero(dst, sizeof(*dst));
2755				dst->sin6_len = sizeof(struct sockaddr_in6);
2756				dst->sin6_family = AF_INET6;
2757				dst->sin6_addr = mreq->ipv6mr_multiaddr;
2758				rtalloc((struct route *)&ro);
2759				if (ro.ro_rt == NULL) {
2760					error = EADDRNOTAVAIL;
2761					break;
2762				}
2763				ifp = ro.ro_rt->rt_ifp;
2764				RTFREE(ro.ro_rt);
2765			}
2766		} else
2767			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2768
2769		/*
2770		 * See if we found an interface, and confirm that it
2771		 * supports multicast
2772		 */
2773		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2774			error = EADDRNOTAVAIL;
2775			break;
2776		}
2777		/*
2778		 * Put interface index into the multicast address,
2779		 * if the address has link-local scope.
2780		 */
2781		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2782			mreq->ipv6mr_multiaddr.s6_addr16[1] =
2783			    htons(ifp->if_index);
2784		}
2785		/*
2786		 * See if the membership already exists.
2787		 */
2788		for (imm = im6o->im6o_memberships.lh_first;
2789		     imm != NULL; imm = imm->i6mm_chain.le_next)
2790			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2791			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2792					       &mreq->ipv6mr_multiaddr))
2793				break;
2794		if (imm != NULL) {
2795			error = EADDRINUSE;
2796			break;
2797		}
2798		/*
2799		 * Everything looks good; add a new record to the multicast
2800		 * address list for the given interface.
2801		 */
2802		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2803		if (imm == NULL) {
2804			error = ENOBUFS;
2805			break;
2806		}
2807		if ((imm->i6mm_maddr =
2808		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2809			free(imm, M_IPMADDR);
2810			break;
2811		}
2812		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2813		break;
2814
2815	case IPV6_LEAVE_GROUP:
2816		/*
2817		 * Drop a multicast group membership.
2818		 * Group must be a valid IP6 multicast address.
2819		 */
2820		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2821			error = EINVAL;
2822			break;
2823		}
2824		mreq = mtod(m, struct ipv6_mreq *);
2825		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2826			if (suser(td)) {
2827				error = EACCES;
2828				break;
2829			}
2830		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2831			error = EINVAL;
2832			break;
2833		}
2834		/*
2835		 * If an interface address was specified, get a pointer
2836		 * to its ifnet structure.
2837		 */
2838		if (mreq->ipv6mr_interface < 0
2839		 || if_index < mreq->ipv6mr_interface) {
2840			error = ENXIO;	/* XXX EINVAL? */
2841			break;
2842		}
2843		ifp = ifnet_byindex(mreq->ipv6mr_interface);
2844		/*
2845		 * Put interface index into the multicast address,
2846		 * if the address has link-local scope.
2847		 */
2848		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2849			mreq->ipv6mr_multiaddr.s6_addr16[1]
2850				= htons(mreq->ipv6mr_interface);
2851		}
2852
2853		/*
2854		 * Find the membership in the membership list.
2855		 */
2856		for (imm = im6o->im6o_memberships.lh_first;
2857		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2858			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2859			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2860			    &mreq->ipv6mr_multiaddr))
2861				break;
2862		}
2863		if (imm == NULL) {
2864			/* Unable to resolve interface */
2865			error = EADDRNOTAVAIL;
2866			break;
2867		}
2868		/*
2869		 * Give up the multicast address record to which the
2870		 * membership points.
2871		 */
2872		LIST_REMOVE(imm, i6mm_chain);
2873		in6_delmulti(imm->i6mm_maddr);
2874		free(imm, M_IPMADDR);
2875		break;
2876
2877	default:
2878		error = EOPNOTSUPP;
2879		break;
2880	}
2881
2882	/*
2883	 * If all options have default values, no need to keep the mbuf.
2884	 */
2885	if (im6o->im6o_multicast_ifp == NULL &&
2886	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2887	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2888	    im6o->im6o_memberships.lh_first == NULL) {
2889		free(*im6op, M_IPMOPTS);
2890		*im6op = NULL;
2891	}
2892
2893	return (error);
2894}
2895
2896/*
2897 * Return the IP6 multicast options in response to user getsockopt().
2898 */
2899static int
2900ip6_getmoptions(optname, im6o, mp)
2901	int optname;
2902	struct ip6_moptions *im6o;
2903	struct mbuf **mp;
2904{
2905	u_int *hlim, *loop, *ifindex;
2906
2907	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2908
2909	switch (optname) {
2910
2911	case IPV6_MULTICAST_IF:
2912		ifindex = mtod(*mp, u_int *);
2913		(*mp)->m_len = sizeof(u_int);
2914		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2915			*ifindex = 0;
2916		else
2917			*ifindex = im6o->im6o_multicast_ifp->if_index;
2918		return (0);
2919
2920	case IPV6_MULTICAST_HOPS:
2921		hlim = mtod(*mp, u_int *);
2922		(*mp)->m_len = sizeof(u_int);
2923		if (im6o == NULL)
2924			*hlim = ip6_defmcasthlim;
2925		else
2926			*hlim = im6o->im6o_multicast_hlim;
2927		return (0);
2928
2929	case IPV6_MULTICAST_LOOP:
2930		loop = mtod(*mp, u_int *);
2931		(*mp)->m_len = sizeof(u_int);
2932		if (im6o == NULL)
2933			*loop = ip6_defmcasthlim;
2934		else
2935			*loop = im6o->im6o_multicast_loop;
2936		return (0);
2937
2938	default:
2939		return (EOPNOTSUPP);
2940	}
2941}
2942
2943/*
2944 * Discard the IP6 multicast options.
2945 */
2946void
2947ip6_freemoptions(im6o)
2948	struct ip6_moptions *im6o;
2949{
2950	struct in6_multi_mship *imm;
2951
2952	if (im6o == NULL)
2953		return;
2954
2955	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2956		LIST_REMOVE(imm, i6mm_chain);
2957		if (imm->i6mm_maddr)
2958			in6_delmulti(imm->i6mm_maddr);
2959		free(imm, M_IPMADDR);
2960	}
2961	free(im6o, M_IPMOPTS);
2962}
2963
2964/*
2965 * Set IPv6 outgoing packet options based on advanced API.
2966 */
2967int
2968ip6_setpktoptions(control, opt, stickyopt, priv, needcopy, uproto)
2969	struct mbuf *control;
2970	struct ip6_pktopts *opt, *stickyopt;
2971	int priv, needcopy, uproto;
2972{
2973	struct cmsghdr *cm = 0;
2974
2975	if (control == 0 || opt == 0)
2976		return (EINVAL);
2977
2978	if (stickyopt) {
2979		/*
2980		 * If stickyopt is provided, make a local copy of the options
2981		 * for this particular packet, then override them by ancillary
2982		 * objects.
2983		 * XXX: need to gain a reference for the cached route of the
2984		 * next hop in case of the overriding.
2985		 */
2986		*opt = *stickyopt;
2987		if (opt->ip6po_nextroute.ro_rt) {
2988			RT_LOCK(opt->ip6po_nextroute.ro_rt);
2989			RT_ADDREF(opt->ip6po_nextroute.ro_rt);
2990			RT_UNLOCK(opt->ip6po_nextroute.ro_rt);
2991		}
2992	} else
2993		init_ip6pktopts(opt);
2994	opt->needfree = needcopy;
2995
2996	/*
2997	 * XXX: Currently, we assume all the optional information is stored
2998	 * in a single mbuf.
2999	 */
3000	if (control->m_next)
3001		return (EINVAL);
3002
3003	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
3004	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
3005		int error;
3006
3007		if (control->m_len < CMSG_LEN(0))
3008			return (EINVAL);
3009
3010		cm = mtod(control, struct cmsghdr *);
3011		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
3012			return (EINVAL);
3013		if (cm->cmsg_level != IPPROTO_IPV6)
3014			continue;
3015
3016		error = ip6_setpktoption(cm->cmsg_type, CMSG_DATA(cm),
3017		    cm->cmsg_len - CMSG_LEN(0), opt, priv, needcopy, 1, uproto);
3018		if (error)
3019			return (error);
3020	}
3021
3022	return (0);
3023}
3024
3025/*
3026 * Set a particular packet option, as a sticky option or an ancillary data
3027 * item.  "len" can be 0 only when it's a sticky option.
3028 * We have 4 cases of combination of "sticky" and "cmsg":
3029 * "sticky=0, cmsg=0": impossible
3030 * "sticky=0, cmsg=1": RFC2292 or rfc2292bis ancillary data
3031 * "sticky=1, cmsg=0": rfc2292bis socket option
3032 * "sticky=1, cmsg=1": RFC2292 socket option
3033 */
3034static int
3035ip6_setpktoption(optname, buf, len, opt, priv, sticky, cmsg, uproto)
3036	int optname, len, priv, sticky, cmsg, uproto;
3037	u_char *buf;
3038	struct ip6_pktopts *opt;
3039{
3040	int minmtupolicy, preftemp;
3041
3042	if (!sticky && !cmsg) {
3043#ifdef DIAGNOSTIC
3044		printf("ip6_setpktoption: impossible case\n");
3045#endif
3046		return (EINVAL);
3047	}
3048
3049	/*
3050	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3051	 * not be specified in the context of rfc2292bis.  Conversely,
3052	 * rfc2292bis types should not be specified in the context of RFC2292.
3053	 */
3054	if (!cmsg) {
3055		switch (optname) {
3056		case IPV6_2292PKTINFO:
3057		case IPV6_2292HOPLIMIT:
3058		case IPV6_2292NEXTHOP:
3059		case IPV6_2292HOPOPTS:
3060		case IPV6_2292DSTOPTS:
3061		case IPV6_2292RTHDR:
3062		case IPV6_2292PKTOPTIONS:
3063			return (ENOPROTOOPT);
3064		}
3065	}
3066	if (sticky && cmsg) {
3067		switch (optname) {
3068		case IPV6_PKTINFO:
3069		case IPV6_HOPLIMIT:
3070		case IPV6_NEXTHOP:
3071		case IPV6_HOPOPTS:
3072		case IPV6_DSTOPTS:
3073		case IPV6_RTHDRDSTOPTS:
3074		case IPV6_RTHDR:
3075		case IPV6_USE_MIN_MTU:
3076		case IPV6_DONTFRAG:
3077		case IPV6_TCLASS:
3078		case IPV6_PREFER_TEMPADDR: /* XXX: not an rfc2292bis option */
3079			return (ENOPROTOOPT);
3080		}
3081	}
3082
3083	switch (optname) {
3084	case IPV6_2292PKTINFO:
3085	case IPV6_PKTINFO:
3086	{
3087		struct ifnet *ifp = NULL;
3088		struct in6_pktinfo *pktinfo;
3089
3090		if (len != sizeof(struct in6_pktinfo))
3091			return (EINVAL);
3092
3093		pktinfo = (struct in6_pktinfo *)buf;
3094
3095		/*
3096		 * An application can clear any sticky IPV6_PKTINFO option by
3097		 * doing a "regular" setsockopt with ipi6_addr being
3098		 * in6addr_any and ipi6_ifindex being zero.
3099		 * [RFC 3542, Section 6]
3100		 */
3101		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3102		    pktinfo->ipi6_ifindex == 0 &&
3103		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3104			ip6_clearpktopts(opt, optname);
3105			break;
3106		}
3107
3108		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3109		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3110			return (EINVAL);
3111		}
3112
3113		/* validate the interface index if specified. */
3114		if (pktinfo->ipi6_ifindex > if_index ||
3115		    pktinfo->ipi6_ifindex < 0) {
3116			 return (ENXIO);
3117		}
3118		if (pktinfo->ipi6_ifindex) {
3119			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
3120			if (ifp == NULL)
3121				return (ENXIO);
3122		}
3123
3124		/*
3125		 * We store the address anyway, and let in6_selectsrc()
3126		 * validate the specified address.  This is because ipi6_addr
3127		 * may not have enough information about its scope zone, and
3128		 * we may need additional information (such as outgoing
3129		 * interface or the scope zone of a destination address) to
3130		 * disambiguate the scope.
3131		 * XXX: the delay of the validation may confuse the
3132		 * application when it is used as a sticky option.
3133		 */
3134		if (sticky) {
3135			if (opt->ip6po_pktinfo == NULL) {
3136				opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3137				    M_IP6OPT, M_WAITOK);
3138			}
3139			bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3140		} else
3141			opt->ip6po_pktinfo = pktinfo;
3142		break;
3143	}
3144
3145	case IPV6_2292HOPLIMIT:
3146	case IPV6_HOPLIMIT:
3147	{
3148		int *hlimp;
3149
3150		/*
3151		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3152		 * to simplify the ordering among hoplimit options.
3153		 */
3154		if (optname == IPV6_HOPLIMIT && sticky)
3155			return (ENOPROTOOPT);
3156
3157		if (len != sizeof(int))
3158			return (EINVAL);
3159		hlimp = (int *)buf;
3160		if (*hlimp < -1 || *hlimp > 255)
3161			return (EINVAL);
3162
3163		opt->ip6po_hlim = *hlimp;
3164		break;
3165	}
3166
3167	case IPV6_TCLASS:
3168	{
3169		int tclass;
3170
3171		if (len != sizeof(int))
3172			return (EINVAL);
3173		tclass = *(int *)buf;
3174		if (tclass < -1 || tclass > 255)
3175			return (EINVAL);
3176
3177		opt->ip6po_tclass = tclass;
3178		break;
3179	}
3180
3181	case IPV6_2292NEXTHOP:
3182	case IPV6_NEXTHOP:
3183		if (!priv)
3184			return (EPERM);
3185
3186		if (len == 0) {	/* just remove the option */
3187			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3188			break;
3189		}
3190
3191		/* check if cmsg_len is large enough for sa_len */
3192		if (len < sizeof(struct sockaddr) || len < *buf)
3193			return (EINVAL);
3194
3195		switch (((struct sockaddr *)buf)->sa_family) {
3196		case AF_INET6:
3197		{
3198			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3199#if 0
3200			int error;
3201#endif
3202
3203			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3204				return (EINVAL);
3205
3206			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3207			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3208				return (EINVAL);
3209			}
3210#if 0
3211			if ((error = scope6_check_id(sa6, ip6_use_defzone))
3212			    != 0) {
3213				return (error);
3214			}
3215#endif
3216			sa6->sin6_scope_id = 0; /* XXX */
3217			break;
3218		}
3219		case AF_LINK:	/* should eventually be supported */
3220		default:
3221			return (EAFNOSUPPORT);
3222		}
3223
3224		/* turn off the previous option, then set the new option. */
3225		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3226		if (sticky) {
3227			opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_WAITOK);
3228			bcopy(buf, opt->ip6po_nexthop, *buf);
3229		} else
3230			opt->ip6po_nexthop = (struct sockaddr *)buf;
3231		break;
3232
3233	case IPV6_2292HOPOPTS:
3234	case IPV6_HOPOPTS:
3235	{
3236		struct ip6_hbh *hbh;
3237		int hbhlen;
3238
3239		/*
3240		 * XXX: We don't allow a non-privileged user to set ANY HbH
3241		 * options, since per-option restriction has too much
3242		 * overhead.
3243		 */
3244		if (!priv)
3245			return (EPERM);
3246
3247		if (len == 0) {
3248			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3249			break;	/* just remove the option */
3250		}
3251
3252		/* message length validation */
3253		if (len < sizeof(struct ip6_hbh))
3254			return (EINVAL);
3255		hbh = (struct ip6_hbh *)buf;
3256		hbhlen = (hbh->ip6h_len + 1) << 3;
3257		if (len != hbhlen)
3258			return (EINVAL);
3259
3260		/* turn off the previous option, then set the new option. */
3261		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3262		if (sticky) {
3263			opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK);
3264			bcopy(hbh, opt->ip6po_hbh, hbhlen);
3265		} else
3266			opt->ip6po_hbh = hbh;
3267
3268		break;
3269	}
3270
3271	case IPV6_2292DSTOPTS:
3272	case IPV6_DSTOPTS:
3273	case IPV6_RTHDRDSTOPTS:
3274	{
3275		struct ip6_dest *dest, **newdest = NULL;
3276		int destlen;
3277
3278		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3279			return (EPERM);
3280
3281		if (len == 0) {
3282			ip6_clearpktopts(opt, optname);
3283			break;	/* just remove the option */
3284		}
3285
3286		/* message length validation */
3287		if (len < sizeof(struct ip6_dest))
3288			return (EINVAL);
3289		dest = (struct ip6_dest *)buf;
3290		destlen = (dest->ip6d_len + 1) << 3;
3291		if (len != destlen)
3292			return (EINVAL);
3293
3294		/*
3295		 * Determine the position that the destination options header
3296		 * should be inserted; before or after the routing header.
3297		 */
3298		switch (optname) {
3299		case IPV6_2292DSTOPTS:
3300			/*
3301			 * The old advacned API is ambiguous on this point.
3302			 * Our approach is to determine the position based
3303			 * according to the existence of a routing header.
3304			 * Note, however, that this depends on the order of the
3305			 * extension headers in the ancillary data; the 1st
3306			 * part of the destination options header must appear
3307			 * before the routing header in the ancillary data,
3308			 * too.
3309			 * RFC2292bis solved the ambiguity by introducing
3310			 * separate ancillary data or option types.
3311			 */
3312			if (opt->ip6po_rthdr == NULL)
3313				newdest = &opt->ip6po_dest1;
3314			else
3315				newdest = &opt->ip6po_dest2;
3316			break;
3317		case IPV6_RTHDRDSTOPTS:
3318			newdest = &opt->ip6po_dest1;
3319			break;
3320		case IPV6_DSTOPTS:
3321			newdest = &opt->ip6po_dest2;
3322			break;
3323		}
3324
3325		/* turn off the previous option, then set the new option. */
3326		ip6_clearpktopts(opt, optname);
3327		if (sticky) {
3328			*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
3329			bcopy(dest, *newdest, destlen);
3330		} else
3331			*newdest = dest;
3332
3333		break;
3334	}
3335
3336	case IPV6_2292RTHDR:
3337	case IPV6_RTHDR:
3338	{
3339		struct ip6_rthdr *rth;
3340		int rthlen;
3341
3342		if (len == 0) {
3343			ip6_clearpktopts(opt, IPV6_RTHDR);
3344			break;	/* just remove the option */
3345		}
3346
3347		/* message length validation */
3348		if (len < sizeof(struct ip6_rthdr))
3349			return (EINVAL);
3350		rth = (struct ip6_rthdr *)buf;
3351		rthlen = (rth->ip6r_len + 1) << 3;
3352		if (len != rthlen)
3353			return (EINVAL);
3354
3355		switch (rth->ip6r_type) {
3356		case IPV6_RTHDR_TYPE_0:
3357			if (rth->ip6r_len == 0)	/* must contain one addr */
3358				return (EINVAL);
3359			if (rth->ip6r_len % 2) /* length must be even */
3360				return (EINVAL);
3361			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3362				return (EINVAL);
3363			break;
3364		default:
3365			return (EINVAL);	/* not supported */
3366		}
3367
3368		/* turn off the previous option */
3369		ip6_clearpktopts(opt, IPV6_RTHDR);
3370		if (sticky) {
3371			opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK);
3372			bcopy(rth, opt->ip6po_rthdr, rthlen);
3373		} else
3374			opt->ip6po_rthdr = rth;
3375
3376		break;
3377	}
3378
3379	case IPV6_USE_MIN_MTU:
3380		if (len != sizeof(int))
3381			return (EINVAL);
3382		minmtupolicy = *(int *)buf;
3383		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3384		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3385		    minmtupolicy != IP6PO_MINMTU_ALL) {
3386			return (EINVAL);
3387		}
3388		opt->ip6po_minmtu = minmtupolicy;
3389		break;
3390
3391	case IPV6_DONTFRAG:
3392		if (len != sizeof(int))
3393			return (EINVAL);
3394
3395		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3396			/*
3397			 * we ignore this option for TCP sockets.
3398			 * (rfc2292bis leaves this case unspecified.)
3399			 */
3400			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3401		} else
3402			opt->ip6po_flags |= IP6PO_DONTFRAG;
3403		break;
3404
3405	case IPV6_PREFER_TEMPADDR:
3406		if (len != sizeof(int))
3407			return (EINVAL);
3408		preftemp = *(int *)buf;
3409		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3410		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3411		    preftemp != IP6PO_TEMPADDR_PREFER) {
3412			return (EINVAL);
3413		}
3414		opt->ip6po_prefer_tempaddr = preftemp;
3415		break;
3416
3417	default:
3418		return (ENOPROTOOPT);
3419	} /* end of switch */
3420
3421	return (0);
3422}
3423
3424/*
3425 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3426 * packet to the input queue of a specified interface.  Note that this
3427 * calls the output routine of the loopback "driver", but with an interface
3428 * pointer that might NOT be &loif -- easier than replicating that code here.
3429 */
3430void
3431ip6_mloopback(ifp, m, dst)
3432	struct ifnet *ifp;
3433	struct mbuf *m;
3434	struct sockaddr_in6 *dst;
3435{
3436	struct mbuf *copym;
3437	struct ip6_hdr *ip6;
3438
3439	copym = m_copy(m, 0, M_COPYALL);
3440	if (copym == NULL)
3441		return;
3442
3443	/*
3444	 * Make sure to deep-copy IPv6 header portion in case the data
3445	 * is in an mbuf cluster, so that we can safely override the IPv6
3446	 * header portion later.
3447	 */
3448	if ((copym->m_flags & M_EXT) != 0 ||
3449	    copym->m_len < sizeof(struct ip6_hdr)) {
3450		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3451		if (copym == NULL)
3452			return;
3453	}
3454
3455#ifdef DIAGNOSTIC
3456	if (copym->m_len < sizeof(*ip6)) {
3457		m_freem(copym);
3458		return;
3459	}
3460#endif
3461
3462	ip6 = mtod(copym, struct ip6_hdr *);
3463	/*
3464	 * clear embedded scope identifiers if necessary.
3465	 * in6_clearscope will touch the addresses only when necessary.
3466	 */
3467	in6_clearscope(&ip6->ip6_src);
3468	in6_clearscope(&ip6->ip6_dst);
3469
3470	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3471}
3472
3473/*
3474 * Chop IPv6 header off from the payload.
3475 */
3476static int
3477ip6_splithdr(m, exthdrs)
3478	struct mbuf *m;
3479	struct ip6_exthdrs *exthdrs;
3480{
3481	struct mbuf *mh;
3482	struct ip6_hdr *ip6;
3483
3484	ip6 = mtod(m, struct ip6_hdr *);
3485	if (m->m_len > sizeof(*ip6)) {
3486		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3487		if (mh == 0) {
3488			m_freem(m);
3489			return ENOBUFS;
3490		}
3491		M_MOVE_PKTHDR(mh, m);
3492		MH_ALIGN(mh, sizeof(*ip6));
3493		m->m_len -= sizeof(*ip6);
3494		m->m_data += sizeof(*ip6);
3495		mh->m_next = m;
3496		m = mh;
3497		m->m_len = sizeof(*ip6);
3498		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3499	}
3500	exthdrs->ip6e_ip6 = m;
3501	return 0;
3502}
3503
3504/*
3505 * Compute IPv6 extension header length.
3506 */
3507int
3508ip6_optlen(in6p)
3509	struct in6pcb *in6p;
3510{
3511	int len;
3512
3513	if (!in6p->in6p_outputopts)
3514		return 0;
3515
3516	len = 0;
3517#define elen(x) \
3518    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3519
3520	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3521	if (in6p->in6p_outputopts->ip6po_rthdr)
3522		/* dest1 is valid with rthdr only */
3523		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3524	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3525	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3526	return len;
3527#undef elen
3528}
3529