ip6_output.c revision 160981
1/*	$FreeBSD: head/sys/netinet6/ip6_output.c 160981 2006-08-04 21:27:40Z brooks $	*/
2/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
3
4/*-
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*-
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
62 */
63
64#include "opt_inet.h"
65#include "opt_inet6.h"
66#include "opt_ipsec.h"
67
68#include <sys/param.h>
69#include <sys/malloc.h>
70#include <sys/mbuf.h>
71#include <sys/proc.h>
72#include <sys/errno.h>
73#include <sys/protosw.h>
74#include <sys/socket.h>
75#include <sys/socketvar.h>
76#include <sys/systm.h>
77#include <sys/kernel.h>
78
79#include <net/if.h>
80#include <net/netisr.h>
81#include <net/route.h>
82#include <net/pfil.h>
83
84#include <netinet/in.h>
85#include <netinet/in_var.h>
86#include <netinet6/in6_var.h>
87#include <netinet/ip6.h>
88#include <netinet/icmp6.h>
89#include <netinet6/ip6_var.h>
90#include <netinet/in_pcb.h>
91#include <netinet/tcp_var.h>
92#include <netinet6/nd6.h>
93
94#ifdef IPSEC
95#include <netinet6/ipsec.h>
96#ifdef INET6
97#include <netinet6/ipsec6.h>
98#endif
99#include <netkey/key.h>
100#endif /* IPSEC */
101
102#ifdef FAST_IPSEC
103#include <netipsec/ipsec.h>
104#include <netipsec/ipsec6.h>
105#include <netipsec/key.h>
106#endif /* FAST_IPSEC */
107
108#include <netinet6/ip6protosw.h>
109#include <netinet6/scope6_var.h>
110
111static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "internet multicast options");
112
113struct ip6_exthdrs {
114	struct mbuf *ip6e_ip6;
115	struct mbuf *ip6e_hbh;
116	struct mbuf *ip6e_dest1;
117	struct mbuf *ip6e_rthdr;
118	struct mbuf *ip6e_dest2;
119};
120
121static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
122			   int, int));
123static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
124	struct socket *, struct sockopt *));
125static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
126static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *, int,
127	int, int, int));
128
129static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
130static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
131static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
132static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
133	struct ip6_frag **));
134static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
135static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
136static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
137	struct ifnet *, struct in6_addr *, u_long *, int *));
138static int copypktopts __P((struct ip6_pktopts *, struct ip6_pktopts *, int));
139
140
141/*
142 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
143 * header (with pri, len, nxt, hlim, src, dst).
144 * This function may modify ver and hlim only.
145 * The mbuf chain containing the packet will be freed.
146 * The mbuf opt, if present, will not be freed.
147 *
148 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
149 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
150 * which is rt_rmx.rmx_mtu.
151 */
152int
153ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
154	struct mbuf *m0;
155	struct ip6_pktopts *opt;
156	struct route_in6 *ro;
157	int flags;
158	struct ip6_moptions *im6o;
159	struct ifnet **ifpp;		/* XXX: just for statistics */
160	struct inpcb *inp;
161{
162	struct ip6_hdr *ip6, *mhip6;
163	struct ifnet *ifp, *origifp;
164	struct mbuf *m = m0;
165	int hlen, tlen, len, off;
166	struct route_in6 ip6route;
167	struct rtentry *rt = NULL;
168	struct sockaddr_in6 *dst, src_sa, dst_sa;
169	struct in6_addr odst;
170	int error = 0;
171	struct in6_ifaddr *ia = NULL;
172	u_long mtu;
173	int alwaysfrag, dontfrag;
174	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
175	struct ip6_exthdrs exthdrs;
176	struct in6_addr finaldst, src0, dst0;
177	u_int32_t zone;
178	struct route_in6 *ro_pmtu = NULL;
179	int hdrsplit = 0;
180	int needipsec = 0;
181#if defined(IPSEC) || defined(FAST_IPSEC)
182	int needipsectun = 0;
183	struct secpolicy *sp = NULL;
184#endif /*IPSEC || FAST_IPSEC*/
185
186	ip6 = mtod(m, struct ip6_hdr *);
187	finaldst = ip6->ip6_dst;
188
189#define MAKE_EXTHDR(hp, mp)						\
190    do {								\
191	if (hp) {							\
192		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
193		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
194		    ((eh)->ip6e_len + 1) << 3);				\
195		if (error)						\
196			goto freehdrs;					\
197	}								\
198    } while (/*CONSTCOND*/ 0)
199
200	bzero(&exthdrs, sizeof(exthdrs));
201
202	if (opt) {
203		/* Hop-by-Hop options header */
204		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
205		/* Destination options header(1st part) */
206		if (opt->ip6po_rthdr) {
207			/*
208			 * Destination options header(1st part)
209			 * This only makes sence with a routing header.
210			 * See Section 9.2 of RFC 3542.
211			 * Disabling this part just for MIP6 convenience is
212			 * a bad idea.  We need to think carefully about a
213			 * way to make the advanced API coexist with MIP6
214			 * options, which might automatically be inserted in
215			 * the kernel.
216			 */
217			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
218		}
219		/* Routing header */
220		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
221		/* Destination options header(2nd part) */
222		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
223	}
224
225#ifdef IPSEC
226	/* get a security policy for this packet */
227	if (inp == NULL)
228		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
229	else
230		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
231
232	if (sp == NULL) {
233		ipsec6stat.out_inval++;
234		goto freehdrs;
235	}
236
237	error = 0;
238
239	/* check policy */
240	switch (sp->policy) {
241	case IPSEC_POLICY_DISCARD:
242		/*
243		 * This packet is just discarded.
244		 */
245		ipsec6stat.out_polvio++;
246		goto freehdrs;
247
248	case IPSEC_POLICY_BYPASS:
249	case IPSEC_POLICY_NONE:
250		/* no need to do IPsec. */
251		needipsec = 0;
252		break;
253
254	case IPSEC_POLICY_IPSEC:
255		if (sp->req == NULL) {
256			/* acquire a policy */
257			error = key_spdacquire(sp);
258			goto freehdrs;
259		}
260		needipsec = 1;
261		break;
262
263	case IPSEC_POLICY_ENTRUST:
264	default:
265		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
266	}
267#endif /* IPSEC */
268#ifdef FAST_IPSEC
269	/* get a security policy for this packet */
270	if (inp == NULL)
271		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
272	else
273		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
274
275	if (sp == NULL) {
276		newipsecstat.ips_out_inval++;
277		goto freehdrs;
278	}
279
280	error = 0;
281
282	/* check policy */
283	switch (sp->policy) {
284	case IPSEC_POLICY_DISCARD:
285		/*
286		 * This packet is just discarded.
287		 */
288		newipsecstat.ips_out_polvio++;
289		goto freehdrs;
290
291	case IPSEC_POLICY_BYPASS:
292	case IPSEC_POLICY_NONE:
293		/* no need to do IPsec. */
294		needipsec = 0;
295		break;
296
297	case IPSEC_POLICY_IPSEC:
298		if (sp->req == NULL) {
299			/* acquire a policy */
300			error = key_spdacquire(sp);
301			goto freehdrs;
302		}
303		needipsec = 1;
304		break;
305
306	case IPSEC_POLICY_ENTRUST:
307	default:
308		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
309	}
310#endif /* FAST_IPSEC */
311
312	/*
313	 * Calculate the total length of the extension header chain.
314	 * Keep the length of the unfragmentable part for fragmentation.
315	 */
316	optlen = 0;
317	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
318	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
319	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
320	unfragpartlen = optlen + sizeof(struct ip6_hdr);
321	/* NOTE: we don't add AH/ESP length here. do that later. */
322	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
323
324	/*
325	 * If we need IPsec, or there is at least one extension header,
326	 * separate IP6 header from the payload.
327	 */
328	if ((needipsec || optlen) && !hdrsplit) {
329		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
330			m = NULL;
331			goto freehdrs;
332		}
333		m = exthdrs.ip6e_ip6;
334		hdrsplit++;
335	}
336
337	/* adjust pointer */
338	ip6 = mtod(m, struct ip6_hdr *);
339
340	/* adjust mbuf packet header length */
341	m->m_pkthdr.len += optlen;
342	plen = m->m_pkthdr.len - sizeof(*ip6);
343
344	/* If this is a jumbo payload, insert a jumbo payload option. */
345	if (plen > IPV6_MAXPACKET) {
346		if (!hdrsplit) {
347			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
348				m = NULL;
349				goto freehdrs;
350			}
351			m = exthdrs.ip6e_ip6;
352			hdrsplit++;
353		}
354		/* adjust pointer */
355		ip6 = mtod(m, struct ip6_hdr *);
356		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
357			goto freehdrs;
358		ip6->ip6_plen = 0;
359	} else
360		ip6->ip6_plen = htons(plen);
361
362	/*
363	 * Concatenate headers and fill in next header fields.
364	 * Here we have, on "m"
365	 *	IPv6 payload
366	 * and we insert headers accordingly.  Finally, we should be getting:
367	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
368	 *
369	 * during the header composing process, "m" points to IPv6 header.
370	 * "mprev" points to an extension header prior to esp.
371	 */
372	{
373		u_char *nexthdrp = &ip6->ip6_nxt;
374		struct mbuf *mprev = m;
375
376		/*
377		 * we treat dest2 specially.  this makes IPsec processing
378		 * much easier.  the goal here is to make mprev point the
379		 * mbuf prior to dest2.
380		 *
381		 * result: IPv6 dest2 payload
382		 * m and mprev will point to IPv6 header.
383		 */
384		if (exthdrs.ip6e_dest2) {
385			if (!hdrsplit)
386				panic("assumption failed: hdr not split");
387			exthdrs.ip6e_dest2->m_next = m->m_next;
388			m->m_next = exthdrs.ip6e_dest2;
389			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
390			ip6->ip6_nxt = IPPROTO_DSTOPTS;
391		}
392
393#define MAKE_CHAIN(m, mp, p, i)\
394    do {\
395	if (m) {\
396		if (!hdrsplit) \
397			panic("assumption failed: hdr not split"); \
398		*mtod((m), u_char *) = *(p);\
399		*(p) = (i);\
400		p = mtod((m), u_char *);\
401		(m)->m_next = (mp)->m_next;\
402		(mp)->m_next = (m);\
403		(mp) = (m);\
404	}\
405    } while (/*CONSTCOND*/ 0)
406		/*
407		 * result: IPv6 hbh dest1 rthdr dest2 payload
408		 * m will point to IPv6 header.  mprev will point to the
409		 * extension header prior to dest2 (rthdr in the above case).
410		 */
411		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
412		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
413		    IPPROTO_DSTOPTS);
414		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
415		    IPPROTO_ROUTING);
416
417#if defined(IPSEC) || defined(FAST_IPSEC)
418		if (!needipsec)
419			goto skip_ipsec2;
420
421		/*
422		 * pointers after IPsec headers are not valid any more.
423		 * other pointers need a great care too.
424		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
425		 */
426		exthdrs.ip6e_dest2 = NULL;
427
428	    {
429		struct ip6_rthdr *rh = NULL;
430		int segleft_org = 0;
431		struct ipsec_output_state state;
432
433		if (exthdrs.ip6e_rthdr) {
434			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
435			segleft_org = rh->ip6r_segleft;
436			rh->ip6r_segleft = 0;
437		}
438
439		bzero(&state, sizeof(state));
440		state.m = m;
441		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
442		    &needipsectun);
443		m = state.m;
444		if (error) {
445			/* mbuf is already reclaimed in ipsec6_output_trans. */
446			m = NULL;
447			switch (error) {
448			case EHOSTUNREACH:
449			case ENETUNREACH:
450			case EMSGSIZE:
451			case ENOBUFS:
452			case ENOMEM:
453				break;
454			default:
455				printf("ip6_output (ipsec): error code %d\n", error);
456				/* FALLTHROUGH */
457			case ENOENT:
458				/* don't show these error codes to the user */
459				error = 0;
460				break;
461			}
462			goto bad;
463		}
464		if (exthdrs.ip6e_rthdr) {
465			/* ah6_output doesn't modify mbuf chain */
466			rh->ip6r_segleft = segleft_org;
467		}
468	    }
469skip_ipsec2:;
470#endif
471	}
472
473	/*
474	 * If there is a routing header, replace the destination address field
475	 * with the first hop of the routing header.
476	 */
477	if (exthdrs.ip6e_rthdr) {
478		struct ip6_rthdr *rh =
479			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
480						  struct ip6_rthdr *));
481		struct ip6_rthdr0 *rh0;
482		struct in6_addr *addr;
483		struct sockaddr_in6 sa;
484
485		switch (rh->ip6r_type) {
486		case IPV6_RTHDR_TYPE_0:
487			 rh0 = (struct ip6_rthdr0 *)rh;
488			 addr = (struct in6_addr *)(rh0 + 1);
489
490			 /*
491			  * construct a sockaddr_in6 form of
492			  * the first hop.
493			  *
494			  * XXX: we may not have enough
495			  * information about its scope zone;
496			  * there is no standard API to pass
497			  * the information from the
498			  * application.
499			  */
500			 bzero(&sa, sizeof(sa));
501			 sa.sin6_family = AF_INET6;
502			 sa.sin6_len = sizeof(sa);
503			 sa.sin6_addr = addr[0];
504			 if ((error = sa6_embedscope(&sa,
505			     ip6_use_defzone)) != 0) {
506				 goto bad;
507			 }
508			 ip6->ip6_dst = sa.sin6_addr;
509			 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
510			     * (rh0->ip6r0_segleft - 1));
511			 addr[rh0->ip6r0_segleft - 1] = finaldst;
512			 /* XXX */
513			 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
514			 break;
515		default:	/* is it possible? */
516			 error = EINVAL;
517			 goto bad;
518		}
519	}
520
521	/* Source address validation */
522	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
523	    (flags & IPV6_UNSPECSRC) == 0) {
524		error = EOPNOTSUPP;
525		ip6stat.ip6s_badscope++;
526		goto bad;
527	}
528	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
529		error = EOPNOTSUPP;
530		ip6stat.ip6s_badscope++;
531		goto bad;
532	}
533
534	ip6stat.ip6s_localout++;
535
536	/*
537	 * Route packet.
538	 */
539	if (ro == 0) {
540		ro = &ip6route;
541		bzero((caddr_t)ro, sizeof(*ro));
542	}
543	ro_pmtu = ro;
544	if (opt && opt->ip6po_rthdr)
545		ro = &opt->ip6po_route;
546	dst = (struct sockaddr_in6 *)&ro->ro_dst;
547
548again:
549 	/*
550	 * if specified, try to fill in the traffic class field.
551	 * do not override if a non-zero value is already set.
552	 * we check the diffserv field and the ecn field separately.
553	 */
554	if (opt && opt->ip6po_tclass >= 0) {
555		int mask = 0;
556
557		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
558			mask |= 0xfc;
559		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
560			mask |= 0x03;
561		if (mask != 0)
562			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
563	}
564
565	/* fill in or override the hop limit field, if necessary. */
566	if (opt && opt->ip6po_hlim != -1)
567		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
568	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
569		if (im6o != NULL)
570			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
571		else
572			ip6->ip6_hlim = ip6_defmcasthlim;
573	}
574
575#if defined(IPSEC) || defined(FAST_IPSEC)
576	if (needipsec && needipsectun) {
577		struct ipsec_output_state state;
578
579		/*
580		 * All the extension headers will become inaccessible
581		 * (since they can be encrypted).
582		 * Don't panic, we need no more updates to extension headers
583		 * on inner IPv6 packet (since they are now encapsulated).
584		 *
585		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
586		 */
587		bzero(&exthdrs, sizeof(exthdrs));
588		exthdrs.ip6e_ip6 = m;
589
590		bzero(&state, sizeof(state));
591		state.m = m;
592		state.ro = (struct route *)ro;
593		state.dst = (struct sockaddr *)dst;
594
595		error = ipsec6_output_tunnel(&state, sp, flags);
596
597		m = state.m;
598		ro = (struct route_in6 *)state.ro;
599		dst = (struct sockaddr_in6 *)state.dst;
600		if (error) {
601			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
602			m0 = m = NULL;
603			m = NULL;
604			switch (error) {
605			case EHOSTUNREACH:
606			case ENETUNREACH:
607			case EMSGSIZE:
608			case ENOBUFS:
609			case ENOMEM:
610				break;
611			default:
612				printf("ip6_output (ipsec): error code %d\n", error);
613				/* FALLTHROUGH */
614			case ENOENT:
615				/* don't show these error codes to the user */
616				error = 0;
617				break;
618			}
619			goto bad;
620		}
621
622		exthdrs.ip6e_ip6 = m;
623	}
624#endif /* IPSEC */
625
626	/* adjust pointer */
627	ip6 = mtod(m, struct ip6_hdr *);
628
629	bzero(&dst_sa, sizeof(dst_sa));
630	dst_sa.sin6_family = AF_INET6;
631	dst_sa.sin6_len = sizeof(dst_sa);
632	dst_sa.sin6_addr = ip6->ip6_dst;
633	if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
634	    &ifp, &rt, 0)) != 0) {
635		switch (error) {
636		case EHOSTUNREACH:
637			ip6stat.ip6s_noroute++;
638			break;
639		case EADDRNOTAVAIL:
640		default:
641			break; /* XXX statistics? */
642		}
643		if (ifp != NULL)
644			in6_ifstat_inc(ifp, ifs6_out_discard);
645		goto bad;
646	}
647	if (rt == NULL) {
648		/*
649		 * If in6_selectroute() does not return a route entry,
650		 * dst may not have been updated.
651		 */
652		*dst = dst_sa;	/* XXX */
653	}
654
655	/*
656	 * then rt (for unicast) and ifp must be non-NULL valid values.
657	 */
658	if ((flags & IPV6_FORWARDING) == 0) {
659		/* XXX: the FORWARDING flag can be set for mrouting. */
660		in6_ifstat_inc(ifp, ifs6_out_request);
661	}
662	if (rt != NULL) {
663		ia = (struct in6_ifaddr *)(rt->rt_ifa);
664		rt->rt_use++;
665	}
666
667	/*
668	 * The outgoing interface must be in the zone of source and
669	 * destination addresses.  We should use ia_ifp to support the
670	 * case of sending packets to an address of our own.
671	 */
672	if (ia != NULL && ia->ia_ifp)
673		origifp = ia->ia_ifp;
674	else
675		origifp = ifp;
676
677	src0 = ip6->ip6_src;
678	if (in6_setscope(&src0, origifp, &zone))
679		goto badscope;
680	bzero(&src_sa, sizeof(src_sa));
681	src_sa.sin6_family = AF_INET6;
682	src_sa.sin6_len = sizeof(src_sa);
683	src_sa.sin6_addr = ip6->ip6_src;
684	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
685		goto badscope;
686
687	dst0 = ip6->ip6_dst;
688	if (in6_setscope(&dst0, origifp, &zone))
689		goto badscope;
690	/* re-initialize to be sure */
691	bzero(&dst_sa, sizeof(dst_sa));
692	dst_sa.sin6_family = AF_INET6;
693	dst_sa.sin6_len = sizeof(dst_sa);
694	dst_sa.sin6_addr = ip6->ip6_dst;
695	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
696		goto badscope;
697	}
698
699	/* scope check is done. */
700	goto routefound;
701
702  badscope:
703	ip6stat.ip6s_badscope++;
704	in6_ifstat_inc(origifp, ifs6_out_discard);
705	if (error == 0)
706		error = EHOSTUNREACH; /* XXX */
707	goto bad;
708
709  routefound:
710	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
711		if (opt && opt->ip6po_nextroute.ro_rt) {
712			/*
713			 * The nexthop is explicitly specified by the
714			 * application.  We assume the next hop is an IPv6
715			 * address.
716			 */
717			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
718		}
719		else if ((rt->rt_flags & RTF_GATEWAY))
720			dst = (struct sockaddr_in6 *)rt->rt_gateway;
721	}
722
723	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
724		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
725	} else {
726		struct	in6_multi *in6m;
727
728		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
729
730		in6_ifstat_inc(ifp, ifs6_out_mcast);
731
732		/*
733		 * Confirm that the outgoing interface supports multicast.
734		 */
735		if (!(ifp->if_flags & IFF_MULTICAST)) {
736			ip6stat.ip6s_noroute++;
737			in6_ifstat_inc(ifp, ifs6_out_discard);
738			error = ENETUNREACH;
739			goto bad;
740		}
741		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
742		if (in6m != NULL &&
743		   (im6o == NULL || im6o->im6o_multicast_loop)) {
744			/*
745			 * If we belong to the destination multicast group
746			 * on the outgoing interface, and the caller did not
747			 * forbid loopback, loop back a copy.
748			 */
749			ip6_mloopback(ifp, m, dst);
750		} else {
751			/*
752			 * If we are acting as a multicast router, perform
753			 * multicast forwarding as if the packet had just
754			 * arrived on the interface to which we are about
755			 * to send.  The multicast forwarding function
756			 * recursively calls this function, using the
757			 * IPV6_FORWARDING flag to prevent infinite recursion.
758			 *
759			 * Multicasts that are looped back by ip6_mloopback(),
760			 * above, will be forwarded by the ip6_input() routine,
761			 * if necessary.
762			 */
763			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
764				/*
765				 * XXX: ip6_mforward expects that rcvif is NULL
766				 * when it is called from the originating path.
767				 * However, it is not always the case, since
768				 * some versions of MGETHDR() does not
769				 * initialize the field.
770				 */
771				m->m_pkthdr.rcvif = NULL;
772				if (ip6_mforward(ip6, ifp, m) != 0) {
773					m_freem(m);
774					goto done;
775				}
776			}
777		}
778		/*
779		 * Multicasts with a hoplimit of zero may be looped back,
780		 * above, but must not be transmitted on a network.
781		 * Also, multicasts addressed to the loopback interface
782		 * are not sent -- the above call to ip6_mloopback() will
783		 * loop back a copy if this host actually belongs to the
784		 * destination group on the loopback interface.
785		 */
786		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
787		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
788			m_freem(m);
789			goto done;
790		}
791	}
792
793	/*
794	 * Fill the outgoing inteface to tell the upper layer
795	 * to increment per-interface statistics.
796	 */
797	if (ifpp)
798		*ifpp = ifp;
799
800	/* Determine path MTU. */
801	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
802	    &alwaysfrag)) != 0)
803		goto bad;
804
805	/*
806	 * The caller of this function may specify to use the minimum MTU
807	 * in some cases.
808	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
809	 * setting.  The logic is a bit complicated; by default, unicast
810	 * packets will follow path MTU while multicast packets will be sent at
811	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
812	 * including unicast ones will be sent at the minimum MTU.  Multicast
813	 * packets will always be sent at the minimum MTU unless
814	 * IP6PO_MINMTU_DISABLE is explicitly specified.
815	 * See RFC 3542 for more details.
816	 */
817	if (mtu > IPV6_MMTU) {
818		if ((flags & IPV6_MINMTU))
819			mtu = IPV6_MMTU;
820		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
821			mtu = IPV6_MMTU;
822		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
823			 (opt == NULL ||
824			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
825			mtu = IPV6_MMTU;
826		}
827	}
828
829	/*
830	 * clear embedded scope identifiers if necessary.
831	 * in6_clearscope will touch the addresses only when necessary.
832	 */
833	in6_clearscope(&ip6->ip6_src);
834	in6_clearscope(&ip6->ip6_dst);
835
836	/*
837	 * If the outgoing packet contains a hop-by-hop options header,
838	 * it must be examined and processed even by the source node.
839	 * (RFC 2460, section 4.)
840	 */
841	if (exthdrs.ip6e_hbh) {
842		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
843		u_int32_t dummy; /* XXX unused */
844		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
845
846#ifdef DIAGNOSTIC
847		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
848			panic("ip6e_hbh is not continuous");
849#endif
850		/*
851		 *  XXX: if we have to send an ICMPv6 error to the sender,
852		 *       we need the M_LOOP flag since icmp6_error() expects
853		 *       the IPv6 and the hop-by-hop options header are
854		 *       continuous unless the flag is set.
855		 */
856		m->m_flags |= M_LOOP;
857		m->m_pkthdr.rcvif = ifp;
858		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
859		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
860		    &dummy, &plen) < 0) {
861			/* m was already freed at this point */
862			error = EINVAL;/* better error? */
863			goto done;
864		}
865		m->m_flags &= ~M_LOOP; /* XXX */
866		m->m_pkthdr.rcvif = NULL;
867	}
868
869	/* Jump over all PFIL processing if hooks are not active. */
870	if (!PFIL_HOOKED(&inet6_pfil_hook))
871		goto passout;
872
873	odst = ip6->ip6_dst;
874	/* Run through list of hooks for output packets. */
875	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
876	if (error != 0 || m == NULL)
877		goto done;
878	ip6 = mtod(m, struct ip6_hdr *);
879
880	/* See if destination IP address was changed by packet filter. */
881	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
882		m->m_flags |= M_SKIP_FIREWALL;
883		/* If destination is now ourself drop to ip6_input(). */
884		if (in6_localaddr(&ip6->ip6_dst)) {
885			if (m->m_pkthdr.rcvif == NULL)
886				m->m_pkthdr.rcvif = loif;
887			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
888				m->m_pkthdr.csum_flags |=
889				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
890				m->m_pkthdr.csum_data = 0xffff;
891			}
892			m->m_pkthdr.csum_flags |=
893			    CSUM_IP_CHECKED | CSUM_IP_VALID;
894			error = netisr_queue(NETISR_IPV6, m);
895			goto done;
896		} else
897			goto again;	/* Redo the routing table lookup. */
898	}
899
900	/* XXX: IPFIREWALL_FORWARD */
901
902passout:
903	/*
904	 * Send the packet to the outgoing interface.
905	 * If necessary, do IPv6 fragmentation before sending.
906	 *
907	 * the logic here is rather complex:
908	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
909	 * 1-a:	send as is if tlen <= path mtu
910	 * 1-b:	fragment if tlen > path mtu
911	 *
912	 * 2: if user asks us not to fragment (dontfrag == 1)
913	 * 2-a:	send as is if tlen <= interface mtu
914	 * 2-b:	error if tlen > interface mtu
915	 *
916	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
917	 *	always fragment
918	 *
919	 * 4: if dontfrag == 1 && alwaysfrag == 1
920	 *	error, as we cannot handle this conflicting request
921	 */
922	tlen = m->m_pkthdr.len;
923
924	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
925		dontfrag = 1;
926	else
927		dontfrag = 0;
928	if (dontfrag && alwaysfrag) {	/* case 4 */
929		/* conflicting request - can't transmit */
930		error = EMSGSIZE;
931		goto bad;
932	}
933	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
934		/*
935		 * Even if the DONTFRAG option is specified, we cannot send the
936		 * packet when the data length is larger than the MTU of the
937		 * outgoing interface.
938		 * Notify the error by sending IPV6_PATHMTU ancillary data as
939		 * well as returning an error code (the latter is not described
940		 * in the API spec.)
941		 */
942		u_int32_t mtu32;
943		struct ip6ctlparam ip6cp;
944
945		mtu32 = (u_int32_t)mtu;
946		bzero(&ip6cp, sizeof(ip6cp));
947		ip6cp.ip6c_cmdarg = (void *)&mtu32;
948		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
949		    (void *)&ip6cp);
950
951		error = EMSGSIZE;
952		goto bad;
953	}
954
955	/*
956	 * transmit packet without fragmentation
957	 */
958	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
959		struct in6_ifaddr *ia6;
960
961		ip6 = mtod(m, struct ip6_hdr *);
962		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
963		if (ia6) {
964			/* Record statistics for this interface address. */
965			ia6->ia_ifa.if_opackets++;
966			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
967		}
968#ifdef IPSEC
969		/* clean ipsec history once it goes out of the node */
970		ipsec_delaux(m);
971#endif
972		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
973		goto done;
974	}
975
976	/*
977	 * try to fragment the packet.  case 1-b and 3
978	 */
979	if (mtu < IPV6_MMTU) {
980		/* path MTU cannot be less than IPV6_MMTU */
981		error = EMSGSIZE;
982		in6_ifstat_inc(ifp, ifs6_out_fragfail);
983		goto bad;
984	} else if (ip6->ip6_plen == 0) {
985		/* jumbo payload cannot be fragmented */
986		error = EMSGSIZE;
987		in6_ifstat_inc(ifp, ifs6_out_fragfail);
988		goto bad;
989	} else {
990		struct mbuf **mnext, *m_frgpart;
991		struct ip6_frag *ip6f;
992		u_int32_t id = htonl(ip6_randomid());
993		u_char nextproto;
994#if 0
995		struct ip6ctlparam ip6cp;
996		u_int32_t mtu32;
997#endif
998		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
999
1000		/*
1001		 * Too large for the destination or interface;
1002		 * fragment if possible.
1003		 * Must be able to put at least 8 bytes per fragment.
1004		 */
1005		hlen = unfragpartlen;
1006		if (mtu > IPV6_MAXPACKET)
1007			mtu = IPV6_MAXPACKET;
1008
1009#if 0
1010		/*
1011		 * It is believed this code is a leftover from the
1012		 * development of the IPV6_RECVPATHMTU sockopt and
1013		 * associated work to implement RFC3542.
1014		 * It's not entirely clear what the intent of the API
1015		 * is at this point, so disable this code for now.
1016		 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
1017		 * will send notifications if the application requests.
1018		 */
1019
1020		/* Notify a proper path MTU to applications. */
1021		mtu32 = (u_int32_t)mtu;
1022		bzero(&ip6cp, sizeof(ip6cp));
1023		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1024		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1025		    (void *)&ip6cp);
1026#endif
1027
1028		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1029		if (len < 8) {
1030			error = EMSGSIZE;
1031			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1032			goto bad;
1033		}
1034
1035		/*
1036		 * Verify that we have any chance at all of being able to queue
1037		 *      the packet or packet fragments
1038		 */
1039		if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
1040		    < tlen  /* - hlen */)) {
1041			error = ENOBUFS;
1042			ip6stat.ip6s_odropped++;
1043			goto bad;
1044		}
1045
1046		mnext = &m->m_nextpkt;
1047
1048		/*
1049		 * Change the next header field of the last header in the
1050		 * unfragmentable part.
1051		 */
1052		if (exthdrs.ip6e_rthdr) {
1053			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1054			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1055		} else if (exthdrs.ip6e_dest1) {
1056			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1057			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1058		} else if (exthdrs.ip6e_hbh) {
1059			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1060			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1061		} else {
1062			nextproto = ip6->ip6_nxt;
1063			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1064		}
1065
1066		/*
1067		 * Loop through length of segment after first fragment,
1068		 * make new header and copy data of each part and link onto
1069		 * chain.
1070		 */
1071		m0 = m;
1072		for (off = hlen; off < tlen; off += len) {
1073			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1074			if (!m) {
1075				error = ENOBUFS;
1076				ip6stat.ip6s_odropped++;
1077				goto sendorfree;
1078			}
1079			m->m_pkthdr.rcvif = NULL;
1080			m->m_flags = m0->m_flags & M_COPYFLAGS;
1081			*mnext = m;
1082			mnext = &m->m_nextpkt;
1083			m->m_data += max_linkhdr;
1084			mhip6 = mtod(m, struct ip6_hdr *);
1085			*mhip6 = *ip6;
1086			m->m_len = sizeof(*mhip6);
1087			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1088			if (error) {
1089				ip6stat.ip6s_odropped++;
1090				goto sendorfree;
1091			}
1092			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1093			if (off + len >= tlen)
1094				len = tlen - off;
1095			else
1096				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1097			mhip6->ip6_plen = htons((u_short)(len + hlen +
1098			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1099			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1100				error = ENOBUFS;
1101				ip6stat.ip6s_odropped++;
1102				goto sendorfree;
1103			}
1104			m_cat(m, m_frgpart);
1105			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1106			m->m_pkthdr.rcvif = NULL;
1107			ip6f->ip6f_reserved = 0;
1108			ip6f->ip6f_ident = id;
1109			ip6f->ip6f_nxt = nextproto;
1110			ip6stat.ip6s_ofragments++;
1111			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1112		}
1113
1114		in6_ifstat_inc(ifp, ifs6_out_fragok);
1115	}
1116
1117	/*
1118	 * Remove leading garbages.
1119	 */
1120sendorfree:
1121	m = m0->m_nextpkt;
1122	m0->m_nextpkt = 0;
1123	m_freem(m0);
1124	for (m0 = m; m; m = m0) {
1125		m0 = m->m_nextpkt;
1126		m->m_nextpkt = 0;
1127		if (error == 0) {
1128 			/* Record statistics for this interface address. */
1129 			if (ia) {
1130 				ia->ia_ifa.if_opackets++;
1131 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1132 			}
1133#ifdef IPSEC
1134			/* clean ipsec history once it goes out of the node */
1135			ipsec_delaux(m);
1136#endif
1137			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1138		} else
1139			m_freem(m);
1140	}
1141
1142	if (error == 0)
1143		ip6stat.ip6s_fragmented++;
1144
1145done:
1146	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1147		RTFREE(ro->ro_rt);
1148	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1149		RTFREE(ro_pmtu->ro_rt);
1150	}
1151
1152#ifdef IPSEC
1153	if (sp != NULL)
1154		key_freesp(sp);
1155#endif /* IPSEC */
1156#ifdef FAST_IPSEC
1157	if (sp != NULL)
1158		KEY_FREESP(&sp);
1159#endif /* FAST_IPSEC */
1160
1161	return (error);
1162
1163freehdrs:
1164	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1165	m_freem(exthdrs.ip6e_dest1);
1166	m_freem(exthdrs.ip6e_rthdr);
1167	m_freem(exthdrs.ip6e_dest2);
1168	/* FALLTHROUGH */
1169bad:
1170	m_freem(m);
1171	goto done;
1172}
1173
1174static int
1175ip6_copyexthdr(mp, hdr, hlen)
1176	struct mbuf **mp;
1177	caddr_t hdr;
1178	int hlen;
1179{
1180	struct mbuf *m;
1181
1182	if (hlen > MCLBYTES)
1183		return (ENOBUFS); /* XXX */
1184
1185	MGET(m, M_DONTWAIT, MT_DATA);
1186	if (!m)
1187		return (ENOBUFS);
1188
1189	if (hlen > MLEN) {
1190		MCLGET(m, M_DONTWAIT);
1191		if ((m->m_flags & M_EXT) == 0) {
1192			m_free(m);
1193			return (ENOBUFS);
1194		}
1195	}
1196	m->m_len = hlen;
1197	if (hdr)
1198		bcopy(hdr, mtod(m, caddr_t), hlen);
1199
1200	*mp = m;
1201	return (0);
1202}
1203
1204/*
1205 * Insert jumbo payload option.
1206 */
1207static int
1208ip6_insert_jumboopt(exthdrs, plen)
1209	struct ip6_exthdrs *exthdrs;
1210	u_int32_t plen;
1211{
1212	struct mbuf *mopt;
1213	u_char *optbuf;
1214	u_int32_t v;
1215
1216#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1217
1218	/*
1219	 * If there is no hop-by-hop options header, allocate new one.
1220	 * If there is one but it doesn't have enough space to store the
1221	 * jumbo payload option, allocate a cluster to store the whole options.
1222	 * Otherwise, use it to store the options.
1223	 */
1224	if (exthdrs->ip6e_hbh == 0) {
1225		MGET(mopt, M_DONTWAIT, MT_DATA);
1226		if (mopt == 0)
1227			return (ENOBUFS);
1228		mopt->m_len = JUMBOOPTLEN;
1229		optbuf = mtod(mopt, u_char *);
1230		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1231		exthdrs->ip6e_hbh = mopt;
1232	} else {
1233		struct ip6_hbh *hbh;
1234
1235		mopt = exthdrs->ip6e_hbh;
1236		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1237			/*
1238			 * XXX assumption:
1239			 * - exthdrs->ip6e_hbh is not referenced from places
1240			 *   other than exthdrs.
1241			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1242			 */
1243			int oldoptlen = mopt->m_len;
1244			struct mbuf *n;
1245
1246			/*
1247			 * XXX: give up if the whole (new) hbh header does
1248			 * not fit even in an mbuf cluster.
1249			 */
1250			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1251				return (ENOBUFS);
1252
1253			/*
1254			 * As a consequence, we must always prepare a cluster
1255			 * at this point.
1256			 */
1257			MGET(n, M_DONTWAIT, MT_DATA);
1258			if (n) {
1259				MCLGET(n, M_DONTWAIT);
1260				if ((n->m_flags & M_EXT) == 0) {
1261					m_freem(n);
1262					n = NULL;
1263				}
1264			}
1265			if (!n)
1266				return (ENOBUFS);
1267			n->m_len = oldoptlen + JUMBOOPTLEN;
1268			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1269			    oldoptlen);
1270			optbuf = mtod(n, caddr_t) + oldoptlen;
1271			m_freem(mopt);
1272			mopt = exthdrs->ip6e_hbh = n;
1273		} else {
1274			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1275			mopt->m_len += JUMBOOPTLEN;
1276		}
1277		optbuf[0] = IP6OPT_PADN;
1278		optbuf[1] = 1;
1279
1280		/*
1281		 * Adjust the header length according to the pad and
1282		 * the jumbo payload option.
1283		 */
1284		hbh = mtod(mopt, struct ip6_hbh *);
1285		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1286	}
1287
1288	/* fill in the option. */
1289	optbuf[2] = IP6OPT_JUMBO;
1290	optbuf[3] = 4;
1291	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1292	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1293
1294	/* finally, adjust the packet header length */
1295	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1296
1297	return (0);
1298#undef JUMBOOPTLEN
1299}
1300
1301/*
1302 * Insert fragment header and copy unfragmentable header portions.
1303 */
1304static int
1305ip6_insertfraghdr(m0, m, hlen, frghdrp)
1306	struct mbuf *m0, *m;
1307	int hlen;
1308	struct ip6_frag **frghdrp;
1309{
1310	struct mbuf *n, *mlast;
1311
1312	if (hlen > sizeof(struct ip6_hdr)) {
1313		n = m_copym(m0, sizeof(struct ip6_hdr),
1314		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1315		if (n == 0)
1316			return (ENOBUFS);
1317		m->m_next = n;
1318	} else
1319		n = m;
1320
1321	/* Search for the last mbuf of unfragmentable part. */
1322	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1323		;
1324
1325	if ((mlast->m_flags & M_EXT) == 0 &&
1326	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1327		/* use the trailing space of the last mbuf for the fragment hdr */
1328		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1329		    mlast->m_len);
1330		mlast->m_len += sizeof(struct ip6_frag);
1331		m->m_pkthdr.len += sizeof(struct ip6_frag);
1332	} else {
1333		/* allocate a new mbuf for the fragment header */
1334		struct mbuf *mfrg;
1335
1336		MGET(mfrg, M_DONTWAIT, MT_DATA);
1337		if (mfrg == 0)
1338			return (ENOBUFS);
1339		mfrg->m_len = sizeof(struct ip6_frag);
1340		*frghdrp = mtod(mfrg, struct ip6_frag *);
1341		mlast->m_next = mfrg;
1342	}
1343
1344	return (0);
1345}
1346
1347static int
1348ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1349	struct route_in6 *ro_pmtu, *ro;
1350	struct ifnet *ifp;
1351	struct in6_addr *dst;
1352	u_long *mtup;
1353	int *alwaysfragp;
1354{
1355	u_int32_t mtu = 0;
1356	int alwaysfrag = 0;
1357	int error = 0;
1358
1359	if (ro_pmtu != ro) {
1360		/* The first hop and the final destination may differ. */
1361		struct sockaddr_in6 *sa6_dst =
1362		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1363		if (ro_pmtu->ro_rt &&
1364		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1365		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1366			RTFREE(ro_pmtu->ro_rt);
1367			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1368		}
1369		if (ro_pmtu->ro_rt == NULL) {
1370			bzero(sa6_dst, sizeof(*sa6_dst));
1371			sa6_dst->sin6_family = AF_INET6;
1372			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1373			sa6_dst->sin6_addr = *dst;
1374
1375			rtalloc((struct route *)ro_pmtu);
1376		}
1377	}
1378	if (ro_pmtu->ro_rt) {
1379		u_int32_t ifmtu;
1380		struct in_conninfo inc;
1381
1382		bzero(&inc, sizeof(inc));
1383		inc.inc_flags = 1; /* IPv6 */
1384		inc.inc6_faddr = *dst;
1385
1386		if (ifp == NULL)
1387			ifp = ro_pmtu->ro_rt->rt_ifp;
1388		ifmtu = IN6_LINKMTU(ifp);
1389		mtu = tcp_hc_getmtu(&inc);
1390		if (mtu)
1391			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1392		else
1393			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1394		if (mtu == 0)
1395			mtu = ifmtu;
1396		else if (mtu < IPV6_MMTU) {
1397			/*
1398			 * RFC2460 section 5, last paragraph:
1399			 * if we record ICMPv6 too big message with
1400			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1401			 * or smaller, with framgent header attached.
1402			 * (fragment header is needed regardless from the
1403			 * packet size, for translators to identify packets)
1404			 */
1405			alwaysfrag = 1;
1406			mtu = IPV6_MMTU;
1407		} else if (mtu > ifmtu) {
1408			/*
1409			 * The MTU on the route is larger than the MTU on
1410			 * the interface!  This shouldn't happen, unless the
1411			 * MTU of the interface has been changed after the
1412			 * interface was brought up.  Change the MTU in the
1413			 * route to match the interface MTU (as long as the
1414			 * field isn't locked).
1415			 */
1416			mtu = ifmtu;
1417			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1418		}
1419	} else if (ifp) {
1420		mtu = IN6_LINKMTU(ifp);
1421	} else
1422		error = EHOSTUNREACH; /* XXX */
1423
1424	*mtup = mtu;
1425	if (alwaysfragp)
1426		*alwaysfragp = alwaysfrag;
1427	return (error);
1428}
1429
1430/*
1431 * IP6 socket option processing.
1432 */
1433int
1434ip6_ctloutput(so, sopt)
1435	struct socket *so;
1436	struct sockopt *sopt;
1437{
1438	int privileged, optdatalen, uproto;
1439	void *optdata;
1440	struct inpcb *in6p = sotoinpcb(so);
1441	int error, optval;
1442	int level, op, optname;
1443	int optlen;
1444	struct thread *td;
1445
1446	if (sopt) {
1447		level = sopt->sopt_level;
1448		op = sopt->sopt_dir;
1449		optname = sopt->sopt_name;
1450		optlen = sopt->sopt_valsize;
1451		td = sopt->sopt_td;
1452	} else {
1453		panic("ip6_ctloutput: arg soopt is NULL");
1454	}
1455	error = optval = 0;
1456
1457	privileged = (td == 0 || suser(td)) ? 0 : 1;
1458	uproto = (int)so->so_proto->pr_protocol;
1459
1460	if (level == IPPROTO_IPV6) {
1461		switch (op) {
1462
1463		case SOPT_SET:
1464			switch (optname) {
1465			case IPV6_2292PKTOPTIONS:
1466#ifdef IPV6_PKTOPTIONS
1467			case IPV6_PKTOPTIONS:
1468#endif
1469			{
1470				struct mbuf *m;
1471
1472				error = soopt_getm(sopt, &m); /* XXX */
1473				if (error != 0)
1474					break;
1475				error = soopt_mcopyin(sopt, m); /* XXX */
1476				if (error != 0)
1477					break;
1478				error = ip6_pcbopts(&in6p->in6p_outputopts,
1479						    m, so, sopt);
1480				m_freem(m); /* XXX */
1481				break;
1482			}
1483
1484			/*
1485			 * Use of some Hop-by-Hop options or some
1486			 * Destination options, might require special
1487			 * privilege.  That is, normal applications
1488			 * (without special privilege) might be forbidden
1489			 * from setting certain options in outgoing packets,
1490			 * and might never see certain options in received
1491			 * packets. [RFC 2292 Section 6]
1492			 * KAME specific note:
1493			 *  KAME prevents non-privileged users from sending or
1494			 *  receiving ANY hbh/dst options in order to avoid
1495			 *  overhead of parsing options in the kernel.
1496			 */
1497			case IPV6_RECVHOPOPTS:
1498			case IPV6_RECVDSTOPTS:
1499			case IPV6_RECVRTHDRDSTOPTS:
1500				if (!privileged) {
1501					error = EPERM;
1502					break;
1503				}
1504				/* FALLTHROUGH */
1505			case IPV6_UNICAST_HOPS:
1506			case IPV6_HOPLIMIT:
1507			case IPV6_FAITH:
1508
1509			case IPV6_RECVPKTINFO:
1510			case IPV6_RECVHOPLIMIT:
1511			case IPV6_RECVRTHDR:
1512			case IPV6_RECVPATHMTU:
1513			case IPV6_RECVTCLASS:
1514			case IPV6_V6ONLY:
1515			case IPV6_AUTOFLOWLABEL:
1516				if (optlen != sizeof(int)) {
1517					error = EINVAL;
1518					break;
1519				}
1520				error = sooptcopyin(sopt, &optval,
1521					sizeof optval, sizeof optval);
1522				if (error)
1523					break;
1524				switch (optname) {
1525
1526				case IPV6_UNICAST_HOPS:
1527					if (optval < -1 || optval >= 256)
1528						error = EINVAL;
1529					else {
1530						/* -1 = kernel default */
1531						in6p->in6p_hops = optval;
1532						if ((in6p->in6p_vflag &
1533						     INP_IPV4) != 0)
1534							in6p->inp_ip_ttl = optval;
1535					}
1536					break;
1537#define OPTSET(bit) \
1538do { \
1539	if (optval) \
1540		in6p->in6p_flags |= (bit); \
1541	else \
1542		in6p->in6p_flags &= ~(bit); \
1543} while (/*CONSTCOND*/ 0)
1544#define OPTSET2292(bit) \
1545do { \
1546	in6p->in6p_flags |= IN6P_RFC2292; \
1547	if (optval) \
1548		in6p->in6p_flags |= (bit); \
1549	else \
1550		in6p->in6p_flags &= ~(bit); \
1551} while (/*CONSTCOND*/ 0)
1552#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1553
1554				case IPV6_RECVPKTINFO:
1555					/* cannot mix with RFC2292 */
1556					if (OPTBIT(IN6P_RFC2292)) {
1557						error = EINVAL;
1558						break;
1559					}
1560					OPTSET(IN6P_PKTINFO);
1561					break;
1562
1563				case IPV6_HOPLIMIT:
1564				{
1565					struct ip6_pktopts **optp;
1566
1567					/* cannot mix with RFC2292 */
1568					if (OPTBIT(IN6P_RFC2292)) {
1569						error = EINVAL;
1570						break;
1571					}
1572					optp = &in6p->in6p_outputopts;
1573					error = ip6_pcbopt(IPV6_HOPLIMIT,
1574							   (u_char *)&optval,
1575							   sizeof(optval),
1576							   optp,
1577							   privileged, uproto);
1578					break;
1579				}
1580
1581				case IPV6_RECVHOPLIMIT:
1582					/* cannot mix with RFC2292 */
1583					if (OPTBIT(IN6P_RFC2292)) {
1584						error = EINVAL;
1585						break;
1586					}
1587					OPTSET(IN6P_HOPLIMIT);
1588					break;
1589
1590				case IPV6_RECVHOPOPTS:
1591					/* cannot mix with RFC2292 */
1592					if (OPTBIT(IN6P_RFC2292)) {
1593						error = EINVAL;
1594						break;
1595					}
1596					OPTSET(IN6P_HOPOPTS);
1597					break;
1598
1599				case IPV6_RECVDSTOPTS:
1600					/* cannot mix with RFC2292 */
1601					if (OPTBIT(IN6P_RFC2292)) {
1602						error = EINVAL;
1603						break;
1604					}
1605					OPTSET(IN6P_DSTOPTS);
1606					break;
1607
1608				case IPV6_RECVRTHDRDSTOPTS:
1609					/* cannot mix with RFC2292 */
1610					if (OPTBIT(IN6P_RFC2292)) {
1611						error = EINVAL;
1612						break;
1613					}
1614					OPTSET(IN6P_RTHDRDSTOPTS);
1615					break;
1616
1617				case IPV6_RECVRTHDR:
1618					/* cannot mix with RFC2292 */
1619					if (OPTBIT(IN6P_RFC2292)) {
1620						error = EINVAL;
1621						break;
1622					}
1623					OPTSET(IN6P_RTHDR);
1624					break;
1625
1626				case IPV6_FAITH:
1627					OPTSET(IN6P_FAITH);
1628					break;
1629
1630				case IPV6_RECVPATHMTU:
1631					/*
1632					 * We ignore this option for TCP
1633					 * sockets.
1634					 * (RFC3542 leaves this case
1635					 * unspecified.)
1636					 */
1637					if (uproto != IPPROTO_TCP)
1638						OPTSET(IN6P_MTU);
1639					break;
1640
1641				case IPV6_V6ONLY:
1642					/*
1643					 * make setsockopt(IPV6_V6ONLY)
1644					 * available only prior to bind(2).
1645					 * see ipng mailing list, Jun 22 2001.
1646					 */
1647					if (in6p->in6p_lport ||
1648					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1649						error = EINVAL;
1650						break;
1651					}
1652					OPTSET(IN6P_IPV6_V6ONLY);
1653					if (optval)
1654						in6p->in6p_vflag &= ~INP_IPV4;
1655					else
1656						in6p->in6p_vflag |= INP_IPV4;
1657					break;
1658				case IPV6_RECVTCLASS:
1659					/* cannot mix with RFC2292 XXX */
1660					if (OPTBIT(IN6P_RFC2292)) {
1661						error = EINVAL;
1662						break;
1663					}
1664					OPTSET(IN6P_TCLASS);
1665					break;
1666				case IPV6_AUTOFLOWLABEL:
1667					OPTSET(IN6P_AUTOFLOWLABEL);
1668					break;
1669
1670				}
1671				break;
1672
1673			case IPV6_TCLASS:
1674			case IPV6_DONTFRAG:
1675			case IPV6_USE_MIN_MTU:
1676			case IPV6_PREFER_TEMPADDR:
1677				if (optlen != sizeof(optval)) {
1678					error = EINVAL;
1679					break;
1680				}
1681				error = sooptcopyin(sopt, &optval,
1682					sizeof optval, sizeof optval);
1683				if (error)
1684					break;
1685				{
1686					struct ip6_pktopts **optp;
1687					optp = &in6p->in6p_outputopts;
1688					error = ip6_pcbopt(optname,
1689							   (u_char *)&optval,
1690							   sizeof(optval),
1691							   optp,
1692							   privileged, uproto);
1693					break;
1694				}
1695
1696			case IPV6_2292PKTINFO:
1697			case IPV6_2292HOPLIMIT:
1698			case IPV6_2292HOPOPTS:
1699			case IPV6_2292DSTOPTS:
1700			case IPV6_2292RTHDR:
1701				/* RFC 2292 */
1702				if (optlen != sizeof(int)) {
1703					error = EINVAL;
1704					break;
1705				}
1706				error = sooptcopyin(sopt, &optval,
1707					sizeof optval, sizeof optval);
1708				if (error)
1709					break;
1710				switch (optname) {
1711				case IPV6_2292PKTINFO:
1712					OPTSET2292(IN6P_PKTINFO);
1713					break;
1714				case IPV6_2292HOPLIMIT:
1715					OPTSET2292(IN6P_HOPLIMIT);
1716					break;
1717				case IPV6_2292HOPOPTS:
1718					/*
1719					 * Check super-user privilege.
1720					 * See comments for IPV6_RECVHOPOPTS.
1721					 */
1722					if (!privileged)
1723						return (EPERM);
1724					OPTSET2292(IN6P_HOPOPTS);
1725					break;
1726				case IPV6_2292DSTOPTS:
1727					if (!privileged)
1728						return (EPERM);
1729					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1730					break;
1731				case IPV6_2292RTHDR:
1732					OPTSET2292(IN6P_RTHDR);
1733					break;
1734				}
1735				break;
1736			case IPV6_PKTINFO:
1737			case IPV6_HOPOPTS:
1738			case IPV6_RTHDR:
1739			case IPV6_DSTOPTS:
1740			case IPV6_RTHDRDSTOPTS:
1741			case IPV6_NEXTHOP:
1742			{
1743				/* new advanced API (RFC3542) */
1744				u_char *optbuf;
1745				u_char optbuf_storage[MCLBYTES];
1746				int optlen;
1747				struct ip6_pktopts **optp;
1748
1749				/* cannot mix with RFC2292 */
1750				if (OPTBIT(IN6P_RFC2292)) {
1751					error = EINVAL;
1752					break;
1753				}
1754
1755				/*
1756				 * We only ensure valsize is not too large
1757				 * here.  Further validation will be done
1758				 * later.
1759				 */
1760				error = sooptcopyin(sopt, optbuf_storage,
1761				    sizeof(optbuf_storage), 0);
1762				if (error)
1763					break;
1764				optlen = sopt->sopt_valsize;
1765				optbuf = optbuf_storage;
1766				optp = &in6p->in6p_outputopts;
1767				error = ip6_pcbopt(optname,
1768						   optbuf, optlen,
1769						   optp, privileged, uproto);
1770				break;
1771			}
1772#undef OPTSET
1773
1774			case IPV6_MULTICAST_IF:
1775			case IPV6_MULTICAST_HOPS:
1776			case IPV6_MULTICAST_LOOP:
1777			case IPV6_JOIN_GROUP:
1778			case IPV6_LEAVE_GROUP:
1779			    {
1780				if (sopt->sopt_valsize > MLEN) {
1781					error = EMSGSIZE;
1782					break;
1783				}
1784				/* XXX */
1785			    }
1786			    /* FALLTHROUGH */
1787			    {
1788				struct mbuf *m;
1789
1790				if (sopt->sopt_valsize > MCLBYTES) {
1791					error = EMSGSIZE;
1792					break;
1793				}
1794				/* XXX */
1795				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
1796				if (m == 0) {
1797					error = ENOBUFS;
1798					break;
1799				}
1800				if (sopt->sopt_valsize > MLEN) {
1801					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1802					if ((m->m_flags & M_EXT) == 0) {
1803						m_free(m);
1804						error = ENOBUFS;
1805						break;
1806					}
1807				}
1808				m->m_len = sopt->sopt_valsize;
1809				error = sooptcopyin(sopt, mtod(m, char *),
1810						    m->m_len, m->m_len);
1811				if (error) {
1812					(void)m_free(m);
1813					break;
1814				}
1815				error =	ip6_setmoptions(sopt->sopt_name,
1816							&in6p->in6p_moptions,
1817							m);
1818				(void)m_free(m);
1819			    }
1820				break;
1821
1822			case IPV6_PORTRANGE:
1823				error = sooptcopyin(sopt, &optval,
1824				    sizeof optval, sizeof optval);
1825				if (error)
1826					break;
1827
1828				switch (optval) {
1829				case IPV6_PORTRANGE_DEFAULT:
1830					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1831					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1832					break;
1833
1834				case IPV6_PORTRANGE_HIGH:
1835					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1836					in6p->in6p_flags |= IN6P_HIGHPORT;
1837					break;
1838
1839				case IPV6_PORTRANGE_LOW:
1840					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1841					in6p->in6p_flags |= IN6P_LOWPORT;
1842					break;
1843
1844				default:
1845					error = EINVAL;
1846					break;
1847				}
1848				break;
1849
1850#if defined(IPSEC) || defined(FAST_IPSEC)
1851			case IPV6_IPSEC_POLICY:
1852			    {
1853				caddr_t req = NULL;
1854				size_t len = 0;
1855				struct mbuf *m;
1856
1857				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1858					break;
1859				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1860					break;
1861				if (m) {
1862					req = mtod(m, caddr_t);
1863					len = m->m_len;
1864				}
1865				error = ipsec6_set_policy(in6p, optname, req,
1866							  len, privileged);
1867				m_freem(m);
1868			    }
1869				break;
1870#endif /* KAME IPSEC */
1871
1872			default:
1873				error = ENOPROTOOPT;
1874				break;
1875			}
1876			break;
1877
1878		case SOPT_GET:
1879			switch (optname) {
1880
1881			case IPV6_2292PKTOPTIONS:
1882#ifdef IPV6_PKTOPTIONS
1883			case IPV6_PKTOPTIONS:
1884#endif
1885				/*
1886				 * RFC3542 (effectively) deprecated the
1887				 * semantics of the 2292-style pktoptions.
1888				 * Since it was not reliable in nature (i.e.,
1889				 * applications had to expect the lack of some
1890				 * information after all), it would make sense
1891				 * to simplify this part by always returning
1892				 * empty data.
1893				 */
1894				sopt->sopt_valsize = 0;
1895				break;
1896
1897			case IPV6_RECVHOPOPTS:
1898			case IPV6_RECVDSTOPTS:
1899			case IPV6_RECVRTHDRDSTOPTS:
1900			case IPV6_UNICAST_HOPS:
1901			case IPV6_RECVPKTINFO:
1902			case IPV6_RECVHOPLIMIT:
1903			case IPV6_RECVRTHDR:
1904			case IPV6_RECVPATHMTU:
1905
1906			case IPV6_FAITH:
1907			case IPV6_V6ONLY:
1908			case IPV6_PORTRANGE:
1909			case IPV6_RECVTCLASS:
1910			case IPV6_AUTOFLOWLABEL:
1911				switch (optname) {
1912
1913				case IPV6_RECVHOPOPTS:
1914					optval = OPTBIT(IN6P_HOPOPTS);
1915					break;
1916
1917				case IPV6_RECVDSTOPTS:
1918					optval = OPTBIT(IN6P_DSTOPTS);
1919					break;
1920
1921				case IPV6_RECVRTHDRDSTOPTS:
1922					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1923					break;
1924
1925				case IPV6_UNICAST_HOPS:
1926					optval = in6p->in6p_hops;
1927					break;
1928
1929				case IPV6_RECVPKTINFO:
1930					optval = OPTBIT(IN6P_PKTINFO);
1931					break;
1932
1933				case IPV6_RECVHOPLIMIT:
1934					optval = OPTBIT(IN6P_HOPLIMIT);
1935					break;
1936
1937				case IPV6_RECVRTHDR:
1938					optval = OPTBIT(IN6P_RTHDR);
1939					break;
1940
1941				case IPV6_RECVPATHMTU:
1942					optval = OPTBIT(IN6P_MTU);
1943					break;
1944
1945				case IPV6_FAITH:
1946					optval = OPTBIT(IN6P_FAITH);
1947					break;
1948
1949				case IPV6_V6ONLY:
1950					optval = OPTBIT(IN6P_IPV6_V6ONLY);
1951					break;
1952
1953				case IPV6_PORTRANGE:
1954				    {
1955					int flags;
1956					flags = in6p->in6p_flags;
1957					if (flags & IN6P_HIGHPORT)
1958						optval = IPV6_PORTRANGE_HIGH;
1959					else if (flags & IN6P_LOWPORT)
1960						optval = IPV6_PORTRANGE_LOW;
1961					else
1962						optval = 0;
1963					break;
1964				    }
1965				case IPV6_RECVTCLASS:
1966					optval = OPTBIT(IN6P_TCLASS);
1967					break;
1968
1969				case IPV6_AUTOFLOWLABEL:
1970					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1971					break;
1972				}
1973				if (error)
1974					break;
1975				error = sooptcopyout(sopt, &optval,
1976					sizeof optval);
1977				break;
1978
1979			case IPV6_PATHMTU:
1980			{
1981				u_long pmtu = 0;
1982				struct ip6_mtuinfo mtuinfo;
1983				struct route_in6 sro;
1984
1985				bzero(&sro, sizeof(sro));
1986
1987				if (!(so->so_state & SS_ISCONNECTED))
1988					return (ENOTCONN);
1989				/*
1990				 * XXX: we dot not consider the case of source
1991				 * routing, or optional information to specify
1992				 * the outgoing interface.
1993				 */
1994				error = ip6_getpmtu(&sro, NULL, NULL,
1995				    &in6p->in6p_faddr, &pmtu, NULL);
1996				if (sro.ro_rt)
1997					RTFREE(sro.ro_rt);
1998				if (error)
1999					break;
2000				if (pmtu > IPV6_MAXPACKET)
2001					pmtu = IPV6_MAXPACKET;
2002
2003				bzero(&mtuinfo, sizeof(mtuinfo));
2004				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2005				optdata = (void *)&mtuinfo;
2006				optdatalen = sizeof(mtuinfo);
2007				error = sooptcopyout(sopt, optdata,
2008				    optdatalen);
2009				break;
2010			}
2011
2012			case IPV6_2292PKTINFO:
2013			case IPV6_2292HOPLIMIT:
2014			case IPV6_2292HOPOPTS:
2015			case IPV6_2292RTHDR:
2016			case IPV6_2292DSTOPTS:
2017				switch (optname) {
2018				case IPV6_2292PKTINFO:
2019					optval = OPTBIT(IN6P_PKTINFO);
2020					break;
2021				case IPV6_2292HOPLIMIT:
2022					optval = OPTBIT(IN6P_HOPLIMIT);
2023					break;
2024				case IPV6_2292HOPOPTS:
2025					optval = OPTBIT(IN6P_HOPOPTS);
2026					break;
2027				case IPV6_2292RTHDR:
2028					optval = OPTBIT(IN6P_RTHDR);
2029					break;
2030				case IPV6_2292DSTOPTS:
2031					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2032					break;
2033				}
2034				error = sooptcopyout(sopt, &optval,
2035				    sizeof optval);
2036				break;
2037			case IPV6_PKTINFO:
2038			case IPV6_HOPOPTS:
2039			case IPV6_RTHDR:
2040			case IPV6_DSTOPTS:
2041			case IPV6_RTHDRDSTOPTS:
2042			case IPV6_NEXTHOP:
2043			case IPV6_TCLASS:
2044			case IPV6_DONTFRAG:
2045			case IPV6_USE_MIN_MTU:
2046			case IPV6_PREFER_TEMPADDR:
2047				error = ip6_getpcbopt(in6p->in6p_outputopts,
2048				    optname, sopt);
2049				break;
2050
2051			case IPV6_MULTICAST_IF:
2052			case IPV6_MULTICAST_HOPS:
2053			case IPV6_MULTICAST_LOOP:
2054			case IPV6_JOIN_GROUP:
2055			case IPV6_LEAVE_GROUP:
2056			    {
2057				struct mbuf *m;
2058				error = ip6_getmoptions(sopt->sopt_name,
2059				    in6p->in6p_moptions, &m);
2060				if (error == 0)
2061					error = sooptcopyout(sopt,
2062					    mtod(m, char *), m->m_len);
2063				m_freem(m);
2064			    }
2065				break;
2066
2067#if defined(IPSEC) || defined(FAST_IPSEC)
2068			case IPV6_IPSEC_POLICY:
2069			  {
2070				caddr_t req = NULL;
2071				size_t len = 0;
2072				struct mbuf *m = NULL;
2073				struct mbuf **mp = &m;
2074				size_t ovalsize = sopt->sopt_valsize;
2075				caddr_t oval = (caddr_t)sopt->sopt_val;
2076
2077				error = soopt_getm(sopt, &m); /* XXX */
2078				if (error != 0)
2079					break;
2080				error = soopt_mcopyin(sopt, m); /* XXX */
2081				if (error != 0)
2082					break;
2083				sopt->sopt_valsize = ovalsize;
2084				sopt->sopt_val = oval;
2085				if (m) {
2086					req = mtod(m, caddr_t);
2087					len = m->m_len;
2088				}
2089				error = ipsec6_get_policy(in6p, req, len, mp);
2090				if (error == 0)
2091					error = soopt_mcopyout(sopt, m); /* XXX */
2092				if (error == 0 && m)
2093					m_freem(m);
2094				break;
2095			  }
2096#endif /* KAME IPSEC */
2097
2098			default:
2099				error = ENOPROTOOPT;
2100				break;
2101			}
2102			break;
2103		}
2104	} else {		/* level != IPPROTO_IPV6 */
2105		error = EINVAL;
2106	}
2107	return (error);
2108}
2109
2110int
2111ip6_raw_ctloutput(so, sopt)
2112	struct socket *so;
2113	struct sockopt *sopt;
2114{
2115	int error = 0, optval, optlen;
2116	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2117	struct in6pcb *in6p = sotoin6pcb(so);
2118	int level, op, optname;
2119
2120	if (sopt) {
2121		level = sopt->sopt_level;
2122		op = sopt->sopt_dir;
2123		optname = sopt->sopt_name;
2124		optlen = sopt->sopt_valsize;
2125	} else
2126		panic("ip6_raw_ctloutput: arg soopt is NULL");
2127
2128	if (level != IPPROTO_IPV6) {
2129		return (EINVAL);
2130	}
2131
2132	switch (optname) {
2133	case IPV6_CHECKSUM:
2134		/*
2135		 * For ICMPv6 sockets, no modification allowed for checksum
2136		 * offset, permit "no change" values to help existing apps.
2137		 *
2138		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2139		 * for an ICMPv6 socket will fail."
2140		 * The current behavior does not meet RFC3542.
2141		 */
2142		switch (op) {
2143		case SOPT_SET:
2144			if (optlen != sizeof(int)) {
2145				error = EINVAL;
2146				break;
2147			}
2148			error = sooptcopyin(sopt, &optval, sizeof(optval),
2149					    sizeof(optval));
2150			if (error)
2151				break;
2152			if ((optval % 2) != 0) {
2153				/* the API assumes even offset values */
2154				error = EINVAL;
2155			} else if (so->so_proto->pr_protocol ==
2156			    IPPROTO_ICMPV6) {
2157				if (optval != icmp6off)
2158					error = EINVAL;
2159			} else
2160				in6p->in6p_cksum = optval;
2161			break;
2162
2163		case SOPT_GET:
2164			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2165				optval = icmp6off;
2166			else
2167				optval = in6p->in6p_cksum;
2168
2169			error = sooptcopyout(sopt, &optval, sizeof(optval));
2170			break;
2171
2172		default:
2173			error = EINVAL;
2174			break;
2175		}
2176		break;
2177
2178	default:
2179		error = ENOPROTOOPT;
2180		break;
2181	}
2182
2183	return (error);
2184}
2185
2186/*
2187 * Set up IP6 options in pcb for insertion in output packets or
2188 * specifying behavior of outgoing packets.
2189 */
2190static int
2191ip6_pcbopts(pktopt, m, so, sopt)
2192	struct ip6_pktopts **pktopt;
2193	struct mbuf *m;
2194	struct socket *so;
2195	struct sockopt *sopt;
2196{
2197	struct ip6_pktopts *opt = *pktopt;
2198	int error = 0;
2199	struct thread *td = sopt->sopt_td;
2200	int priv = 0;
2201
2202	/* turn off any old options. */
2203	if (opt) {
2204#ifdef DIAGNOSTIC
2205		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2206		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2207		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2208			printf("ip6_pcbopts: all specified options are cleared.\n");
2209#endif
2210		ip6_clearpktopts(opt, -1);
2211	} else
2212		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2213	*pktopt = NULL;
2214
2215	if (!m || m->m_len == 0) {
2216		/*
2217		 * Only turning off any previous options, regardless of
2218		 * whether the opt is just created or given.
2219		 */
2220		free(opt, M_IP6OPT);
2221		return (0);
2222	}
2223
2224	/*  set options specified by user. */
2225	if (td && !suser(td))
2226		priv = 1;
2227	if ((error = ip6_setpktopts(m, opt, NULL, priv,
2228	    so->so_proto->pr_protocol)) != 0) {
2229		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2230		free(opt, M_IP6OPT);
2231		return (error);
2232	}
2233	*pktopt = opt;
2234	return (0);
2235}
2236
2237/*
2238 * initialize ip6_pktopts.  beware that there are non-zero default values in
2239 * the struct.
2240 */
2241void
2242ip6_initpktopts(opt)
2243	struct ip6_pktopts *opt;
2244{
2245
2246	bzero(opt, sizeof(*opt));
2247	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2248	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2249	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2250	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2251}
2252
2253static int
2254ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2255	int optname, len, priv;
2256	u_char *buf;
2257	struct ip6_pktopts **pktopt;
2258	int uproto;
2259{
2260	struct ip6_pktopts *opt;
2261
2262	if (*pktopt == NULL) {
2263		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2264		    M_WAITOK);
2265		ip6_initpktopts(*pktopt);
2266	}
2267	opt = *pktopt;
2268
2269	return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
2270}
2271
2272static int
2273ip6_getpcbopt(pktopt, optname, sopt)
2274	struct ip6_pktopts *pktopt;
2275	struct sockopt *sopt;
2276	int optname;
2277{
2278	void *optdata = NULL;
2279	int optdatalen = 0;
2280	struct ip6_ext *ip6e;
2281	int error = 0;
2282	struct in6_pktinfo null_pktinfo;
2283	int deftclass = 0, on;
2284	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2285	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2286
2287	switch (optname) {
2288	case IPV6_PKTINFO:
2289		if (pktopt && pktopt->ip6po_pktinfo)
2290			optdata = (void *)pktopt->ip6po_pktinfo;
2291		else {
2292			/* XXX: we don't have to do this every time... */
2293			bzero(&null_pktinfo, sizeof(null_pktinfo));
2294			optdata = (void *)&null_pktinfo;
2295		}
2296		optdatalen = sizeof(struct in6_pktinfo);
2297		break;
2298	case IPV6_TCLASS:
2299		if (pktopt && pktopt->ip6po_tclass >= 0)
2300			optdata = (void *)&pktopt->ip6po_tclass;
2301		else
2302			optdata = (void *)&deftclass;
2303		optdatalen = sizeof(int);
2304		break;
2305	case IPV6_HOPOPTS:
2306		if (pktopt && pktopt->ip6po_hbh) {
2307			optdata = (void *)pktopt->ip6po_hbh;
2308			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2309			optdatalen = (ip6e->ip6e_len + 1) << 3;
2310		}
2311		break;
2312	case IPV6_RTHDR:
2313		if (pktopt && pktopt->ip6po_rthdr) {
2314			optdata = (void *)pktopt->ip6po_rthdr;
2315			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2316			optdatalen = (ip6e->ip6e_len + 1) << 3;
2317		}
2318		break;
2319	case IPV6_RTHDRDSTOPTS:
2320		if (pktopt && pktopt->ip6po_dest1) {
2321			optdata = (void *)pktopt->ip6po_dest1;
2322			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2323			optdatalen = (ip6e->ip6e_len + 1) << 3;
2324		}
2325		break;
2326	case IPV6_DSTOPTS:
2327		if (pktopt && pktopt->ip6po_dest2) {
2328			optdata = (void *)pktopt->ip6po_dest2;
2329			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2330			optdatalen = (ip6e->ip6e_len + 1) << 3;
2331		}
2332		break;
2333	case IPV6_NEXTHOP:
2334		if (pktopt && pktopt->ip6po_nexthop) {
2335			optdata = (void *)pktopt->ip6po_nexthop;
2336			optdatalen = pktopt->ip6po_nexthop->sa_len;
2337		}
2338		break;
2339	case IPV6_USE_MIN_MTU:
2340		if (pktopt)
2341			optdata = (void *)&pktopt->ip6po_minmtu;
2342		else
2343			optdata = (void *)&defminmtu;
2344		optdatalen = sizeof(int);
2345		break;
2346	case IPV6_DONTFRAG:
2347		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2348			on = 1;
2349		else
2350			on = 0;
2351		optdata = (void *)&on;
2352		optdatalen = sizeof(on);
2353		break;
2354	case IPV6_PREFER_TEMPADDR:
2355		if (pktopt)
2356			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2357		else
2358			optdata = (void *)&defpreftemp;
2359		optdatalen = sizeof(int);
2360		break;
2361	default:		/* should not happen */
2362#ifdef DIAGNOSTIC
2363		panic("ip6_getpcbopt: unexpected option\n");
2364#endif
2365		return (ENOPROTOOPT);
2366	}
2367
2368	error = sooptcopyout(sopt, optdata, optdatalen);
2369
2370	return (error);
2371}
2372
2373void
2374ip6_clearpktopts(pktopt, optname)
2375	struct ip6_pktopts *pktopt;
2376	int optname;
2377{
2378	if (pktopt == NULL)
2379		return;
2380
2381	if (optname == -1 || optname == IPV6_PKTINFO) {
2382		if (pktopt->ip6po_pktinfo)
2383			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2384		pktopt->ip6po_pktinfo = NULL;
2385	}
2386	if (optname == -1 || optname == IPV6_HOPLIMIT)
2387		pktopt->ip6po_hlim = -1;
2388	if (optname == -1 || optname == IPV6_TCLASS)
2389		pktopt->ip6po_tclass = -1;
2390	if (optname == -1 || optname == IPV6_NEXTHOP) {
2391		if (pktopt->ip6po_nextroute.ro_rt) {
2392			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2393			pktopt->ip6po_nextroute.ro_rt = NULL;
2394		}
2395		if (pktopt->ip6po_nexthop)
2396			free(pktopt->ip6po_nexthop, M_IP6OPT);
2397		pktopt->ip6po_nexthop = NULL;
2398	}
2399	if (optname == -1 || optname == IPV6_HOPOPTS) {
2400		if (pktopt->ip6po_hbh)
2401			free(pktopt->ip6po_hbh, M_IP6OPT);
2402		pktopt->ip6po_hbh = NULL;
2403	}
2404	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2405		if (pktopt->ip6po_dest1)
2406			free(pktopt->ip6po_dest1, M_IP6OPT);
2407		pktopt->ip6po_dest1 = NULL;
2408	}
2409	if (optname == -1 || optname == IPV6_RTHDR) {
2410		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2411			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2412		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2413		if (pktopt->ip6po_route.ro_rt) {
2414			RTFREE(pktopt->ip6po_route.ro_rt);
2415			pktopt->ip6po_route.ro_rt = NULL;
2416		}
2417	}
2418	if (optname == -1 || optname == IPV6_DSTOPTS) {
2419		if (pktopt->ip6po_dest2)
2420			free(pktopt->ip6po_dest2, M_IP6OPT);
2421		pktopt->ip6po_dest2 = NULL;
2422	}
2423}
2424
2425#define PKTOPT_EXTHDRCPY(type) \
2426do {\
2427	if (src->type) {\
2428		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2429		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2430		if (dst->type == NULL && canwait == M_NOWAIT)\
2431			goto bad;\
2432		bcopy(src->type, dst->type, hlen);\
2433	}\
2434} while (/*CONSTCOND*/ 0)
2435
2436static int
2437copypktopts(dst, src, canwait)
2438	struct ip6_pktopts *dst, *src;
2439	int canwait;
2440{
2441	if (dst == NULL || src == NULL)  {
2442		printf("ip6_clearpktopts: invalid argument\n");
2443		return (EINVAL);
2444	}
2445
2446	dst->ip6po_hlim = src->ip6po_hlim;
2447	dst->ip6po_tclass = src->ip6po_tclass;
2448	dst->ip6po_flags = src->ip6po_flags;
2449	if (src->ip6po_pktinfo) {
2450		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2451		    M_IP6OPT, canwait);
2452		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2453			goto bad;
2454		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2455	}
2456	if (src->ip6po_nexthop) {
2457		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2458		    M_IP6OPT, canwait);
2459		if (dst->ip6po_nexthop == NULL)
2460			goto bad;
2461		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2462		    src->ip6po_nexthop->sa_len);
2463	}
2464	PKTOPT_EXTHDRCPY(ip6po_hbh);
2465	PKTOPT_EXTHDRCPY(ip6po_dest1);
2466	PKTOPT_EXTHDRCPY(ip6po_dest2);
2467	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2468	return (0);
2469
2470  bad:
2471	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2472	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2473	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2474	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2475	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2476	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2477	return (ENOBUFS);
2478}
2479#undef PKTOPT_EXTHDRCPY
2480
2481struct ip6_pktopts *
2482ip6_copypktopts(src, canwait)
2483	struct ip6_pktopts *src;
2484	int canwait;
2485{
2486	int error;
2487	struct ip6_pktopts *dst;
2488
2489	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2490	if (dst == NULL && canwait == M_NOWAIT)
2491		return (NULL);
2492	ip6_initpktopts(dst);
2493
2494	if ((error = copypktopts(dst, src, canwait)) != 0) {
2495		free(dst, M_IP6OPT);
2496		return (NULL);
2497	}
2498
2499	return (dst);
2500}
2501
2502void
2503ip6_freepcbopts(pktopt)
2504	struct ip6_pktopts *pktopt;
2505{
2506	if (pktopt == NULL)
2507		return;
2508
2509	ip6_clearpktopts(pktopt, -1);
2510
2511	free(pktopt, M_IP6OPT);
2512}
2513
2514/*
2515 * Set the IP6 multicast options in response to user setsockopt().
2516 */
2517static int
2518ip6_setmoptions(optname, im6op, m)
2519	int optname;
2520	struct ip6_moptions **im6op;
2521	struct mbuf *m;
2522{
2523	int error = 0;
2524	u_int loop, ifindex;
2525	struct ipv6_mreq *mreq;
2526	struct ifnet *ifp;
2527	struct ip6_moptions *im6o = *im6op;
2528	struct route_in6 ro;
2529	struct in6_multi_mship *imm;
2530	struct thread *td = curthread;
2531
2532	if (im6o == NULL) {
2533		/*
2534		 * No multicast option buffer attached to the pcb;
2535		 * allocate one and initialize to default values.
2536		 */
2537		im6o = (struct ip6_moptions *)
2538			malloc(sizeof(*im6o), M_IP6MOPTS, M_WAITOK);
2539
2540		if (im6o == NULL)
2541			return (ENOBUFS);
2542		*im6op = im6o;
2543		im6o->im6o_multicast_ifp = NULL;
2544		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2545		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2546		LIST_INIT(&im6o->im6o_memberships);
2547	}
2548
2549	switch (optname) {
2550
2551	case IPV6_MULTICAST_IF:
2552		/*
2553		 * Select the interface for outgoing multicast packets.
2554		 */
2555		if (m == NULL || m->m_len != sizeof(u_int)) {
2556			error = EINVAL;
2557			break;
2558		}
2559		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2560		if (ifindex < 0 || if_index < ifindex) {
2561			error = ENXIO;	/* XXX EINVAL? */
2562			break;
2563		}
2564		ifp = ifnet_byindex(ifindex);
2565		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2566			error = EADDRNOTAVAIL;
2567			break;
2568		}
2569		im6o->im6o_multicast_ifp = ifp;
2570		break;
2571
2572	case IPV6_MULTICAST_HOPS:
2573	    {
2574		/*
2575		 * Set the IP6 hoplimit for outgoing multicast packets.
2576		 */
2577		int optval;
2578		if (m == NULL || m->m_len != sizeof(int)) {
2579			error = EINVAL;
2580			break;
2581		}
2582		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2583		if (optval < -1 || optval >= 256)
2584			error = EINVAL;
2585		else if (optval == -1)
2586			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2587		else
2588			im6o->im6o_multicast_hlim = optval;
2589		break;
2590	    }
2591
2592	case IPV6_MULTICAST_LOOP:
2593		/*
2594		 * Set the loopback flag for outgoing multicast packets.
2595		 * Must be zero or one.
2596		 */
2597		if (m == NULL || m->m_len != sizeof(u_int)) {
2598			error = EINVAL;
2599			break;
2600		}
2601		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2602		if (loop > 1) {
2603			error = EINVAL;
2604			break;
2605		}
2606		im6o->im6o_multicast_loop = loop;
2607		break;
2608
2609	case IPV6_JOIN_GROUP:
2610		/*
2611		 * Add a multicast group membership.
2612		 * Group must be a valid IP6 multicast address.
2613		 */
2614		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2615			error = EINVAL;
2616			break;
2617		}
2618		mreq = mtod(m, struct ipv6_mreq *);
2619
2620		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2621			/*
2622			 * We use the unspecified address to specify to accept
2623			 * all multicast addresses. Only super user is allowed
2624			 * to do this.
2625			 */
2626			if (suser(td)) {
2627				error = EACCES;
2628				break;
2629			}
2630		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2631			error = EINVAL;
2632			break;
2633		}
2634
2635		/*
2636		 * If no interface was explicitly specified, choose an
2637		 * appropriate one according to the given multicast address.
2638		 */
2639		if (mreq->ipv6mr_interface == 0) {
2640			struct sockaddr_in6 *dst;
2641
2642			/*
2643			 * Look up the routing table for the
2644			 * address, and choose the outgoing interface.
2645			 *   XXX: is it a good approach?
2646			 */
2647			ro.ro_rt = NULL;
2648			dst = (struct sockaddr_in6 *)&ro.ro_dst;
2649			bzero(dst, sizeof(*dst));
2650			dst->sin6_family = AF_INET6;
2651			dst->sin6_len = sizeof(*dst);
2652			dst->sin6_addr = mreq->ipv6mr_multiaddr;
2653			rtalloc((struct route *)&ro);
2654			if (ro.ro_rt == NULL) {
2655				error = EADDRNOTAVAIL;
2656				break;
2657			}
2658			ifp = ro.ro_rt->rt_ifp;
2659			RTFREE(ro.ro_rt);
2660		} else {
2661			/*
2662			 * If the interface is specified, validate it.
2663			 */
2664			if (mreq->ipv6mr_interface < 0 ||
2665			    if_index < mreq->ipv6mr_interface) {
2666				error = ENXIO;	/* XXX EINVAL? */
2667				break;
2668			}
2669			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2670			if (!ifp) {
2671				error = ENXIO;	/* XXX EINVAL? */
2672				break;
2673			}
2674		}
2675
2676		/*
2677		 * See if we found an interface, and confirm that it
2678		 * supports multicast
2679		 */
2680		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2681			error = EADDRNOTAVAIL;
2682			break;
2683		}
2684
2685		if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2686			error = EADDRNOTAVAIL; /* XXX: should not happen */
2687			break;
2688		}
2689
2690		/*
2691		 * See if the membership already exists.
2692		 */
2693		for (imm = im6o->im6o_memberships.lh_first;
2694		     imm != NULL; imm = imm->i6mm_chain.le_next)
2695			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2696			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2697					       &mreq->ipv6mr_multiaddr))
2698				break;
2699		if (imm != NULL) {
2700			error = EADDRINUSE;
2701			break;
2702		}
2703		/*
2704		 * Everything looks good; add a new record to the multicast
2705		 * address list for the given interface.
2706		 */
2707		imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr,  &error, 0);
2708		if (imm == NULL)
2709			break;
2710		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2711		break;
2712
2713	case IPV6_LEAVE_GROUP:
2714		/*
2715		 * Drop a multicast group membership.
2716		 * Group must be a valid IP6 multicast address.
2717		 */
2718		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2719			error = EINVAL;
2720			break;
2721		}
2722		mreq = mtod(m, struct ipv6_mreq *);
2723
2724		/*
2725		 * If an interface address was specified, get a pointer
2726		 * to its ifnet structure.
2727		 */
2728		if (mreq->ipv6mr_interface < 0 ||
2729		    if_index < mreq->ipv6mr_interface) {
2730			error = ENXIO;	/* XXX EINVAL? */
2731			break;
2732		}
2733		if (mreq->ipv6mr_interface == 0)
2734			ifp = NULL;
2735		else
2736			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2737
2738		/* Fill in the scope zone ID */
2739		if (ifp) {
2740			if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2741				/* XXX: should not happen */
2742				error = EADDRNOTAVAIL;
2743				break;
2744			}
2745		} else if (mreq->ipv6mr_interface != 0) {
2746			/*
2747			 * This case happens when the (positive) index is in
2748			 * the valid range, but the corresponding interface has
2749			 * been detached dynamically (XXX).
2750			 */
2751			error = EADDRNOTAVAIL;
2752			break;
2753		} else {	/* ipv6mr_interface == 0 */
2754			struct sockaddr_in6 sa6_mc;
2755
2756			/*
2757			 * The API spec says as follows:
2758			 *  If the interface index is specified as 0, the
2759			 *  system may choose a multicast group membership to
2760			 *  drop by matching the multicast address only.
2761			 * On the other hand, we cannot disambiguate the scope
2762			 * zone unless an interface is provided.  Thus, we
2763			 * check if there's ambiguity with the default scope
2764			 * zone as the last resort.
2765			 */
2766			bzero(&sa6_mc, sizeof(sa6_mc));
2767			sa6_mc.sin6_family = AF_INET6;
2768			sa6_mc.sin6_len = sizeof(sa6_mc);
2769			sa6_mc.sin6_addr = mreq->ipv6mr_multiaddr;
2770			error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2771			if (error != 0)
2772				break;
2773			mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
2774		}
2775
2776		/*
2777		 * Find the membership in the membership list.
2778		 */
2779		for (imm = im6o->im6o_memberships.lh_first;
2780		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2781			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2782			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2783			    &mreq->ipv6mr_multiaddr))
2784				break;
2785		}
2786		if (imm == NULL) {
2787			/* Unable to resolve interface */
2788			error = EADDRNOTAVAIL;
2789			break;
2790		}
2791		/*
2792		 * Give up the multicast address record to which the
2793		 * membership points.
2794		 */
2795		LIST_REMOVE(imm, i6mm_chain);
2796		in6_delmulti(imm->i6mm_maddr);
2797		free(imm, M_IP6MADDR);
2798		break;
2799
2800	default:
2801		error = EOPNOTSUPP;
2802		break;
2803	}
2804
2805	/*
2806	 * If all options have default values, no need to keep the mbuf.
2807	 */
2808	if (im6o->im6o_multicast_ifp == NULL &&
2809	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2810	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2811	    im6o->im6o_memberships.lh_first == NULL) {
2812		free(*im6op, M_IP6MOPTS);
2813		*im6op = NULL;
2814	}
2815
2816	return (error);
2817}
2818
2819/*
2820 * Return the IP6 multicast options in response to user getsockopt().
2821 */
2822static int
2823ip6_getmoptions(optname, im6o, mp)
2824	int optname;
2825	struct ip6_moptions *im6o;
2826	struct mbuf **mp;
2827{
2828	u_int *hlim, *loop, *ifindex;
2829
2830	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2831
2832	switch (optname) {
2833
2834	case IPV6_MULTICAST_IF:
2835		ifindex = mtod(*mp, u_int *);
2836		(*mp)->m_len = sizeof(u_int);
2837		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2838			*ifindex = 0;
2839		else
2840			*ifindex = im6o->im6o_multicast_ifp->if_index;
2841		return (0);
2842
2843	case IPV6_MULTICAST_HOPS:
2844		hlim = mtod(*mp, u_int *);
2845		(*mp)->m_len = sizeof(u_int);
2846		if (im6o == NULL)
2847			*hlim = ip6_defmcasthlim;
2848		else
2849			*hlim = im6o->im6o_multicast_hlim;
2850		return (0);
2851
2852	case IPV6_MULTICAST_LOOP:
2853		loop = mtod(*mp, u_int *);
2854		(*mp)->m_len = sizeof(u_int);
2855		if (im6o == NULL)
2856			*loop = ip6_defmcasthlim;
2857		else
2858			*loop = im6o->im6o_multicast_loop;
2859		return (0);
2860
2861	default:
2862		return (EOPNOTSUPP);
2863	}
2864}
2865
2866/*
2867 * Discard the IP6 multicast options.
2868 */
2869void
2870ip6_freemoptions(im6o)
2871	struct ip6_moptions *im6o;
2872{
2873	struct in6_multi_mship *imm;
2874
2875	if (im6o == NULL)
2876		return;
2877
2878	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2879		LIST_REMOVE(imm, i6mm_chain);
2880		if (imm->i6mm_maddr)
2881			in6_delmulti(imm->i6mm_maddr);
2882		free(imm, M_IP6MADDR);
2883	}
2884	free(im6o, M_IP6MOPTS);
2885}
2886
2887/*
2888 * Set IPv6 outgoing packet options based on advanced API.
2889 */
2890int
2891ip6_setpktopts(control, opt, stickyopt, priv, uproto)
2892	struct mbuf *control;
2893	struct ip6_pktopts *opt, *stickyopt;
2894	int priv, uproto;
2895{
2896	struct cmsghdr *cm = 0;
2897
2898	if (control == NULL || opt == NULL)
2899		return (EINVAL);
2900
2901	ip6_initpktopts(opt);
2902	if (stickyopt) {
2903		int error;
2904
2905		/*
2906		 * If stickyopt is provided, make a local copy of the options
2907		 * for this particular packet, then override them by ancillary
2908		 * objects.
2909		 * XXX: copypktopts() does not copy the cached route to a next
2910		 * hop (if any).  This is not very good in terms of efficiency,
2911		 * but we can allow this since this option should be rarely
2912		 * used.
2913		 */
2914		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2915			return (error);
2916	}
2917
2918	/*
2919	 * XXX: Currently, we assume all the optional information is stored
2920	 * in a single mbuf.
2921	 */
2922	if (control->m_next)
2923		return (EINVAL);
2924
2925	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2926	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2927		int error;
2928
2929		if (control->m_len < CMSG_LEN(0))
2930			return (EINVAL);
2931
2932		cm = mtod(control, struct cmsghdr *);
2933		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2934			return (EINVAL);
2935		if (cm->cmsg_level != IPPROTO_IPV6)
2936			continue;
2937
2938		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2939		    cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
2940		if (error)
2941			return (error);
2942	}
2943
2944	return (0);
2945}
2946
2947/*
2948 * Set a particular packet option, as a sticky option or an ancillary data
2949 * item.  "len" can be 0 only when it's a sticky option.
2950 * We have 4 cases of combination of "sticky" and "cmsg":
2951 * "sticky=0, cmsg=0": impossible
2952 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2953 * "sticky=1, cmsg=0": RFC3542 socket option
2954 * "sticky=1, cmsg=1": RFC2292 socket option
2955 */
2956static int
2957ip6_setpktopt(optname, buf, len, opt, priv, sticky, cmsg, uproto)
2958	int optname, len, priv, sticky, cmsg, uproto;
2959	u_char *buf;
2960	struct ip6_pktopts *opt;
2961{
2962	int minmtupolicy, preftemp;
2963
2964	if (!sticky && !cmsg) {
2965#ifdef DIAGNOSTIC
2966		printf("ip6_setpktopt: impossible case\n");
2967#endif
2968		return (EINVAL);
2969	}
2970
2971	/*
2972	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2973	 * not be specified in the context of RFC3542.  Conversely,
2974	 * RFC3542 types should not be specified in the context of RFC2292.
2975	 */
2976	if (!cmsg) {
2977		switch (optname) {
2978		case IPV6_2292PKTINFO:
2979		case IPV6_2292HOPLIMIT:
2980		case IPV6_2292NEXTHOP:
2981		case IPV6_2292HOPOPTS:
2982		case IPV6_2292DSTOPTS:
2983		case IPV6_2292RTHDR:
2984		case IPV6_2292PKTOPTIONS:
2985			return (ENOPROTOOPT);
2986		}
2987	}
2988	if (sticky && cmsg) {
2989		switch (optname) {
2990		case IPV6_PKTINFO:
2991		case IPV6_HOPLIMIT:
2992		case IPV6_NEXTHOP:
2993		case IPV6_HOPOPTS:
2994		case IPV6_DSTOPTS:
2995		case IPV6_RTHDRDSTOPTS:
2996		case IPV6_RTHDR:
2997		case IPV6_USE_MIN_MTU:
2998		case IPV6_DONTFRAG:
2999		case IPV6_TCLASS:
3000		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3001			return (ENOPROTOOPT);
3002		}
3003	}
3004
3005	switch (optname) {
3006	case IPV6_2292PKTINFO:
3007	case IPV6_PKTINFO:
3008	{
3009		struct ifnet *ifp = NULL;
3010		struct in6_pktinfo *pktinfo;
3011
3012		if (len != sizeof(struct in6_pktinfo))
3013			return (EINVAL);
3014
3015		pktinfo = (struct in6_pktinfo *)buf;
3016
3017		/*
3018		 * An application can clear any sticky IPV6_PKTINFO option by
3019		 * doing a "regular" setsockopt with ipi6_addr being
3020		 * in6addr_any and ipi6_ifindex being zero.
3021		 * [RFC 3542, Section 6]
3022		 */
3023		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3024		    pktinfo->ipi6_ifindex == 0 &&
3025		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3026			ip6_clearpktopts(opt, optname);
3027			break;
3028		}
3029
3030		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3031		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3032			return (EINVAL);
3033		}
3034
3035		/* validate the interface index if specified. */
3036		if (pktinfo->ipi6_ifindex > if_index ||
3037		    pktinfo->ipi6_ifindex < 0) {
3038			 return (ENXIO);
3039		}
3040		if (pktinfo->ipi6_ifindex) {
3041			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
3042			if (ifp == NULL)
3043				return (ENXIO);
3044		}
3045
3046		/*
3047		 * We store the address anyway, and let in6_selectsrc()
3048		 * validate the specified address.  This is because ipi6_addr
3049		 * may not have enough information about its scope zone, and
3050		 * we may need additional information (such as outgoing
3051		 * interface or the scope zone of a destination address) to
3052		 * disambiguate the scope.
3053		 * XXX: the delay of the validation may confuse the
3054		 * application when it is used as a sticky option.
3055		 */
3056		if (opt->ip6po_pktinfo == NULL) {
3057			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3058			    M_IP6OPT, M_NOWAIT);
3059			if (opt->ip6po_pktinfo == NULL)
3060				return (ENOBUFS);
3061		}
3062		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3063		break;
3064	}
3065
3066	case IPV6_2292HOPLIMIT:
3067	case IPV6_HOPLIMIT:
3068	{
3069		int *hlimp;
3070
3071		/*
3072		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3073		 * to simplify the ordering among hoplimit options.
3074		 */
3075		if (optname == IPV6_HOPLIMIT && sticky)
3076			return (ENOPROTOOPT);
3077
3078		if (len != sizeof(int))
3079			return (EINVAL);
3080		hlimp = (int *)buf;
3081		if (*hlimp < -1 || *hlimp > 255)
3082			return (EINVAL);
3083
3084		opt->ip6po_hlim = *hlimp;
3085		break;
3086	}
3087
3088	case IPV6_TCLASS:
3089	{
3090		int tclass;
3091
3092		if (len != sizeof(int))
3093			return (EINVAL);
3094		tclass = *(int *)buf;
3095		if (tclass < -1 || tclass > 255)
3096			return (EINVAL);
3097
3098		opt->ip6po_tclass = tclass;
3099		break;
3100	}
3101
3102	case IPV6_2292NEXTHOP:
3103	case IPV6_NEXTHOP:
3104		if (!priv)
3105			return (EPERM);
3106
3107		if (len == 0) {	/* just remove the option */
3108			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3109			break;
3110		}
3111
3112		/* check if cmsg_len is large enough for sa_len */
3113		if (len < sizeof(struct sockaddr) || len < *buf)
3114			return (EINVAL);
3115
3116		switch (((struct sockaddr *)buf)->sa_family) {
3117		case AF_INET6:
3118		{
3119			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3120			int error;
3121
3122			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3123				return (EINVAL);
3124
3125			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3126			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3127				return (EINVAL);
3128			}
3129			if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3130			    != 0) {
3131				return (error);
3132			}
3133			break;
3134		}
3135		case AF_LINK:	/* should eventually be supported */
3136		default:
3137			return (EAFNOSUPPORT);
3138		}
3139
3140		/* turn off the previous option, then set the new option. */
3141		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3142		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3143		if (opt->ip6po_nexthop == NULL)
3144			return (ENOBUFS);
3145		bcopy(buf, opt->ip6po_nexthop, *buf);
3146		break;
3147
3148	case IPV6_2292HOPOPTS:
3149	case IPV6_HOPOPTS:
3150	{
3151		struct ip6_hbh *hbh;
3152		int hbhlen;
3153
3154		/*
3155		 * XXX: We don't allow a non-privileged user to set ANY HbH
3156		 * options, since per-option restriction has too much
3157		 * overhead.
3158		 */
3159		if (!priv)
3160			return (EPERM);
3161
3162		if (len == 0) {
3163			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3164			break;	/* just remove the option */
3165		}
3166
3167		/* message length validation */
3168		if (len < sizeof(struct ip6_hbh))
3169			return (EINVAL);
3170		hbh = (struct ip6_hbh *)buf;
3171		hbhlen = (hbh->ip6h_len + 1) << 3;
3172		if (len != hbhlen)
3173			return (EINVAL);
3174
3175		/* turn off the previous option, then set the new option. */
3176		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3177		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3178		if (opt->ip6po_hbh == NULL)
3179			return (ENOBUFS);
3180		bcopy(hbh, opt->ip6po_hbh, hbhlen);
3181
3182		break;
3183	}
3184
3185	case IPV6_2292DSTOPTS:
3186	case IPV6_DSTOPTS:
3187	case IPV6_RTHDRDSTOPTS:
3188	{
3189		struct ip6_dest *dest, **newdest = NULL;
3190		int destlen;
3191
3192		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3193			return (EPERM);
3194
3195		if (len == 0) {
3196			ip6_clearpktopts(opt, optname);
3197			break;	/* just remove the option */
3198		}
3199
3200		/* message length validation */
3201		if (len < sizeof(struct ip6_dest))
3202			return (EINVAL);
3203		dest = (struct ip6_dest *)buf;
3204		destlen = (dest->ip6d_len + 1) << 3;
3205		if (len != destlen)
3206			return (EINVAL);
3207
3208		/*
3209		 * Determine the position that the destination options header
3210		 * should be inserted; before or after the routing header.
3211		 */
3212		switch (optname) {
3213		case IPV6_2292DSTOPTS:
3214			/*
3215			 * The old advacned API is ambiguous on this point.
3216			 * Our approach is to determine the position based
3217			 * according to the existence of a routing header.
3218			 * Note, however, that this depends on the order of the
3219			 * extension headers in the ancillary data; the 1st
3220			 * part of the destination options header must appear
3221			 * before the routing header in the ancillary data,
3222			 * too.
3223			 * RFC3542 solved the ambiguity by introducing
3224			 * separate ancillary data or option types.
3225			 */
3226			if (opt->ip6po_rthdr == NULL)
3227				newdest = &opt->ip6po_dest1;
3228			else
3229				newdest = &opt->ip6po_dest2;
3230			break;
3231		case IPV6_RTHDRDSTOPTS:
3232			newdest = &opt->ip6po_dest1;
3233			break;
3234		case IPV6_DSTOPTS:
3235			newdest = &opt->ip6po_dest2;
3236			break;
3237		}
3238
3239		/* turn off the previous option, then set the new option. */
3240		ip6_clearpktopts(opt, optname);
3241		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3242		if (*newdest == NULL)
3243			return (ENOBUFS);
3244		bcopy(dest, *newdest, destlen);
3245
3246		break;
3247	}
3248
3249	case IPV6_2292RTHDR:
3250	case IPV6_RTHDR:
3251	{
3252		struct ip6_rthdr *rth;
3253		int rthlen;
3254
3255		if (len == 0) {
3256			ip6_clearpktopts(opt, IPV6_RTHDR);
3257			break;	/* just remove the option */
3258		}
3259
3260		/* message length validation */
3261		if (len < sizeof(struct ip6_rthdr))
3262			return (EINVAL);
3263		rth = (struct ip6_rthdr *)buf;
3264		rthlen = (rth->ip6r_len + 1) << 3;
3265		if (len != rthlen)
3266			return (EINVAL);
3267
3268		switch (rth->ip6r_type) {
3269		case IPV6_RTHDR_TYPE_0:
3270			if (rth->ip6r_len == 0)	/* must contain one addr */
3271				return (EINVAL);
3272			if (rth->ip6r_len % 2) /* length must be even */
3273				return (EINVAL);
3274			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3275				return (EINVAL);
3276			break;
3277		default:
3278			return (EINVAL);	/* not supported */
3279		}
3280
3281		/* turn off the previous option */
3282		ip6_clearpktopts(opt, IPV6_RTHDR);
3283		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3284		if (opt->ip6po_rthdr == NULL)
3285			return (ENOBUFS);
3286		bcopy(rth, opt->ip6po_rthdr, rthlen);
3287
3288		break;
3289	}
3290
3291	case IPV6_USE_MIN_MTU:
3292		if (len != sizeof(int))
3293			return (EINVAL);
3294		minmtupolicy = *(int *)buf;
3295		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3296		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3297		    minmtupolicy != IP6PO_MINMTU_ALL) {
3298			return (EINVAL);
3299		}
3300		opt->ip6po_minmtu = minmtupolicy;
3301		break;
3302
3303	case IPV6_DONTFRAG:
3304		if (len != sizeof(int))
3305			return (EINVAL);
3306
3307		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3308			/*
3309			 * we ignore this option for TCP sockets.
3310			 * (RFC3542 leaves this case unspecified.)
3311			 */
3312			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3313		} else
3314			opt->ip6po_flags |= IP6PO_DONTFRAG;
3315		break;
3316
3317	case IPV6_PREFER_TEMPADDR:
3318		if (len != sizeof(int))
3319			return (EINVAL);
3320		preftemp = *(int *)buf;
3321		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3322		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3323		    preftemp != IP6PO_TEMPADDR_PREFER) {
3324			return (EINVAL);
3325		}
3326		opt->ip6po_prefer_tempaddr = preftemp;
3327		break;
3328
3329	default:
3330		return (ENOPROTOOPT);
3331	} /* end of switch */
3332
3333	return (0);
3334}
3335
3336/*
3337 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3338 * packet to the input queue of a specified interface.  Note that this
3339 * calls the output routine of the loopback "driver", but with an interface
3340 * pointer that might NOT be &loif -- easier than replicating that code here.
3341 */
3342void
3343ip6_mloopback(ifp, m, dst)
3344	struct ifnet *ifp;
3345	struct mbuf *m;
3346	struct sockaddr_in6 *dst;
3347{
3348	struct mbuf *copym;
3349	struct ip6_hdr *ip6;
3350
3351	copym = m_copy(m, 0, M_COPYALL);
3352	if (copym == NULL)
3353		return;
3354
3355	/*
3356	 * Make sure to deep-copy IPv6 header portion in case the data
3357	 * is in an mbuf cluster, so that we can safely override the IPv6
3358	 * header portion later.
3359	 */
3360	if ((copym->m_flags & M_EXT) != 0 ||
3361	    copym->m_len < sizeof(struct ip6_hdr)) {
3362		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3363		if (copym == NULL)
3364			return;
3365	}
3366
3367#ifdef DIAGNOSTIC
3368	if (copym->m_len < sizeof(*ip6)) {
3369		m_freem(copym);
3370		return;
3371	}
3372#endif
3373
3374	ip6 = mtod(copym, struct ip6_hdr *);
3375	/*
3376	 * clear embedded scope identifiers if necessary.
3377	 * in6_clearscope will touch the addresses only when necessary.
3378	 */
3379	in6_clearscope(&ip6->ip6_src);
3380	in6_clearscope(&ip6->ip6_dst);
3381
3382	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3383}
3384
3385/*
3386 * Chop IPv6 header off from the payload.
3387 */
3388static int
3389ip6_splithdr(m, exthdrs)
3390	struct mbuf *m;
3391	struct ip6_exthdrs *exthdrs;
3392{
3393	struct mbuf *mh;
3394	struct ip6_hdr *ip6;
3395
3396	ip6 = mtod(m, struct ip6_hdr *);
3397	if (m->m_len > sizeof(*ip6)) {
3398		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3399		if (mh == 0) {
3400			m_freem(m);
3401			return ENOBUFS;
3402		}
3403		M_MOVE_PKTHDR(mh, m);
3404		MH_ALIGN(mh, sizeof(*ip6));
3405		m->m_len -= sizeof(*ip6);
3406		m->m_data += sizeof(*ip6);
3407		mh->m_next = m;
3408		m = mh;
3409		m->m_len = sizeof(*ip6);
3410		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3411	}
3412	exthdrs->ip6e_ip6 = m;
3413	return 0;
3414}
3415
3416/*
3417 * Compute IPv6 extension header length.
3418 */
3419int
3420ip6_optlen(in6p)
3421	struct in6pcb *in6p;
3422{
3423	int len;
3424
3425	if (!in6p->in6p_outputopts)
3426		return 0;
3427
3428	len = 0;
3429#define elen(x) \
3430    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3431
3432	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3433	if (in6p->in6p_outputopts->ip6po_rthdr)
3434		/* dest1 is valid with rthdr only */
3435		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3436	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3437	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3438	return len;
3439#undef elen
3440}
3441