ip6_output.c revision 149635
1/*	$FreeBSD: head/sys/netinet6/ip6_output.c 149635 2005-08-30 16:35:27Z andre $	*/
2/*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
3
4/*-
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*-
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
62 */
63
64#include "opt_ip6fw.h"
65#include "opt_inet.h"
66#include "opt_inet6.h"
67#include "opt_ipsec.h"
68
69#include <sys/param.h>
70#include <sys/malloc.h>
71#include <sys/mbuf.h>
72#include <sys/proc.h>
73#include <sys/errno.h>
74#include <sys/protosw.h>
75#include <sys/socket.h>
76#include <sys/socketvar.h>
77#include <sys/systm.h>
78#include <sys/kernel.h>
79
80#include <net/if.h>
81#include <net/netisr.h>
82#include <net/route.h>
83#include <net/pfil.h>
84
85#include <netinet/in.h>
86#include <netinet/in_var.h>
87#include <netinet6/in6_var.h>
88#include <netinet/ip6.h>
89#include <netinet/icmp6.h>
90#include <netinet6/ip6_var.h>
91#include <netinet/in_pcb.h>
92#include <netinet/tcp_var.h>
93#include <netinet6/nd6.h>
94
95#ifdef IPSEC
96#include <netinet6/ipsec.h>
97#ifdef INET6
98#include <netinet6/ipsec6.h>
99#endif
100#include <netkey/key.h>
101#endif /* IPSEC */
102
103#ifdef FAST_IPSEC
104#include <netipsec/ipsec.h>
105#include <netipsec/ipsec6.h>
106#include <netipsec/key.h>
107#endif /* FAST_IPSEC */
108
109#include <netinet6/ip6_fw.h>
110
111#include <net/net_osdep.h>
112
113#include <netinet6/ip6protosw.h>
114#include <netinet6/scope6_var.h>
115
116static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
117
118struct ip6_exthdrs {
119	struct mbuf *ip6e_ip6;
120	struct mbuf *ip6e_hbh;
121	struct mbuf *ip6e_dest1;
122	struct mbuf *ip6e_rthdr;
123	struct mbuf *ip6e_dest2;
124};
125
126static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
127			   int, int));
128static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
129	struct socket *, struct sockopt *));
130static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
131static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *, int,
132	int, int, int));
133
134static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
135static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
136static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
137static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
138	struct ip6_frag **));
139static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
140static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
141static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
142	struct ifnet *, struct in6_addr *, u_long *, int *));
143static int copypktopts __P((struct ip6_pktopts *, struct ip6_pktopts *, int));
144
145
146/*
147 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
148 * header (with pri, len, nxt, hlim, src, dst).
149 * This function may modify ver and hlim only.
150 * The mbuf chain containing the packet will be freed.
151 * The mbuf opt, if present, will not be freed.
152 *
153 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
154 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
155 * which is rt_rmx.rmx_mtu.
156 */
157int
158ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
159	struct mbuf *m0;
160	struct ip6_pktopts *opt;
161	struct route_in6 *ro;
162	int flags;
163	struct ip6_moptions *im6o;
164	struct ifnet **ifpp;		/* XXX: just for statistics */
165	struct inpcb *inp;
166{
167	struct ip6_hdr *ip6, *mhip6;
168	struct ifnet *ifp, *origifp;
169	struct mbuf *m = m0;
170	int hlen, tlen, len, off;
171	struct route_in6 ip6route;
172	struct rtentry *rt = NULL;
173	struct sockaddr_in6 *dst, src_sa, dst_sa;
174	struct in6_addr odst;
175	int error = 0;
176	struct in6_ifaddr *ia = NULL;
177	u_long mtu;
178	int alwaysfrag, dontfrag;
179	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
180	struct ip6_exthdrs exthdrs;
181	struct in6_addr finaldst, src0, dst0;
182	u_int32_t zone;
183	struct route_in6 *ro_pmtu = NULL;
184	int hdrsplit = 0;
185	int needipsec = 0;
186#if defined(IPSEC) || defined(FAST_IPSEC)
187	int needipsectun = 0;
188	struct secpolicy *sp = NULL;
189#endif /*IPSEC || FAST_IPSEC*/
190
191	ip6 = mtod(m, struct ip6_hdr *);
192	finaldst = ip6->ip6_dst;
193
194#define MAKE_EXTHDR(hp, mp)						\
195    do {								\
196	if (hp) {							\
197		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
198		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
199		    ((eh)->ip6e_len + 1) << 3);				\
200		if (error)						\
201			goto freehdrs;					\
202	}								\
203    } while (/*CONSTCOND*/ 0)
204
205	bzero(&exthdrs, sizeof(exthdrs));
206
207	if (opt) {
208		/* Hop-by-Hop options header */
209		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
210		/* Destination options header(1st part) */
211		if (opt->ip6po_rthdr) {
212			/*
213			 * Destination options header(1st part)
214			 * This only makes sence with a routing header.
215			 * See Section 9.2 of RFC 3542.
216			 * Disabling this part just for MIP6 convenience is
217			 * a bad idea.  We need to think carefully about a
218			 * way to make the advanced API coexist with MIP6
219			 * options, which might automatically be inserted in
220			 * the kernel.
221			 */
222			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
223		}
224		/* Routing header */
225		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
226		/* Destination options header(2nd part) */
227		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
228	}
229
230#ifdef IPSEC
231	/* get a security policy for this packet */
232	if (inp == NULL)
233		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
234	else
235		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
236
237	if (sp == NULL) {
238		ipsec6stat.out_inval++;
239		goto freehdrs;
240	}
241
242	error = 0;
243
244	/* check policy */
245	switch (sp->policy) {
246	case IPSEC_POLICY_DISCARD:
247		/*
248		 * This packet is just discarded.
249		 */
250		ipsec6stat.out_polvio++;
251		goto freehdrs;
252
253	case IPSEC_POLICY_BYPASS:
254	case IPSEC_POLICY_NONE:
255		/* no need to do IPsec. */
256		needipsec = 0;
257		break;
258
259	case IPSEC_POLICY_IPSEC:
260		if (sp->req == NULL) {
261			/* acquire a policy */
262			error = key_spdacquire(sp);
263			goto freehdrs;
264		}
265		needipsec = 1;
266		break;
267
268	case IPSEC_POLICY_ENTRUST:
269	default:
270		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
271	}
272#endif /* IPSEC */
273#ifdef FAST_IPSEC
274	/* get a security policy for this packet */
275	if (inp == NULL)
276		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
277	else
278		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
279
280	if (sp == NULL) {
281		newipsecstat.ips_out_inval++;
282		goto freehdrs;
283	}
284
285	error = 0;
286
287	/* check policy */
288	switch (sp->policy) {
289	case IPSEC_POLICY_DISCARD:
290		/*
291		 * This packet is just discarded.
292		 */
293		newipsecstat.ips_out_polvio++;
294		goto freehdrs;
295
296	case IPSEC_POLICY_BYPASS:
297	case IPSEC_POLICY_NONE:
298		/* no need to do IPsec. */
299		needipsec = 0;
300		break;
301
302	case IPSEC_POLICY_IPSEC:
303		if (sp->req == NULL) {
304			/* acquire a policy */
305			error = key_spdacquire(sp);
306			goto freehdrs;
307		}
308		needipsec = 1;
309		break;
310
311	case IPSEC_POLICY_ENTRUST:
312	default:
313		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
314	}
315#endif /* FAST_IPSEC */
316
317	/*
318	 * Calculate the total length of the extension header chain.
319	 * Keep the length of the unfragmentable part for fragmentation.
320	 */
321	optlen = 0;
322	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
323	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
324	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
325	unfragpartlen = optlen + sizeof(struct ip6_hdr);
326	/* NOTE: we don't add AH/ESP length here. do that later. */
327	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
328
329	/*
330	 * If we need IPsec, or there is at least one extension header,
331	 * separate IP6 header from the payload.
332	 */
333	if ((needipsec || optlen) && !hdrsplit) {
334		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
335			m = NULL;
336			goto freehdrs;
337		}
338		m = exthdrs.ip6e_ip6;
339		hdrsplit++;
340	}
341
342	/* adjust pointer */
343	ip6 = mtod(m, struct ip6_hdr *);
344
345	/* adjust mbuf packet header length */
346	m->m_pkthdr.len += optlen;
347	plen = m->m_pkthdr.len - sizeof(*ip6);
348
349	/* If this is a jumbo payload, insert a jumbo payload option. */
350	if (plen > IPV6_MAXPACKET) {
351		if (!hdrsplit) {
352			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
353				m = NULL;
354				goto freehdrs;
355			}
356			m = exthdrs.ip6e_ip6;
357			hdrsplit++;
358		}
359		/* adjust pointer */
360		ip6 = mtod(m, struct ip6_hdr *);
361		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
362			goto freehdrs;
363		ip6->ip6_plen = 0;
364	} else
365		ip6->ip6_plen = htons(plen);
366
367	/*
368	 * Concatenate headers and fill in next header fields.
369	 * Here we have, on "m"
370	 *	IPv6 payload
371	 * and we insert headers accordingly.  Finally, we should be getting:
372	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
373	 *
374	 * during the header composing process, "m" points to IPv6 header.
375	 * "mprev" points to an extension header prior to esp.
376	 */
377	{
378		u_char *nexthdrp = &ip6->ip6_nxt;
379		struct mbuf *mprev = m;
380
381		/*
382		 * we treat dest2 specially.  this makes IPsec processing
383		 * much easier.  the goal here is to make mprev point the
384		 * mbuf prior to dest2.
385		 *
386		 * result: IPv6 dest2 payload
387		 * m and mprev will point to IPv6 header.
388		 */
389		if (exthdrs.ip6e_dest2) {
390			if (!hdrsplit)
391				panic("assumption failed: hdr not split");
392			exthdrs.ip6e_dest2->m_next = m->m_next;
393			m->m_next = exthdrs.ip6e_dest2;
394			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
395			ip6->ip6_nxt = IPPROTO_DSTOPTS;
396		}
397
398#define MAKE_CHAIN(m, mp, p, i)\
399    do {\
400	if (m) {\
401		if (!hdrsplit) \
402			panic("assumption failed: hdr not split"); \
403		*mtod((m), u_char *) = *(p);\
404		*(p) = (i);\
405		p = mtod((m), u_char *);\
406		(m)->m_next = (mp)->m_next;\
407		(mp)->m_next = (m);\
408		(mp) = (m);\
409	}\
410    } while (/*CONSTCOND*/ 0)
411		/*
412		 * result: IPv6 hbh dest1 rthdr dest2 payload
413		 * m will point to IPv6 header.  mprev will point to the
414		 * extension header prior to dest2 (rthdr in the above case).
415		 */
416		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
417		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
418		    IPPROTO_DSTOPTS);
419		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
420		    IPPROTO_ROUTING);
421
422#if defined(IPSEC) || defined(FAST_IPSEC)
423		if (!needipsec)
424			goto skip_ipsec2;
425
426		/*
427		 * pointers after IPsec headers are not valid any more.
428		 * other pointers need a great care too.
429		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
430		 */
431		exthdrs.ip6e_dest2 = NULL;
432
433	    {
434		struct ip6_rthdr *rh = NULL;
435		int segleft_org = 0;
436		struct ipsec_output_state state;
437
438		if (exthdrs.ip6e_rthdr) {
439			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
440			segleft_org = rh->ip6r_segleft;
441			rh->ip6r_segleft = 0;
442		}
443
444		bzero(&state, sizeof(state));
445		state.m = m;
446		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
447		    &needipsectun);
448		m = state.m;
449		if (error) {
450			/* mbuf is already reclaimed in ipsec6_output_trans. */
451			m = NULL;
452			switch (error) {
453			case EHOSTUNREACH:
454			case ENETUNREACH:
455			case EMSGSIZE:
456			case ENOBUFS:
457			case ENOMEM:
458				break;
459			default:
460				printf("ip6_output (ipsec): error code %d\n", error);
461				/* FALLTHROUGH */
462			case ENOENT:
463				/* don't show these error codes to the user */
464				error = 0;
465				break;
466			}
467			goto bad;
468		}
469		if (exthdrs.ip6e_rthdr) {
470			/* ah6_output doesn't modify mbuf chain */
471			rh->ip6r_segleft = segleft_org;
472		}
473	    }
474skip_ipsec2:;
475#endif
476	}
477
478	/*
479	 * If there is a routing header, replace the destination address field
480	 * with the first hop of the routing header.
481	 */
482	if (exthdrs.ip6e_rthdr) {
483		struct ip6_rthdr *rh =
484			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
485						  struct ip6_rthdr *));
486		struct ip6_rthdr0 *rh0;
487		struct in6_addr *addr;
488		struct sockaddr_in6 sa;
489
490		switch (rh->ip6r_type) {
491		case IPV6_RTHDR_TYPE_0:
492			 rh0 = (struct ip6_rthdr0 *)rh;
493			 addr = (struct in6_addr *)(rh0 + 1);
494
495			 /*
496			  * construct a sockaddr_in6 form of
497			  * the first hop.
498			  *
499			  * XXX: we may not have enough
500			  * information about its scope zone;
501			  * there is no standard API to pass
502			  * the information from the
503			  * application.
504			  */
505			 bzero(&sa, sizeof(sa));
506			 sa.sin6_family = AF_INET6;
507			 sa.sin6_len = sizeof(sa);
508			 sa.sin6_addr = addr[0];
509			 if ((error = sa6_embedscope(&sa,
510			     ip6_use_defzone)) != 0) {
511				 goto bad;
512			 }
513			 ip6->ip6_dst = sa.sin6_addr;
514			 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
515			     * (rh0->ip6r0_segleft - 1));
516			 addr[rh0->ip6r0_segleft - 1] = finaldst;
517			 /* XXX */
518			 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
519			 break;
520		default:	/* is it possible? */
521			 error = EINVAL;
522			 goto bad;
523		}
524	}
525
526	/* Source address validation */
527	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
528	    (flags & IPV6_DADOUTPUT) == 0) {
529		error = EOPNOTSUPP;
530		ip6stat.ip6s_badscope++;
531		goto bad;
532	}
533	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
534		error = EOPNOTSUPP;
535		ip6stat.ip6s_badscope++;
536		goto bad;
537	}
538
539	ip6stat.ip6s_localout++;
540
541	/*
542	 * Route packet.
543	 */
544	if (ro == 0) {
545		ro = &ip6route;
546		bzero((caddr_t)ro, sizeof(*ro));
547	}
548	ro_pmtu = ro;
549	if (opt && opt->ip6po_rthdr)
550		ro = &opt->ip6po_route;
551	dst = (struct sockaddr_in6 *)&ro->ro_dst;
552
553again:
554 	/*
555	 * if specified, try to fill in the traffic class field.
556	 * do not override if a non-zero value is already set.
557	 * we check the diffserv field and the ecn field separately.
558	 */
559	if (opt && opt->ip6po_tclass >= 0) {
560		int mask = 0;
561
562		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
563			mask |= 0xfc;
564		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
565			mask |= 0x03;
566		if (mask != 0)
567			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
568	}
569
570	/* fill in or override the hop limit field, if necessary. */
571	if (opt && opt->ip6po_hlim != -1)
572		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
573	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
574		if (im6o != NULL)
575			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
576		else
577			ip6->ip6_hlim = ip6_defmcasthlim;
578	}
579
580#if defined(IPSEC) || defined(FAST_IPSEC)
581	if (needipsec && needipsectun) {
582		struct ipsec_output_state state;
583
584		/*
585		 * All the extension headers will become inaccessible
586		 * (since they can be encrypted).
587		 * Don't panic, we need no more updates to extension headers
588		 * on inner IPv6 packet (since they are now encapsulated).
589		 *
590		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
591		 */
592		bzero(&exthdrs, sizeof(exthdrs));
593		exthdrs.ip6e_ip6 = m;
594
595		bzero(&state, sizeof(state));
596		state.m = m;
597		state.ro = (struct route *)ro;
598		state.dst = (struct sockaddr *)dst;
599
600		error = ipsec6_output_tunnel(&state, sp, flags);
601
602		m = state.m;
603		ro = (struct route_in6 *)state.ro;
604		dst = (struct sockaddr_in6 *)state.dst;
605		if (error) {
606			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
607			m0 = m = NULL;
608			m = NULL;
609			switch (error) {
610			case EHOSTUNREACH:
611			case ENETUNREACH:
612			case EMSGSIZE:
613			case ENOBUFS:
614			case ENOMEM:
615				break;
616			default:
617				printf("ip6_output (ipsec): error code %d\n", error);
618				/* FALLTHROUGH */
619			case ENOENT:
620				/* don't show these error codes to the user */
621				error = 0;
622				break;
623			}
624			goto bad;
625		}
626
627		exthdrs.ip6e_ip6 = m;
628	}
629#endif /* IPSEC */
630
631	/* adjust pointer */
632	ip6 = mtod(m, struct ip6_hdr *);
633
634	bzero(&dst_sa, sizeof(dst_sa));
635	dst_sa.sin6_family = AF_INET6;
636	dst_sa.sin6_len = sizeof(dst_sa);
637	dst_sa.sin6_addr = ip6->ip6_dst;
638	if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
639	    &ifp, &rt, 0)) != 0) {
640		switch (error) {
641		case EHOSTUNREACH:
642			ip6stat.ip6s_noroute++;
643			break;
644		case EADDRNOTAVAIL:
645		default:
646			break; /* XXX statistics? */
647		}
648		if (ifp != NULL)
649			in6_ifstat_inc(ifp, ifs6_out_discard);
650		goto bad;
651	}
652	if (rt == NULL) {
653		/*
654		 * If in6_selectroute() does not return a route entry,
655		 * dst may not have been updated.
656		 */
657		*dst = dst_sa;	/* XXX */
658	}
659
660	/*
661	 * then rt (for unicast) and ifp must be non-NULL valid values.
662	 */
663	if ((flags & IPV6_FORWARDING) == 0) {
664		/* XXX: the FORWARDING flag can be set for mrouting. */
665		in6_ifstat_inc(ifp, ifs6_out_request);
666	}
667	if (rt != NULL) {
668		ia = (struct in6_ifaddr *)(rt->rt_ifa);
669		rt->rt_use++;
670	}
671
672	/*
673	 * The outgoing interface must be in the zone of source and
674	 * destination addresses.  We should use ia_ifp to support the
675	 * case of sending packets to an address of our own.
676	 */
677	if (ia != NULL && ia->ia_ifp)
678		origifp = ia->ia_ifp;
679	else
680		origifp = ifp;
681
682	src0 = ip6->ip6_src;
683	if (in6_setscope(&src0, origifp, &zone))
684		goto badscope;
685	bzero(&src_sa, sizeof(src_sa));
686	src_sa.sin6_family = AF_INET6;
687	src_sa.sin6_len = sizeof(src_sa);
688	src_sa.sin6_addr = ip6->ip6_src;
689	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
690		goto badscope;
691
692	dst0 = ip6->ip6_dst;
693	if (in6_setscope(&dst0, origifp, &zone))
694		goto badscope;
695	/* re-initialize to be sure */
696	bzero(&dst_sa, sizeof(dst_sa));
697	dst_sa.sin6_family = AF_INET6;
698	dst_sa.sin6_len = sizeof(dst_sa);
699	dst_sa.sin6_addr = ip6->ip6_dst;
700	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
701		goto badscope;
702	}
703
704	/* scope check is done. */
705	goto routefound;
706
707  badscope:
708	ip6stat.ip6s_badscope++;
709	in6_ifstat_inc(origifp, ifs6_out_discard);
710	if (error == 0)
711		error = EHOSTUNREACH; /* XXX */
712	goto bad;
713
714  routefound:
715	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
716		if (opt && opt->ip6po_nextroute.ro_rt) {
717			/*
718			 * The nexthop is explicitly specified by the
719			 * application.  We assume the next hop is an IPv6
720			 * address.
721			 */
722			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
723		}
724		else if ((rt->rt_flags & RTF_GATEWAY))
725			dst = (struct sockaddr_in6 *)rt->rt_gateway;
726	}
727
728	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
729		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
730	} else {
731		struct	in6_multi *in6m;
732
733		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
734
735		in6_ifstat_inc(ifp, ifs6_out_mcast);
736
737		/*
738		 * Confirm that the outgoing interface supports multicast.
739		 */
740		if (!(ifp->if_flags & IFF_MULTICAST)) {
741			ip6stat.ip6s_noroute++;
742			in6_ifstat_inc(ifp, ifs6_out_discard);
743			error = ENETUNREACH;
744			goto bad;
745		}
746		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
747		if (in6m != NULL &&
748		   (im6o == NULL || im6o->im6o_multicast_loop)) {
749			/*
750			 * If we belong to the destination multicast group
751			 * on the outgoing interface, and the caller did not
752			 * forbid loopback, loop back a copy.
753			 */
754			ip6_mloopback(ifp, m, dst);
755		} else {
756			/*
757			 * If we are acting as a multicast router, perform
758			 * multicast forwarding as if the packet had just
759			 * arrived on the interface to which we are about
760			 * to send.  The multicast forwarding function
761			 * recursively calls this function, using the
762			 * IPV6_FORWARDING flag to prevent infinite recursion.
763			 *
764			 * Multicasts that are looped back by ip6_mloopback(),
765			 * above, will be forwarded by the ip6_input() routine,
766			 * if necessary.
767			 */
768			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
769				/*
770				 * XXX: ip6_mforward expects that rcvif is NULL
771				 * when it is called from the originating path.
772				 * However, it is not always the case, since
773				 * some versions of MGETHDR() does not
774				 * initialize the field.
775				 */
776				m->m_pkthdr.rcvif = NULL;
777				if (ip6_mforward(ip6, ifp, m) != 0) {
778					m_freem(m);
779					goto done;
780				}
781			}
782		}
783		/*
784		 * Multicasts with a hoplimit of zero may be looped back,
785		 * above, but must not be transmitted on a network.
786		 * Also, multicasts addressed to the loopback interface
787		 * are not sent -- the above call to ip6_mloopback() will
788		 * loop back a copy if this host actually belongs to the
789		 * destination group on the loopback interface.
790		 */
791		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
792		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
793			m_freem(m);
794			goto done;
795		}
796	}
797
798	/*
799	 * Fill the outgoing inteface to tell the upper layer
800	 * to increment per-interface statistics.
801	 */
802	if (ifpp)
803		*ifpp = ifp;
804
805	/* Determine path MTU. */
806	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
807	    &alwaysfrag)) != 0)
808		goto bad;
809
810	/*
811	 * The caller of this function may specify to use the minimum MTU
812	 * in some cases.
813	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
814	 * setting.  The logic is a bit complicated; by default, unicast
815	 * packets will follow path MTU while multicast packets will be sent at
816	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
817	 * including unicast ones will be sent at the minimum MTU.  Multicast
818	 * packets will always be sent at the minimum MTU unless
819	 * IP6PO_MINMTU_DISABLE is explicitly specified.
820	 * See RFC 3542 for more details.
821	 */
822	if (mtu > IPV6_MMTU) {
823		if ((flags & IPV6_MINMTU))
824			mtu = IPV6_MMTU;
825		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
826			mtu = IPV6_MMTU;
827		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
828			 (opt == NULL ||
829			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
830			mtu = IPV6_MMTU;
831		}
832	}
833
834	/*
835	 * clear embedded scope identifiers if necessary.
836	 * in6_clearscope will touch the addresses only when necessary.
837	 */
838	in6_clearscope(&ip6->ip6_src);
839	in6_clearscope(&ip6->ip6_dst);
840
841	/*
842	 * Check with the firewall...
843	 */
844	if (ip6_fw_enable && ip6_fw_chk_ptr) {
845		u_short port = 0;
846		m->m_pkthdr.rcvif = NULL;	/* XXX */
847		/* If ipfw says divert, we have to just drop packet */
848		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
849			m_freem(m);
850			goto done;
851		}
852		if (!m) {
853			error = EACCES;
854			goto done;
855		}
856	}
857
858	/*
859	 * If the outgoing packet contains a hop-by-hop options header,
860	 * it must be examined and processed even by the source node.
861	 * (RFC 2460, section 4.)
862	 */
863	if (exthdrs.ip6e_hbh) {
864		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
865		u_int32_t dummy; /* XXX unused */
866		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
867
868#ifdef DIAGNOSTIC
869		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
870			panic("ip6e_hbh is not continuous");
871#endif
872		/*
873		 *  XXX: if we have to send an ICMPv6 error to the sender,
874		 *       we need the M_LOOP flag since icmp6_error() expects
875		 *       the IPv6 and the hop-by-hop options header are
876		 *       continuous unless the flag is set.
877		 */
878		m->m_flags |= M_LOOP;
879		m->m_pkthdr.rcvif = ifp;
880		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
881		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
882		    &dummy, &plen) < 0) {
883			/* m was already freed at this point */
884			error = EINVAL;/* better error? */
885			goto done;
886		}
887		m->m_flags &= ~M_LOOP; /* XXX */
888		m->m_pkthdr.rcvif = NULL;
889	}
890
891	/* Jump over all PFIL processing if hooks are not active. */
892	if (inet6_pfil_hook.ph_busy_count == -1)
893		goto passout;
894
895	odst = ip6->ip6_dst;
896	/* Run through list of hooks for output packets. */
897	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
898	if (error != 0 || m == NULL)
899		goto done;
900	ip6 = mtod(m, struct ip6_hdr *);
901
902	/* See if destination IP address was changed by packet filter. */
903	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
904		m->m_flags |= M_SKIP_FIREWALL;
905		/* If destination is now ourself drop to ip6_input(). */
906		if (in6_localaddr(&ip6->ip6_dst)) {
907			if (m->m_pkthdr.rcvif == NULL)
908				m->m_pkthdr.rcvif = loif;
909			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
910				m->m_pkthdr.csum_flags |=
911				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
912				m->m_pkthdr.csum_data = 0xffff;
913			}
914			m->m_pkthdr.csum_flags |=
915			    CSUM_IP_CHECKED | CSUM_IP_VALID;
916			error = netisr_queue(NETISR_IPV6, m);
917			goto done;
918		} else
919			goto again;	/* Redo the routing table lookup. */
920	}
921
922	/* XXX: IPFIREWALL_FORWARD */
923
924passout:
925	/*
926	 * Send the packet to the outgoing interface.
927	 * If necessary, do IPv6 fragmentation before sending.
928	 *
929	 * the logic here is rather complex:
930	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
931	 * 1-a:	send as is if tlen <= path mtu
932	 * 1-b:	fragment if tlen > path mtu
933	 *
934	 * 2: if user asks us not to fragment (dontfrag == 1)
935	 * 2-a:	send as is if tlen <= interface mtu
936	 * 2-b:	error if tlen > interface mtu
937	 *
938	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
939	 *	always fragment
940	 *
941	 * 4: if dontfrag == 1 && alwaysfrag == 1
942	 *	error, as we cannot handle this conflicting request
943	 */
944	tlen = m->m_pkthdr.len;
945
946	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
947		dontfrag = 1;
948	else
949		dontfrag = 0;
950	if (dontfrag && alwaysfrag) {	/* case 4 */
951		/* conflicting request - can't transmit */
952		error = EMSGSIZE;
953		goto bad;
954	}
955	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
956		/*
957		 * Even if the DONTFRAG option is specified, we cannot send the
958		 * packet when the data length is larger than the MTU of the
959		 * outgoing interface.
960		 * Notify the error by sending IPV6_PATHMTU ancillary data as
961		 * well as returning an error code (the latter is not described
962		 * in the API spec.)
963		 */
964		u_int32_t mtu32;
965		struct ip6ctlparam ip6cp;
966
967		mtu32 = (u_int32_t)mtu;
968		bzero(&ip6cp, sizeof(ip6cp));
969		ip6cp.ip6c_cmdarg = (void *)&mtu32;
970		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
971		    (void *)&ip6cp);
972
973		error = EMSGSIZE;
974		goto bad;
975	}
976
977	/*
978	 * transmit packet without fragmentation
979	 */
980	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
981		struct in6_ifaddr *ia6;
982
983		ip6 = mtod(m, struct ip6_hdr *);
984		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
985		if (ia6) {
986			/* Record statistics for this interface address. */
987			ia6->ia_ifa.if_opackets++;
988			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
989		}
990#ifdef IPSEC
991		/* clean ipsec history once it goes out of the node */
992		ipsec_delaux(m);
993#endif
994		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
995		goto done;
996	}
997
998	/*
999	 * try to fragment the packet.  case 1-b and 3
1000	 */
1001	if (mtu < IPV6_MMTU) {
1002		/* path MTU cannot be less than IPV6_MMTU */
1003		error = EMSGSIZE;
1004		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1005		goto bad;
1006	} else if (ip6->ip6_plen == 0) {
1007		/* jumbo payload cannot be fragmented */
1008		error = EMSGSIZE;
1009		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1010		goto bad;
1011	} else {
1012		struct mbuf **mnext, *m_frgpart;
1013		struct ip6_frag *ip6f;
1014		u_int32_t id = htonl(ip6_randomid());
1015		u_char nextproto;
1016#if 0
1017		struct ip6ctlparam ip6cp;
1018		u_int32_t mtu32;
1019#endif
1020		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
1021
1022		/*
1023		 * Too large for the destination or interface;
1024		 * fragment if possible.
1025		 * Must be able to put at least 8 bytes per fragment.
1026		 */
1027		hlen = unfragpartlen;
1028		if (mtu > IPV6_MAXPACKET)
1029			mtu = IPV6_MAXPACKET;
1030
1031#if 0
1032		/*
1033		 * It is believed this code is a leftover from the
1034		 * development of the IPV6_RECVPATHMTU sockopt and
1035		 * associated work to implement RFC3542.
1036		 * It's not entirely clear what the intent of the API
1037		 * is at this point, so disable this code for now.
1038		 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
1039		 * will send notifications if the application requests.
1040		 */
1041
1042		/* Notify a proper path MTU to applications. */
1043		mtu32 = (u_int32_t)mtu;
1044		bzero(&ip6cp, sizeof(ip6cp));
1045		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1046		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1047		    (void *)&ip6cp);
1048#endif
1049
1050		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1051		if (len < 8) {
1052			error = EMSGSIZE;
1053			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1054			goto bad;
1055		}
1056
1057		/*
1058		 * Verify that we have any chance at all of being able to queue
1059		 *      the packet or packet fragments
1060		 */
1061		if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
1062		    < tlen  /* - hlen */)) {
1063			error = ENOBUFS;
1064			ip6stat.ip6s_odropped++;
1065			goto bad;
1066		}
1067
1068		mnext = &m->m_nextpkt;
1069
1070		/*
1071		 * Change the next header field of the last header in the
1072		 * unfragmentable part.
1073		 */
1074		if (exthdrs.ip6e_rthdr) {
1075			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1076			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1077		} else if (exthdrs.ip6e_dest1) {
1078			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1079			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1080		} else if (exthdrs.ip6e_hbh) {
1081			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1082			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1083		} else {
1084			nextproto = ip6->ip6_nxt;
1085			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1086		}
1087
1088		/*
1089		 * Loop through length of segment after first fragment,
1090		 * make new header and copy data of each part and link onto
1091		 * chain.
1092		 */
1093		m0 = m;
1094		for (off = hlen; off < tlen; off += len) {
1095			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1096			if (!m) {
1097				error = ENOBUFS;
1098				ip6stat.ip6s_odropped++;
1099				goto sendorfree;
1100			}
1101			m->m_pkthdr.rcvif = NULL;
1102			m->m_flags = m0->m_flags & M_COPYFLAGS;
1103			*mnext = m;
1104			mnext = &m->m_nextpkt;
1105			m->m_data += max_linkhdr;
1106			mhip6 = mtod(m, struct ip6_hdr *);
1107			*mhip6 = *ip6;
1108			m->m_len = sizeof(*mhip6);
1109			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1110			if (error) {
1111				ip6stat.ip6s_odropped++;
1112				goto sendorfree;
1113			}
1114			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1115			if (off + len >= tlen)
1116				len = tlen - off;
1117			else
1118				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1119			mhip6->ip6_plen = htons((u_short)(len + hlen +
1120			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1121			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1122				error = ENOBUFS;
1123				ip6stat.ip6s_odropped++;
1124				goto sendorfree;
1125			}
1126			m_cat(m, m_frgpart);
1127			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1128			m->m_pkthdr.rcvif = NULL;
1129			ip6f->ip6f_reserved = 0;
1130			ip6f->ip6f_ident = id;
1131			ip6f->ip6f_nxt = nextproto;
1132			ip6stat.ip6s_ofragments++;
1133			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1134		}
1135
1136		in6_ifstat_inc(ifp, ifs6_out_fragok);
1137	}
1138
1139	/*
1140	 * Remove leading garbages.
1141	 */
1142sendorfree:
1143	m = m0->m_nextpkt;
1144	m0->m_nextpkt = 0;
1145	m_freem(m0);
1146	for (m0 = m; m; m = m0) {
1147		m0 = m->m_nextpkt;
1148		m->m_nextpkt = 0;
1149		if (error == 0) {
1150 			/* Record statistics for this interface address. */
1151 			if (ia) {
1152 				ia->ia_ifa.if_opackets++;
1153 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1154 			}
1155#ifdef IPSEC
1156			/* clean ipsec history once it goes out of the node */
1157			ipsec_delaux(m);
1158#endif
1159			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1160		} else
1161			m_freem(m);
1162	}
1163
1164	if (error == 0)
1165		ip6stat.ip6s_fragmented++;
1166
1167done:
1168	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1169		RTFREE(ro->ro_rt);
1170	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1171		RTFREE(ro_pmtu->ro_rt);
1172	}
1173
1174#ifdef IPSEC
1175	if (sp != NULL)
1176		key_freesp(sp);
1177#endif /* IPSEC */
1178#ifdef FAST_IPSEC
1179	if (sp != NULL)
1180		KEY_FREESP(&sp);
1181#endif /* FAST_IPSEC */
1182
1183	return (error);
1184
1185freehdrs:
1186	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1187	m_freem(exthdrs.ip6e_dest1);
1188	m_freem(exthdrs.ip6e_rthdr);
1189	m_freem(exthdrs.ip6e_dest2);
1190	/* FALLTHROUGH */
1191bad:
1192	m_freem(m);
1193	goto done;
1194}
1195
1196static int
1197ip6_copyexthdr(mp, hdr, hlen)
1198	struct mbuf **mp;
1199	caddr_t hdr;
1200	int hlen;
1201{
1202	struct mbuf *m;
1203
1204	if (hlen > MCLBYTES)
1205		return (ENOBUFS); /* XXX */
1206
1207	MGET(m, M_DONTWAIT, MT_DATA);
1208	if (!m)
1209		return (ENOBUFS);
1210
1211	if (hlen > MLEN) {
1212		MCLGET(m, M_DONTWAIT);
1213		if ((m->m_flags & M_EXT) == 0) {
1214			m_free(m);
1215			return (ENOBUFS);
1216		}
1217	}
1218	m->m_len = hlen;
1219	if (hdr)
1220		bcopy(hdr, mtod(m, caddr_t), hlen);
1221
1222	*mp = m;
1223	return (0);
1224}
1225
1226/*
1227 * Insert jumbo payload option.
1228 */
1229static int
1230ip6_insert_jumboopt(exthdrs, plen)
1231	struct ip6_exthdrs *exthdrs;
1232	u_int32_t plen;
1233{
1234	struct mbuf *mopt;
1235	u_char *optbuf;
1236	u_int32_t v;
1237
1238#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1239
1240	/*
1241	 * If there is no hop-by-hop options header, allocate new one.
1242	 * If there is one but it doesn't have enough space to store the
1243	 * jumbo payload option, allocate a cluster to store the whole options.
1244	 * Otherwise, use it to store the options.
1245	 */
1246	if (exthdrs->ip6e_hbh == 0) {
1247		MGET(mopt, M_DONTWAIT, MT_DATA);
1248		if (mopt == 0)
1249			return (ENOBUFS);
1250		mopt->m_len = JUMBOOPTLEN;
1251		optbuf = mtod(mopt, u_char *);
1252		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1253		exthdrs->ip6e_hbh = mopt;
1254	} else {
1255		struct ip6_hbh *hbh;
1256
1257		mopt = exthdrs->ip6e_hbh;
1258		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1259			/*
1260			 * XXX assumption:
1261			 * - exthdrs->ip6e_hbh is not referenced from places
1262			 *   other than exthdrs.
1263			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1264			 */
1265			int oldoptlen = mopt->m_len;
1266			struct mbuf *n;
1267
1268			/*
1269			 * XXX: give up if the whole (new) hbh header does
1270			 * not fit even in an mbuf cluster.
1271			 */
1272			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1273				return (ENOBUFS);
1274
1275			/*
1276			 * As a consequence, we must always prepare a cluster
1277			 * at this point.
1278			 */
1279			MGET(n, M_DONTWAIT, MT_DATA);
1280			if (n) {
1281				MCLGET(n, M_DONTWAIT);
1282				if ((n->m_flags & M_EXT) == 0) {
1283					m_freem(n);
1284					n = NULL;
1285				}
1286			}
1287			if (!n)
1288				return (ENOBUFS);
1289			n->m_len = oldoptlen + JUMBOOPTLEN;
1290			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1291			    oldoptlen);
1292			optbuf = mtod(n, caddr_t) + oldoptlen;
1293			m_freem(mopt);
1294			mopt = exthdrs->ip6e_hbh = n;
1295		} else {
1296			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1297			mopt->m_len += JUMBOOPTLEN;
1298		}
1299		optbuf[0] = IP6OPT_PADN;
1300		optbuf[1] = 1;
1301
1302		/*
1303		 * Adjust the header length according to the pad and
1304		 * the jumbo payload option.
1305		 */
1306		hbh = mtod(mopt, struct ip6_hbh *);
1307		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1308	}
1309
1310	/* fill in the option. */
1311	optbuf[2] = IP6OPT_JUMBO;
1312	optbuf[3] = 4;
1313	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1314	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1315
1316	/* finally, adjust the packet header length */
1317	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1318
1319	return (0);
1320#undef JUMBOOPTLEN
1321}
1322
1323/*
1324 * Insert fragment header and copy unfragmentable header portions.
1325 */
1326static int
1327ip6_insertfraghdr(m0, m, hlen, frghdrp)
1328	struct mbuf *m0, *m;
1329	int hlen;
1330	struct ip6_frag **frghdrp;
1331{
1332	struct mbuf *n, *mlast;
1333
1334	if (hlen > sizeof(struct ip6_hdr)) {
1335		n = m_copym(m0, sizeof(struct ip6_hdr),
1336		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1337		if (n == 0)
1338			return (ENOBUFS);
1339		m->m_next = n;
1340	} else
1341		n = m;
1342
1343	/* Search for the last mbuf of unfragmentable part. */
1344	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1345		;
1346
1347	if ((mlast->m_flags & M_EXT) == 0 &&
1348	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1349		/* use the trailing space of the last mbuf for the fragment hdr */
1350		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1351		    mlast->m_len);
1352		mlast->m_len += sizeof(struct ip6_frag);
1353		m->m_pkthdr.len += sizeof(struct ip6_frag);
1354	} else {
1355		/* allocate a new mbuf for the fragment header */
1356		struct mbuf *mfrg;
1357
1358		MGET(mfrg, M_DONTWAIT, MT_DATA);
1359		if (mfrg == 0)
1360			return (ENOBUFS);
1361		mfrg->m_len = sizeof(struct ip6_frag);
1362		*frghdrp = mtod(mfrg, struct ip6_frag *);
1363		mlast->m_next = mfrg;
1364	}
1365
1366	return (0);
1367}
1368
1369static int
1370ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1371	struct route_in6 *ro_pmtu, *ro;
1372	struct ifnet *ifp;
1373	struct in6_addr *dst;
1374	u_long *mtup;
1375	int *alwaysfragp;
1376{
1377	u_int32_t mtu = 0;
1378	int alwaysfrag = 0;
1379	int error = 0;
1380
1381	if (ro_pmtu != ro) {
1382		/* The first hop and the final destination may differ. */
1383		struct sockaddr_in6 *sa6_dst =
1384		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1385		if (ro_pmtu->ro_rt &&
1386		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1387		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1388			RTFREE(ro_pmtu->ro_rt);
1389			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1390		}
1391		if (ro_pmtu->ro_rt == NULL) {
1392			bzero(sa6_dst, sizeof(*sa6_dst));
1393			sa6_dst->sin6_family = AF_INET6;
1394			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1395			sa6_dst->sin6_addr = *dst;
1396
1397			rtalloc((struct route *)ro_pmtu);
1398		}
1399	}
1400	if (ro_pmtu->ro_rt) {
1401		u_int32_t ifmtu;
1402		struct in_conninfo inc;
1403
1404		bzero(&inc, sizeof(inc));
1405		inc.inc_flags = 1; /* IPv6 */
1406		inc.inc6_faddr = *dst;
1407
1408		if (ifp == NULL)
1409			ifp = ro_pmtu->ro_rt->rt_ifp;
1410		ifmtu = IN6_LINKMTU(ifp);
1411		mtu = tcp_hc_getmtu(&inc);
1412		if (mtu)
1413			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1414		else
1415			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1416		if (mtu == 0)
1417			mtu = ifmtu;
1418		else if (mtu < IPV6_MMTU) {
1419			/*
1420			 * RFC2460 section 5, last paragraph:
1421			 * if we record ICMPv6 too big message with
1422			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1423			 * or smaller, with framgent header attached.
1424			 * (fragment header is needed regardless from the
1425			 * packet size, for translators to identify packets)
1426			 */
1427			alwaysfrag = 1;
1428			mtu = IPV6_MMTU;
1429		} else if (mtu > ifmtu) {
1430			/*
1431			 * The MTU on the route is larger than the MTU on
1432			 * the interface!  This shouldn't happen, unless the
1433			 * MTU of the interface has been changed after the
1434			 * interface was brought up.  Change the MTU in the
1435			 * route to match the interface MTU (as long as the
1436			 * field isn't locked).
1437			 */
1438			mtu = ifmtu;
1439			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1440		}
1441	} else if (ifp) {
1442		mtu = IN6_LINKMTU(ifp);
1443	} else
1444		error = EHOSTUNREACH; /* XXX */
1445
1446	*mtup = mtu;
1447	if (alwaysfragp)
1448		*alwaysfragp = alwaysfrag;
1449	return (error);
1450}
1451
1452/*
1453 * IP6 socket option processing.
1454 */
1455int
1456ip6_ctloutput(so, sopt)
1457	struct socket *so;
1458	struct sockopt *sopt;
1459{
1460	int privileged, optdatalen, uproto;
1461	void *optdata;
1462	struct inpcb *in6p = sotoinpcb(so);
1463	int error, optval;
1464	int level, op, optname;
1465	int optlen;
1466	struct thread *td;
1467
1468	if (sopt) {
1469		level = sopt->sopt_level;
1470		op = sopt->sopt_dir;
1471		optname = sopt->sopt_name;
1472		optlen = sopt->sopt_valsize;
1473		td = sopt->sopt_td;
1474	} else {
1475		panic("ip6_ctloutput: arg soopt is NULL");
1476	}
1477	error = optval = 0;
1478
1479	privileged = (td == 0 || suser(td)) ? 0 : 1;
1480	uproto = (int)so->so_proto->pr_protocol;
1481
1482	if (level == IPPROTO_IPV6) {
1483		switch (op) {
1484
1485		case SOPT_SET:
1486			switch (optname) {
1487			case IPV6_2292PKTOPTIONS:
1488#ifdef IPV6_PKTOPTIONS
1489			case IPV6_PKTOPTIONS:
1490#endif
1491			{
1492				struct mbuf *m;
1493
1494				error = soopt_getm(sopt, &m); /* XXX */
1495				if (error != 0)
1496					break;
1497				error = soopt_mcopyin(sopt, m); /* XXX */
1498				if (error != 0)
1499					break;
1500				error = ip6_pcbopts(&in6p->in6p_outputopts,
1501						    m, so, sopt);
1502				m_freem(m); /* XXX */
1503				break;
1504			}
1505
1506			/*
1507			 * Use of some Hop-by-Hop options or some
1508			 * Destination options, might require special
1509			 * privilege.  That is, normal applications
1510			 * (without special privilege) might be forbidden
1511			 * from setting certain options in outgoing packets,
1512			 * and might never see certain options in received
1513			 * packets. [RFC 2292 Section 6]
1514			 * KAME specific note:
1515			 *  KAME prevents non-privileged users from sending or
1516			 *  receiving ANY hbh/dst options in order to avoid
1517			 *  overhead of parsing options in the kernel.
1518			 */
1519			case IPV6_RECVHOPOPTS:
1520			case IPV6_RECVDSTOPTS:
1521			case IPV6_RECVRTHDRDSTOPTS:
1522				if (!privileged) {
1523					error = EPERM;
1524					break;
1525				}
1526				/* FALLTHROUGH */
1527			case IPV6_UNICAST_HOPS:
1528			case IPV6_HOPLIMIT:
1529			case IPV6_FAITH:
1530
1531			case IPV6_RECVPKTINFO:
1532			case IPV6_RECVHOPLIMIT:
1533			case IPV6_RECVRTHDR:
1534			case IPV6_RECVPATHMTU:
1535			case IPV6_RECVTCLASS:
1536			case IPV6_V6ONLY:
1537			case IPV6_AUTOFLOWLABEL:
1538				if (optlen != sizeof(int)) {
1539					error = EINVAL;
1540					break;
1541				}
1542				error = sooptcopyin(sopt, &optval,
1543					sizeof optval, sizeof optval);
1544				if (error)
1545					break;
1546				switch (optname) {
1547
1548				case IPV6_UNICAST_HOPS:
1549					if (optval < -1 || optval >= 256)
1550						error = EINVAL;
1551					else {
1552						/* -1 = kernel default */
1553						in6p->in6p_hops = optval;
1554						if ((in6p->in6p_vflag &
1555						     INP_IPV4) != 0)
1556							in6p->inp_ip_ttl = optval;
1557					}
1558					break;
1559#define OPTSET(bit) \
1560do { \
1561	if (optval) \
1562		in6p->in6p_flags |= (bit); \
1563	else \
1564		in6p->in6p_flags &= ~(bit); \
1565} while (/*CONSTCOND*/ 0)
1566#define OPTSET2292(bit) \
1567do { \
1568	in6p->in6p_flags |= IN6P_RFC2292; \
1569	if (optval) \
1570		in6p->in6p_flags |= (bit); \
1571	else \
1572		in6p->in6p_flags &= ~(bit); \
1573} while (/*CONSTCOND*/ 0)
1574#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1575
1576				case IPV6_RECVPKTINFO:
1577					/* cannot mix with RFC2292 */
1578					if (OPTBIT(IN6P_RFC2292)) {
1579						error = EINVAL;
1580						break;
1581					}
1582					OPTSET(IN6P_PKTINFO);
1583					break;
1584
1585				case IPV6_HOPLIMIT:
1586				{
1587					struct ip6_pktopts **optp;
1588
1589					/* cannot mix with RFC2292 */
1590					if (OPTBIT(IN6P_RFC2292)) {
1591						error = EINVAL;
1592						break;
1593					}
1594					optp = &in6p->in6p_outputopts;
1595					error = ip6_pcbopt(IPV6_HOPLIMIT,
1596							   (u_char *)&optval,
1597							   sizeof(optval),
1598							   optp,
1599							   privileged, uproto);
1600					break;
1601				}
1602
1603				case IPV6_RECVHOPLIMIT:
1604					/* cannot mix with RFC2292 */
1605					if (OPTBIT(IN6P_RFC2292)) {
1606						error = EINVAL;
1607						break;
1608					}
1609					OPTSET(IN6P_HOPLIMIT);
1610					break;
1611
1612				case IPV6_RECVHOPOPTS:
1613					/* cannot mix with RFC2292 */
1614					if (OPTBIT(IN6P_RFC2292)) {
1615						error = EINVAL;
1616						break;
1617					}
1618					OPTSET(IN6P_HOPOPTS);
1619					break;
1620
1621				case IPV6_RECVDSTOPTS:
1622					/* cannot mix with RFC2292 */
1623					if (OPTBIT(IN6P_RFC2292)) {
1624						error = EINVAL;
1625						break;
1626					}
1627					OPTSET(IN6P_DSTOPTS);
1628					break;
1629
1630				case IPV6_RECVRTHDRDSTOPTS:
1631					/* cannot mix with RFC2292 */
1632					if (OPTBIT(IN6P_RFC2292)) {
1633						error = EINVAL;
1634						break;
1635					}
1636					OPTSET(IN6P_RTHDRDSTOPTS);
1637					break;
1638
1639				case IPV6_RECVRTHDR:
1640					/* cannot mix with RFC2292 */
1641					if (OPTBIT(IN6P_RFC2292)) {
1642						error = EINVAL;
1643						break;
1644					}
1645					OPTSET(IN6P_RTHDR);
1646					break;
1647
1648				case IPV6_FAITH:
1649					OPTSET(IN6P_FAITH);
1650					break;
1651
1652				case IPV6_RECVPATHMTU:
1653					/*
1654					 * We ignore this option for TCP
1655					 * sockets.
1656					 * (RFC3542 leaves this case
1657					 * unspecified.)
1658					 */
1659					if (uproto != IPPROTO_TCP)
1660						OPTSET(IN6P_MTU);
1661					break;
1662
1663				case IPV6_V6ONLY:
1664					/*
1665					 * make setsockopt(IPV6_V6ONLY)
1666					 * available only prior to bind(2).
1667					 * see ipng mailing list, Jun 22 2001.
1668					 */
1669					if (in6p->in6p_lport ||
1670					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1671						error = EINVAL;
1672						break;
1673					}
1674					OPTSET(IN6P_IPV6_V6ONLY);
1675					if (optval)
1676						in6p->in6p_vflag &= ~INP_IPV4;
1677					else
1678						in6p->in6p_vflag |= INP_IPV4;
1679					break;
1680				case IPV6_RECVTCLASS:
1681					/* cannot mix with RFC2292 XXX */
1682					if (OPTBIT(IN6P_RFC2292)) {
1683						error = EINVAL;
1684						break;
1685					}
1686					OPTSET(IN6P_TCLASS);
1687					break;
1688				case IPV6_AUTOFLOWLABEL:
1689					OPTSET(IN6P_AUTOFLOWLABEL);
1690					break;
1691
1692				}
1693				break;
1694
1695			case IPV6_TCLASS:
1696			case IPV6_DONTFRAG:
1697			case IPV6_USE_MIN_MTU:
1698			case IPV6_PREFER_TEMPADDR:
1699				if (optlen != sizeof(optval)) {
1700					error = EINVAL;
1701					break;
1702				}
1703				error = sooptcopyin(sopt, &optval,
1704					sizeof optval, sizeof optval);
1705				if (error)
1706					break;
1707				{
1708					struct ip6_pktopts **optp;
1709					optp = &in6p->in6p_outputopts;
1710					error = ip6_pcbopt(optname,
1711							   (u_char *)&optval,
1712							   sizeof(optval),
1713							   optp,
1714							   privileged, uproto);
1715					break;
1716				}
1717
1718			case IPV6_2292PKTINFO:
1719			case IPV6_2292HOPLIMIT:
1720			case IPV6_2292HOPOPTS:
1721			case IPV6_2292DSTOPTS:
1722			case IPV6_2292RTHDR:
1723				/* RFC 2292 */
1724				if (optlen != sizeof(int)) {
1725					error = EINVAL;
1726					break;
1727				}
1728				error = sooptcopyin(sopt, &optval,
1729					sizeof optval, sizeof optval);
1730				if (error)
1731					break;
1732				switch (optname) {
1733				case IPV6_2292PKTINFO:
1734					OPTSET2292(IN6P_PKTINFO);
1735					break;
1736				case IPV6_2292HOPLIMIT:
1737					OPTSET2292(IN6P_HOPLIMIT);
1738					break;
1739				case IPV6_2292HOPOPTS:
1740					/*
1741					 * Check super-user privilege.
1742					 * See comments for IPV6_RECVHOPOPTS.
1743					 */
1744					if (!privileged)
1745						return (EPERM);
1746					OPTSET2292(IN6P_HOPOPTS);
1747					break;
1748				case IPV6_2292DSTOPTS:
1749					if (!privileged)
1750						return (EPERM);
1751					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1752					break;
1753				case IPV6_2292RTHDR:
1754					OPTSET2292(IN6P_RTHDR);
1755					break;
1756				}
1757				break;
1758			case IPV6_PKTINFO:
1759			case IPV6_HOPOPTS:
1760			case IPV6_RTHDR:
1761			case IPV6_DSTOPTS:
1762			case IPV6_RTHDRDSTOPTS:
1763			case IPV6_NEXTHOP:
1764			{
1765				/* new advanced API (RFC3542) */
1766				u_char *optbuf;
1767				u_char optbuf_storage[MCLBYTES];
1768				int optlen;
1769				struct ip6_pktopts **optp;
1770
1771				/* cannot mix with RFC2292 */
1772				if (OPTBIT(IN6P_RFC2292)) {
1773					error = EINVAL;
1774					break;
1775				}
1776
1777				/*
1778				 * We only ensure valsize is not too large
1779				 * here.  Further validation will be done
1780				 * later.
1781				 */
1782				error = sooptcopyin(sopt, optbuf_storage,
1783				    sizeof(optbuf_storage), 0);
1784				if (error)
1785					break;
1786				optlen = sopt->sopt_valsize;
1787				optbuf = optbuf_storage;
1788				optp = &in6p->in6p_outputopts;
1789				error = ip6_pcbopt(optname,
1790						   optbuf, optlen,
1791						   optp, privileged, uproto);
1792				break;
1793			}
1794#undef OPTSET
1795
1796			case IPV6_MULTICAST_IF:
1797			case IPV6_MULTICAST_HOPS:
1798			case IPV6_MULTICAST_LOOP:
1799			case IPV6_JOIN_GROUP:
1800			case IPV6_LEAVE_GROUP:
1801			    {
1802				if (sopt->sopt_valsize > MLEN) {
1803					error = EMSGSIZE;
1804					break;
1805				}
1806				/* XXX */
1807			    }
1808			    /* FALLTHROUGH */
1809			    {
1810				struct mbuf *m;
1811
1812				if (sopt->sopt_valsize > MCLBYTES) {
1813					error = EMSGSIZE;
1814					break;
1815				}
1816				/* XXX */
1817				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
1818				if (m == 0) {
1819					error = ENOBUFS;
1820					break;
1821				}
1822				if (sopt->sopt_valsize > MLEN) {
1823					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1824					if ((m->m_flags & M_EXT) == 0) {
1825						m_free(m);
1826						error = ENOBUFS;
1827						break;
1828					}
1829				}
1830				m->m_len = sopt->sopt_valsize;
1831				error = sooptcopyin(sopt, mtod(m, char *),
1832						    m->m_len, m->m_len);
1833				if (error) {
1834					(void)m_free(m);
1835					break;
1836				}
1837				error =	ip6_setmoptions(sopt->sopt_name,
1838							&in6p->in6p_moptions,
1839							m);
1840				(void)m_free(m);
1841			    }
1842				break;
1843
1844			case IPV6_PORTRANGE:
1845				error = sooptcopyin(sopt, &optval,
1846				    sizeof optval, sizeof optval);
1847				if (error)
1848					break;
1849
1850				switch (optval) {
1851				case IPV6_PORTRANGE_DEFAULT:
1852					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1853					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1854					break;
1855
1856				case IPV6_PORTRANGE_HIGH:
1857					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1858					in6p->in6p_flags |= IN6P_HIGHPORT;
1859					break;
1860
1861				case IPV6_PORTRANGE_LOW:
1862					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1863					in6p->in6p_flags |= IN6P_LOWPORT;
1864					break;
1865
1866				default:
1867					error = EINVAL;
1868					break;
1869				}
1870				break;
1871
1872#if defined(IPSEC) || defined(FAST_IPSEC)
1873			case IPV6_IPSEC_POLICY:
1874			    {
1875				caddr_t req = NULL;
1876				size_t len = 0;
1877				struct mbuf *m;
1878
1879				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1880					break;
1881				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1882					break;
1883				if (m) {
1884					req = mtod(m, caddr_t);
1885					len = m->m_len;
1886				}
1887				error = ipsec6_set_policy(in6p, optname, req,
1888							  len, privileged);
1889				m_freem(m);
1890			    }
1891				break;
1892#endif /* KAME IPSEC */
1893
1894			case IPV6_FW_ADD:
1895			case IPV6_FW_DEL:
1896			case IPV6_FW_FLUSH:
1897			case IPV6_FW_ZERO:
1898			    {
1899				struct mbuf *m;
1900				struct mbuf **mp = &m;
1901
1902				if (ip6_fw_ctl_ptr == NULL)
1903					return EINVAL;
1904				/* XXX */
1905				if ((error = soopt_getm(sopt, &m)) != 0)
1906					break;
1907				/* XXX */
1908				if ((error = soopt_mcopyin(sopt, m)) != 0)
1909					break;
1910				error = (*ip6_fw_ctl_ptr)(optname, mp);
1911				m = *mp;
1912			    }
1913				break;
1914
1915			default:
1916				error = ENOPROTOOPT;
1917				break;
1918			}
1919			break;
1920
1921		case SOPT_GET:
1922			switch (optname) {
1923
1924			case IPV6_2292PKTOPTIONS:
1925#ifdef IPV6_PKTOPTIONS
1926			case IPV6_PKTOPTIONS:
1927#endif
1928				/*
1929				 * RFC3542 (effectively) deprecated the
1930				 * semantics of the 2292-style pktoptions.
1931				 * Since it was not reliable in nature (i.e.,
1932				 * applications had to expect the lack of some
1933				 * information after all), it would make sense
1934				 * to simplify this part by always returning
1935				 * empty data.
1936				 */
1937				sopt->sopt_valsize = 0;
1938				break;
1939
1940			case IPV6_RECVHOPOPTS:
1941			case IPV6_RECVDSTOPTS:
1942			case IPV6_RECVRTHDRDSTOPTS:
1943			case IPV6_UNICAST_HOPS:
1944			case IPV6_RECVPKTINFO:
1945			case IPV6_RECVHOPLIMIT:
1946			case IPV6_RECVRTHDR:
1947			case IPV6_RECVPATHMTU:
1948
1949			case IPV6_FAITH:
1950			case IPV6_V6ONLY:
1951			case IPV6_PORTRANGE:
1952			case IPV6_RECVTCLASS:
1953			case IPV6_AUTOFLOWLABEL:
1954				switch (optname) {
1955
1956				case IPV6_RECVHOPOPTS:
1957					optval = OPTBIT(IN6P_HOPOPTS);
1958					break;
1959
1960				case IPV6_RECVDSTOPTS:
1961					optval = OPTBIT(IN6P_DSTOPTS);
1962					break;
1963
1964				case IPV6_RECVRTHDRDSTOPTS:
1965					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1966					break;
1967
1968				case IPV6_UNICAST_HOPS:
1969					optval = in6p->in6p_hops;
1970					break;
1971
1972				case IPV6_RECVPKTINFO:
1973					optval = OPTBIT(IN6P_PKTINFO);
1974					break;
1975
1976				case IPV6_RECVHOPLIMIT:
1977					optval = OPTBIT(IN6P_HOPLIMIT);
1978					break;
1979
1980				case IPV6_RECVRTHDR:
1981					optval = OPTBIT(IN6P_RTHDR);
1982					break;
1983
1984				case IPV6_RECVPATHMTU:
1985					optval = OPTBIT(IN6P_MTU);
1986					break;
1987
1988				case IPV6_FAITH:
1989					optval = OPTBIT(IN6P_FAITH);
1990					break;
1991
1992				case IPV6_V6ONLY:
1993					optval = OPTBIT(IN6P_IPV6_V6ONLY);
1994					break;
1995
1996				case IPV6_PORTRANGE:
1997				    {
1998					int flags;
1999					flags = in6p->in6p_flags;
2000					if (flags & IN6P_HIGHPORT)
2001						optval = IPV6_PORTRANGE_HIGH;
2002					else if (flags & IN6P_LOWPORT)
2003						optval = IPV6_PORTRANGE_LOW;
2004					else
2005						optval = 0;
2006					break;
2007				    }
2008				case IPV6_RECVTCLASS:
2009					optval = OPTBIT(IN6P_TCLASS);
2010					break;
2011
2012				case IPV6_AUTOFLOWLABEL:
2013					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2014					break;
2015				}
2016				if (error)
2017					break;
2018				error = sooptcopyout(sopt, &optval,
2019					sizeof optval);
2020				break;
2021
2022			case IPV6_PATHMTU:
2023			{
2024				u_long pmtu = 0;
2025				struct ip6_mtuinfo mtuinfo;
2026				struct route_in6 sro;
2027
2028				bzero(&sro, sizeof(sro));
2029
2030				if (!(so->so_state & SS_ISCONNECTED))
2031					return (ENOTCONN);
2032				/*
2033				 * XXX: we dot not consider the case of source
2034				 * routing, or optional information to specify
2035				 * the outgoing interface.
2036				 */
2037				error = ip6_getpmtu(&sro, NULL, NULL,
2038				    &in6p->in6p_faddr, &pmtu, NULL);
2039				if (sro.ro_rt)
2040					RTFREE(sro.ro_rt);
2041				if (error)
2042					break;
2043				if (pmtu > IPV6_MAXPACKET)
2044					pmtu = IPV6_MAXPACKET;
2045
2046				bzero(&mtuinfo, sizeof(mtuinfo));
2047				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2048				optdata = (void *)&mtuinfo;
2049				optdatalen = sizeof(mtuinfo);
2050				error = sooptcopyout(sopt, optdata,
2051				    optdatalen);
2052				break;
2053			}
2054
2055			case IPV6_2292PKTINFO:
2056			case IPV6_2292HOPLIMIT:
2057			case IPV6_2292HOPOPTS:
2058			case IPV6_2292RTHDR:
2059			case IPV6_2292DSTOPTS:
2060				switch (optname) {
2061				case IPV6_2292PKTINFO:
2062					optval = OPTBIT(IN6P_PKTINFO);
2063					break;
2064				case IPV6_2292HOPLIMIT:
2065					optval = OPTBIT(IN6P_HOPLIMIT);
2066					break;
2067				case IPV6_2292HOPOPTS:
2068					optval = OPTBIT(IN6P_HOPOPTS);
2069					break;
2070				case IPV6_2292RTHDR:
2071					optval = OPTBIT(IN6P_RTHDR);
2072					break;
2073				case IPV6_2292DSTOPTS:
2074					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2075					break;
2076				}
2077				error = sooptcopyout(sopt, &optval,
2078				    sizeof optval);
2079				break;
2080			case IPV6_PKTINFO:
2081			case IPV6_HOPOPTS:
2082			case IPV6_RTHDR:
2083			case IPV6_DSTOPTS:
2084			case IPV6_RTHDRDSTOPTS:
2085			case IPV6_NEXTHOP:
2086			case IPV6_TCLASS:
2087			case IPV6_DONTFRAG:
2088			case IPV6_USE_MIN_MTU:
2089			case IPV6_PREFER_TEMPADDR:
2090				error = ip6_getpcbopt(in6p->in6p_outputopts,
2091				    optname, sopt);
2092				break;
2093
2094			case IPV6_MULTICAST_IF:
2095			case IPV6_MULTICAST_HOPS:
2096			case IPV6_MULTICAST_LOOP:
2097			case IPV6_JOIN_GROUP:
2098			case IPV6_LEAVE_GROUP:
2099			    {
2100				struct mbuf *m;
2101				error = ip6_getmoptions(sopt->sopt_name,
2102				    in6p->in6p_moptions, &m);
2103				if (error == 0)
2104					error = sooptcopyout(sopt,
2105					    mtod(m, char *), m->m_len);
2106				m_freem(m);
2107			    }
2108				break;
2109
2110#if defined(IPSEC) || defined(FAST_IPSEC)
2111			case IPV6_IPSEC_POLICY:
2112			  {
2113				caddr_t req = NULL;
2114				size_t len = 0;
2115				struct mbuf *m = NULL;
2116				struct mbuf **mp = &m;
2117				size_t ovalsize = sopt->sopt_valsize;
2118				caddr_t oval = (caddr_t)sopt->sopt_val;
2119
2120				error = soopt_getm(sopt, &m); /* XXX */
2121				if (error != 0)
2122					break;
2123				error = soopt_mcopyin(sopt, m); /* XXX */
2124				if (error != 0)
2125					break;
2126				sopt->sopt_valsize = ovalsize;
2127				sopt->sopt_val = oval;
2128				if (m) {
2129					req = mtod(m, caddr_t);
2130					len = m->m_len;
2131				}
2132				error = ipsec6_get_policy(in6p, req, len, mp);
2133				if (error == 0)
2134					error = soopt_mcopyout(sopt, m); /* XXX */
2135				if (error == 0 && m)
2136					m_freem(m);
2137				break;
2138			  }
2139#endif /* KAME IPSEC */
2140
2141			case IPV6_FW_GET:
2142			  {
2143				struct mbuf *m;
2144				struct mbuf **mp = &m;
2145
2146				if (ip6_fw_ctl_ptr == NULL)
2147			        {
2148					return EINVAL;
2149				}
2150				error = (*ip6_fw_ctl_ptr)(optname, mp);
2151				if (error == 0)
2152					error = soopt_mcopyout(sopt, m); /* XXX */
2153				if (error == 0 && m)
2154					m_freem(m);
2155			  }
2156				break;
2157
2158			default:
2159				error = ENOPROTOOPT;
2160				break;
2161			}
2162			break;
2163		}
2164	} else {		/* level != IPPROTO_IPV6 */
2165		error = EINVAL;
2166	}
2167	return (error);
2168}
2169
2170int
2171ip6_raw_ctloutput(so, sopt)
2172	struct socket *so;
2173	struct sockopt *sopt;
2174{
2175	int error = 0, optval, optlen;
2176	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2177	struct in6pcb *in6p = sotoin6pcb(so);
2178	int level, op, optname;
2179
2180	if (sopt) {
2181		level = sopt->sopt_level;
2182		op = sopt->sopt_dir;
2183		optname = sopt->sopt_name;
2184		optlen = sopt->sopt_valsize;
2185	} else
2186		panic("ip6_raw_ctloutput: arg soopt is NULL");
2187
2188	if (level != IPPROTO_IPV6) {
2189		return (EINVAL);
2190	}
2191
2192	switch (optname) {
2193	case IPV6_CHECKSUM:
2194		/*
2195		 * For ICMPv6 sockets, no modification allowed for checksum
2196		 * offset, permit "no change" values to help existing apps.
2197		 *
2198		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2199		 * for an ICMPv6 socket will fail."
2200		 * The current behavior does not meet RFC3542.
2201		 */
2202		switch (op) {
2203		case SOPT_SET:
2204			if (optlen != sizeof(int)) {
2205				error = EINVAL;
2206				break;
2207			}
2208			error = sooptcopyin(sopt, &optval, sizeof(optval),
2209					    sizeof(optval));
2210			if (error)
2211				break;
2212			if ((optval % 2) != 0) {
2213				/* the API assumes even offset values */
2214				error = EINVAL;
2215			} else if (so->so_proto->pr_protocol ==
2216			    IPPROTO_ICMPV6) {
2217				if (optval != icmp6off)
2218					error = EINVAL;
2219			} else
2220				in6p->in6p_cksum = optval;
2221			break;
2222
2223		case SOPT_GET:
2224			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2225				optval = icmp6off;
2226			else
2227				optval = in6p->in6p_cksum;
2228
2229			error = sooptcopyout(sopt, &optval, sizeof(optval));
2230			break;
2231
2232		default:
2233			error = EINVAL;
2234			break;
2235		}
2236		break;
2237
2238	default:
2239		error = ENOPROTOOPT;
2240		break;
2241	}
2242
2243	return (error);
2244}
2245
2246/*
2247 * Set up IP6 options in pcb for insertion in output packets or
2248 * specifying behavior of outgoing packets.
2249 */
2250static int
2251ip6_pcbopts(pktopt, m, so, sopt)
2252	struct ip6_pktopts **pktopt;
2253	struct mbuf *m;
2254	struct socket *so;
2255	struct sockopt *sopt;
2256{
2257	struct ip6_pktopts *opt = *pktopt;
2258	int error = 0;
2259	struct thread *td = sopt->sopt_td;
2260	int priv = 0;
2261
2262	/* turn off any old options. */
2263	if (opt) {
2264#ifdef DIAGNOSTIC
2265		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2266		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2267		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2268			printf("ip6_pcbopts: all specified options are cleared.\n");
2269#endif
2270		ip6_clearpktopts(opt, -1);
2271	} else
2272		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2273	*pktopt = NULL;
2274
2275	if (!m || m->m_len == 0) {
2276		/*
2277		 * Only turning off any previous options, regardless of
2278		 * whether the opt is just created or given.
2279		 */
2280		free(opt, M_IP6OPT);
2281		return (0);
2282	}
2283
2284	/*  set options specified by user. */
2285	if (td && !suser(td))
2286		priv = 1;
2287	if ((error = ip6_setpktopts(m, opt, NULL, priv,
2288	    so->so_proto->pr_protocol)) != 0) {
2289		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2290		free(opt, M_IP6OPT);
2291		return (error);
2292	}
2293	*pktopt = opt;
2294	return (0);
2295}
2296
2297/*
2298 * initialize ip6_pktopts.  beware that there are non-zero default values in
2299 * the struct.
2300 */
2301void
2302ip6_initpktopts(opt)
2303	struct ip6_pktopts *opt;
2304{
2305
2306	bzero(opt, sizeof(*opt));
2307	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2308	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2309	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2310	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2311}
2312
2313static int
2314ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2315	int optname, len, priv;
2316	u_char *buf;
2317	struct ip6_pktopts **pktopt;
2318	int uproto;
2319{
2320	struct ip6_pktopts *opt;
2321
2322	if (*pktopt == NULL) {
2323		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2324		    M_WAITOK);
2325		ip6_initpktopts(*pktopt);
2326	}
2327	opt = *pktopt;
2328
2329	return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
2330}
2331
2332static int
2333ip6_getpcbopt(pktopt, optname, sopt)
2334	struct ip6_pktopts *pktopt;
2335	struct sockopt *sopt;
2336	int optname;
2337{
2338	void *optdata = NULL;
2339	int optdatalen = 0;
2340	struct ip6_ext *ip6e;
2341	int error = 0;
2342	struct in6_pktinfo null_pktinfo;
2343	int deftclass = 0, on;
2344	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2345	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2346
2347	switch (optname) {
2348	case IPV6_PKTINFO:
2349		if (pktopt && pktopt->ip6po_pktinfo)
2350			optdata = (void *)pktopt->ip6po_pktinfo;
2351		else {
2352			/* XXX: we don't have to do this every time... */
2353			bzero(&null_pktinfo, sizeof(null_pktinfo));
2354			optdata = (void *)&null_pktinfo;
2355		}
2356		optdatalen = sizeof(struct in6_pktinfo);
2357		break;
2358	case IPV6_TCLASS:
2359		if (pktopt && pktopt->ip6po_tclass >= 0)
2360			optdata = (void *)&pktopt->ip6po_tclass;
2361		else
2362			optdata = (void *)&deftclass;
2363		optdatalen = sizeof(int);
2364		break;
2365	case IPV6_HOPOPTS:
2366		if (pktopt && pktopt->ip6po_hbh) {
2367			optdata = (void *)pktopt->ip6po_hbh;
2368			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2369			optdatalen = (ip6e->ip6e_len + 1) << 3;
2370		}
2371		break;
2372	case IPV6_RTHDR:
2373		if (pktopt && pktopt->ip6po_rthdr) {
2374			optdata = (void *)pktopt->ip6po_rthdr;
2375			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2376			optdatalen = (ip6e->ip6e_len + 1) << 3;
2377		}
2378		break;
2379	case IPV6_RTHDRDSTOPTS:
2380		if (pktopt && pktopt->ip6po_dest1) {
2381			optdata = (void *)pktopt->ip6po_dest1;
2382			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2383			optdatalen = (ip6e->ip6e_len + 1) << 3;
2384		}
2385		break;
2386	case IPV6_DSTOPTS:
2387		if (pktopt && pktopt->ip6po_dest2) {
2388			optdata = (void *)pktopt->ip6po_dest2;
2389			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2390			optdatalen = (ip6e->ip6e_len + 1) << 3;
2391		}
2392		break;
2393	case IPV6_NEXTHOP:
2394		if (pktopt && pktopt->ip6po_nexthop) {
2395			optdata = (void *)pktopt->ip6po_nexthop;
2396			optdatalen = pktopt->ip6po_nexthop->sa_len;
2397		}
2398		break;
2399	case IPV6_USE_MIN_MTU:
2400		if (pktopt)
2401			optdata = (void *)&pktopt->ip6po_minmtu;
2402		else
2403			optdata = (void *)&defminmtu;
2404		optdatalen = sizeof(int);
2405		break;
2406	case IPV6_DONTFRAG:
2407		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2408			on = 1;
2409		else
2410			on = 0;
2411		optdata = (void *)&on;
2412		optdatalen = sizeof(on);
2413		break;
2414	case IPV6_PREFER_TEMPADDR:
2415		if (pktopt)
2416			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2417		else
2418			optdata = (void *)&defpreftemp;
2419		optdatalen = sizeof(int);
2420		break;
2421	default:		/* should not happen */
2422#ifdef DIAGNOSTIC
2423		panic("ip6_getpcbopt: unexpected option\n");
2424#endif
2425		return (ENOPROTOOPT);
2426	}
2427
2428	error = sooptcopyout(sopt, optdata, optdatalen);
2429
2430	return (error);
2431}
2432
2433void
2434ip6_clearpktopts(pktopt, optname)
2435	struct ip6_pktopts *pktopt;
2436	int optname;
2437{
2438	if (pktopt == NULL)
2439		return;
2440
2441	if (optname == -1 || optname == IPV6_PKTINFO) {
2442		if (pktopt->ip6po_pktinfo)
2443			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2444		pktopt->ip6po_pktinfo = NULL;
2445	}
2446	if (optname == -1 || optname == IPV6_HOPLIMIT)
2447		pktopt->ip6po_hlim = -1;
2448	if (optname == -1 || optname == IPV6_TCLASS)
2449		pktopt->ip6po_tclass = -1;
2450	if (optname == -1 || optname == IPV6_NEXTHOP) {
2451		if (pktopt->ip6po_nextroute.ro_rt) {
2452			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2453			pktopt->ip6po_nextroute.ro_rt = NULL;
2454		}
2455		if (pktopt->ip6po_nexthop)
2456			free(pktopt->ip6po_nexthop, M_IP6OPT);
2457		pktopt->ip6po_nexthop = NULL;
2458	}
2459	if (optname == -1 || optname == IPV6_HOPOPTS) {
2460		if (pktopt->ip6po_hbh)
2461			free(pktopt->ip6po_hbh, M_IP6OPT);
2462		pktopt->ip6po_hbh = NULL;
2463	}
2464	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2465		if (pktopt->ip6po_dest1)
2466			free(pktopt->ip6po_dest1, M_IP6OPT);
2467		pktopt->ip6po_dest1 = NULL;
2468	}
2469	if (optname == -1 || optname == IPV6_RTHDR) {
2470		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2471			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2472		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2473		if (pktopt->ip6po_route.ro_rt) {
2474			RTFREE(pktopt->ip6po_route.ro_rt);
2475			pktopt->ip6po_route.ro_rt = NULL;
2476		}
2477	}
2478	if (optname == -1 || optname == IPV6_DSTOPTS) {
2479		if (pktopt->ip6po_dest2)
2480			free(pktopt->ip6po_dest2, M_IP6OPT);
2481		pktopt->ip6po_dest2 = NULL;
2482	}
2483}
2484
2485#define PKTOPT_EXTHDRCPY(type) \
2486do {\
2487	if (src->type) {\
2488		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2489		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2490		if (dst->type == NULL && canwait == M_NOWAIT)\
2491			goto bad;\
2492		bcopy(src->type, dst->type, hlen);\
2493	}\
2494} while (/*CONSTCOND*/ 0)
2495
2496static int
2497copypktopts(dst, src, canwait)
2498	struct ip6_pktopts *dst, *src;
2499	int canwait;
2500{
2501	if (dst == NULL || src == NULL)  {
2502		printf("ip6_clearpktopts: invalid argument\n");
2503		return (EINVAL);
2504	}
2505
2506	dst->ip6po_hlim = src->ip6po_hlim;
2507	dst->ip6po_tclass = src->ip6po_tclass;
2508	dst->ip6po_flags = src->ip6po_flags;
2509	if (src->ip6po_pktinfo) {
2510		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2511		    M_IP6OPT, canwait);
2512		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2513			goto bad;
2514		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2515	}
2516	if (src->ip6po_nexthop) {
2517		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2518		    M_IP6OPT, canwait);
2519		if (dst->ip6po_nexthop == NULL)
2520			goto bad;
2521		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2522		    src->ip6po_nexthop->sa_len);
2523	}
2524	PKTOPT_EXTHDRCPY(ip6po_hbh);
2525	PKTOPT_EXTHDRCPY(ip6po_dest1);
2526	PKTOPT_EXTHDRCPY(ip6po_dest2);
2527	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2528	return (0);
2529
2530  bad:
2531	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2532	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2533	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2534	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2535	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2536	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2537	return (ENOBUFS);
2538}
2539#undef PKTOPT_EXTHDRCPY
2540
2541struct ip6_pktopts *
2542ip6_copypktopts(src, canwait)
2543	struct ip6_pktopts *src;
2544	int canwait;
2545{
2546	int error;
2547	struct ip6_pktopts *dst;
2548
2549	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2550	if (dst == NULL && canwait == M_NOWAIT)
2551		return (NULL);
2552	ip6_initpktopts(dst);
2553
2554	if ((error = copypktopts(dst, src, canwait)) != 0) {
2555		free(dst, M_IP6OPT);
2556		return (NULL);
2557	}
2558
2559	return (dst);
2560}
2561
2562void
2563ip6_freepcbopts(pktopt)
2564	struct ip6_pktopts *pktopt;
2565{
2566	if (pktopt == NULL)
2567		return;
2568
2569	ip6_clearpktopts(pktopt, -1);
2570
2571	free(pktopt, M_IP6OPT);
2572}
2573
2574/*
2575 * Set the IP6 multicast options in response to user setsockopt().
2576 */
2577static int
2578ip6_setmoptions(optname, im6op, m)
2579	int optname;
2580	struct ip6_moptions **im6op;
2581	struct mbuf *m;
2582{
2583	int error = 0;
2584	u_int loop, ifindex;
2585	struct ipv6_mreq *mreq;
2586	struct ifnet *ifp;
2587	struct ip6_moptions *im6o = *im6op;
2588	struct route_in6 ro;
2589	struct in6_multi_mship *imm;
2590	struct thread *td = curthread;
2591
2592	if (im6o == NULL) {
2593		/*
2594		 * No multicast option buffer attached to the pcb;
2595		 * allocate one and initialize to default values.
2596		 */
2597		im6o = (struct ip6_moptions *)
2598			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2599
2600		if (im6o == NULL)
2601			return (ENOBUFS);
2602		*im6op = im6o;
2603		im6o->im6o_multicast_ifp = NULL;
2604		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2605		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2606		LIST_INIT(&im6o->im6o_memberships);
2607	}
2608
2609	switch (optname) {
2610
2611	case IPV6_MULTICAST_IF:
2612		/*
2613		 * Select the interface for outgoing multicast packets.
2614		 */
2615		if (m == NULL || m->m_len != sizeof(u_int)) {
2616			error = EINVAL;
2617			break;
2618		}
2619		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2620		if (ifindex < 0 || if_index < ifindex) {
2621			error = ENXIO;	/* XXX EINVAL? */
2622			break;
2623		}
2624		ifp = ifnet_byindex(ifindex);
2625		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2626			error = EADDRNOTAVAIL;
2627			break;
2628		}
2629		im6o->im6o_multicast_ifp = ifp;
2630		break;
2631
2632	case IPV6_MULTICAST_HOPS:
2633	    {
2634		/*
2635		 * Set the IP6 hoplimit for outgoing multicast packets.
2636		 */
2637		int optval;
2638		if (m == NULL || m->m_len != sizeof(int)) {
2639			error = EINVAL;
2640			break;
2641		}
2642		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2643		if (optval < -1 || optval >= 256)
2644			error = EINVAL;
2645		else if (optval == -1)
2646			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2647		else
2648			im6o->im6o_multicast_hlim = optval;
2649		break;
2650	    }
2651
2652	case IPV6_MULTICAST_LOOP:
2653		/*
2654		 * Set the loopback flag for outgoing multicast packets.
2655		 * Must be zero or one.
2656		 */
2657		if (m == NULL || m->m_len != sizeof(u_int)) {
2658			error = EINVAL;
2659			break;
2660		}
2661		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2662		if (loop > 1) {
2663			error = EINVAL;
2664			break;
2665		}
2666		im6o->im6o_multicast_loop = loop;
2667		break;
2668
2669	case IPV6_JOIN_GROUP:
2670		/*
2671		 * Add a multicast group membership.
2672		 * Group must be a valid IP6 multicast address.
2673		 */
2674		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2675			error = EINVAL;
2676			break;
2677		}
2678		mreq = mtod(m, struct ipv6_mreq *);
2679
2680		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2681			/*
2682			 * We use the unspecified address to specify to accept
2683			 * all multicast addresses. Only super user is allowed
2684			 * to do this.
2685			 */
2686			if (suser(td)) {
2687				error = EACCES;
2688				break;
2689			}
2690		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2691			error = EINVAL;
2692			break;
2693		}
2694
2695		/*
2696		 * If no interface was explicitly specified, choose an
2697		 * appropriate one according to the given multicast address.
2698		 */
2699		if (mreq->ipv6mr_interface == 0) {
2700			struct sockaddr_in6 *dst;
2701
2702			/*
2703			 * Look up the routing table for the
2704			 * address, and choose the outgoing interface.
2705			 *   XXX: is it a good approach?
2706			 */
2707			ro.ro_rt = NULL;
2708			dst = (struct sockaddr_in6 *)&ro.ro_dst;
2709			bzero(dst, sizeof(*dst));
2710			dst->sin6_family = AF_INET6;
2711			dst->sin6_len = sizeof(*dst);
2712			dst->sin6_addr = mreq->ipv6mr_multiaddr;
2713			rtalloc((struct route *)&ro);
2714			if (ro.ro_rt == NULL) {
2715				error = EADDRNOTAVAIL;
2716				break;
2717			}
2718			ifp = ro.ro_rt->rt_ifp;
2719			RTFREE(ro.ro_rt);
2720		} else {
2721			/*
2722			 * If the interface is specified, validate it.
2723			 */
2724			if (mreq->ipv6mr_interface < 0 ||
2725			    if_index < mreq->ipv6mr_interface) {
2726				error = ENXIO;	/* XXX EINVAL? */
2727				break;
2728			}
2729			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2730			if (!ifp) {
2731				error = ENXIO;	/* XXX EINVAL? */
2732				break;
2733			}
2734		}
2735
2736		/*
2737		 * See if we found an interface, and confirm that it
2738		 * supports multicast
2739		 */
2740		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2741			error = EADDRNOTAVAIL;
2742			break;
2743		}
2744
2745		if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2746			error = EADDRNOTAVAIL; /* XXX: should not happen */
2747			break;
2748		}
2749
2750		/*
2751		 * See if the membership already exists.
2752		 */
2753		for (imm = im6o->im6o_memberships.lh_first;
2754		     imm != NULL; imm = imm->i6mm_chain.le_next)
2755			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2756			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2757					       &mreq->ipv6mr_multiaddr))
2758				break;
2759		if (imm != NULL) {
2760			error = EADDRINUSE;
2761			break;
2762		}
2763		/*
2764		 * Everything looks good; add a new record to the multicast
2765		 * address list for the given interface.
2766		 */
2767		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2768		if (imm == NULL) {
2769			error = ENOBUFS;
2770			break;
2771		}
2772		if ((imm->i6mm_maddr =
2773		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2774			free(imm, M_IPMADDR);
2775			break;
2776		}
2777		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2778		break;
2779
2780	case IPV6_LEAVE_GROUP:
2781		/*
2782		 * Drop a multicast group membership.
2783		 * Group must be a valid IP6 multicast address.
2784		 */
2785		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2786			error = EINVAL;
2787			break;
2788		}
2789		mreq = mtod(m, struct ipv6_mreq *);
2790
2791		/*
2792		 * If an interface address was specified, get a pointer
2793		 * to its ifnet structure.
2794		 */
2795		if (mreq->ipv6mr_interface < 0 ||
2796		    if_index < mreq->ipv6mr_interface) {
2797			error = ENXIO;	/* XXX EINVAL? */
2798			break;
2799		}
2800		if (mreq->ipv6mr_interface == 0)
2801			ifp = NULL;
2802		else
2803			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2804
2805		/* Fill in the scope zone ID */
2806		if (ifp) {
2807			if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2808				/* XXX: should not happen */
2809				error = EADDRNOTAVAIL;
2810				break;
2811			}
2812		} else if (mreq->ipv6mr_interface != 0) {
2813			/*
2814			 * This case happens when the (positive) index is in
2815			 * the valid range, but the corresponding interface has
2816			 * been detached dynamically (XXX).
2817			 */
2818			error = EADDRNOTAVAIL;
2819			break;
2820		} else {	/* ipv6mr_interface == 0 */
2821			struct sockaddr_in6 sa6_mc;
2822
2823			/*
2824			 * The API spec says as follows:
2825			 *  If the interface index is specified as 0, the
2826			 *  system may choose a multicast group membership to
2827			 *  drop by matching the multicast address only.
2828			 * On the other hand, we cannot disambiguate the scope
2829			 * zone unless an interface is provided.  Thus, we
2830			 * check if there's ambiguity with the default scope
2831			 * zone as the last resort.
2832			 */
2833			bzero(&sa6_mc, sizeof(sa6_mc));
2834			sa6_mc.sin6_family = AF_INET6;
2835			sa6_mc.sin6_len = sizeof(sa6_mc);
2836			sa6_mc.sin6_addr = mreq->ipv6mr_multiaddr;
2837			error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2838			if (error != 0)
2839				break;
2840			mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
2841		}
2842
2843		/*
2844		 * Find the membership in the membership list.
2845		 */
2846		for (imm = im6o->im6o_memberships.lh_first;
2847		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2848			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2849			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2850			    &mreq->ipv6mr_multiaddr))
2851				break;
2852		}
2853		if (imm == NULL) {
2854			/* Unable to resolve interface */
2855			error = EADDRNOTAVAIL;
2856			break;
2857		}
2858		/*
2859		 * Give up the multicast address record to which the
2860		 * membership points.
2861		 */
2862		LIST_REMOVE(imm, i6mm_chain);
2863		in6_delmulti(imm->i6mm_maddr);
2864		free(imm, M_IPMADDR);
2865		break;
2866
2867	default:
2868		error = EOPNOTSUPP;
2869		break;
2870	}
2871
2872	/*
2873	 * If all options have default values, no need to keep the mbuf.
2874	 */
2875	if (im6o->im6o_multicast_ifp == NULL &&
2876	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2877	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2878	    im6o->im6o_memberships.lh_first == NULL) {
2879		free(*im6op, M_IPMOPTS);
2880		*im6op = NULL;
2881	}
2882
2883	return (error);
2884}
2885
2886/*
2887 * Return the IP6 multicast options in response to user getsockopt().
2888 */
2889static int
2890ip6_getmoptions(optname, im6o, mp)
2891	int optname;
2892	struct ip6_moptions *im6o;
2893	struct mbuf **mp;
2894{
2895	u_int *hlim, *loop, *ifindex;
2896
2897	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2898
2899	switch (optname) {
2900
2901	case IPV6_MULTICAST_IF:
2902		ifindex = mtod(*mp, u_int *);
2903		(*mp)->m_len = sizeof(u_int);
2904		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2905			*ifindex = 0;
2906		else
2907			*ifindex = im6o->im6o_multicast_ifp->if_index;
2908		return (0);
2909
2910	case IPV6_MULTICAST_HOPS:
2911		hlim = mtod(*mp, u_int *);
2912		(*mp)->m_len = sizeof(u_int);
2913		if (im6o == NULL)
2914			*hlim = ip6_defmcasthlim;
2915		else
2916			*hlim = im6o->im6o_multicast_hlim;
2917		return (0);
2918
2919	case IPV6_MULTICAST_LOOP:
2920		loop = mtod(*mp, u_int *);
2921		(*mp)->m_len = sizeof(u_int);
2922		if (im6o == NULL)
2923			*loop = ip6_defmcasthlim;
2924		else
2925			*loop = im6o->im6o_multicast_loop;
2926		return (0);
2927
2928	default:
2929		return (EOPNOTSUPP);
2930	}
2931}
2932
2933/*
2934 * Discard the IP6 multicast options.
2935 */
2936void
2937ip6_freemoptions(im6o)
2938	struct ip6_moptions *im6o;
2939{
2940	struct in6_multi_mship *imm;
2941
2942	if (im6o == NULL)
2943		return;
2944
2945	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2946		LIST_REMOVE(imm, i6mm_chain);
2947		if (imm->i6mm_maddr)
2948			in6_delmulti(imm->i6mm_maddr);
2949		free(imm, M_IPMADDR);
2950	}
2951	free(im6o, M_IPMOPTS);
2952}
2953
2954/*
2955 * Set IPv6 outgoing packet options based on advanced API.
2956 */
2957int
2958ip6_setpktopts(control, opt, stickyopt, priv, uproto)
2959	struct mbuf *control;
2960	struct ip6_pktopts *opt, *stickyopt;
2961	int priv, uproto;
2962{
2963	struct cmsghdr *cm = 0;
2964
2965	if (control == NULL || opt == NULL)
2966		return (EINVAL);
2967
2968	ip6_initpktopts(opt);
2969	if (stickyopt) {
2970		int error;
2971
2972		/*
2973		 * If stickyopt is provided, make a local copy of the options
2974		 * for this particular packet, then override them by ancillary
2975		 * objects.
2976		 * XXX: copypktopts() does not copy the cached route to a next
2977		 * hop (if any).  This is not very good in terms of efficiency,
2978		 * but we can allow this since this option should be rarely
2979		 * used.
2980		 */
2981		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2982			return (error);
2983	}
2984
2985	/*
2986	 * XXX: Currently, we assume all the optional information is stored
2987	 * in a single mbuf.
2988	 */
2989	if (control->m_next)
2990		return (EINVAL);
2991
2992	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2993	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2994		int error;
2995
2996		if (control->m_len < CMSG_LEN(0))
2997			return (EINVAL);
2998
2999		cm = mtod(control, struct cmsghdr *);
3000		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
3001			return (EINVAL);
3002		if (cm->cmsg_level != IPPROTO_IPV6)
3003			continue;
3004
3005		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3006		    cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
3007		if (error)
3008			return (error);
3009	}
3010
3011	return (0);
3012}
3013
3014/*
3015 * Set a particular packet option, as a sticky option or an ancillary data
3016 * item.  "len" can be 0 only when it's a sticky option.
3017 * We have 4 cases of combination of "sticky" and "cmsg":
3018 * "sticky=0, cmsg=0": impossible
3019 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3020 * "sticky=1, cmsg=0": RFC3542 socket option
3021 * "sticky=1, cmsg=1": RFC2292 socket option
3022 */
3023static int
3024ip6_setpktopt(optname, buf, len, opt, priv, sticky, cmsg, uproto)
3025	int optname, len, priv, sticky, cmsg, uproto;
3026	u_char *buf;
3027	struct ip6_pktopts *opt;
3028{
3029	int minmtupolicy, preftemp;
3030
3031	if (!sticky && !cmsg) {
3032#ifdef DIAGNOSTIC
3033		printf("ip6_setpktopt: impossible case\n");
3034#endif
3035		return (EINVAL);
3036	}
3037
3038	/*
3039	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3040	 * not be specified in the context of RFC3542.  Conversely,
3041	 * RFC3542 types should not be specified in the context of RFC2292.
3042	 */
3043	if (!cmsg) {
3044		switch (optname) {
3045		case IPV6_2292PKTINFO:
3046		case IPV6_2292HOPLIMIT:
3047		case IPV6_2292NEXTHOP:
3048		case IPV6_2292HOPOPTS:
3049		case IPV6_2292DSTOPTS:
3050		case IPV6_2292RTHDR:
3051		case IPV6_2292PKTOPTIONS:
3052			return (ENOPROTOOPT);
3053		}
3054	}
3055	if (sticky && cmsg) {
3056		switch (optname) {
3057		case IPV6_PKTINFO:
3058		case IPV6_HOPLIMIT:
3059		case IPV6_NEXTHOP:
3060		case IPV6_HOPOPTS:
3061		case IPV6_DSTOPTS:
3062		case IPV6_RTHDRDSTOPTS:
3063		case IPV6_RTHDR:
3064		case IPV6_USE_MIN_MTU:
3065		case IPV6_DONTFRAG:
3066		case IPV6_TCLASS:
3067		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3068			return (ENOPROTOOPT);
3069		}
3070	}
3071
3072	switch (optname) {
3073	case IPV6_2292PKTINFO:
3074	case IPV6_PKTINFO:
3075	{
3076		struct ifnet *ifp = NULL;
3077		struct in6_pktinfo *pktinfo;
3078
3079		if (len != sizeof(struct in6_pktinfo))
3080			return (EINVAL);
3081
3082		pktinfo = (struct in6_pktinfo *)buf;
3083
3084		/*
3085		 * An application can clear any sticky IPV6_PKTINFO option by
3086		 * doing a "regular" setsockopt with ipi6_addr being
3087		 * in6addr_any and ipi6_ifindex being zero.
3088		 * [RFC 3542, Section 6]
3089		 */
3090		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3091		    pktinfo->ipi6_ifindex == 0 &&
3092		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3093			ip6_clearpktopts(opt, optname);
3094			break;
3095		}
3096
3097		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3098		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3099			return (EINVAL);
3100		}
3101
3102		/* validate the interface index if specified. */
3103		if (pktinfo->ipi6_ifindex > if_index ||
3104		    pktinfo->ipi6_ifindex < 0) {
3105			 return (ENXIO);
3106		}
3107		if (pktinfo->ipi6_ifindex) {
3108			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
3109			if (ifp == NULL)
3110				return (ENXIO);
3111		}
3112
3113		/*
3114		 * We store the address anyway, and let in6_selectsrc()
3115		 * validate the specified address.  This is because ipi6_addr
3116		 * may not have enough information about its scope zone, and
3117		 * we may need additional information (such as outgoing
3118		 * interface or the scope zone of a destination address) to
3119		 * disambiguate the scope.
3120		 * XXX: the delay of the validation may confuse the
3121		 * application when it is used as a sticky option.
3122		 */
3123		if (opt->ip6po_pktinfo == NULL) {
3124			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3125			    M_IP6OPT, M_NOWAIT);
3126			if (opt->ip6po_pktinfo == NULL)
3127				return (ENOBUFS);
3128		}
3129		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3130		break;
3131	}
3132
3133	case IPV6_2292HOPLIMIT:
3134	case IPV6_HOPLIMIT:
3135	{
3136		int *hlimp;
3137
3138		/*
3139		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3140		 * to simplify the ordering among hoplimit options.
3141		 */
3142		if (optname == IPV6_HOPLIMIT && sticky)
3143			return (ENOPROTOOPT);
3144
3145		if (len != sizeof(int))
3146			return (EINVAL);
3147		hlimp = (int *)buf;
3148		if (*hlimp < -1 || *hlimp > 255)
3149			return (EINVAL);
3150
3151		opt->ip6po_hlim = *hlimp;
3152		break;
3153	}
3154
3155	case IPV6_TCLASS:
3156	{
3157		int tclass;
3158
3159		if (len != sizeof(int))
3160			return (EINVAL);
3161		tclass = *(int *)buf;
3162		if (tclass < -1 || tclass > 255)
3163			return (EINVAL);
3164
3165		opt->ip6po_tclass = tclass;
3166		break;
3167	}
3168
3169	case IPV6_2292NEXTHOP:
3170	case IPV6_NEXTHOP:
3171		if (!priv)
3172			return (EPERM);
3173
3174		if (len == 0) {	/* just remove the option */
3175			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3176			break;
3177		}
3178
3179		/* check if cmsg_len is large enough for sa_len */
3180		if (len < sizeof(struct sockaddr) || len < *buf)
3181			return (EINVAL);
3182
3183		switch (((struct sockaddr *)buf)->sa_family) {
3184		case AF_INET6:
3185		{
3186			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3187			int error;
3188
3189			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3190				return (EINVAL);
3191
3192			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3193			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3194				return (EINVAL);
3195			}
3196			if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3197			    != 0) {
3198				return (error);
3199			}
3200			break;
3201		}
3202		case AF_LINK:	/* should eventually be supported */
3203		default:
3204			return (EAFNOSUPPORT);
3205		}
3206
3207		/* turn off the previous option, then set the new option. */
3208		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3209		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_WAITOK);
3210		bcopy(buf, opt->ip6po_nexthop, *buf);
3211		break;
3212
3213	case IPV6_2292HOPOPTS:
3214	case IPV6_HOPOPTS:
3215	{
3216		struct ip6_hbh *hbh;
3217		int hbhlen;
3218
3219		/*
3220		 * XXX: We don't allow a non-privileged user to set ANY HbH
3221		 * options, since per-option restriction has too much
3222		 * overhead.
3223		 */
3224		if (!priv)
3225			return (EPERM);
3226
3227		if (len == 0) {
3228			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3229			break;	/* just remove the option */
3230		}
3231
3232		/* message length validation */
3233		if (len < sizeof(struct ip6_hbh))
3234			return (EINVAL);
3235		hbh = (struct ip6_hbh *)buf;
3236		hbhlen = (hbh->ip6h_len + 1) << 3;
3237		if (len != hbhlen)
3238			return (EINVAL);
3239
3240		/* turn off the previous option, then set the new option. */
3241		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3242		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK);
3243		bcopy(hbh, opt->ip6po_hbh, hbhlen);
3244
3245		break;
3246	}
3247
3248	case IPV6_2292DSTOPTS:
3249	case IPV6_DSTOPTS:
3250	case IPV6_RTHDRDSTOPTS:
3251	{
3252		struct ip6_dest *dest, **newdest = NULL;
3253		int destlen;
3254
3255		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3256			return (EPERM);
3257
3258		if (len == 0) {
3259			ip6_clearpktopts(opt, optname);
3260			break;	/* just remove the option */
3261		}
3262
3263		/* message length validation */
3264		if (len < sizeof(struct ip6_dest))
3265			return (EINVAL);
3266		dest = (struct ip6_dest *)buf;
3267		destlen = (dest->ip6d_len + 1) << 3;
3268		if (len != destlen)
3269			return (EINVAL);
3270
3271		/*
3272		 * Determine the position that the destination options header
3273		 * should be inserted; before or after the routing header.
3274		 */
3275		switch (optname) {
3276		case IPV6_2292DSTOPTS:
3277			/*
3278			 * The old advacned API is ambiguous on this point.
3279			 * Our approach is to determine the position based
3280			 * according to the existence of a routing header.
3281			 * Note, however, that this depends on the order of the
3282			 * extension headers in the ancillary data; the 1st
3283			 * part of the destination options header must appear
3284			 * before the routing header in the ancillary data,
3285			 * too.
3286			 * RFC3542 solved the ambiguity by introducing
3287			 * separate ancillary data or option types.
3288			 */
3289			if (opt->ip6po_rthdr == NULL)
3290				newdest = &opt->ip6po_dest1;
3291			else
3292				newdest = &opt->ip6po_dest2;
3293			break;
3294		case IPV6_RTHDRDSTOPTS:
3295			newdest = &opt->ip6po_dest1;
3296			break;
3297		case IPV6_DSTOPTS:
3298			newdest = &opt->ip6po_dest2;
3299			break;
3300		}
3301
3302		/* turn off the previous option, then set the new option. */
3303		ip6_clearpktopts(opt, optname);
3304		*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
3305		bcopy(dest, *newdest, destlen);
3306
3307		break;
3308	}
3309
3310	case IPV6_2292RTHDR:
3311	case IPV6_RTHDR:
3312	{
3313		struct ip6_rthdr *rth;
3314		int rthlen;
3315
3316		if (len == 0) {
3317			ip6_clearpktopts(opt, IPV6_RTHDR);
3318			break;	/* just remove the option */
3319		}
3320
3321		/* message length validation */
3322		if (len < sizeof(struct ip6_rthdr))
3323			return (EINVAL);
3324		rth = (struct ip6_rthdr *)buf;
3325		rthlen = (rth->ip6r_len + 1) << 3;
3326		if (len != rthlen)
3327			return (EINVAL);
3328
3329		switch (rth->ip6r_type) {
3330		case IPV6_RTHDR_TYPE_0:
3331			if (rth->ip6r_len == 0)	/* must contain one addr */
3332				return (EINVAL);
3333			if (rth->ip6r_len % 2) /* length must be even */
3334				return (EINVAL);
3335			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3336				return (EINVAL);
3337			break;
3338		default:
3339			return (EINVAL);	/* not supported */
3340		}
3341
3342		/* turn off the previous option */
3343		ip6_clearpktopts(opt, IPV6_RTHDR);
3344		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK);
3345		bcopy(rth, opt->ip6po_rthdr, rthlen);
3346
3347		break;
3348	}
3349
3350	case IPV6_USE_MIN_MTU:
3351		if (len != sizeof(int))
3352			return (EINVAL);
3353		minmtupolicy = *(int *)buf;
3354		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3355		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3356		    minmtupolicy != IP6PO_MINMTU_ALL) {
3357			return (EINVAL);
3358		}
3359		opt->ip6po_minmtu = minmtupolicy;
3360		break;
3361
3362	case IPV6_DONTFRAG:
3363		if (len != sizeof(int))
3364			return (EINVAL);
3365
3366		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3367			/*
3368			 * we ignore this option for TCP sockets.
3369			 * (RFC3542 leaves this case unspecified.)
3370			 */
3371			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3372		} else
3373			opt->ip6po_flags |= IP6PO_DONTFRAG;
3374		break;
3375
3376	case IPV6_PREFER_TEMPADDR:
3377		if (len != sizeof(int))
3378			return (EINVAL);
3379		preftemp = *(int *)buf;
3380		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3381		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3382		    preftemp != IP6PO_TEMPADDR_PREFER) {
3383			return (EINVAL);
3384		}
3385		opt->ip6po_prefer_tempaddr = preftemp;
3386		break;
3387
3388	default:
3389		return (ENOPROTOOPT);
3390	} /* end of switch */
3391
3392	return (0);
3393}
3394
3395/*
3396 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3397 * packet to the input queue of a specified interface.  Note that this
3398 * calls the output routine of the loopback "driver", but with an interface
3399 * pointer that might NOT be &loif -- easier than replicating that code here.
3400 */
3401void
3402ip6_mloopback(ifp, m, dst)
3403	struct ifnet *ifp;
3404	struct mbuf *m;
3405	struct sockaddr_in6 *dst;
3406{
3407	struct mbuf *copym;
3408	struct ip6_hdr *ip6;
3409
3410	copym = m_copy(m, 0, M_COPYALL);
3411	if (copym == NULL)
3412		return;
3413
3414	/*
3415	 * Make sure to deep-copy IPv6 header portion in case the data
3416	 * is in an mbuf cluster, so that we can safely override the IPv6
3417	 * header portion later.
3418	 */
3419	if ((copym->m_flags & M_EXT) != 0 ||
3420	    copym->m_len < sizeof(struct ip6_hdr)) {
3421		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3422		if (copym == NULL)
3423			return;
3424	}
3425
3426#ifdef DIAGNOSTIC
3427	if (copym->m_len < sizeof(*ip6)) {
3428		m_freem(copym);
3429		return;
3430	}
3431#endif
3432
3433	ip6 = mtod(copym, struct ip6_hdr *);
3434	/*
3435	 * clear embedded scope identifiers if necessary.
3436	 * in6_clearscope will touch the addresses only when necessary.
3437	 */
3438	in6_clearscope(&ip6->ip6_src);
3439	in6_clearscope(&ip6->ip6_dst);
3440
3441	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3442}
3443
3444/*
3445 * Chop IPv6 header off from the payload.
3446 */
3447static int
3448ip6_splithdr(m, exthdrs)
3449	struct mbuf *m;
3450	struct ip6_exthdrs *exthdrs;
3451{
3452	struct mbuf *mh;
3453	struct ip6_hdr *ip6;
3454
3455	ip6 = mtod(m, struct ip6_hdr *);
3456	if (m->m_len > sizeof(*ip6)) {
3457		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3458		if (mh == 0) {
3459			m_freem(m);
3460			return ENOBUFS;
3461		}
3462		M_MOVE_PKTHDR(mh, m);
3463		MH_ALIGN(mh, sizeof(*ip6));
3464		m->m_len -= sizeof(*ip6);
3465		m->m_data += sizeof(*ip6);
3466		mh->m_next = m;
3467		m = mh;
3468		m->m_len = sizeof(*ip6);
3469		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3470	}
3471	exthdrs->ip6e_ip6 = m;
3472	return 0;
3473}
3474
3475/*
3476 * Compute IPv6 extension header length.
3477 */
3478int
3479ip6_optlen(in6p)
3480	struct in6pcb *in6p;
3481{
3482	int len;
3483
3484	if (!in6p->in6p_outputopts)
3485		return 0;
3486
3487	len = 0;
3488#define elen(x) \
3489    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3490
3491	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3492	if (in6p->in6p_outputopts->ip6po_rthdr)
3493		/* dest1 is valid with rthdr only */
3494		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3495	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3496	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3497	return len;
3498#undef elen
3499}
3500