ip6_output.c revision 78064
1/*	$FreeBSD: head/sys/netinet6/ip6_output.c 78064 2001-06-11 12:39:29Z ume $	*/
2/*	$KAME: ip6_output.c,v 1.180 2001/05/21 05:37:50 jinmei Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
66 */
67
68#include "opt_ip6fw.h"
69#include "opt_inet.h"
70#include "opt_inet6.h"
71#include "opt_ipsec.h"
72#include "opt_pfil_hooks.h"
73
74#include <sys/param.h>
75#include <sys/malloc.h>
76#include <sys/mbuf.h>
77#include <sys/errno.h>
78#include <sys/protosw.h>
79#include <sys/socket.h>
80#include <sys/socketvar.h>
81#include <sys/systm.h>
82#include <sys/kernel.h>
83
84#include <net/if.h>
85#include <net/route.h>
86#ifdef PFIL_HOOKS
87#include <net/pfil.h>
88#endif
89
90#include <netinet/in.h>
91#include <netinet/in_var.h>
92#include <netinet6/in6_var.h>
93#include <netinet/ip6.h>
94#include <netinet/icmp6.h>
95#include <netinet6/ip6_var.h>
96#include <netinet/in_pcb.h>
97#include <netinet6/nd6.h>
98
99#ifdef IPSEC
100#include <netinet6/ipsec.h>
101#ifdef INET6
102#include <netinet6/ipsec6.h>
103#endif
104#include <netkey/key.h>
105#endif /* IPSEC */
106
107#include <netinet6/ip6_fw.h>
108
109#include <net/net_osdep.h>
110
111#include <netinet6/ip6protosw.h>
112
113static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
114
115struct ip6_exthdrs {
116	struct mbuf *ip6e_ip6;
117	struct mbuf *ip6e_hbh;
118	struct mbuf *ip6e_dest1;
119	struct mbuf *ip6e_rthdr;
120	struct mbuf *ip6e_dest2;
121};
122
123static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
124			    struct socket *, struct sockopt *sopt));
125static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
126static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
127static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
128static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
129				  struct ip6_frag **));
130static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
131static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
132
133extern struct ip6protosw inet6sw[];
134extern u_char ip6_protox[IPPROTO_MAX];
135
136/*
137 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
138 * header (with pri, len, nxt, hlim, src, dst).
139 * This function may modify ver and hlim only.
140 * The mbuf chain containing the packet will be freed.
141 * The mbuf opt, if present, will not be freed.
142 *
143 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
144 * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
145 * which is rt_rmx.rmx_mtu.
146 *
147 * If MIP6 is active it will have to add a Home Address option to DH1 if
148 * the mobile node is roaming or a Routing Header type 0 if there exist
149 * a Binding Cache entry for the destination node or a BU option to DH2
150 * if the mobile node initiates communication and no BUL entry exist.
151 * The only way to do this is to allocate new memory, copy the user data
152 * to the new buffer and then add the Home Address option, BU option and
153 * routing header type 0 respectively. MIP6 will set two flags in "struct
154 * pktopts" to restore the original contents once ip6_output is completed.
155 * To make this work, make sure that function exit is made through label
156 * alldone.
157 *
158 */
159int
160ip6_output(m0, opt, ro, flags, im6o, ifpp)
161	struct mbuf *m0;
162	struct ip6_pktopts *opt;
163	struct route_in6 *ro;
164	int flags;
165	struct ip6_moptions *im6o;
166	struct ifnet **ifpp;		/* XXX: just for statistics */
167{
168	struct ip6_hdr *ip6, *mhip6;
169	struct ifnet *ifp, *origifp;
170	struct mbuf *m = m0;
171	int hlen, tlen, len, off;
172	struct route_in6 ip6route;
173	struct sockaddr_in6 *dst;
174	int error = 0;
175	struct in6_ifaddr *ia = NULL;
176	u_long mtu;
177	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
178	struct ip6_exthdrs exthdrs;
179	struct in6_addr finaldst;
180	struct route_in6 *ro_pmtu = NULL;
181	int hdrsplit = 0;
182	int needipsec = 0;
183#ifdef PFIL_HOOKS
184	struct packet_filter_hook *pfh;
185	struct mbuf *m1;
186	int rv;
187#endif /* PFIL_HOOKS */
188#ifdef IPSEC
189	int needipsectun = 0;
190	struct socket *so;
191	struct secpolicy *sp = NULL;
192
193	/* for AH processing. stupid to have "socket" variable in IP layer... */
194	so = ipsec_getsocket(m);
195	(void)ipsec_setsocket(m, NULL);
196	ip6 = mtod(m, struct ip6_hdr *);
197#endif /* IPSEC */
198
199#define MAKE_EXTHDR(hp, mp)						\
200    do {								\
201	if (hp) {							\
202		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
203		error = ip6_copyexthdr((mp), (caddr_t)(hp), 		\
204				       ((eh)->ip6e_len + 1) << 3);	\
205		if (error)						\
206			goto freehdrs;					\
207	}								\
208    } while (0)
209
210	bzero(&exthdrs, sizeof(exthdrs));
211
212	if (opt) {
213		/* Hop-by-Hop options header */
214		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
215		/* Destination options header(1st part) */
216		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
217		/* Routing header */
218		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
219		/* Destination options header(2nd part) */
220		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
221	}
222
223#ifdef IPSEC
224	/* get a security policy for this packet */
225	if (so == NULL)
226		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
227	else
228		sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
229
230	if (sp == NULL) {
231		ipsec6stat.out_inval++;
232		goto freehdrs;
233	}
234
235	error = 0;
236
237	/* check policy */
238	switch (sp->policy) {
239	case IPSEC_POLICY_DISCARD:
240		/*
241		 * This packet is just discarded.
242		 */
243		ipsec6stat.out_polvio++;
244		goto freehdrs;
245
246	case IPSEC_POLICY_BYPASS:
247	case IPSEC_POLICY_NONE:
248		/* no need to do IPsec. */
249		needipsec = 0;
250		break;
251
252	case IPSEC_POLICY_IPSEC:
253		if (sp->req == NULL) {
254			/* acquire a policy */
255			error = key_spdacquire(sp);
256			goto freehdrs;
257		}
258		needipsec = 1;
259		break;
260
261	case IPSEC_POLICY_ENTRUST:
262	default:
263		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
264	}
265#endif /* IPSEC */
266
267	/*
268	 * Calculate the total length of the extension header chain.
269	 * Keep the length of the unfragmentable part for fragmentation.
270	 */
271	optlen = 0;
272	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
273	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
274	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
275	unfragpartlen = optlen + sizeof(struct ip6_hdr);
276	/* NOTE: we don't add AH/ESP length here. do that later. */
277	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
278
279	/*
280	 * If we need IPsec, or there is at least one extension header,
281	 * separate IP6 header from the payload.
282	 */
283	if ((needipsec || optlen) && !hdrsplit) {
284		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
285			m = NULL;
286			goto freehdrs;
287		}
288		m = exthdrs.ip6e_ip6;
289		hdrsplit++;
290	}
291
292	/* adjust pointer */
293	ip6 = mtod(m, struct ip6_hdr *);
294
295	/* adjust mbuf packet header length */
296	m->m_pkthdr.len += optlen;
297	plen = m->m_pkthdr.len - sizeof(*ip6);
298
299	/* If this is a jumbo payload, insert a jumbo payload option. */
300	if (plen > IPV6_MAXPACKET) {
301		if (!hdrsplit) {
302			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
303				m = NULL;
304				goto freehdrs;
305			}
306			m = exthdrs.ip6e_ip6;
307			hdrsplit++;
308		}
309		/* adjust pointer */
310		ip6 = mtod(m, struct ip6_hdr *);
311		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
312			goto freehdrs;
313		ip6->ip6_plen = 0;
314	} else
315		ip6->ip6_plen = htons(plen);
316
317	/*
318	 * Concatenate headers and fill in next header fields.
319	 * Here we have, on "m"
320	 *	IPv6 payload
321	 * and we insert headers accordingly.  Finally, we should be getting:
322	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
323	 *
324	 * during the header composing process, "m" points to IPv6 header.
325	 * "mprev" points to an extension header prior to esp.
326	 */
327	{
328		u_char *nexthdrp = &ip6->ip6_nxt;
329		struct mbuf *mprev = m;
330
331		/*
332		 * we treat dest2 specially.  this makes IPsec processing
333		 * much easier.
334		 *
335		 * result: IPv6 dest2 payload
336		 * m and mprev will point to IPv6 header.
337		 */
338		if (exthdrs.ip6e_dest2) {
339			if (!hdrsplit)
340				panic("assumption failed: hdr not split");
341			exthdrs.ip6e_dest2->m_next = m->m_next;
342			m->m_next = exthdrs.ip6e_dest2;
343			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
344			ip6->ip6_nxt = IPPROTO_DSTOPTS;
345		}
346
347#define MAKE_CHAIN(m, mp, p, i)\
348    do {\
349	if (m) {\
350		if (!hdrsplit) \
351			panic("assumption failed: hdr not split"); \
352		*mtod((m), u_char *) = *(p);\
353		*(p) = (i);\
354		p = mtod((m), u_char *);\
355		(m)->m_next = (mp)->m_next;\
356		(mp)->m_next = (m);\
357		(mp) = (m);\
358	}\
359    } while (0)
360		/*
361		 * result: IPv6 hbh dest1 rthdr dest2 payload
362		 * m will point to IPv6 header.  mprev will point to the
363		 * extension header prior to dest2 (rthdr in the above case).
364		 */
365		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev,
366			   nexthdrp, IPPROTO_HOPOPTS);
367		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev,
368			   nexthdrp, IPPROTO_DSTOPTS);
369		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev,
370			   nexthdrp, IPPROTO_ROUTING);
371
372#ifdef IPSEC
373		if (!needipsec)
374			goto skip_ipsec2;
375
376		/*
377		 * pointers after IPsec headers are not valid any more.
378		 * other pointers need a great care too.
379		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
380		 */
381		exthdrs.ip6e_dest2 = NULL;
382
383	    {
384		struct ip6_rthdr *rh = NULL;
385		int segleft_org = 0;
386		struct ipsec_output_state state;
387
388		if (exthdrs.ip6e_rthdr) {
389			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
390			segleft_org = rh->ip6r_segleft;
391			rh->ip6r_segleft = 0;
392		}
393
394		bzero(&state, sizeof(state));
395		state.m = m;
396		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
397			&needipsectun);
398		m = state.m;
399		if (error) {
400			/* mbuf is already reclaimed in ipsec6_output_trans. */
401			m = NULL;
402			switch (error) {
403			case EHOSTUNREACH:
404			case ENETUNREACH:
405			case EMSGSIZE:
406			case ENOBUFS:
407			case ENOMEM:
408				break;
409			default:
410				printf("ip6_output (ipsec): error code %d\n", error);
411				/*fall through*/
412			case ENOENT:
413				/* don't show these error codes to the user */
414				error = 0;
415				break;
416			}
417			goto bad;
418		}
419		if (exthdrs.ip6e_rthdr) {
420			/* ah6_output doesn't modify mbuf chain */
421			rh->ip6r_segleft = segleft_org;
422		}
423	    }
424skip_ipsec2:;
425#endif
426	}
427
428	/*
429	 * If there is a routing header, replace destination address field
430	 * with the first hop of the routing header.
431	 */
432	if (exthdrs.ip6e_rthdr) {
433		struct ip6_rthdr *rh =
434			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
435						  struct ip6_rthdr *));
436		struct ip6_rthdr0 *rh0;
437
438		finaldst = ip6->ip6_dst;
439		switch (rh->ip6r_type) {
440		case IPV6_RTHDR_TYPE_0:
441			 rh0 = (struct ip6_rthdr0 *)rh;
442			 ip6->ip6_dst = rh0->ip6r0_addr[0];
443			 bcopy((caddr_t)&rh0->ip6r0_addr[1],
444			       (caddr_t)&rh0->ip6r0_addr[0],
445			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
446				 );
447			 rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst;
448			 break;
449		default:	/* is it possible? */
450			 error = EINVAL;
451			 goto bad;
452		}
453	}
454
455	/* Source address validation */
456	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
457	    (flags & IPV6_DADOUTPUT) == 0) {
458		error = EOPNOTSUPP;
459		ip6stat.ip6s_badscope++;
460		goto bad;
461	}
462	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
463		error = EOPNOTSUPP;
464		ip6stat.ip6s_badscope++;
465		goto bad;
466	}
467
468	ip6stat.ip6s_localout++;
469
470	/*
471	 * Route packet.
472	 */
473	if (ro == 0) {
474		ro = &ip6route;
475		bzero((caddr_t)ro, sizeof(*ro));
476	}
477	ro_pmtu = ro;
478	if (opt && opt->ip6po_rthdr)
479		ro = &opt->ip6po_route;
480	dst = (struct sockaddr_in6 *)&ro->ro_dst;
481	/*
482	 * If there is a cached route,
483	 * check that it is to the same destination
484	 * and is still up. If not, free it and try again.
485	 */
486	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
487			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
488		RTFREE(ro->ro_rt);
489		ro->ro_rt = (struct rtentry *)0;
490	}
491	if (ro->ro_rt == 0) {
492		bzero(dst, sizeof(*dst));
493		dst->sin6_family = AF_INET6;
494		dst->sin6_len = sizeof(struct sockaddr_in6);
495		dst->sin6_addr = ip6->ip6_dst;
496#ifdef SCOPEDROUTING
497		/* XXX: sin6_scope_id should already be fixed at this point */
498		if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
499			dst->sin6_scope_id = ntohs(dst->sin6_addr.s6_addr16[1]);
500#endif
501	}
502#ifdef IPSEC
503	if (needipsec && needipsectun) {
504		struct ipsec_output_state state;
505
506		/*
507		 * All the extension headers will become inaccessible
508		 * (since they can be encrypted).
509		 * Don't panic, we need no more updates to extension headers
510		 * on inner IPv6 packet (since they are now encapsulated).
511		 *
512		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
513		 */
514		bzero(&exthdrs, sizeof(exthdrs));
515		exthdrs.ip6e_ip6 = m;
516
517		bzero(&state, sizeof(state));
518		state.m = m;
519		state.ro = (struct route *)ro;
520		state.dst = (struct sockaddr *)dst;
521
522		error = ipsec6_output_tunnel(&state, sp, flags);
523
524		m = state.m;
525		ro = (struct route_in6 *)state.ro;
526		dst = (struct sockaddr_in6 *)state.dst;
527		if (error) {
528			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
529			m0 = m = NULL;
530			m = NULL;
531			switch (error) {
532			case EHOSTUNREACH:
533			case ENETUNREACH:
534			case EMSGSIZE:
535			case ENOBUFS:
536			case ENOMEM:
537				break;
538			default:
539				printf("ip6_output (ipsec): error code %d\n", error);
540				/*fall through*/
541			case ENOENT:
542				/* don't show these error codes to the user */
543				error = 0;
544				break;
545			}
546			goto bad;
547		}
548
549		exthdrs.ip6e_ip6 = m;
550	}
551#endif /*IPSEC*/
552
553	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
554		/* Unicast */
555
556#define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
557#define sin6tosa(sin6)	((struct sockaddr *)(sin6))
558		/* xxx
559		 * interface selection comes here
560		 * if an interface is specified from an upper layer,
561		 * ifp must point it.
562		 */
563		if (ro->ro_rt == 0) {
564			/*
565			 * non-bsdi always clone routes, if parent is
566			 * PRF_CLONING.
567			 */
568			rtalloc((struct route *)ro);
569		}
570		if (ro->ro_rt == 0) {
571			ip6stat.ip6s_noroute++;
572			error = EHOSTUNREACH;
573			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
574			goto bad;
575		}
576		ia = ifatoia6(ro->ro_rt->rt_ifa);
577		ifp = ro->ro_rt->rt_ifp;
578		ro->ro_rt->rt_use++;
579		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
580			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
581		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
582
583		in6_ifstat_inc(ifp, ifs6_out_request);
584
585		/*
586		 * Check if the outgoing interface conflicts with
587		 * the interface specified by ifi6_ifindex (if specified).
588		 * Note that loopback interface is always okay.
589		 * (this may happen when we are sending a packet to one of
590		 *  our own addresses.)
591		 */
592		if (opt && opt->ip6po_pktinfo
593		 && opt->ip6po_pktinfo->ipi6_ifindex) {
594			if (!(ifp->if_flags & IFF_LOOPBACK)
595			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
596				ip6stat.ip6s_noroute++;
597				in6_ifstat_inc(ifp, ifs6_out_discard);
598				error = EHOSTUNREACH;
599				goto bad;
600			}
601		}
602
603		if (opt && opt->ip6po_hlim != -1)
604			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
605	} else {
606		/* Multicast */
607		struct	in6_multi *in6m;
608
609		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
610
611		/*
612		 * See if the caller provided any multicast options
613		 */
614		ifp = NULL;
615		if (im6o != NULL) {
616			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
617			if (im6o->im6o_multicast_ifp != NULL)
618				ifp = im6o->im6o_multicast_ifp;
619		} else
620			ip6->ip6_hlim = ip6_defmcasthlim;
621
622		/*
623		 * See if the caller provided the outgoing interface
624		 * as an ancillary data.
625		 * Boundary check for ifindex is assumed to be already done.
626		 */
627		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
628			ifp = ifindex2ifnet[opt->ip6po_pktinfo->ipi6_ifindex];
629
630		/*
631		 * If the destination is a node-local scope multicast,
632		 * the packet should be loop-backed only.
633		 */
634		if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) {
635			/*
636			 * If the outgoing interface is already specified,
637			 * it should be a loopback interface.
638			 */
639			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
640				ip6stat.ip6s_badscope++;
641				error = ENETUNREACH; /* XXX: better error? */
642				/* XXX correct ifp? */
643				in6_ifstat_inc(ifp, ifs6_out_discard);
644				goto bad;
645			} else {
646				ifp = &loif[0];
647			}
648		}
649
650		if (opt && opt->ip6po_hlim != -1)
651			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
652
653		/*
654		 * If caller did not provide an interface lookup a
655		 * default in the routing table.  This is either a
656		 * default for the speicfied group (i.e. a host
657		 * route), or a multicast default (a route for the
658		 * ``net'' ff00::/8).
659		 */
660		if (ifp == NULL) {
661			if (ro->ro_rt == 0) {
662				ro->ro_rt = rtalloc1((struct sockaddr *)
663						&ro->ro_dst, 0, 0UL);
664			}
665			if (ro->ro_rt == 0) {
666				ip6stat.ip6s_noroute++;
667				error = EHOSTUNREACH;
668				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
669				goto bad;
670			}
671			ia = ifatoia6(ro->ro_rt->rt_ifa);
672			ifp = ro->ro_rt->rt_ifp;
673			ro->ro_rt->rt_use++;
674		}
675
676		if ((flags & IPV6_FORWARDING) == 0)
677			in6_ifstat_inc(ifp, ifs6_out_request);
678		in6_ifstat_inc(ifp, ifs6_out_mcast);
679
680		/*
681		 * Confirm that the outgoing interface supports multicast.
682		 */
683		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
684			ip6stat.ip6s_noroute++;
685			in6_ifstat_inc(ifp, ifs6_out_discard);
686			error = ENETUNREACH;
687			goto bad;
688		}
689		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
690		if (in6m != NULL &&
691		   (im6o == NULL || im6o->im6o_multicast_loop)) {
692			/*
693			 * If we belong to the destination multicast group
694			 * on the outgoing interface, and the caller did not
695			 * forbid loopback, loop back a copy.
696			 */
697			ip6_mloopback(ifp, m, dst);
698		} else {
699			/*
700			 * If we are acting as a multicast router, perform
701			 * multicast forwarding as if the packet had just
702			 * arrived on the interface to which we are about
703			 * to send.  The multicast forwarding function
704			 * recursively calls this function, using the
705			 * IPV6_FORWARDING flag to prevent infinite recursion.
706			 *
707			 * Multicasts that are looped back by ip6_mloopback(),
708			 * above, will be forwarded by the ip6_input() routine,
709			 * if necessary.
710			 */
711			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
712				if (ip6_mforward(ip6, ifp, m) != 0) {
713					m_freem(m);
714					goto done;
715				}
716			}
717		}
718		/*
719		 * Multicasts with a hoplimit of zero may be looped back,
720		 * above, but must not be transmitted on a network.
721		 * Also, multicasts addressed to the loopback interface
722		 * are not sent -- the above call to ip6_mloopback() will
723		 * loop back a copy if this host actually belongs to the
724		 * destination group on the loopback interface.
725		 */
726		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) {
727			m_freem(m);
728			goto done;
729		}
730	}
731
732	/*
733	 * Fill the outgoing inteface to tell the upper layer
734	 * to increment per-interface statistics.
735	 */
736	if (ifpp)
737		*ifpp = ifp;
738
739	/*
740	 * Determine path MTU.
741	 */
742	if (ro_pmtu != ro) {
743		/* The first hop and the final destination may differ. */
744		struct sockaddr_in6 *sin6_fin =
745			(struct sockaddr_in6 *)&ro_pmtu->ro_dst;
746		if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
747				       !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr,
748							   &finaldst))) {
749			RTFREE(ro_pmtu->ro_rt);
750			ro_pmtu->ro_rt = (struct rtentry *)0;
751		}
752		if (ro_pmtu->ro_rt == 0) {
753			bzero(sin6_fin, sizeof(*sin6_fin));
754			sin6_fin->sin6_family = AF_INET6;
755			sin6_fin->sin6_len = sizeof(struct sockaddr_in6);
756			sin6_fin->sin6_addr = finaldst;
757
758			rtalloc((struct route *)ro_pmtu);
759		}
760	}
761	if (ro_pmtu->ro_rt != NULL) {
762		u_int32_t ifmtu = nd_ifinfo[ifp->if_index].linkmtu;
763
764		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
765		if (mtu > ifmtu || mtu == 0) {
766			/*
767			 * The MTU on the route is larger than the MTU on
768			 * the interface!  This shouldn't happen, unless the
769			 * MTU of the interface has been changed after the
770			 * interface was brought up.  Change the MTU in the
771			 * route to match the interface MTU (as long as the
772			 * field isn't locked).
773			 *
774			 * if MTU on the route is 0, we need to fix the MTU.
775			 * this case happens with path MTU discovery timeouts.
776			 */
777			 mtu = ifmtu;
778			 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
779				 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
780		}
781	} else {
782		mtu = nd_ifinfo[ifp->if_index].linkmtu;
783	}
784
785	/*
786	 * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting
787	 */
788	if ((flags & IPV6_MINMTU) != 0 && mtu > IPV6_MMTU)
789		mtu = IPV6_MMTU;
790
791	/* Fake scoped addresses */
792	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
793		/*
794		 * If source or destination address is a scoped address, and
795		 * the packet is going to be sent to a loopback interface,
796		 * we should keep the original interface.
797		 */
798
799		/*
800		 * XXX: this is a very experimental and temporary solution.
801		 * We eventually have sockaddr_in6 and use the sin6_scope_id
802		 * field of the structure here.
803		 * We rely on the consistency between two scope zone ids
804		 * of source add destination, which should already be assured
805		 * larger scopes than link will be supported in the near
806		 * future.
807		 */
808		origifp = NULL;
809		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
810			origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])];
811		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
812			origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])];
813		/*
814		 * XXX: origifp can be NULL even in those two cases above.
815		 * For example, if we remove the (only) link-local address
816		 * from the loopback interface, and try to send a link-local
817		 * address without link-id information.  Then the source
818		 * address is ::1, and the destination address is the
819		 * link-local address with its s6_addr16[1] being zero.
820		 * What is worse, if the packet goes to the loopback interface
821		 * by a default rejected route, the null pointer would be
822		 * passed to looutput, and the kernel would hang.
823		 * The following last resort would prevent such disaster.
824		 */
825		if (origifp == NULL)
826			origifp = ifp;
827	}
828	else
829		origifp = ifp;
830#ifndef SCOPEDROUTING
831	/*
832	 * clear embedded scope identifiers if necessary.
833	 * in6_clearscope will touch the addresses only when necessary.
834	 */
835	in6_clearscope(&ip6->ip6_src);
836	in6_clearscope(&ip6->ip6_dst);
837#endif
838
839	/*
840	 * Check with the firewall...
841	 */
842        if (ip6_fw_enable && ip6_fw_chk_ptr) {
843		u_short port = 0;
844		m->m_pkthdr.rcvif = NULL;	/*XXX*/
845		/* If ipfw says divert, we have to just drop packet */
846		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
847			m_freem(m);
848			goto done;
849		}
850		if (!m) {
851			error = EACCES;
852			goto done;
853		}
854	}
855
856	/*
857	 * If the outgoing packet contains a hop-by-hop options header,
858	 * it must be examined and processed even by the source node.
859	 * (RFC 2460, section 4.)
860	 */
861	if (exthdrs.ip6e_hbh) {
862		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
863		u_int32_t dummy1; /* XXX unused */
864		u_int32_t dummy2; /* XXX unused */
865
866#ifdef DIAGNOSTIC
867		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
868			panic("ip6e_hbh is not continuous");
869#endif
870		/*
871		 *  XXX: if we have to send an ICMPv6 error to the sender,
872		 *       we need the M_LOOP flag since icmp6_error() expects
873		 *       the IPv6 and the hop-by-hop options header are
874		 *       continuous unless the flag is set.
875		 */
876		m->m_flags |= M_LOOP;
877		m->m_pkthdr.rcvif = ifp;
878		if (ip6_process_hopopts(m,
879					(u_int8_t *)(hbh + 1),
880					((hbh->ip6h_len + 1) << 3) -
881					sizeof(struct ip6_hbh),
882					&dummy1, &dummy2) < 0) {
883			/* m was already freed at this point */
884			error = EINVAL;/* better error? */
885			goto done;
886		}
887		m->m_flags &= ~M_LOOP; /* XXX */
888		m->m_pkthdr.rcvif = NULL;
889	}
890
891#ifdef PFIL_HOOKS
892	/*
893	 * Run through list of hooks for output packets.
894	 */
895	m1 = m;
896	pfh = pfil_hook_get(PFIL_OUT, &inet6sw[ip6_protox[IPPROTO_IPV6]].pr_pfh);
897	for (; pfh; pfh = pfh->pfil_link.tqe_next)
898		if (pfh->pfil_func) {
899			rv = pfh->pfil_func(ip6, sizeof(*ip6), ifp, 1, &m1);
900			if (rv) {
901				error = EHOSTUNREACH;
902				goto done;
903			}
904			m = m1;
905			if (m == NULL)
906				goto done;
907			ip6 = mtod(m, struct ip6_hdr *);
908		}
909#endif /* PFIL_HOOKS */
910	/*
911	 * Send the packet to the outgoing interface.
912	 * If necessary, do IPv6 fragmentation before sending.
913	 */
914	tlen = m->m_pkthdr.len;
915	if (tlen <= mtu
916#ifdef notyet
917	    /*
918	     * On any link that cannot convey a 1280-octet packet in one piece,
919	     * link-specific fragmentation and reassembly must be provided at
920	     * a layer below IPv6. [RFC 2460, sec.5]
921	     * Thus if the interface has ability of link-level fragmentation,
922	     * we can just send the packet even if the packet size is
923	     * larger than the link's MTU.
924	     * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet...
925	     */
926
927	    || ifp->if_flags & IFF_FRAGMENTABLE
928#endif
929	    )
930	{
931 		/* Record statistics for this interface address. */
932 		if (ia && !(flags & IPV6_FORWARDING)) {
933 			ia->ia_ifa.if_opackets++;
934 			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
935 		}
936#ifdef IPSEC
937		/* clean ipsec history once it goes out of the node */
938		ipsec_delaux(m);
939#endif
940		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
941		goto done;
942	} else if (mtu < IPV6_MMTU) {
943		/*
944		 * note that path MTU is never less than IPV6_MMTU
945		 * (see icmp6_input).
946		 */
947		error = EMSGSIZE;
948		in6_ifstat_inc(ifp, ifs6_out_fragfail);
949		goto bad;
950	} else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */
951		error = EMSGSIZE;
952		in6_ifstat_inc(ifp, ifs6_out_fragfail);
953		goto bad;
954	} else {
955		struct mbuf **mnext, *m_frgpart;
956		struct ip6_frag *ip6f;
957		u_int32_t id = htonl(ip6_id++);
958		u_char nextproto;
959
960		/*
961		 * Too large for the destination or interface;
962		 * fragment if possible.
963		 * Must be able to put at least 8 bytes per fragment.
964		 */
965		hlen = unfragpartlen;
966		if (mtu > IPV6_MAXPACKET)
967			mtu = IPV6_MAXPACKET;
968
969		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
970		if (len < 8) {
971			error = EMSGSIZE;
972			in6_ifstat_inc(ifp, ifs6_out_fragfail);
973			goto bad;
974		}
975
976		mnext = &m->m_nextpkt;
977
978		/*
979		 * Change the next header field of the last header in the
980		 * unfragmentable part.
981		 */
982		if (exthdrs.ip6e_rthdr) {
983			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
984			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
985		} else if (exthdrs.ip6e_dest1) {
986			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
987			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
988		} else if (exthdrs.ip6e_hbh) {
989			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
990			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
991		} else {
992			nextproto = ip6->ip6_nxt;
993			ip6->ip6_nxt = IPPROTO_FRAGMENT;
994		}
995
996		/*
997		 * Loop through length of segment after first fragment,
998		 * make new header and copy data of each part and link onto chain.
999		 */
1000		m0 = m;
1001		for (off = hlen; off < tlen; off += len) {
1002			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1003			if (!m) {
1004				error = ENOBUFS;
1005				ip6stat.ip6s_odropped++;
1006				goto sendorfree;
1007			}
1008			m->m_pkthdr.rcvif = NULL;
1009			m->m_flags = m0->m_flags & M_COPYFLAGS;
1010			*mnext = m;
1011			mnext = &m->m_nextpkt;
1012			m->m_data += max_linkhdr;
1013			mhip6 = mtod(m, struct ip6_hdr *);
1014			*mhip6 = *ip6;
1015			m->m_len = sizeof(*mhip6);
1016 			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1017 			if (error) {
1018				ip6stat.ip6s_odropped++;
1019				goto sendorfree;
1020			}
1021			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1022			if (off + len >= tlen)
1023				len = tlen - off;
1024			else
1025				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1026			mhip6->ip6_plen = htons((u_short)(len + hlen +
1027							  sizeof(*ip6f) -
1028							  sizeof(struct ip6_hdr)));
1029			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1030				error = ENOBUFS;
1031				ip6stat.ip6s_odropped++;
1032				goto sendorfree;
1033			}
1034			m_cat(m, m_frgpart);
1035			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1036			m->m_pkthdr.rcvif = (struct ifnet *)0;
1037			ip6f->ip6f_reserved = 0;
1038			ip6f->ip6f_ident = id;
1039			ip6f->ip6f_nxt = nextproto;
1040			ip6stat.ip6s_ofragments++;
1041			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1042		}
1043
1044		in6_ifstat_inc(ifp, ifs6_out_fragok);
1045	}
1046
1047	/*
1048	 * Remove leading garbages.
1049	 */
1050sendorfree:
1051	m = m0->m_nextpkt;
1052	m0->m_nextpkt = 0;
1053	m_freem(m0);
1054	for (m0 = m; m; m = m0) {
1055		m0 = m->m_nextpkt;
1056		m->m_nextpkt = 0;
1057		if (error == 0) {
1058 			/* Record statistics for this interface address. */
1059 			if (ia) {
1060 				ia->ia_ifa.if_opackets++;
1061 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1062 			}
1063#ifdef IPSEC
1064			/* clean ipsec history once it goes out of the node */
1065			ipsec_delaux(m);
1066#endif
1067			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1068		} else
1069			m_freem(m);
1070	}
1071
1072	if (error == 0)
1073		ip6stat.ip6s_fragmented++;
1074
1075done:
1076	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1077		RTFREE(ro->ro_rt);
1078	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1079		RTFREE(ro_pmtu->ro_rt);
1080	}
1081
1082#ifdef IPSEC
1083	if (sp != NULL)
1084		key_freesp(sp);
1085#endif /* IPSEC */
1086
1087	return(error);
1088
1089freehdrs:
1090	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1091	m_freem(exthdrs.ip6e_dest1);
1092	m_freem(exthdrs.ip6e_rthdr);
1093	m_freem(exthdrs.ip6e_dest2);
1094	/* fall through */
1095bad:
1096	m_freem(m);
1097	goto done;
1098}
1099
1100static int
1101ip6_copyexthdr(mp, hdr, hlen)
1102	struct mbuf **mp;
1103	caddr_t hdr;
1104	int hlen;
1105{
1106	struct mbuf *m;
1107
1108	if (hlen > MCLBYTES)
1109		return(ENOBUFS); /* XXX */
1110
1111	MGET(m, M_DONTWAIT, MT_DATA);
1112	if (!m)
1113		return(ENOBUFS);
1114
1115	if (hlen > MLEN) {
1116		MCLGET(m, M_DONTWAIT);
1117		if ((m->m_flags & M_EXT) == 0) {
1118			m_free(m);
1119			return(ENOBUFS);
1120		}
1121	}
1122	m->m_len = hlen;
1123	if (hdr)
1124		bcopy(hdr, mtod(m, caddr_t), hlen);
1125
1126	*mp = m;
1127	return(0);
1128}
1129
1130/*
1131 * Insert jumbo payload option.
1132 */
1133static int
1134ip6_insert_jumboopt(exthdrs, plen)
1135	struct ip6_exthdrs *exthdrs;
1136	u_int32_t plen;
1137{
1138	struct mbuf *mopt;
1139	u_char *optbuf;
1140	u_int32_t v;
1141
1142#define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1143
1144	/*
1145	 * If there is no hop-by-hop options header, allocate new one.
1146	 * If there is one but it doesn't have enough space to store the
1147	 * jumbo payload option, allocate a cluster to store the whole options.
1148	 * Otherwise, use it to store the options.
1149	 */
1150	if (exthdrs->ip6e_hbh == 0) {
1151		MGET(mopt, M_DONTWAIT, MT_DATA);
1152		if (mopt == 0)
1153			return(ENOBUFS);
1154		mopt->m_len = JUMBOOPTLEN;
1155		optbuf = mtod(mopt, u_char *);
1156		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1157		exthdrs->ip6e_hbh = mopt;
1158	} else {
1159		struct ip6_hbh *hbh;
1160
1161		mopt = exthdrs->ip6e_hbh;
1162		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1163			/*
1164			 * XXX assumption:
1165			 * - exthdrs->ip6e_hbh is not referenced from places
1166			 *   other than exthdrs.
1167			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1168			 */
1169			int oldoptlen = mopt->m_len;
1170			struct mbuf *n;
1171
1172			/*
1173			 * XXX: give up if the whole (new) hbh header does
1174			 * not fit even in an mbuf cluster.
1175			 */
1176			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1177				return(ENOBUFS);
1178
1179			/*
1180			 * As a consequence, we must always prepare a cluster
1181			 * at this point.
1182			 */
1183			MGET(n, M_DONTWAIT, MT_DATA);
1184			if (n) {
1185				MCLGET(n, M_DONTWAIT);
1186				if ((n->m_flags & M_EXT) == 0) {
1187					m_freem(n);
1188					n = NULL;
1189				}
1190			}
1191			if (!n)
1192				return(ENOBUFS);
1193			n->m_len = oldoptlen + JUMBOOPTLEN;
1194			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1195			      oldoptlen);
1196			optbuf = mtod(n, caddr_t) + oldoptlen;
1197			m_freem(mopt);
1198			mopt = exthdrs->ip6e_hbh = n;
1199		} else {
1200			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1201			mopt->m_len += JUMBOOPTLEN;
1202		}
1203		optbuf[0] = IP6OPT_PADN;
1204		optbuf[1] = 1;
1205
1206		/*
1207		 * Adjust the header length according to the pad and
1208		 * the jumbo payload option.
1209		 */
1210		hbh = mtod(mopt, struct ip6_hbh *);
1211		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1212	}
1213
1214	/* fill in the option. */
1215	optbuf[2] = IP6OPT_JUMBO;
1216	optbuf[3] = 4;
1217	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1218	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1219
1220	/* finally, adjust the packet header length */
1221	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1222
1223	return(0);
1224#undef JUMBOOPTLEN
1225}
1226
1227/*
1228 * Insert fragment header and copy unfragmentable header portions.
1229 */
1230static int
1231ip6_insertfraghdr(m0, m, hlen, frghdrp)
1232	struct mbuf *m0, *m;
1233	int hlen;
1234	struct ip6_frag **frghdrp;
1235{
1236	struct mbuf *n, *mlast;
1237
1238	if (hlen > sizeof(struct ip6_hdr)) {
1239		n = m_copym(m0, sizeof(struct ip6_hdr),
1240			    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1241		if (n == 0)
1242			return(ENOBUFS);
1243		m->m_next = n;
1244	} else
1245		n = m;
1246
1247	/* Search for the last mbuf of unfragmentable part. */
1248	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1249		;
1250
1251	if ((mlast->m_flags & M_EXT) == 0 &&
1252	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1253		/* use the trailing space of the last mbuf for the fragment hdr */
1254		*frghdrp =
1255			(struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len);
1256		mlast->m_len += sizeof(struct ip6_frag);
1257		m->m_pkthdr.len += sizeof(struct ip6_frag);
1258	} else {
1259		/* allocate a new mbuf for the fragment header */
1260		struct mbuf *mfrg;
1261
1262		MGET(mfrg, M_DONTWAIT, MT_DATA);
1263		if (mfrg == 0)
1264			return(ENOBUFS);
1265		mfrg->m_len = sizeof(struct ip6_frag);
1266		*frghdrp = mtod(mfrg, struct ip6_frag *);
1267		mlast->m_next = mfrg;
1268	}
1269
1270	return(0);
1271}
1272
1273/*
1274 * IP6 socket option processing.
1275 */
1276int
1277ip6_ctloutput(so, sopt)
1278	struct socket *so;
1279	struct sockopt *sopt;
1280{
1281	int privileged;
1282	struct inpcb *in6p = sotoinpcb(so);
1283	int error, optval;
1284	int level, op, optname;
1285	int optlen;
1286	struct proc *p;
1287
1288	if (sopt) {
1289		level = sopt->sopt_level;
1290		op = sopt->sopt_dir;
1291		optname = sopt->sopt_name;
1292		optlen = sopt->sopt_valsize;
1293		p = sopt->sopt_p;
1294	} else {
1295		panic("ip6_ctloutput: arg soopt is NULL");
1296	}
1297	error = optval = 0;
1298
1299	privileged = (p == 0 || suser(p)) ? 0 : 1;
1300
1301	if (level == IPPROTO_IPV6) {
1302		switch (op) {
1303
1304		case SOPT_SET:
1305			switch (optname) {
1306			case IPV6_PKTOPTIONS:
1307			{
1308				struct mbuf *m;
1309
1310				error = soopt_getm(sopt, &m); /* XXX */
1311				if (error != NULL)
1312					break;
1313				error = soopt_mcopyin(sopt, m); /* XXX */
1314				if (error != NULL)
1315					break;
1316				error = ip6_pcbopts(&in6p->in6p_outputopts,
1317						    m, so, sopt);
1318				m_freem(m); /* XXX */
1319				break;
1320			}
1321
1322			/*
1323			 * Use of some Hop-by-Hop options or some
1324			 * Destination options, might require special
1325			 * privilege.  That is, normal applications
1326			 * (without special privilege) might be forbidden
1327			 * from setting certain options in outgoing packets,
1328			 * and might never see certain options in received
1329			 * packets. [RFC 2292 Section 6]
1330			 * KAME specific note:
1331			 *  KAME prevents non-privileged users from sending or
1332			 *  receiving ANY hbh/dst options in order to avoid
1333			 *  overhead of parsing options in the kernel.
1334			 */
1335			case IPV6_UNICAST_HOPS:
1336			case IPV6_CHECKSUM:
1337			case IPV6_FAITH:
1338
1339			case IPV6_V6ONLY:
1340				if (optlen != sizeof(int)) {
1341					error = EINVAL;
1342					break;
1343				}
1344				error = sooptcopyin(sopt, &optval,
1345					sizeof optval, sizeof optval);
1346				if (error)
1347					break;
1348				switch (optname) {
1349
1350				case IPV6_UNICAST_HOPS:
1351					if (optval < -1 || optval >= 256)
1352						error = EINVAL;
1353					else {
1354						/* -1 = kernel default */
1355						in6p->in6p_hops = optval;
1356
1357						if ((in6p->in6p_vflag &
1358						     INP_IPV4) != 0)
1359							in6p->inp_ip_ttl = optval;
1360					}
1361					break;
1362#define OPTSET(bit) \
1363do { \
1364	if (optval) \
1365		in6p->in6p_flags |= (bit); \
1366	else \
1367		in6p->in6p_flags &= ~(bit); \
1368} while (0)
1369#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1370
1371				case IPV6_CHECKSUM:
1372					in6p->in6p_cksum = optval;
1373					break;
1374
1375				case IPV6_FAITH:
1376					OPTSET(IN6P_FAITH);
1377					break;
1378
1379				case IPV6_V6ONLY:
1380					/*
1381					 * XXX: BINDV6ONLY should be integrated
1382					 * into V6ONLY.
1383					 */
1384					OPTSET(IN6P_BINDV6ONLY);
1385					OPTSET(IN6P_IPV6_V6ONLY);
1386					break;
1387				}
1388				break;
1389
1390			case IPV6_PKTINFO:
1391			case IPV6_HOPLIMIT:
1392			case IPV6_HOPOPTS:
1393			case IPV6_DSTOPTS:
1394			case IPV6_RTHDR:
1395				/* RFC 2292 */
1396				if (optlen != sizeof(int)) {
1397					error = EINVAL;
1398					break;
1399				}
1400				error = sooptcopyin(sopt, &optval,
1401					sizeof optval, sizeof optval);
1402				if (error)
1403					break;
1404				switch (optname) {
1405				case IPV6_PKTINFO:
1406					OPTSET(IN6P_PKTINFO);
1407					break;
1408				case IPV6_HOPLIMIT:
1409					OPTSET(IN6P_HOPLIMIT);
1410					break;
1411				case IPV6_HOPOPTS:
1412					/*
1413					 * Check super-user privilege.
1414					 * See comments for IPV6_RECVHOPOPTS.
1415					 */
1416					if (!privileged)
1417						return(EPERM);
1418					OPTSET(IN6P_HOPOPTS);
1419					break;
1420				case IPV6_DSTOPTS:
1421					if (!privileged)
1422						return(EPERM);
1423					OPTSET(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1424					break;
1425				case IPV6_RTHDR:
1426					OPTSET(IN6P_RTHDR);
1427					break;
1428				}
1429				break;
1430#undef OPTSET
1431
1432			case IPV6_MULTICAST_IF:
1433			case IPV6_MULTICAST_HOPS:
1434			case IPV6_MULTICAST_LOOP:
1435			case IPV6_JOIN_GROUP:
1436			case IPV6_LEAVE_GROUP:
1437			    {
1438				struct mbuf *m;
1439				if (sopt->sopt_valsize > MLEN) {
1440					error = EMSGSIZE;
1441					break;
1442				}
1443				/* XXX */
1444				MGET(m, sopt->sopt_p ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
1445				if (m == 0) {
1446					error = ENOBUFS;
1447					break;
1448				}
1449				m->m_len = sopt->sopt_valsize;
1450				error = sooptcopyin(sopt, mtod(m, char *),
1451						    m->m_len, m->m_len);
1452				error =	ip6_setmoptions(sopt->sopt_name,
1453							&in6p->in6p_moptions,
1454							m);
1455				(void)m_free(m);
1456			    }
1457				break;
1458
1459			case IPV6_PORTRANGE:
1460				error = sooptcopyin(sopt, &optval,
1461				    sizeof optval, sizeof optval);
1462				if (error)
1463					break;
1464
1465				switch (optval) {
1466				case IPV6_PORTRANGE_DEFAULT:
1467					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1468					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1469					break;
1470
1471				case IPV6_PORTRANGE_HIGH:
1472					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1473					in6p->in6p_flags |= IN6P_HIGHPORT;
1474					break;
1475
1476				case IPV6_PORTRANGE_LOW:
1477					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1478					in6p->in6p_flags |= IN6P_LOWPORT;
1479					break;
1480
1481				default:
1482					error = EINVAL;
1483					break;
1484				}
1485				break;
1486
1487#ifdef IPSEC
1488			case IPV6_IPSEC_POLICY:
1489			    {
1490				caddr_t req = NULL;
1491				size_t len = 0;
1492				struct mbuf *m;
1493
1494				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1495					break;
1496				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1497					break;
1498				if (m) {
1499					req = mtod(m, caddr_t);
1500					len = m->m_len;
1501				}
1502				error = ipsec6_set_policy(in6p, optname, req,
1503				                          len, privileged);
1504				m_freem(m);
1505			    }
1506				break;
1507#endif /* KAME IPSEC */
1508
1509			case IPV6_FW_ADD:
1510			case IPV6_FW_DEL:
1511			case IPV6_FW_FLUSH:
1512			case IPV6_FW_ZERO:
1513			    {
1514				struct mbuf *m;
1515				struct mbuf **mp = &m;
1516
1517				if (ip6_fw_ctl_ptr == NULL)
1518					return EINVAL;
1519				if (error = soopt_getm(sopt, &m)) /* XXX */
1520					break;
1521				if (error = soopt_mcopyin(sopt, m)) /* XXX */
1522					break;
1523				error = (*ip6_fw_ctl_ptr)(optname, mp);
1524				m = *mp;
1525			    }
1526				break;
1527
1528			default:
1529				error = ENOPROTOOPT;
1530				break;
1531			}
1532			break;
1533
1534		case SOPT_GET:
1535			switch (optname) {
1536
1537			case IPV6_PKTOPTIONS:
1538				if (in6p->in6p_options) {
1539					error = soopt_mcopyout(sopt,
1540							       in6p->in6p_options);
1541				} else
1542					sopt->sopt_valsize = 0;
1543				break;
1544
1545			case IPV6_UNICAST_HOPS:
1546			case IPV6_CHECKSUM:
1547
1548			case IPV6_FAITH:
1549			case IPV6_V6ONLY:
1550			case IPV6_PORTRANGE:
1551				switch (optname) {
1552
1553				case IPV6_UNICAST_HOPS:
1554					optval = in6p->in6p_hops;
1555					break;
1556
1557				case IPV6_CHECKSUM:
1558					optval = in6p->in6p_cksum;
1559					break;
1560
1561				case IPV6_FAITH:
1562					optval = OPTBIT(IN6P_FAITH);
1563					break;
1564
1565				case IPV6_V6ONLY:
1566					/* XXX: see the setopt case. */
1567					optval = OPTBIT(IN6P_BINDV6ONLY);
1568					break;
1569
1570				case IPV6_PORTRANGE:
1571				    {
1572					int flags;
1573					flags = in6p->in6p_flags;
1574					if (flags & IN6P_HIGHPORT)
1575						optval = IPV6_PORTRANGE_HIGH;
1576					else if (flags & IN6P_LOWPORT)
1577						optval = IPV6_PORTRANGE_LOW;
1578					else
1579						optval = 0;
1580					break;
1581				    }
1582				}
1583				error = sooptcopyout(sopt, &optval,
1584					sizeof optval);
1585				break;
1586
1587			case IPV6_PKTINFO:
1588			case IPV6_HOPLIMIT:
1589			case IPV6_HOPOPTS:
1590			case IPV6_RTHDR:
1591			case IPV6_DSTOPTS:
1592				if (optname == IPV6_HOPOPTS ||
1593				    optname == IPV6_DSTOPTS ||
1594				    !privileged)
1595					return(EPERM);
1596				switch (optname) {
1597				case IPV6_PKTINFO:
1598					optval = OPTBIT(IN6P_PKTINFO);
1599					break;
1600				case IPV6_HOPLIMIT:
1601					optval = OPTBIT(IN6P_HOPLIMIT);
1602					break;
1603				case IPV6_HOPOPTS:
1604					if (!privileged)
1605						return(EPERM);
1606					optval = OPTBIT(IN6P_HOPOPTS);
1607					break;
1608				case IPV6_RTHDR:
1609					optval = OPTBIT(IN6P_RTHDR);
1610					break;
1611				case IPV6_DSTOPTS:
1612					if (!privileged)
1613						return(EPERM);
1614					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1615					break;
1616				}
1617				error = sooptcopyout(sopt, &optval,
1618					sizeof optval);
1619				break;
1620
1621			case IPV6_MULTICAST_IF:
1622			case IPV6_MULTICAST_HOPS:
1623			case IPV6_MULTICAST_LOOP:
1624			case IPV6_JOIN_GROUP:
1625			case IPV6_LEAVE_GROUP:
1626			    {
1627				struct mbuf *m;
1628				error = ip6_getmoptions(sopt->sopt_name,
1629						in6p->in6p_moptions, &m);
1630				if (error == 0)
1631					error = sooptcopyout(sopt,
1632						mtod(m, char *), m->m_len);
1633				m_freem(m);
1634			    }
1635				break;
1636
1637#ifdef IPSEC
1638			case IPV6_IPSEC_POLICY:
1639			  {
1640				caddr_t req = NULL;
1641				size_t len = 0;
1642				struct mbuf *m = NULL;
1643				struct mbuf **mp = &m;
1644
1645				error = soopt_getm(sopt, &m); /* XXX */
1646				if (error != NULL)
1647					break;
1648				error = soopt_mcopyin(sopt, m); /* XXX */
1649				if (error != NULL)
1650					break;
1651				if (m) {
1652					req = mtod(m, caddr_t);
1653					len = m->m_len;
1654				}
1655				error = ipsec6_get_policy(in6p, req, len, mp);
1656				if (error == 0)
1657					error = soopt_mcopyout(sopt, m); /*XXX*/
1658				if (error == 0 && m)
1659					m_freem(m);
1660				break;
1661			  }
1662#endif /* KAME IPSEC */
1663
1664			case IPV6_FW_GET:
1665			  {
1666				struct mbuf *m;
1667				struct mbuf **mp = &m;
1668
1669				if (ip6_fw_ctl_ptr == NULL)
1670			        {
1671					if (m)
1672						(void)m_free(m);
1673					return EINVAL;
1674				}
1675				error = (*ip6_fw_ctl_ptr)(optname, mp);
1676				if (error == 0)
1677					error = soopt_mcopyout(sopt, m); /* XXX */
1678				if (error == 0 && m)
1679					m_freem(m);
1680			  }
1681				break;
1682
1683			default:
1684				error = ENOPROTOOPT;
1685				break;
1686			}
1687			break;
1688		}
1689	} else {
1690		error = EINVAL;
1691	}
1692	return(error);
1693}
1694
1695/*
1696 * Set up IP6 options in pcb for insertion in output packets or
1697 * specifying behavior of outgoing packets.
1698 */
1699static int
1700ip6_pcbopts(pktopt, m, so, sopt)
1701	struct ip6_pktopts **pktopt;
1702	struct mbuf *m;
1703	struct socket *so;
1704	struct sockopt *sopt;
1705{
1706	struct ip6_pktopts *opt = *pktopt;
1707	int error = 0;
1708	struct proc *p = sopt->sopt_p;
1709	int priv = 0;
1710
1711	/* turn off any old options. */
1712	if (opt) {
1713#ifdef DIAGNOSTIC
1714		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
1715		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
1716		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
1717			printf("ip6_pcbopts: all specified options are cleared.\n");
1718#endif
1719		ip6_clearpktopts(opt, 1, -1);
1720	} else
1721		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
1722	*pktopt = NULL;
1723
1724	if (!m || m->m_len == 0) {
1725		/*
1726		 * Only turning off any previous options.
1727		 */
1728		if (opt)
1729			free(opt, M_IP6OPT);
1730		return(0);
1731	}
1732
1733	/*  set options specified by user. */
1734	if (p && !suser(p))
1735		priv = 1;
1736	if ((error = ip6_setpktoptions(m, opt, priv, 1)) != 0) {
1737		ip6_clearpktopts(opt, 1, -1); /* XXX: discard all options */
1738		return(error);
1739	}
1740	*pktopt = opt;
1741	return(0);
1742}
1743
1744/*
1745 * initialize ip6_pktopts.  beware that there are non-zero default values in
1746 * the struct.
1747 */
1748void
1749init_ip6pktopts(opt)
1750	struct ip6_pktopts *opt;
1751{
1752
1753	bzero(opt, sizeof(*opt));
1754	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
1755}
1756
1757void
1758ip6_clearpktopts(pktopt, needfree, optname)
1759	struct ip6_pktopts *pktopt;
1760	int needfree, optname;
1761{
1762	if (pktopt == NULL)
1763		return;
1764
1765	if (optname == -1) {
1766		if (needfree && pktopt->ip6po_pktinfo)
1767			free(pktopt->ip6po_pktinfo, M_IP6OPT);
1768		pktopt->ip6po_pktinfo = NULL;
1769	}
1770	if (optname == -1)
1771		pktopt->ip6po_hlim = -1;
1772	if (optname == -1) {
1773		if (needfree && pktopt->ip6po_nexthop)
1774			free(pktopt->ip6po_nexthop, M_IP6OPT);
1775		pktopt->ip6po_nexthop = NULL;
1776	}
1777	if (optname == -1) {
1778		if (needfree && pktopt->ip6po_hbh)
1779			free(pktopt->ip6po_hbh, M_IP6OPT);
1780		pktopt->ip6po_hbh = NULL;
1781	}
1782	if (optname == -1) {
1783		if (needfree && pktopt->ip6po_dest1)
1784			free(pktopt->ip6po_dest1, M_IP6OPT);
1785		pktopt->ip6po_dest1 = NULL;
1786	}
1787	if (optname == -1) {
1788		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
1789			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
1790		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
1791		if (pktopt->ip6po_route.ro_rt) {
1792			RTFREE(pktopt->ip6po_route.ro_rt);
1793			pktopt->ip6po_route.ro_rt = NULL;
1794		}
1795	}
1796	if (optname == -1) {
1797		if (needfree && pktopt->ip6po_dest2)
1798			free(pktopt->ip6po_dest2, M_IP6OPT);
1799		pktopt->ip6po_dest2 = NULL;
1800	}
1801}
1802
1803#define PKTOPT_EXTHDRCPY(type) \
1804do {\
1805	if (src->type) {\
1806		int hlen =\
1807			(((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
1808		dst->type = malloc(hlen, M_IP6OPT, canwait);\
1809		if (dst->type == NULL && canwait == M_NOWAIT)\
1810			goto bad;\
1811		bcopy(src->type, dst->type, hlen);\
1812	}\
1813} while (0)
1814
1815struct ip6_pktopts *
1816ip6_copypktopts(src, canwait)
1817	struct ip6_pktopts *src;
1818	int canwait;
1819{
1820	struct ip6_pktopts *dst;
1821
1822	if (src == NULL) {
1823		printf("ip6_clearpktopts: invalid argument\n");
1824		return(NULL);
1825	}
1826
1827	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
1828	if (dst == NULL && canwait == M_NOWAIT)
1829		goto bad;
1830	bzero(dst, sizeof(*dst));
1831
1832	dst->ip6po_hlim = src->ip6po_hlim;
1833	if (src->ip6po_pktinfo) {
1834		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
1835					    M_IP6OPT, canwait);
1836		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
1837			goto bad;
1838		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
1839	}
1840	if (src->ip6po_nexthop) {
1841		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
1842					    M_IP6OPT, canwait);
1843		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
1844			goto bad;
1845		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
1846		      src->ip6po_nexthop->sa_len);
1847	}
1848	PKTOPT_EXTHDRCPY(ip6po_hbh);
1849	PKTOPT_EXTHDRCPY(ip6po_dest1);
1850	PKTOPT_EXTHDRCPY(ip6po_dest2);
1851	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
1852	return(dst);
1853
1854  bad:
1855	printf("ip6_copypktopts: copy failed");
1856	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
1857	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
1858	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
1859	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
1860	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
1861	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
1862	return(NULL);
1863}
1864#undef PKTOPT_EXTHDRCPY
1865
1866void
1867ip6_freepcbopts(pktopt)
1868	struct ip6_pktopts *pktopt;
1869{
1870	if (pktopt == NULL)
1871		return;
1872
1873	ip6_clearpktopts(pktopt, 1, -1);
1874
1875	free(pktopt, M_IP6OPT);
1876}
1877
1878/*
1879 * Set the IP6 multicast options in response to user setsockopt().
1880 */
1881static int
1882ip6_setmoptions(optname, im6op, m)
1883	int optname;
1884	struct ip6_moptions **im6op;
1885	struct mbuf *m;
1886{
1887	int error = 0;
1888	u_int loop, ifindex;
1889	struct ipv6_mreq *mreq;
1890	struct ifnet *ifp;
1891	struct ip6_moptions *im6o = *im6op;
1892	struct route_in6 ro;
1893	struct sockaddr_in6 *dst;
1894	struct in6_multi_mship *imm;
1895	struct proc *p = curproc;	/* XXX */
1896
1897	if (im6o == NULL) {
1898		/*
1899		 * No multicast option buffer attached to the pcb;
1900		 * allocate one and initialize to default values.
1901		 */
1902		im6o = (struct ip6_moptions *)
1903			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
1904
1905		if (im6o == NULL)
1906			return(ENOBUFS);
1907		*im6op = im6o;
1908		im6o->im6o_multicast_ifp = NULL;
1909		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
1910		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
1911		LIST_INIT(&im6o->im6o_memberships);
1912	}
1913
1914	switch (optname) {
1915
1916	case IPV6_MULTICAST_IF:
1917		/*
1918		 * Select the interface for outgoing multicast packets.
1919		 */
1920		if (m == NULL || m->m_len != sizeof(u_int)) {
1921			error = EINVAL;
1922			break;
1923		}
1924		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
1925		if (ifindex < 0 || if_index < ifindex) {
1926			error = ENXIO;	/* XXX EINVAL? */
1927			break;
1928		}
1929		ifp = ifindex2ifnet[ifindex];
1930		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1931			error = EADDRNOTAVAIL;
1932			break;
1933		}
1934		im6o->im6o_multicast_ifp = ifp;
1935		break;
1936
1937	case IPV6_MULTICAST_HOPS:
1938	    {
1939		/*
1940		 * Set the IP6 hoplimit for outgoing multicast packets.
1941		 */
1942		int optval;
1943		if (m == NULL || m->m_len != sizeof(int)) {
1944			error = EINVAL;
1945			break;
1946		}
1947		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
1948		if (optval < -1 || optval >= 256)
1949			error = EINVAL;
1950		else if (optval == -1)
1951			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
1952		else
1953			im6o->im6o_multicast_hlim = optval;
1954		break;
1955	    }
1956
1957	case IPV6_MULTICAST_LOOP:
1958		/*
1959		 * Set the loopback flag for outgoing multicast packets.
1960		 * Must be zero or one.
1961		 */
1962		if (m == NULL || m->m_len != sizeof(u_int)) {
1963			error = EINVAL;
1964			break;
1965		}
1966		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
1967		if (loop > 1) {
1968			error = EINVAL;
1969			break;
1970		}
1971		im6o->im6o_multicast_loop = loop;
1972		break;
1973
1974	case IPV6_JOIN_GROUP:
1975		/*
1976		 * Add a multicast group membership.
1977		 * Group must be a valid IP6 multicast address.
1978		 */
1979		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
1980			error = EINVAL;
1981			break;
1982		}
1983		mreq = mtod(m, struct ipv6_mreq *);
1984		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
1985			/*
1986			 * We use the unspecified address to specify to accept
1987			 * all multicast addresses. Only super user is allowed
1988			 * to do this.
1989			 */
1990			if (suser(p))
1991			{
1992				error = EACCES;
1993				break;
1994			}
1995		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
1996			error = EINVAL;
1997			break;
1998		}
1999
2000		/*
2001		 * If the interface is specified, validate it.
2002		 */
2003		if (mreq->ipv6mr_interface < 0
2004		 || if_index < mreq->ipv6mr_interface) {
2005			error = ENXIO;	/* XXX EINVAL? */
2006			break;
2007		}
2008		/*
2009		 * If no interface was explicitly specified, choose an
2010		 * appropriate one according to the given multicast address.
2011		 */
2012		if (mreq->ipv6mr_interface == 0) {
2013			/*
2014			 * If the multicast address is in node-local scope,
2015			 * the interface should be a loopback interface.
2016			 * Otherwise, look up the routing table for the
2017			 * address, and choose the outgoing interface.
2018			 *   XXX: is it a good approach?
2019			 */
2020			if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) {
2021				ifp = &loif[0];
2022			} else {
2023				ro.ro_rt = NULL;
2024				dst = (struct sockaddr_in6 *)&ro.ro_dst;
2025				bzero(dst, sizeof(*dst));
2026				dst->sin6_len = sizeof(struct sockaddr_in6);
2027				dst->sin6_family = AF_INET6;
2028				dst->sin6_addr = mreq->ipv6mr_multiaddr;
2029				rtalloc((struct route *)&ro);
2030				if (ro.ro_rt == NULL) {
2031					error = EADDRNOTAVAIL;
2032					break;
2033				}
2034				ifp = ro.ro_rt->rt_ifp;
2035				rtfree(ro.ro_rt);
2036			}
2037		} else
2038			ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2039
2040		/*
2041		 * See if we found an interface, and confirm that it
2042		 * supports multicast
2043		 */
2044		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2045			error = EADDRNOTAVAIL;
2046			break;
2047		}
2048		/*
2049		 * Put interface index into the multicast address,
2050		 * if the address has link-local scope.
2051		 */
2052		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2053			mreq->ipv6mr_multiaddr.s6_addr16[1]
2054				= htons(mreq->ipv6mr_interface);
2055		}
2056		/*
2057		 * See if the membership already exists.
2058		 */
2059		for (imm = im6o->im6o_memberships.lh_first;
2060		     imm != NULL; imm = imm->i6mm_chain.le_next)
2061			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2062			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2063					       &mreq->ipv6mr_multiaddr))
2064				break;
2065		if (imm != NULL) {
2066			error = EADDRINUSE;
2067			break;
2068		}
2069		/*
2070		 * Everything looks good; add a new record to the multicast
2071		 * address list for the given interface.
2072		 */
2073		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2074		if (imm == NULL) {
2075			error = ENOBUFS;
2076			break;
2077		}
2078		if ((imm->i6mm_maddr =
2079		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2080			free(imm, M_IPMADDR);
2081			break;
2082		}
2083		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2084		break;
2085
2086	case IPV6_LEAVE_GROUP:
2087		/*
2088		 * Drop a multicast group membership.
2089		 * Group must be a valid IP6 multicast address.
2090		 */
2091		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2092			error = EINVAL;
2093			break;
2094		}
2095		mreq = mtod(m, struct ipv6_mreq *);
2096		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2097			if (suser(p)) {
2098				error = EACCES;
2099				break;
2100			}
2101		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2102			error = EINVAL;
2103			break;
2104		}
2105		/*
2106		 * If an interface address was specified, get a pointer
2107		 * to its ifnet structure.
2108		 */
2109		if (mreq->ipv6mr_interface < 0
2110		 || if_index < mreq->ipv6mr_interface) {
2111			error = ENXIO;	/* XXX EINVAL? */
2112			break;
2113		}
2114		ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2115		/*
2116		 * Put interface index into the multicast address,
2117		 * if the address has link-local scope.
2118		 */
2119		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2120			mreq->ipv6mr_multiaddr.s6_addr16[1]
2121				= htons(mreq->ipv6mr_interface);
2122		}
2123		/*
2124		 * Find the membership in the membership list.
2125		 */
2126		for (imm = im6o->im6o_memberships.lh_first;
2127		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2128			if ((ifp == NULL ||
2129			     imm->i6mm_maddr->in6m_ifp == ifp) &&
2130			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2131					       &mreq->ipv6mr_multiaddr))
2132				break;
2133		}
2134		if (imm == NULL) {
2135			/* Unable to resolve interface */
2136			error = EADDRNOTAVAIL;
2137			break;
2138		}
2139		/*
2140		 * Give up the multicast address record to which the
2141		 * membership points.
2142		 */
2143		LIST_REMOVE(imm, i6mm_chain);
2144		in6_delmulti(imm->i6mm_maddr);
2145		free(imm, M_IPMADDR);
2146		break;
2147
2148	default:
2149		error = EOPNOTSUPP;
2150		break;
2151	}
2152
2153	/*
2154	 * If all options have default values, no need to keep the mbuf.
2155	 */
2156	if (im6o->im6o_multicast_ifp == NULL &&
2157	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2158	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2159	    im6o->im6o_memberships.lh_first == NULL) {
2160		free(*im6op, M_IPMOPTS);
2161		*im6op = NULL;
2162	}
2163
2164	return(error);
2165}
2166
2167/*
2168 * Return the IP6 multicast options in response to user getsockopt().
2169 */
2170static int
2171ip6_getmoptions(optname, im6o, mp)
2172	int optname;
2173	struct ip6_moptions *im6o;
2174	struct mbuf **mp;
2175{
2176	u_int *hlim, *loop, *ifindex;
2177
2178	*mp = m_get(M_TRYWAIT, MT_HEADER);		/*XXX*/
2179
2180	switch (optname) {
2181
2182	case IPV6_MULTICAST_IF:
2183		ifindex = mtod(*mp, u_int *);
2184		(*mp)->m_len = sizeof(u_int);
2185		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2186			*ifindex = 0;
2187		else
2188			*ifindex = im6o->im6o_multicast_ifp->if_index;
2189		return(0);
2190
2191	case IPV6_MULTICAST_HOPS:
2192		hlim = mtod(*mp, u_int *);
2193		(*mp)->m_len = sizeof(u_int);
2194		if (im6o == NULL)
2195			*hlim = ip6_defmcasthlim;
2196		else
2197			*hlim = im6o->im6o_multicast_hlim;
2198		return(0);
2199
2200	case IPV6_MULTICAST_LOOP:
2201		loop = mtod(*mp, u_int *);
2202		(*mp)->m_len = sizeof(u_int);
2203		if (im6o == NULL)
2204			*loop = ip6_defmcasthlim;
2205		else
2206			*loop = im6o->im6o_multicast_loop;
2207		return(0);
2208
2209	default:
2210		return(EOPNOTSUPP);
2211	}
2212}
2213
2214/*
2215 * Discard the IP6 multicast options.
2216 */
2217void
2218ip6_freemoptions(im6o)
2219	struct ip6_moptions *im6o;
2220{
2221	struct in6_multi_mship *imm;
2222
2223	if (im6o == NULL)
2224		return;
2225
2226	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2227		LIST_REMOVE(imm, i6mm_chain);
2228		if (imm->i6mm_maddr)
2229			in6_delmulti(imm->i6mm_maddr);
2230		free(imm, M_IPMADDR);
2231	}
2232	free(im6o, M_IPMOPTS);
2233}
2234
2235/*
2236 * Set IPv6 outgoing packet options based on advanced API.
2237 */
2238int
2239ip6_setpktoptions(control, opt, priv, needcopy)
2240	struct mbuf *control;
2241	struct ip6_pktopts *opt;
2242	int priv, needcopy;
2243{
2244	struct cmsghdr *cm = 0;
2245
2246	if (control == 0 || opt == 0)
2247		return(EINVAL);
2248
2249	init_ip6pktopts(opt);
2250
2251	/*
2252	 * XXX: Currently, we assume all the optional information is stored
2253	 * in a single mbuf.
2254	 */
2255	if (control->m_next)
2256		return(EINVAL);
2257
2258	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2259		     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2260		cm = mtod(control, struct cmsghdr *);
2261		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2262			return(EINVAL);
2263		if (cm->cmsg_level != IPPROTO_IPV6)
2264			continue;
2265
2266		/*
2267		 * XXX should check if RFC2292 API is mixed with 2292bis API
2268		 */
2269		switch (cm->cmsg_type) {
2270		case IPV6_PKTINFO:
2271			if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo)))
2272				return(EINVAL);
2273			if (needcopy) {
2274				/* XXX: Is it really WAITOK? */
2275				opt->ip6po_pktinfo =
2276					malloc(sizeof(struct in6_pktinfo),
2277					       M_IP6OPT, M_WAITOK);
2278				bcopy(CMSG_DATA(cm), opt->ip6po_pktinfo,
2279				    sizeof(struct in6_pktinfo));
2280			} else
2281				opt->ip6po_pktinfo =
2282					(struct in6_pktinfo *)CMSG_DATA(cm);
2283			if (opt->ip6po_pktinfo->ipi6_ifindex &&
2284			    IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr))
2285				opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] =
2286					htons(opt->ip6po_pktinfo->ipi6_ifindex);
2287
2288			if (opt->ip6po_pktinfo->ipi6_ifindex > if_index
2289			 || opt->ip6po_pktinfo->ipi6_ifindex < 0) {
2290				return(ENXIO);
2291			}
2292
2293			/*
2294			 * Check if the requested source address is indeed a
2295			 * unicast address assigned to the node, and can be
2296			 * used as the packet's source address.
2297			 */
2298			if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) {
2299				struct in6_ifaddr *ia6;
2300				struct sockaddr_in6 sin6;
2301
2302				bzero(&sin6, sizeof(sin6));
2303				sin6.sin6_len = sizeof(sin6);
2304				sin6.sin6_family = AF_INET6;
2305				sin6.sin6_addr =
2306					opt->ip6po_pktinfo->ipi6_addr;
2307				ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6));
2308				if (ia6 == NULL ||
2309				    (ia6->ia6_flags & (IN6_IFF_ANYCAST |
2310						       IN6_IFF_NOTREADY)) != 0)
2311					return(EADDRNOTAVAIL);
2312			}
2313			break;
2314
2315		case IPV6_HOPLIMIT:
2316			if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
2317				return(EINVAL);
2318
2319			opt->ip6po_hlim = *(int *)CMSG_DATA(cm);
2320			if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255)
2321				return(EINVAL);
2322			break;
2323
2324		case IPV6_NEXTHOP:
2325			if (!priv)
2326				return(EPERM);
2327
2328			if (cm->cmsg_len < sizeof(u_char) ||
2329			    /* check if cmsg_len is large enough for sa_len */
2330			    cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm)))
2331				return(EINVAL);
2332
2333			if (needcopy) {
2334				opt->ip6po_nexthop =
2335					malloc(*CMSG_DATA(cm),
2336					       M_IP6OPT, M_WAITOK);
2337				bcopy(CMSG_DATA(cm),
2338				      opt->ip6po_nexthop,
2339				      *CMSG_DATA(cm));
2340			} else
2341				opt->ip6po_nexthop =
2342					(struct sockaddr *)CMSG_DATA(cm);
2343			break;
2344
2345		case IPV6_HOPOPTS:
2346		{
2347			struct ip6_hbh *hbh;
2348			int hbhlen;
2349
2350			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh)))
2351				return(EINVAL);
2352			hbh = (struct ip6_hbh *)CMSG_DATA(cm);
2353			hbhlen = (hbh->ip6h_len + 1) << 3;
2354			if (cm->cmsg_len != CMSG_LEN(hbhlen))
2355				return(EINVAL);
2356
2357			if (needcopy) {
2358				opt->ip6po_hbh =
2359					malloc(hbhlen, M_IP6OPT, M_WAITOK);
2360				bcopy(hbh, opt->ip6po_hbh, hbhlen);
2361			} else
2362				opt->ip6po_hbh = hbh;
2363			break;
2364		}
2365
2366		case IPV6_DSTOPTS:
2367		{
2368			struct ip6_dest *dest, **newdest;
2369			int destlen;
2370
2371			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest)))
2372				return(EINVAL);
2373			dest = (struct ip6_dest *)CMSG_DATA(cm);
2374			destlen = (dest->ip6d_len + 1) << 3;
2375			if (cm->cmsg_len != CMSG_LEN(destlen))
2376				return(EINVAL);
2377
2378			/*
2379			 * The old advacned API is ambiguous on this
2380			 * point. Our approach is to determine the
2381			 * position based according to the existence
2382			 * of a routing header. Note, however, that
2383			 * this depends on the order of the extension
2384			 * headers in the ancillary data; the 1st part
2385			 * of the destination options header must
2386			 * appear before the routing header in the
2387			 * ancillary data, too.
2388			 * RFC2292bis solved the ambiguity by
2389			 * introducing separate cmsg types.
2390			 */
2391			if (opt->ip6po_rthdr == NULL)
2392				newdest = &opt->ip6po_dest1;
2393			else
2394				newdest = &opt->ip6po_dest2;
2395
2396			if (needcopy) {
2397				*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
2398				bcopy(dest, *newdest, destlen);
2399			} else
2400				*newdest = dest;
2401
2402			break;
2403		}
2404
2405		case IPV6_RTHDR:
2406		{
2407			struct ip6_rthdr *rth;
2408			int rthlen;
2409
2410			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr)))
2411				return(EINVAL);
2412			rth = (struct ip6_rthdr *)CMSG_DATA(cm);
2413			rthlen = (rth->ip6r_len + 1) << 3;
2414			if (cm->cmsg_len != CMSG_LEN(rthlen))
2415				return(EINVAL);
2416
2417			switch (rth->ip6r_type) {
2418			case IPV6_RTHDR_TYPE_0:
2419				/* must contain one addr */
2420				if (rth->ip6r_len == 0)
2421					return(EINVAL);
2422				/* length must be even */
2423				if (rth->ip6r_len % 2)
2424					return(EINVAL);
2425				if (rth->ip6r_len / 2 != rth->ip6r_segleft)
2426					return(EINVAL);
2427				break;
2428			default:
2429				return(EINVAL);	/* not supported */
2430			}
2431
2432			if (needcopy) {
2433				opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT,
2434							  M_WAITOK);
2435				bcopy(rth, opt->ip6po_rthdr, rthlen);
2436			} else
2437				opt->ip6po_rthdr = rth;
2438
2439			break;
2440		}
2441
2442		default:
2443			return(ENOPROTOOPT);
2444		}
2445	}
2446
2447	return(0);
2448}
2449
2450/*
2451 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
2452 * packet to the input queue of a specified interface.  Note that this
2453 * calls the output routine of the loopback "driver", but with an interface
2454 * pointer that might NOT be &loif -- easier than replicating that code here.
2455 */
2456void
2457ip6_mloopback(ifp, m, dst)
2458	struct ifnet *ifp;
2459	struct mbuf *m;
2460	struct sockaddr_in6 *dst;
2461{
2462	struct mbuf *copym;
2463	struct ip6_hdr *ip6;
2464
2465	copym = m_copy(m, 0, M_COPYALL);
2466	if (copym == NULL)
2467		return;
2468
2469	/*
2470	 * Make sure to deep-copy IPv6 header portion in case the data
2471	 * is in an mbuf cluster, so that we can safely override the IPv6
2472	 * header portion later.
2473	 */
2474	if ((copym->m_flags & M_EXT) != 0 ||
2475	    copym->m_len < sizeof(struct ip6_hdr)) {
2476		copym = m_pullup(copym, sizeof(struct ip6_hdr));
2477		if (copym == NULL)
2478			return;
2479	}
2480
2481#ifdef DIAGNOSTIC
2482	if (copym->m_len < sizeof(*ip6)) {
2483		m_freem(copym);
2484		return;
2485	}
2486#endif
2487
2488	ip6 = mtod(copym, struct ip6_hdr *);
2489#ifndef SCOPEDROUTING
2490	/*
2491	 * clear embedded scope identifiers if necessary.
2492	 * in6_clearscope will touch the addresses only when necessary.
2493	 */
2494	in6_clearscope(&ip6->ip6_src);
2495	in6_clearscope(&ip6->ip6_dst);
2496#endif
2497
2498	(void)if_simloop(ifp, copym, dst->sin6_family, NULL);
2499}
2500
2501/*
2502 * Chop IPv6 header off from the payload.
2503 */
2504static int
2505ip6_splithdr(m, exthdrs)
2506	struct mbuf *m;
2507	struct ip6_exthdrs *exthdrs;
2508{
2509	struct mbuf *mh;
2510	struct ip6_hdr *ip6;
2511
2512	ip6 = mtod(m, struct ip6_hdr *);
2513	if (m->m_len > sizeof(*ip6)) {
2514		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
2515		if (mh == 0) {
2516			m_freem(m);
2517			return ENOBUFS;
2518		}
2519		M_COPY_PKTHDR(mh, m);
2520		MH_ALIGN(mh, sizeof(*ip6));
2521		m->m_flags &= ~M_PKTHDR;
2522		m->m_len -= sizeof(*ip6);
2523		m->m_data += sizeof(*ip6);
2524		mh->m_next = m;
2525		m = mh;
2526		m->m_len = sizeof(*ip6);
2527		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
2528	}
2529	exthdrs->ip6e_ip6 = m;
2530	return 0;
2531}
2532
2533/*
2534 * Compute IPv6 extension header length.
2535 */
2536int
2537ip6_optlen(in6p)
2538	struct in6pcb *in6p;
2539{
2540	int len;
2541
2542	if (!in6p->in6p_outputopts)
2543		return 0;
2544
2545	len = 0;
2546#define elen(x) \
2547    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
2548
2549	len += elen(in6p->in6p_outputopts->ip6po_hbh);
2550	if (in6p->in6p_outputopts->ip6po_rthdr)
2551		/* dest1 is valid with rthdr only */
2552		len += elen(in6p->in6p_outputopts->ip6po_dest1);
2553	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
2554	len += elen(in6p->in6p_outputopts->ip6po_dest2);
2555	return len;
2556#undef elen
2557}
2558