ip6_input.c revision 225044
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $
30 */
31
32/*-
33 * Copyright (c) 1982, 1986, 1988, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
61 */
62
63#include <sys/cdefs.h>
64__FBSDID("$FreeBSD: head/sys/netinet6/ip6_input.c 225044 2011-08-20 17:05:11Z bz $");
65
66#include "opt_inet.h"
67#include "opt_inet6.h"
68#include "opt_ipfw.h"
69#include "opt_ipsec.h"
70#include "opt_route.h"
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/malloc.h>
75#include <sys/mbuf.h>
76#include <sys/proc.h>
77#include <sys/domain.h>
78#include <sys/protosw.h>
79#include <sys/socket.h>
80#include <sys/socketvar.h>
81#include <sys/errno.h>
82#include <sys/time.h>
83#include <sys/kernel.h>
84#include <sys/syslog.h>
85
86#include <net/if.h>
87#include <net/if_types.h>
88#include <net/if_dl.h>
89#include <net/route.h>
90#include <net/netisr.h>
91#include <net/pfil.h>
92#include <net/vnet.h>
93
94#include <netinet/in.h>
95#include <netinet/ip_var.h>
96#include <netinet/in_systm.h>
97#include <net/if_llatbl.h>
98#ifdef INET
99#include <netinet/ip.h>
100#include <netinet/ip_icmp.h>
101#endif /* INET */
102#include <netinet/ip6.h>
103#include <netinet6/in6_var.h>
104#include <netinet6/ip6_var.h>
105#include <netinet/in_pcb.h>
106#include <netinet/icmp6.h>
107#include <netinet6/scope6_var.h>
108#include <netinet6/in6_ifattach.h>
109#include <netinet6/nd6.h>
110
111#ifdef IPSEC
112#include <netipsec/ipsec.h>
113#include <netinet6/ip6_ipsec.h>
114#include <netipsec/ipsec6.h>
115#endif /* IPSEC */
116
117#include <netinet6/ip6protosw.h>
118
119#ifdef FLOWTABLE
120#include <net/flowtable.h>
121VNET_DECLARE(int, ip6_output_flowtable_size);
122#define	V_ip6_output_flowtable_size	VNET(ip6_output_flowtable_size)
123#endif
124
125extern struct domain inet6domain;
126
127u_char ip6_protox[IPPROTO_MAX];
128VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead);
129
130static struct netisr_handler ip6_nh = {
131	.nh_name = "ip6",
132	.nh_handler = ip6_input,
133	.nh_proto = NETISR_IPV6,
134	.nh_policy = NETISR_POLICY_FLOW,
135};
136
137VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch);
138#define	V_in6_tmpaddrtimer_ch		VNET(in6_tmpaddrtimer_ch)
139
140VNET_DEFINE(struct pfil_head, inet6_pfil_hook);
141
142VNET_DEFINE(struct ip6stat, ip6stat);
143
144struct rwlock in6_ifaddr_lock;
145RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
146
147static void ip6_init2(void *);
148static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
149static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
150#ifdef PULLDOWN_TEST
151static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
152#endif
153
154/*
155 * IP6 initialization: fill in IP6 protocol switch table.
156 * All protocols not implemented in kernel go to raw IP6 protocol handler.
157 */
158void
159ip6_init(void)
160{
161	struct ip6protosw *pr;
162	int i;
163
164	TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
165	    &V_ip6_auto_linklocal);
166
167	TAILQ_INIT(&V_in6_ifaddrhead);
168
169	/* Initialize packet filter hooks. */
170	V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
171	V_inet6_pfil_hook.ph_af = AF_INET6;
172	if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0)
173		printf("%s: WARNING: unable to register pfil hook, "
174			"error %d\n", __func__, i);
175
176	scope6_init();
177	addrsel_policy_init();
178	nd6_init();
179	frag6_init();
180
181#ifdef FLOWTABLE
182	if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size",
183		&V_ip6_output_flowtable_size)) {
184		if (V_ip6_output_flowtable_size < 256)
185			V_ip6_output_flowtable_size = 256;
186		if (!powerof2(V_ip6_output_flowtable_size)) {
187			printf("flowtable must be power of 2 size\n");
188			V_ip6_output_flowtable_size = 2048;
189		}
190	} else {
191		/*
192		 * round up to the next power of 2
193		 */
194		V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
195	}
196	V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU);
197#endif
198
199	V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
200
201	/* Skip global initialization stuff for non-default instances. */
202	if (!IS_DEFAULT_VNET(curvnet))
203		return;
204
205#ifdef DIAGNOSTIC
206	if (sizeof(struct protosw) != sizeof(struct ip6protosw))
207		panic("sizeof(protosw) != sizeof(ip6protosw)");
208#endif
209	pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
210	if (pr == NULL)
211		panic("ip6_init");
212
213	/* Initialize the entire ip6_protox[] array to IPPROTO_RAW. */
214	for (i = 0; i < IPPROTO_MAX; i++)
215		ip6_protox[i] = pr - inet6sw;
216	/*
217	 * Cycle through IP protocols and put them into the appropriate place
218	 * in ip6_protox[].
219	 */
220	for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
221	    pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
222		if (pr->pr_domain->dom_family == PF_INET6 &&
223		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
224			/* Be careful to only index valid IP protocols. */
225			if (pr->pr_protocol < IPPROTO_MAX)
226				ip6_protox[pr->pr_protocol] = pr - inet6sw;
227		}
228
229	netisr_register(&ip6_nh);
230}
231
232/*
233 * The protocol to be inserted into ip6_protox[] must be already registered
234 * in inet6sw[], either statically or through pf_proto_register().
235 */
236int
237ip6proto_register(short ip6proto)
238{
239	struct ip6protosw *pr;
240
241	/* Sanity checks. */
242	if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
243		return (EPROTONOSUPPORT);
244
245	/*
246	 * The protocol slot must not be occupied by another protocol
247	 * already.  An index pointing to IPPROTO_RAW is unused.
248	 */
249	pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
250	if (pr == NULL)
251		return (EPFNOSUPPORT);
252	if (ip6_protox[ip6proto] != pr - inet6sw)	/* IPPROTO_RAW */
253		return (EEXIST);
254
255	/*
256	 * Find the protocol position in inet6sw[] and set the index.
257	 */
258	for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
259	    pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) {
260		if (pr->pr_domain->dom_family == PF_INET6 &&
261		    pr->pr_protocol && pr->pr_protocol == ip6proto) {
262			ip6_protox[pr->pr_protocol] = pr - inet6sw;
263			return (0);
264		}
265	}
266	return (EPROTONOSUPPORT);
267}
268
269int
270ip6proto_unregister(short ip6proto)
271{
272	struct ip6protosw *pr;
273
274	/* Sanity checks. */
275	if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
276		return (EPROTONOSUPPORT);
277
278	/* Check if the protocol was indeed registered. */
279	pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
280	if (pr == NULL)
281		return (EPFNOSUPPORT);
282	if (ip6_protox[ip6proto] == pr - inet6sw)	/* IPPROTO_RAW */
283		return (ENOENT);
284
285	/* Reset the protocol slot to IPPROTO_RAW. */
286	ip6_protox[ip6proto] = pr - inet6sw;
287	return (0);
288}
289
290#ifdef VIMAGE
291void
292ip6_destroy()
293{
294
295	nd6_destroy();
296	callout_drain(&V_in6_tmpaddrtimer_ch);
297}
298#endif
299
300static int
301ip6_init2_vnet(const void *unused __unused)
302{
303
304	/* nd6_timer_init */
305	callout_init(&V_nd6_timer_ch, 0);
306	callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
307
308	/* timer for regeneranation of temporary addresses randomize ID */
309	callout_init(&V_in6_tmpaddrtimer_ch, 0);
310	callout_reset(&V_in6_tmpaddrtimer_ch,
311		      (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
312		       V_ip6_temp_regen_advance) * hz,
313		      in6_tmpaddrtimer, curvnet);
314
315	return (0);
316}
317
318static void
319ip6_init2(void *dummy)
320{
321
322	ip6_init2_vnet(NULL);
323}
324
325/* cheat */
326/* This must be after route_init(), which is now SI_ORDER_THIRD */
327SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
328
329void
330ip6_input(struct mbuf *m)
331{
332	struct ip6_hdr *ip6;
333	int off = sizeof(struct ip6_hdr), nest;
334	u_int32_t plen;
335	u_int32_t rtalert = ~0;
336	int nxt, ours = 0;
337	struct ifnet *deliverifp = NULL, *ifp = NULL;
338	struct in6_addr odst;
339	struct route_in6 rin6;
340	int srcrt = 0;
341	struct llentry *lle = NULL;
342	struct sockaddr_in6 dst6, *dst;
343
344	bzero(&rin6, sizeof(struct route_in6));
345#ifdef IPSEC
346	/*
347	 * should the inner packet be considered authentic?
348	 * see comment in ah4_input().
349	 * NB: m cannot be NULL when passed to the input routine
350	 */
351
352	m->m_flags &= ~M_AUTHIPHDR;
353	m->m_flags &= ~M_AUTHIPDGM;
354
355#endif /* IPSEC */
356
357	/*
358	 * make sure we don't have onion peering information into m_tag.
359	 */
360	ip6_delaux(m);
361
362	if (m->m_flags & M_FASTFWD_OURS) {
363		/*
364		 * Firewall changed destination to local.
365		 */
366		m->m_flags &= ~M_FASTFWD_OURS;
367		ours = 1;
368		deliverifp = m->m_pkthdr.rcvif;
369		ip6 = mtod(m, struct ip6_hdr *);
370		goto hbhcheck;
371	}
372
373	/*
374	 * mbuf statistics
375	 */
376	if (m->m_flags & M_EXT) {
377		if (m->m_next)
378			V_ip6stat.ip6s_mext2m++;
379		else
380			V_ip6stat.ip6s_mext1++;
381	} else {
382#define M2MMAX	(sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0]))
383		if (m->m_next) {
384			if (m->m_flags & M_LOOP) {
385				V_ip6stat.ip6s_m2m[V_loif->if_index]++;
386			} else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
387				V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
388			else
389				V_ip6stat.ip6s_m2m[0]++;
390		} else
391			V_ip6stat.ip6s_m1++;
392#undef M2MMAX
393	}
394
395	/* drop the packet if IPv6 operation is disabled on the IF */
396	if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
397		m_freem(m);
398		return;
399	}
400
401	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
402	V_ip6stat.ip6s_total++;
403
404#ifndef PULLDOWN_TEST
405	/*
406	 * L2 bridge code and some other code can return mbuf chain
407	 * that does not conform to KAME requirement.  too bad.
408	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
409	 */
410	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
411		struct mbuf *n;
412
413		MGETHDR(n, M_DONTWAIT, MT_HEADER);
414		if (n)
415			M_MOVE_PKTHDR(n, m);
416		if (n && n->m_pkthdr.len > MHLEN) {
417			MCLGET(n, M_DONTWAIT);
418			if ((n->m_flags & M_EXT) == 0) {
419				m_freem(n);
420				n = NULL;
421			}
422		}
423		if (n == NULL) {
424			m_freem(m);
425			return;	/* ENOBUFS */
426		}
427
428		m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
429		n->m_len = n->m_pkthdr.len;
430		m_freem(m);
431		m = n;
432	}
433	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
434#endif
435
436	if (m->m_len < sizeof(struct ip6_hdr)) {
437		struct ifnet *inifp;
438		inifp = m->m_pkthdr.rcvif;
439		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
440			V_ip6stat.ip6s_toosmall++;
441			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
442			return;
443		}
444	}
445
446	ip6 = mtod(m, struct ip6_hdr *);
447
448	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
449		V_ip6stat.ip6s_badvers++;
450		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
451		goto bad;
452	}
453
454	V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
455
456	/*
457	 * Check against address spoofing/corruption.
458	 */
459	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
460	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
461		/*
462		 * XXX: "badscope" is not very suitable for a multicast source.
463		 */
464		V_ip6stat.ip6s_badscope++;
465		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
466		goto bad;
467	}
468	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
469	    !(m->m_flags & M_LOOP)) {
470		/*
471		 * In this case, the packet should come from the loopback
472		 * interface.  However, we cannot just check the if_flags,
473		 * because ip6_mloopback() passes the "actual" interface
474		 * as the outgoing/incoming interface.
475		 */
476		V_ip6stat.ip6s_badscope++;
477		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
478		goto bad;
479	}
480
481#ifdef ALTQ
482	if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
483		/* packet is dropped by traffic conditioner */
484		return;
485	}
486#endif
487	/*
488	 * The following check is not documented in specs.  A malicious
489	 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
490	 * and bypass security checks (act as if it was from 127.0.0.1 by using
491	 * IPv6 src ::ffff:127.0.0.1).  Be cautious.
492	 *
493	 * This check chokes if we are in an SIIT cloud.  As none of BSDs
494	 * support IPv4-less kernel compilation, we cannot support SIIT
495	 * environment at all.  So, it makes more sense for us to reject any
496	 * malicious packets for non-SIIT environment, than try to do a
497	 * partial support for SIIT environment.
498	 */
499	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
500	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
501		V_ip6stat.ip6s_badscope++;
502		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
503		goto bad;
504	}
505#if 0
506	/*
507	 * Reject packets with IPv4 compatible addresses (auto tunnel).
508	 *
509	 * The code forbids auto tunnel relay case in RFC1933 (the check is
510	 * stronger than RFC1933).  We may want to re-enable it if mech-xx
511	 * is revised to forbid relaying case.
512	 */
513	if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
514	    IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
515		V_ip6stat.ip6s_badscope++;
516		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
517		goto bad;
518	}
519#endif
520#ifdef IPSEC
521	/*
522	 * Bypass packet filtering for packets previously handled by IPsec.
523	 */
524	if (ip6_ipsec_filtertunnel(m))
525		goto passin;
526#endif /* IPSEC */
527
528	/*
529	 * Run through list of hooks for input packets.
530	 *
531	 * NB: Beware of the destination address changing
532	 *     (e.g. by NAT rewriting).  When this happens,
533	 *     tell ip6_forward to do the right thing.
534	 */
535	odst = ip6->ip6_dst;
536
537	/* Jump over all PFIL processing if hooks are not active. */
538	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
539		goto passin;
540
541	if (pfil_run_hooks(&V_inet6_pfil_hook, &m,
542	    m->m_pkthdr.rcvif, PFIL_IN, NULL))
543		return;
544	if (m == NULL)			/* consumed by filter */
545		return;
546	ip6 = mtod(m, struct ip6_hdr *);
547	srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
548
549#ifdef IPFIREWALL_FORWARD
550	if (m->m_flags & M_FASTFWD_OURS) {
551		m->m_flags &= ~M_FASTFWD_OURS;
552		ours = 1;
553		deliverifp = m->m_pkthdr.rcvif;
554		goto hbhcheck;
555	}
556	if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
557		/*
558		 * Directly ship the packet on.  This allows forwarding
559		 * packets originally destined to us to some other directly
560		 * connected host.
561		 */
562		ip6_forward(m, 1);
563		goto out;
564	}
565#endif /* IPFIREWALL_FORWARD */
566
567passin:
568	/*
569	 * Disambiguate address scope zones (if there is ambiguity).
570	 * We first make sure that the original source or destination address
571	 * is not in our internal form for scoped addresses.  Such addresses
572	 * are not necessarily invalid spec-wise, but we cannot accept them due
573	 * to the usage conflict.
574	 * in6_setscope() then also checks and rejects the cases where src or
575	 * dst are the loopback address and the receiving interface
576	 * is not loopback.
577	 */
578	if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
579		V_ip6stat.ip6s_badscope++; /* XXX */
580		goto bad;
581	}
582	if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) ||
583	    in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) {
584		V_ip6stat.ip6s_badscope++;
585		goto bad;
586	}
587
588	/*
589	 * Multicast check. Assume packet is for us to avoid
590	 * prematurely taking locks.
591	 */
592	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
593		ours = 1;
594		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
595		deliverifp = m->m_pkthdr.rcvif;
596		goto hbhcheck;
597	}
598
599	/*
600	 *  Unicast check
601	 */
602
603	bzero(&dst6, sizeof(dst6));
604	dst6.sin6_family = AF_INET6;
605	dst6.sin6_len = sizeof(struct sockaddr_in6);
606	dst6.sin6_addr = ip6->ip6_dst;
607	ifp = m->m_pkthdr.rcvif;
608	IF_AFDATA_LOCK(ifp);
609	lle = lla_lookup(LLTABLE6(ifp), 0,
610	     (struct sockaddr *)&dst6);
611	IF_AFDATA_UNLOCK(ifp);
612	if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) {
613		struct ifaddr *ifa;
614		struct in6_ifaddr *ia6;
615		int bad;
616
617		bad = 1;
618#define	sa_equal(a1, a2)						\
619	(bcmp((a1), (a2), ((a1))->sin6_len) == 0)
620		IF_ADDR_LOCK(ifp);
621		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
622			if (ifa->ifa_addr->sa_family != dst6.sin6_family)
623				continue;
624			if (sa_equal(&dst6, ifa->ifa_addr))
625				break;
626		}
627		KASSERT(ifa != NULL, ("%s: ifa not found for lle %p",
628		    __func__, lle));
629#undef sa_equal
630
631		ia6 = (struct in6_ifaddr *)ifa;
632		if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
633			/* Count the packet in the ip address stats */
634			ia6->ia_ifa.if_ipackets++;
635			ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
636
637			/*
638			 * record address information into m_tag.
639			 */
640			(void)ip6_setdstifaddr(m, ia6);
641
642			bad = 0;
643		} else {
644			char ip6bufs[INET6_ADDRSTRLEN];
645			char ip6bufd[INET6_ADDRSTRLEN];
646			/* address is not ready, so discard the packet. */
647			nd6log((LOG_INFO,
648			    "ip6_input: packet to an unready address %s->%s\n",
649			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
650			    ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
651		}
652		IF_ADDR_UNLOCK(ifp);
653		LLE_RUNLOCK(lle);
654		if (bad)
655			goto bad;
656		else {
657			ours = 1;
658			deliverifp = ifp;
659			goto hbhcheck;
660		}
661	}
662	if (lle != NULL)
663		LLE_RUNLOCK(lle);
664
665	dst = &rin6.ro_dst;
666	dst->sin6_len = sizeof(struct sockaddr_in6);
667	dst->sin6_family = AF_INET6;
668	dst->sin6_addr = ip6->ip6_dst;
669	rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0);
670	if (rin6.ro_rt)
671		RT_UNLOCK(rin6.ro_rt);
672
673#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
674
675	/*
676	 * Accept the packet if the forwarding interface to the destination
677	 * according to the routing table is the loopback interface,
678	 * unless the associated route has a gateway.
679	 * Note that this approach causes to accept a packet if there is a
680	 * route to the loopback interface for the destination of the packet.
681	 * But we think it's even useful in some situations, e.g. when using
682	 * a special daemon which wants to intercept the packet.
683	 *
684	 * XXX: some OSes automatically make a cloned route for the destination
685	 * of an outgoing packet.  If the outgoing interface of the packet
686	 * is a loopback one, the kernel would consider the packet to be
687	 * accepted, even if we have no such address assinged on the interface.
688	 * We check the cloned flag of the route entry to reject such cases,
689	 * assuming that route entries for our own addresses are not made by
690	 * cloning (it should be true because in6_addloop explicitly installs
691	 * the host route).  However, we might have to do an explicit check
692	 * while it would be less efficient.  Or, should we rather install a
693	 * reject route for such a case?
694	 */
695	if (rin6.ro_rt &&
696	    (rin6.ro_rt->rt_flags &
697	     (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
698#ifdef RTF_WASCLONED
699	    !(rin6.ro_rt->rt_flags & RTF_WASCLONED) &&
700#endif
701#ifdef RTF_CLONED
702	    !(rin6.ro_rt->rt_flags & RTF_CLONED) &&
703#endif
704#if 0
705	    /*
706	     * The check below is redundant since the comparison of
707	     * the destination and the key of the rtentry has
708	     * already done through looking up the routing table.
709	     */
710	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
711	    &rt6_key(rin6.ro_rt)->sin6_addr)
712#endif
713	    rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) {
714		int free_ia6 = 0;
715		struct in6_ifaddr *ia6;
716
717		/*
718		 * found the loopback route to the interface address
719		 */
720		if (rin6.ro_rt->rt_gateway->sa_family == AF_LINK) {
721			struct sockaddr_in6 dest6;
722
723			bzero(&dest6, sizeof(dest6));
724			dest6.sin6_family = AF_INET6;
725			dest6.sin6_len = sizeof(dest6);
726			dest6.sin6_addr = ip6->ip6_dst;
727			ia6 = (struct in6_ifaddr *)
728			    ifa_ifwithaddr((struct sockaddr *)&dest6);
729			if (ia6 == NULL)
730				goto bad;
731			free_ia6 = 1;
732		}
733		else
734			ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa;
735
736		/*
737		 * record address information into m_tag.
738		 */
739		(void)ip6_setdstifaddr(m, ia6);
740
741		/*
742		 * packets to a tentative, duplicated, or somehow invalid
743		 * address must not be accepted.
744		 */
745		if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
746			/* this address is ready */
747			ours = 1;
748			deliverifp = ia6->ia_ifp;	/* correct? */
749			/* Count the packet in the ip address stats */
750			ia6->ia_ifa.if_ipackets++;
751			ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
752			if (ia6 != NULL && free_ia6 != 0)
753				ifa_free(&ia6->ia_ifa);
754			goto hbhcheck;
755		} else {
756			char ip6bufs[INET6_ADDRSTRLEN];
757			char ip6bufd[INET6_ADDRSTRLEN];
758			/* address is not ready, so discard the packet. */
759			nd6log((LOG_INFO,
760			    "ip6_input: packet to an unready address %s->%s\n",
761			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
762			    ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
763
764			if (ia6 != NULL && free_ia6 != 0)
765				ifa_free(&ia6->ia_ifa);
766			goto bad;
767		}
768	}
769
770	/*
771	 * FAITH (Firewall Aided Internet Translator)
772	 */
773	if (V_ip6_keepfaith) {
774		if (rin6.ro_rt && rin6.ro_rt->rt_ifp &&
775		    rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) {
776			/* XXX do we need more sanity checks? */
777			ours = 1;
778			deliverifp = rin6.ro_rt->rt_ifp; /* faith */
779			goto hbhcheck;
780		}
781	}
782
783	/*
784	 * Now there is no reason to process the packet if it's not our own
785	 * and we're not a router.
786	 */
787	if (!V_ip6_forwarding) {
788		V_ip6stat.ip6s_cantforward++;
789		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
790		goto bad;
791	}
792
793  hbhcheck:
794	/*
795	 * record address information into m_tag, if we don't have one yet.
796	 * note that we are unable to record it, if the address is not listed
797	 * as our interface address (e.g. multicast addresses, addresses
798	 * within FAITH prefixes and such).
799	 */
800	if (deliverifp && !ip6_getdstifaddr(m)) {
801		struct in6_ifaddr *ia6;
802
803		ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
804		if (ia6) {
805			if (!ip6_setdstifaddr(m, ia6)) {
806				/*
807				 * XXX maybe we should drop the packet here,
808				 * as we could not provide enough information
809				 * to the upper layers.
810				 */
811			}
812			ifa_free(&ia6->ia_ifa);
813		}
814	}
815
816	/*
817	 * Process Hop-by-Hop options header if it's contained.
818	 * m may be modified in ip6_hopopts_input().
819	 * If a JumboPayload option is included, plen will also be modified.
820	 */
821	plen = (u_int32_t)ntohs(ip6->ip6_plen);
822	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
823		struct ip6_hbh *hbh;
824
825		if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
826#if 0	/*touches NULL pointer*/
827			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
828#endif
829			goto out;	/* m have already been freed */
830		}
831
832		/* adjust pointer */
833		ip6 = mtod(m, struct ip6_hdr *);
834
835		/*
836		 * if the payload length field is 0 and the next header field
837		 * indicates Hop-by-Hop Options header, then a Jumbo Payload
838		 * option MUST be included.
839		 */
840		if (ip6->ip6_plen == 0 && plen == 0) {
841			/*
842			 * Note that if a valid jumbo payload option is
843			 * contained, ip6_hopopts_input() must set a valid
844			 * (non-zero) payload length to the variable plen.
845			 */
846			V_ip6stat.ip6s_badoptions++;
847			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
848			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
849			icmp6_error(m, ICMP6_PARAM_PROB,
850				    ICMP6_PARAMPROB_HEADER,
851				    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
852			goto out;
853		}
854#ifndef PULLDOWN_TEST
855		/* ip6_hopopts_input() ensures that mbuf is contiguous */
856		hbh = (struct ip6_hbh *)(ip6 + 1);
857#else
858		IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
859			sizeof(struct ip6_hbh));
860		if (hbh == NULL) {
861			V_ip6stat.ip6s_tooshort++;
862			goto out;
863		}
864#endif
865		nxt = hbh->ip6h_nxt;
866
867		/*
868		 * If we are acting as a router and the packet contains a
869		 * router alert option, see if we know the option value.
870		 * Currently, we only support the option value for MLD, in which
871		 * case we should pass the packet to the multicast routing
872		 * daemon.
873		 */
874		if (rtalert != ~0) {
875			switch (rtalert) {
876			case IP6OPT_RTALERT_MLD:
877				if (V_ip6_forwarding)
878					ours = 1;
879				break;
880			default:
881				/*
882				 * RFC2711 requires unrecognized values must be
883				 * silently ignored.
884				 */
885				break;
886			}
887		}
888	} else
889		nxt = ip6->ip6_nxt;
890
891	/*
892	 * Check that the amount of data in the buffers
893	 * is as at least much as the IPv6 header would have us expect.
894	 * Trim mbufs if longer than we expect.
895	 * Drop packet if shorter than we expect.
896	 */
897	if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
898		V_ip6stat.ip6s_tooshort++;
899		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
900		goto bad;
901	}
902	if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
903		if (m->m_len == m->m_pkthdr.len) {
904			m->m_len = sizeof(struct ip6_hdr) + plen;
905			m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
906		} else
907			m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
908	}
909
910	/*
911	 * Forward if desirable.
912	 */
913	if (V_ip6_mrouter &&
914	    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
915		/*
916		 * If we are acting as a multicast router, all
917		 * incoming multicast packets are passed to the
918		 * kernel-level multicast forwarding function.
919		 * The packet is returned (relatively) intact; if
920		 * ip6_mforward() returns a non-zero value, the packet
921		 * must be discarded, else it may be accepted below.
922		 *
923		 * XXX TODO: Check hlim and multicast scope here to avoid
924		 * unnecessarily calling into ip6_mforward().
925		 */
926		if (ip6_mforward &&
927		    ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
928			IP6STAT_INC(ip6s_cantforward);
929			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
930			goto bad;
931		}
932	} else if (!ours) {
933		ip6_forward(m, srcrt);
934		goto out;
935	}
936
937	ip6 = mtod(m, struct ip6_hdr *);
938
939	/*
940	 * Malicious party may be able to use IPv4 mapped addr to confuse
941	 * tcp/udp stack and bypass security checks (act as if it was from
942	 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1).  Be cautious.
943	 *
944	 * For SIIT end node behavior, you may want to disable the check.
945	 * However, you will  become vulnerable to attacks using IPv4 mapped
946	 * source.
947	 */
948	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
949	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
950		V_ip6stat.ip6s_badscope++;
951		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
952		goto bad;
953	}
954
955	/*
956	 * Tell launch routine the next header
957	 */
958	V_ip6stat.ip6s_delivered++;
959	in6_ifstat_inc(deliverifp, ifs6_in_deliver);
960	nest = 0;
961
962	while (nxt != IPPROTO_DONE) {
963		if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
964			V_ip6stat.ip6s_toomanyhdr++;
965			goto bad;
966		}
967
968		/*
969		 * protection against faulty packet - there should be
970		 * more sanity checks in header chain processing.
971		 */
972		if (m->m_pkthdr.len < off) {
973			V_ip6stat.ip6s_tooshort++;
974			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
975			goto bad;
976		}
977
978#ifdef IPSEC
979		/*
980		 * enforce IPsec policy checking if we are seeing last header.
981		 * note that we do not visit this with protocols with pcb layer
982		 * code - like udp/tcp/raw ip.
983		 */
984		if (ip6_ipsec_input(m, nxt))
985			goto bad;
986#endif /* IPSEC */
987
988		/*
989		 * Use mbuf flags to propagate Router Alert option to
990		 * ICMPv6 layer, as hop-by-hop options have been stripped.
991		 */
992		if (nxt == IPPROTO_ICMPV6 && rtalert != ~0)
993			m->m_flags |= M_RTALERT_MLD;
994
995		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
996	}
997	goto out;
998bad:
999	m_freem(m);
1000out:
1001	if (rin6.ro_rt)
1002		RTFREE(rin6.ro_rt);
1003}
1004
1005/*
1006 * set/grab in6_ifaddr correspond to IPv6 destination address.
1007 * XXX backward compatibility wrapper
1008 *
1009 * XXXRW: We should bump the refcount on ia6 before sticking it in the m_tag,
1010 * and then bump it when the tag is copied, and release it when the tag is
1011 * freed.  Unfortunately, m_tags don't support deep copies (yet), so instead
1012 * we just bump the ia refcount when we receive it.  This should be fixed.
1013 */
1014static struct ip6aux *
1015ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
1016{
1017	struct ip6aux *ip6a;
1018
1019	ip6a = ip6_addaux(m);
1020	if (ip6a)
1021		ip6a->ip6a_dstia6 = ia6;
1022	return ip6a;	/* NULL if failed to set */
1023}
1024
1025struct in6_ifaddr *
1026ip6_getdstifaddr(struct mbuf *m)
1027{
1028	struct ip6aux *ip6a;
1029	struct in6_ifaddr *ia;
1030
1031	ip6a = ip6_findaux(m);
1032	if (ip6a) {
1033		ia = ip6a->ip6a_dstia6;
1034		ifa_ref(&ia->ia_ifa);
1035		return ia;
1036	} else
1037		return NULL;
1038}
1039
1040/*
1041 * Hop-by-Hop options header processing. If a valid jumbo payload option is
1042 * included, the real payload length will be stored in plenp.
1043 *
1044 * rtalertp - XXX: should be stored more smart way
1045 */
1046static int
1047ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
1048    struct mbuf **mp, int *offp)
1049{
1050	struct mbuf *m = *mp;
1051	int off = *offp, hbhlen;
1052	struct ip6_hbh *hbh;
1053	u_int8_t *opt;
1054
1055	/* validation of the length of the header */
1056#ifndef PULLDOWN_TEST
1057	IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
1058	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
1059	hbhlen = (hbh->ip6h_len + 1) << 3;
1060
1061	IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
1062	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
1063#else
1064	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
1065		sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
1066	if (hbh == NULL) {
1067		V_ip6stat.ip6s_tooshort++;
1068		return -1;
1069	}
1070	hbhlen = (hbh->ip6h_len + 1) << 3;
1071	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
1072		hbhlen);
1073	if (hbh == NULL) {
1074		V_ip6stat.ip6s_tooshort++;
1075		return -1;
1076	}
1077#endif
1078	off += hbhlen;
1079	hbhlen -= sizeof(struct ip6_hbh);
1080	opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
1081
1082	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
1083				hbhlen, rtalertp, plenp) < 0)
1084		return (-1);
1085
1086	*offp = off;
1087	*mp = m;
1088	return (0);
1089}
1090
1091/*
1092 * Search header for all Hop-by-hop options and process each option.
1093 * This function is separate from ip6_hopopts_input() in order to
1094 * handle a case where the sending node itself process its hop-by-hop
1095 * options header. In such a case, the function is called from ip6_output().
1096 *
1097 * The function assumes that hbh header is located right after the IPv6 header
1098 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
1099 * opthead + hbhlen is located in contiguous memory region.
1100 */
1101int
1102ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
1103    u_int32_t *rtalertp, u_int32_t *plenp)
1104{
1105	struct ip6_hdr *ip6;
1106	int optlen = 0;
1107	u_int8_t *opt = opthead;
1108	u_int16_t rtalert_val;
1109	u_int32_t jumboplen;
1110	const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
1111
1112	for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
1113		switch (*opt) {
1114		case IP6OPT_PAD1:
1115			optlen = 1;
1116			break;
1117		case IP6OPT_PADN:
1118			if (hbhlen < IP6OPT_MINLEN) {
1119				V_ip6stat.ip6s_toosmall++;
1120				goto bad;
1121			}
1122			optlen = *(opt + 1) + 2;
1123			break;
1124		case IP6OPT_ROUTER_ALERT:
1125			/* XXX may need check for alignment */
1126			if (hbhlen < IP6OPT_RTALERT_LEN) {
1127				V_ip6stat.ip6s_toosmall++;
1128				goto bad;
1129			}
1130			if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
1131				/* XXX stat */
1132				icmp6_error(m, ICMP6_PARAM_PROB,
1133				    ICMP6_PARAMPROB_HEADER,
1134				    erroff + opt + 1 - opthead);
1135				return (-1);
1136			}
1137			optlen = IP6OPT_RTALERT_LEN;
1138			bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
1139			*rtalertp = ntohs(rtalert_val);
1140			break;
1141		case IP6OPT_JUMBO:
1142			/* XXX may need check for alignment */
1143			if (hbhlen < IP6OPT_JUMBO_LEN) {
1144				V_ip6stat.ip6s_toosmall++;
1145				goto bad;
1146			}
1147			if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
1148				/* XXX stat */
1149				icmp6_error(m, ICMP6_PARAM_PROB,
1150				    ICMP6_PARAMPROB_HEADER,
1151				    erroff + opt + 1 - opthead);
1152				return (-1);
1153			}
1154			optlen = IP6OPT_JUMBO_LEN;
1155
1156			/*
1157			 * IPv6 packets that have non 0 payload length
1158			 * must not contain a jumbo payload option.
1159			 */
1160			ip6 = mtod(m, struct ip6_hdr *);
1161			if (ip6->ip6_plen) {
1162				V_ip6stat.ip6s_badoptions++;
1163				icmp6_error(m, ICMP6_PARAM_PROB,
1164				    ICMP6_PARAMPROB_HEADER,
1165				    erroff + opt - opthead);
1166				return (-1);
1167			}
1168
1169			/*
1170			 * We may see jumbolen in unaligned location, so
1171			 * we'd need to perform bcopy().
1172			 */
1173			bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
1174			jumboplen = (u_int32_t)htonl(jumboplen);
1175
1176#if 1
1177			/*
1178			 * if there are multiple jumbo payload options,
1179			 * *plenp will be non-zero and the packet will be
1180			 * rejected.
1181			 * the behavior may need some debate in ipngwg -
1182			 * multiple options does not make sense, however,
1183			 * there's no explicit mention in specification.
1184			 */
1185			if (*plenp != 0) {
1186				V_ip6stat.ip6s_badoptions++;
1187				icmp6_error(m, ICMP6_PARAM_PROB,
1188				    ICMP6_PARAMPROB_HEADER,
1189				    erroff + opt + 2 - opthead);
1190				return (-1);
1191			}
1192#endif
1193
1194			/*
1195			 * jumbo payload length must be larger than 65535.
1196			 */
1197			if (jumboplen <= IPV6_MAXPACKET) {
1198				V_ip6stat.ip6s_badoptions++;
1199				icmp6_error(m, ICMP6_PARAM_PROB,
1200				    ICMP6_PARAMPROB_HEADER,
1201				    erroff + opt + 2 - opthead);
1202				return (-1);
1203			}
1204			*plenp = jumboplen;
1205
1206			break;
1207		default:		/* unknown option */
1208			if (hbhlen < IP6OPT_MINLEN) {
1209				V_ip6stat.ip6s_toosmall++;
1210				goto bad;
1211			}
1212			optlen = ip6_unknown_opt(opt, m,
1213			    erroff + opt - opthead);
1214			if (optlen == -1)
1215				return (-1);
1216			optlen += 2;
1217			break;
1218		}
1219	}
1220
1221	return (0);
1222
1223  bad:
1224	m_freem(m);
1225	return (-1);
1226}
1227
1228/*
1229 * Unknown option processing.
1230 * The third argument `off' is the offset from the IPv6 header to the option,
1231 * which is necessary if the IPv6 header the and option header and IPv6 header
1232 * is not contiguous in order to return an ICMPv6 error.
1233 */
1234int
1235ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
1236{
1237	struct ip6_hdr *ip6;
1238
1239	switch (IP6OPT_TYPE(*optp)) {
1240	case IP6OPT_TYPE_SKIP: /* ignore the option */
1241		return ((int)*(optp + 1));
1242	case IP6OPT_TYPE_DISCARD:	/* silently discard */
1243		m_freem(m);
1244		return (-1);
1245	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
1246		V_ip6stat.ip6s_badoptions++;
1247		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
1248		return (-1);
1249	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
1250		V_ip6stat.ip6s_badoptions++;
1251		ip6 = mtod(m, struct ip6_hdr *);
1252		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1253		    (m->m_flags & (M_BCAST|M_MCAST)))
1254			m_freem(m);
1255		else
1256			icmp6_error(m, ICMP6_PARAM_PROB,
1257				    ICMP6_PARAMPROB_OPTION, off);
1258		return (-1);
1259	}
1260
1261	m_freem(m);		/* XXX: NOTREACHED */
1262	return (-1);
1263}
1264
1265/*
1266 * Create the "control" list for this pcb.
1267 * These functions will not modify mbuf chain at all.
1268 *
1269 * With KAME mbuf chain restriction:
1270 * The routine will be called from upper layer handlers like tcp6_input().
1271 * Thus the routine assumes that the caller (tcp6_input) have already
1272 * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
1273 * very first mbuf on the mbuf chain.
1274 *
1275 * ip6_savecontrol_v4 will handle those options that are possible to be
1276 * set on a v4-mapped socket.
1277 * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those
1278 * options and handle the v6-only ones itself.
1279 */
1280struct mbuf **
1281ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
1282    int *v4only)
1283{
1284	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1285
1286#ifdef SO_TIMESTAMP
1287	if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) {
1288		struct timeval tv;
1289
1290		microtime(&tv);
1291		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1292		    SCM_TIMESTAMP, SOL_SOCKET);
1293		if (*mp)
1294			mp = &(*mp)->m_next;
1295	}
1296#endif
1297
1298	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
1299		if (v4only != NULL)
1300			*v4only = 1;
1301		return (mp);
1302	}
1303
1304#define IS2292(inp, x, y)	(((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y))
1305	/* RFC 2292 sec. 5 */
1306	if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
1307		struct in6_pktinfo pi6;
1308
1309		bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
1310		in6_clearscope(&pi6.ipi6_addr);	/* XXX */
1311		pi6.ipi6_ifindex =
1312		    (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
1313
1314		*mp = sbcreatecontrol((caddr_t) &pi6,
1315		    sizeof(struct in6_pktinfo),
1316		    IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
1317		if (*mp)
1318			mp = &(*mp)->m_next;
1319	}
1320
1321	if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
1322		int hlim = ip6->ip6_hlim & 0xff;
1323
1324		*mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
1325		    IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT),
1326		    IPPROTO_IPV6);
1327		if (*mp)
1328			mp = &(*mp)->m_next;
1329	}
1330
1331	if (v4only != NULL)
1332		*v4only = 0;
1333	return (mp);
1334}
1335
1336void
1337ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
1338{
1339	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1340	int v4only = 0;
1341
1342	mp = ip6_savecontrol_v4(in6p, m, mp, &v4only);
1343	if (v4only)
1344		return;
1345
1346	if ((in6p->inp_flags & IN6P_TCLASS) != 0) {
1347		u_int32_t flowinfo;
1348		int tclass;
1349
1350		flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
1351		flowinfo >>= 20;
1352
1353		tclass = flowinfo & 0xff;
1354		*mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
1355		    IPV6_TCLASS, IPPROTO_IPV6);
1356		if (*mp)
1357			mp = &(*mp)->m_next;
1358	}
1359
1360	/*
1361	 * IPV6_HOPOPTS socket option.  Recall that we required super-user
1362	 * privilege for the option (see ip6_ctloutput), but it might be too
1363	 * strict, since there might be some hop-by-hop options which can be
1364	 * returned to normal user.
1365	 * See also RFC 2292 section 6 (or RFC 3542 section 8).
1366	 */
1367	if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) {
1368		/*
1369		 * Check if a hop-by-hop options header is contatined in the
1370		 * received packet, and if so, store the options as ancillary
1371		 * data. Note that a hop-by-hop options header must be
1372		 * just after the IPv6 header, which is assured through the
1373		 * IPv6 input processing.
1374		 */
1375		if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
1376			struct ip6_hbh *hbh;
1377			int hbhlen = 0;
1378#ifdef PULLDOWN_TEST
1379			struct mbuf *ext;
1380#endif
1381
1382#ifndef PULLDOWN_TEST
1383			hbh = (struct ip6_hbh *)(ip6 + 1);
1384			hbhlen = (hbh->ip6h_len + 1) << 3;
1385#else
1386			ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
1387			    ip6->ip6_nxt);
1388			if (ext == NULL) {
1389				V_ip6stat.ip6s_tooshort++;
1390				return;
1391			}
1392			hbh = mtod(ext, struct ip6_hbh *);
1393			hbhlen = (hbh->ip6h_len + 1) << 3;
1394			if (hbhlen != ext->m_len) {
1395				m_freem(ext);
1396				V_ip6stat.ip6s_tooshort++;
1397				return;
1398			}
1399#endif
1400
1401			/*
1402			 * XXX: We copy the whole header even if a
1403			 * jumbo payload option is included, the option which
1404			 * is to be removed before returning according to
1405			 * RFC2292.
1406			 * Note: this constraint is removed in RFC3542
1407			 */
1408			*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
1409			    IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
1410			    IPPROTO_IPV6);
1411			if (*mp)
1412				mp = &(*mp)->m_next;
1413#ifdef PULLDOWN_TEST
1414			m_freem(ext);
1415#endif
1416		}
1417	}
1418
1419	if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
1420		int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
1421
1422		/*
1423		 * Search for destination options headers or routing
1424		 * header(s) through the header chain, and stores each
1425		 * header as ancillary data.
1426		 * Note that the order of the headers remains in
1427		 * the chain of ancillary data.
1428		 */
1429		while (1) {	/* is explicit loop prevention necessary? */
1430			struct ip6_ext *ip6e = NULL;
1431			int elen;
1432#ifdef PULLDOWN_TEST
1433			struct mbuf *ext = NULL;
1434#endif
1435
1436			/*
1437			 * if it is not an extension header, don't try to
1438			 * pull it from the chain.
1439			 */
1440			switch (nxt) {
1441			case IPPROTO_DSTOPTS:
1442			case IPPROTO_ROUTING:
1443			case IPPROTO_HOPOPTS:
1444			case IPPROTO_AH: /* is it possible? */
1445				break;
1446			default:
1447				goto loopend;
1448			}
1449
1450#ifndef PULLDOWN_TEST
1451			if (off + sizeof(*ip6e) > m->m_len)
1452				goto loopend;
1453			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
1454			if (nxt == IPPROTO_AH)
1455				elen = (ip6e->ip6e_len + 2) << 2;
1456			else
1457				elen = (ip6e->ip6e_len + 1) << 3;
1458			if (off + elen > m->m_len)
1459				goto loopend;
1460#else
1461			ext = ip6_pullexthdr(m, off, nxt);
1462			if (ext == NULL) {
1463				V_ip6stat.ip6s_tooshort++;
1464				return;
1465			}
1466			ip6e = mtod(ext, struct ip6_ext *);
1467			if (nxt == IPPROTO_AH)
1468				elen = (ip6e->ip6e_len + 2) << 2;
1469			else
1470				elen = (ip6e->ip6e_len + 1) << 3;
1471			if (elen != ext->m_len) {
1472				m_freem(ext);
1473				V_ip6stat.ip6s_tooshort++;
1474				return;
1475			}
1476#endif
1477
1478			switch (nxt) {
1479			case IPPROTO_DSTOPTS:
1480				if (!(in6p->inp_flags & IN6P_DSTOPTS))
1481					break;
1482
1483				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
1484				    IS2292(in6p,
1485					IPV6_2292DSTOPTS, IPV6_DSTOPTS),
1486				    IPPROTO_IPV6);
1487				if (*mp)
1488					mp = &(*mp)->m_next;
1489				break;
1490			case IPPROTO_ROUTING:
1491				if (!(in6p->inp_flags & IN6P_RTHDR))
1492					break;
1493
1494				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
1495				    IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR),
1496				    IPPROTO_IPV6);
1497				if (*mp)
1498					mp = &(*mp)->m_next;
1499				break;
1500			case IPPROTO_HOPOPTS:
1501			case IPPROTO_AH: /* is it possible? */
1502				break;
1503
1504			default:
1505				/*
1506				 * other cases have been filtered in the above.
1507				 * none will visit this case.  here we supply
1508				 * the code just in case (nxt overwritten or
1509				 * other cases).
1510				 */
1511#ifdef PULLDOWN_TEST
1512				m_freem(ext);
1513#endif
1514				goto loopend;
1515
1516			}
1517
1518			/* proceed with the next header. */
1519			off += elen;
1520			nxt = ip6e->ip6e_nxt;
1521			ip6e = NULL;
1522#ifdef PULLDOWN_TEST
1523			m_freem(ext);
1524			ext = NULL;
1525#endif
1526		}
1527	  loopend:
1528		;
1529	}
1530}
1531#undef IS2292
1532
1533void
1534ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
1535{
1536	struct socket *so;
1537	struct mbuf *m_mtu;
1538	struct ip6_mtuinfo mtuctl;
1539
1540	so =  in6p->inp_socket;
1541
1542	if (mtu == NULL)
1543		return;
1544
1545#ifdef DIAGNOSTIC
1546	if (so == NULL)		/* I believe this is impossible */
1547		panic("ip6_notify_pmtu: socket is NULL");
1548#endif
1549
1550	bzero(&mtuctl, sizeof(mtuctl));	/* zero-clear for safety */
1551	mtuctl.ip6m_mtu = *mtu;
1552	mtuctl.ip6m_addr = *dst;
1553	if (sa6_recoverscope(&mtuctl.ip6m_addr))
1554		return;
1555
1556	if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
1557	    IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
1558		return;
1559
1560	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
1561	    == 0) {
1562		m_freem(m_mtu);
1563		/* XXX: should count statistics */
1564	} else
1565		sorwakeup(so);
1566
1567	return;
1568}
1569
1570#ifdef PULLDOWN_TEST
1571/*
1572 * pull single extension header from mbuf chain.  returns single mbuf that
1573 * contains the result, or NULL on error.
1574 */
1575static struct mbuf *
1576ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
1577{
1578	struct ip6_ext ip6e;
1579	size_t elen;
1580	struct mbuf *n;
1581
1582#ifdef DIAGNOSTIC
1583	switch (nxt) {
1584	case IPPROTO_DSTOPTS:
1585	case IPPROTO_ROUTING:
1586	case IPPROTO_HOPOPTS:
1587	case IPPROTO_AH: /* is it possible? */
1588		break;
1589	default:
1590		printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
1591	}
1592#endif
1593
1594	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1595	if (nxt == IPPROTO_AH)
1596		elen = (ip6e.ip6e_len + 2) << 2;
1597	else
1598		elen = (ip6e.ip6e_len + 1) << 3;
1599
1600	MGET(n, M_DONTWAIT, MT_DATA);
1601	if (n && elen >= MLEN) {
1602		MCLGET(n, M_DONTWAIT);
1603		if ((n->m_flags & M_EXT) == 0) {
1604			m_free(n);
1605			n = NULL;
1606		}
1607	}
1608	if (!n)
1609		return NULL;
1610
1611	n->m_len = 0;
1612	if (elen >= M_TRAILINGSPACE(n)) {
1613		m_free(n);
1614		return NULL;
1615	}
1616
1617	m_copydata(m, off, elen, mtod(n, caddr_t));
1618	n->m_len = elen;
1619	return n;
1620}
1621#endif
1622
1623/*
1624 * Get pointer to the previous header followed by the header
1625 * currently processed.
1626 * XXX: This function supposes that
1627 *	M includes all headers,
1628 *	the next header field and the header length field of each header
1629 *	are valid, and
1630 *	the sum of each header length equals to OFF.
1631 * Because of these assumptions, this function must be called very
1632 * carefully. Moreover, it will not be used in the near future when
1633 * we develop `neater' mechanism to process extension headers.
1634 */
1635char *
1636ip6_get_prevhdr(struct mbuf *m, int off)
1637{
1638	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1639
1640	if (off == sizeof(struct ip6_hdr))
1641		return (&ip6->ip6_nxt);
1642	else {
1643		int len, nxt;
1644		struct ip6_ext *ip6e = NULL;
1645
1646		nxt = ip6->ip6_nxt;
1647		len = sizeof(struct ip6_hdr);
1648		while (len < off) {
1649			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
1650
1651			switch (nxt) {
1652			case IPPROTO_FRAGMENT:
1653				len += sizeof(struct ip6_frag);
1654				break;
1655			case IPPROTO_AH:
1656				len += (ip6e->ip6e_len + 2) << 2;
1657				break;
1658			default:
1659				len += (ip6e->ip6e_len + 1) << 3;
1660				break;
1661			}
1662			nxt = ip6e->ip6e_nxt;
1663		}
1664		if (ip6e)
1665			return (&ip6e->ip6e_nxt);
1666		else
1667			return NULL;
1668	}
1669}
1670
1671/*
1672 * get next header offset.  m will be retained.
1673 */
1674int
1675ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
1676{
1677	struct ip6_hdr ip6;
1678	struct ip6_ext ip6e;
1679	struct ip6_frag fh;
1680
1681	/* just in case */
1682	if (m == NULL)
1683		panic("ip6_nexthdr: m == NULL");
1684	if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
1685		return -1;
1686
1687	switch (proto) {
1688	case IPPROTO_IPV6:
1689		if (m->m_pkthdr.len < off + sizeof(ip6))
1690			return -1;
1691		m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
1692		if (nxtp)
1693			*nxtp = ip6.ip6_nxt;
1694		off += sizeof(ip6);
1695		return off;
1696
1697	case IPPROTO_FRAGMENT:
1698		/*
1699		 * terminate parsing if it is not the first fragment,
1700		 * it does not make sense to parse through it.
1701		 */
1702		if (m->m_pkthdr.len < off + sizeof(fh))
1703			return -1;
1704		m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
1705		/* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
1706		if (fh.ip6f_offlg & IP6F_OFF_MASK)
1707			return -1;
1708		if (nxtp)
1709			*nxtp = fh.ip6f_nxt;
1710		off += sizeof(struct ip6_frag);
1711		return off;
1712
1713	case IPPROTO_AH:
1714		if (m->m_pkthdr.len < off + sizeof(ip6e))
1715			return -1;
1716		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1717		if (nxtp)
1718			*nxtp = ip6e.ip6e_nxt;
1719		off += (ip6e.ip6e_len + 2) << 2;
1720		return off;
1721
1722	case IPPROTO_HOPOPTS:
1723	case IPPROTO_ROUTING:
1724	case IPPROTO_DSTOPTS:
1725		if (m->m_pkthdr.len < off + sizeof(ip6e))
1726			return -1;
1727		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1728		if (nxtp)
1729			*nxtp = ip6e.ip6e_nxt;
1730		off += (ip6e.ip6e_len + 1) << 3;
1731		return off;
1732
1733	case IPPROTO_NONE:
1734	case IPPROTO_ESP:
1735	case IPPROTO_IPCOMP:
1736		/* give up */
1737		return -1;
1738
1739	default:
1740		return -1;
1741	}
1742
1743	return -1;
1744}
1745
1746/*
1747 * get offset for the last header in the chain.  m will be kept untainted.
1748 */
1749int
1750ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
1751{
1752	int newoff;
1753	int nxt;
1754
1755	if (!nxtp) {
1756		nxt = -1;
1757		nxtp = &nxt;
1758	}
1759	while (1) {
1760		newoff = ip6_nexthdr(m, off, proto, nxtp);
1761		if (newoff < 0)
1762			return off;
1763		else if (newoff < off)
1764			return -1;	/* invalid */
1765		else if (newoff == off)
1766			return newoff;
1767
1768		off = newoff;
1769		proto = *nxtp;
1770	}
1771}
1772
1773struct ip6aux *
1774ip6_addaux(struct mbuf *m)
1775{
1776	struct m_tag *mtag;
1777
1778	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1779	if (!mtag) {
1780		mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
1781		    M_NOWAIT);
1782		if (mtag) {
1783			m_tag_prepend(m, mtag);
1784			bzero(mtag + 1, sizeof(struct ip6aux));
1785		}
1786	}
1787	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1788}
1789
1790struct ip6aux *
1791ip6_findaux(struct mbuf *m)
1792{
1793	struct m_tag *mtag;
1794
1795	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1796	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1797}
1798
1799void
1800ip6_delaux(struct mbuf *m)
1801{
1802	struct m_tag *mtag;
1803
1804	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1805	if (mtag)
1806		m_tag_delete(m, mtag);
1807}
1808
1809/*
1810 * System control for IP6
1811 */
1812
1813u_char	inet6ctlerrmap[PRC_NCMDS] = {
1814	0,		0,		0,		0,
1815	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1816	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1817	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1818	0,		0,		0,		0,
1819	ENOPROTOOPT
1820};
1821