ip6_input.c revision 148385
112233Spsandoz/*	$FreeBSD: head/sys/netinet6/ip6_input.c 148385 2005-07-25 12:31:43Z ume $	*/
212233Spsandoz/*	$KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $	*/
312233Spsandoz
412233Spsandoz/*-
512233Spsandoz * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
612233Spsandoz * All rights reserved.
712233Spsandoz *
812233Spsandoz * Redistribution and use in source and binary forms, with or without
912233Spsandoz * modification, are permitted provided that the following conditions
1012233Spsandoz * are met:
1112233Spsandoz * 1. Redistributions of source code must retain the above copyright
1212233Spsandoz *    notice, this list of conditions and the following disclaimer.
1312233Spsandoz * 2. Redistributions in binary form must reproduce the above copyright
1412233Spsandoz *    notice, this list of conditions and the following disclaimer in the
1512233Spsandoz *    documentation and/or other materials provided with the distribution.
1612233Spsandoz * 3. Neither the name of the project nor the names of its contributors
1712233Spsandoz *    may be used to endorse or promote products derived from this software
1812233Spsandoz *    without specific prior written permission.
1912233Spsandoz *
2012233Spsandoz * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
2112233Spsandoz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2212233Spsandoz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2312233Spsandoz * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
2412233Spsandoz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2512233Spsandoz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2612233Spsandoz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2712233Spsandoz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2812233Spsandoz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2912233Spsandoz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3012233Spsandoz * SUCH DAMAGE.
3112233Spsandoz */
3212233Spsandoz
3312233Spsandoz/*-
3412233Spsandoz * Copyright (c) 1982, 1986, 1988, 1993
3512233Spsandoz *	The Regents of the University of California.  All rights reserved.
3612233Spsandoz *
3712233Spsandoz * Redistribution and use in source and binary forms, with or without
3812233Spsandoz * modification, are permitted provided that the following conditions
3912233Spsandoz * are met:
4012233Spsandoz * 1. Redistributions of source code must retain the above copyright
4112233Spsandoz *    notice, this list of conditions and the following disclaimer.
4212233Spsandoz * 2. Redistributions in binary form must reproduce the above copyright
4312233Spsandoz *    notice, this list of conditions and the following disclaimer in the
4412233Spsandoz *    documentation and/or other materials provided with the distribution.
4512233Spsandoz * 4. Neither the name of the University nor the names of its contributors
4612233Spsandoz *    may be used to endorse or promote products derived from this software
4712233Spsandoz *    without specific prior written permission.
4812233Spsandoz *
4912233Spsandoz * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
5012233Spsandoz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
5112233Spsandoz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
5212233Spsandoz * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
5312233Spsandoz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
5412233Spsandoz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
5512233Spsandoz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
5612233Spsandoz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
5712233Spsandoz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
5812233Spsandoz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5912233Spsandoz * SUCH DAMAGE.
6012233Spsandoz *
6112233Spsandoz *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
6212233Spsandoz */
6312233Spsandoz
6412233Spsandoz#include "opt_ip6fw.h"
6512233Spsandoz#include "opt_inet.h"
6612233Spsandoz#include "opt_inet6.h"
6712233Spsandoz#include "opt_ipsec.h"
6812233Spsandoz
6912233Spsandoz#include <sys/param.h>
7012233Spsandoz#include <sys/systm.h>
7112233Spsandoz#include <sys/malloc.h>
7212233Spsandoz#include <sys/mbuf.h>
7312233Spsandoz#include <sys/proc.h>
7412233Spsandoz#include <sys/domain.h>
7512233Spsandoz#include <sys/protosw.h>
7612233Spsandoz#include <sys/socket.h>
7712233Spsandoz#include <sys/socketvar.h>
7812233Spsandoz#include <sys/errno.h>
7912233Spsandoz#include <sys/time.h>
8012233Spsandoz#include <sys/kernel.h>
8112233Spsandoz#include <sys/syslog.h>
8212233Spsandoz
8312233Spsandoz#include <net/if.h>
8412233Spsandoz#include <net/if_types.h>
8512233Spsandoz#include <net/if_dl.h>
8612233Spsandoz#include <net/route.h>
8712233Spsandoz#include <net/netisr.h>
8812233Spsandoz#include <net/pfil.h>
8912233Spsandoz
9012233Spsandoz#include <netinet/in.h>
9112233Spsandoz#include <netinet/in_systm.h>
9212233Spsandoz#ifdef INET
9312233Spsandoz#include <netinet/ip.h>
9412233Spsandoz#include <netinet/ip_icmp.h>
9512233Spsandoz#endif /* INET */
9612233Spsandoz#include <netinet/ip6.h>
9712233Spsandoz#include <netinet6/in6_var.h>
9812233Spsandoz#include <netinet6/ip6_var.h>
9912233Spsandoz#include <netinet/in_pcb.h>
10012233Spsandoz#include <netinet/icmp6.h>
10112233Spsandoz#include <netinet6/scope6_var.h>
10212233Spsandoz#include <netinet6/in6_ifattach.h>
10312233Spsandoz#include <netinet6/nd6.h>
10412233Spsandoz
10512233Spsandoz#ifdef IPSEC
10612233Spsandoz#include <netinet6/ipsec.h>
10712233Spsandoz#ifdef INET6
10812233Spsandoz#include <netinet6/ipsec6.h>
10912233Spsandoz#endif
11012233Spsandoz#endif
11112233Spsandoz
11212233Spsandoz#ifdef FAST_IPSEC
11312233Spsandoz#include <netipsec/ipsec.h>
11412233Spsandoz#include <netipsec/ipsec6.h>
11512233Spsandoz#define	IPSEC
11612233Spsandoz#endif /* FAST_IPSEC */
11712233Spsandoz
11812233Spsandoz#include <netinet6/ip6_fw.h>
11912233Spsandoz
12012233Spsandoz#include <netinet6/ip6protosw.h>
12112233Spsandoz
12212233Spsandoz#include <net/net_osdep.h>
12312233Spsandoz
12412233Spsandozextern struct domain inet6domain;
12512233Spsandoz
12612233Spsandozu_char ip6_protox[IPPROTO_MAX];
12712233Spsandozstatic struct ifqueue ip6intrq;
12812233Spsandozstatic int ip6qmaxlen = IFQ_MAXLEN;
12912233Spsandozstruct in6_ifaddr *in6_ifaddr;
13012233Spsandoz
13112233Spsandozextern struct callout in6_tmpaddrtimer_ch;
13212233Spsandoz
13312233Spsandozint ip6_forward_srcrt;			/* XXX */
13412233Spsandozint ip6_sourcecheck;			/* XXX */
13512233Spsandozint ip6_sourcecheck_interval;		/* XXX */
13612233Spsandoz
13712233Spsandozint ip6_ours_check_algorithm;
13812233Spsandoz
13912233Spsandozstruct pfil_head inet6_pfil_hook;
14012233Spsandoz
14112233Spsandoz/* firewall hooks */
14212233Spsandozip6_fw_chk_t *ip6_fw_chk_ptr;
14312233Spsandozip6_fw_ctl_t *ip6_fw_ctl_ptr;
14412233Spsandozint ip6_fw_enable = 1;
14512233Spsandoz
14612233Spsandozstruct ip6stat ip6stat;
14712233Spsandoz
14812233Spsandozstatic void ip6_init2 __P((void *));
14912233Spsandozstatic struct ip6aux *ip6_setdstifaddr __P((struct mbuf *, struct in6_ifaddr *));
15012233Spsandozstatic int ip6_hopopts_input __P((u_int32_t *, u_int32_t *, struct mbuf **, int *));
15112233Spsandoz#ifdef PULLDOWN_TEST
15212233Spsandozstatic struct mbuf *ip6_pullexthdr __P((struct mbuf *, size_t, int));
15312233Spsandoz#endif
15412233Spsandoz
15512233Spsandoz/*
15612233Spsandoz * IP6 initialization: fill in IP6 protocol switch table.
15712233Spsandoz * All protocols not implemented in kernel go to raw IP6 protocol handler.
15812233Spsandoz */
15912233Spsandozvoid
16012233Spsandozip6_init()
16112233Spsandoz{
16212233Spsandoz	struct ip6protosw *pr;
16312233Spsandoz	int i;
16412233Spsandoz
16512233Spsandoz#ifdef DIAGNOSTIC
16612233Spsandoz	if (sizeof(struct protosw) != sizeof(struct ip6protosw))
16712233Spsandoz		panic("sizeof(protosw) != sizeof(ip6protosw)");
16812233Spsandoz#endif
16912233Spsandoz	pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
17012233Spsandoz	if (pr == 0)
17112233Spsandoz		panic("ip6_init");
17212233Spsandoz
17312233Spsandoz	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
17412233Spsandoz	for (i = 0; i < IPPROTO_MAX; i++)
17512233Spsandoz		ip6_protox[i] = pr - inet6sw;
17612233Spsandoz	/*
17712233Spsandoz	 * Cycle through IP protocols and put them into the appropriate place
17812233Spsandoz	 * in ip6_protox[].
17912233Spsandoz	 */
18012233Spsandoz	for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
18112233Spsandoz	    pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
18212233Spsandoz		if (pr->pr_domain->dom_family == PF_INET6 &&
18312233Spsandoz		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
18412233Spsandoz			/* Be careful to only index valid IP protocols. */
18512233Spsandoz			if (pr->pr_protocol < IPPROTO_MAX)
18612233Spsandoz				ip6_protox[pr->pr_protocol] = pr - inet6sw;
18712233Spsandoz		}
18812233Spsandoz
18912233Spsandoz	/* Initialize packet filter hooks. */
19012233Spsandoz	inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
19112233Spsandoz	inet6_pfil_hook.ph_af = AF_INET6;
19212233Spsandoz	if ((i = pfil_head_register(&inet6_pfil_hook)) != 0)
19312233Spsandoz		printf("%s: WARNING: unable to register pfil hook, "
19412233Spsandoz			"error %d\n", __func__, i);
19512233Spsandoz
19612233Spsandoz	ip6intrq.ifq_maxlen = ip6qmaxlen;
19712233Spsandoz	mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF);
19812233Spsandoz	netisr_register(NETISR_IPV6, ip6_input, &ip6intrq, 0);
19912233Spsandoz	scope6_init();
20012233Spsandoz	addrsel_policy_init();
20112233Spsandoz	nd6_init();
20212233Spsandoz	frag6_init();
20312233Spsandoz	ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
20412233Spsandoz}
20512233Spsandoz
20612233Spsandozstatic void
20712233Spsandozip6_init2(dummy)
20812233Spsandoz	void *dummy;
20912233Spsandoz{
21012233Spsandoz
21112233Spsandoz	/* nd6_timer_init */
21212233Spsandoz	callout_init(&nd6_timer_ch, 0);
21312233Spsandoz	callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL);
21412233Spsandoz
21512233Spsandoz	/* timer for regeneranation of temporary addresses randomize ID */
21612233Spsandoz	callout_init(&in6_tmpaddrtimer_ch, 0);
21712233Spsandoz	callout_reset(&in6_tmpaddrtimer_ch,
21812233Spsandoz		      (ip6_temp_preferred_lifetime - ip6_desync_factor -
21912233Spsandoz		       ip6_temp_regen_advance) * hz,
22012233Spsandoz		      in6_tmpaddrtimer, NULL);
22112233Spsandoz}
22212233Spsandoz
22312233Spsandoz/* cheat */
22412233Spsandoz/* This must be after route_init(), which is now SI_ORDER_THIRD */
22512233SpsandozSYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
22612233Spsandoz
22712233Spsandozextern struct	route_in6 ip6_forward_rt;
22812233Spsandoz
22912233Spsandozvoid
23012233Spsandozip6_input(m)
23112233Spsandoz	struct mbuf *m;
23212233Spsandoz{
23312233Spsandoz	struct ip6_hdr *ip6;
23412233Spsandoz	int off = sizeof(struct ip6_hdr), nest;
23512233Spsandoz	u_int32_t plen;
23612233Spsandoz	u_int32_t rtalert = ~0;
23712233Spsandoz	int nxt, ours = 0;
23812233Spsandoz	struct ifnet *deliverifp = NULL;
23912233Spsandoz	struct in6_addr odst;
24012233Spsandoz	int srcrt = 0;
24112233Spsandoz
24212233Spsandoz	GIANT_REQUIRED;			/* XXX for now */
24312233Spsandoz#ifdef IPSEC
24412233Spsandoz	/*
24512233Spsandoz	 * should the inner packet be considered authentic?
24612233Spsandoz	 * see comment in ah4_input().
24712233Spsandoz	 */
24812233Spsandoz	if (m) {
24912233Spsandoz		m->m_flags &= ~M_AUTHIPHDR;
25012233Spsandoz		m->m_flags &= ~M_AUTHIPDGM;
25112233Spsandoz	}
25212233Spsandoz#endif
25312233Spsandoz
25412233Spsandoz	/*
25512233Spsandoz	 * make sure we don't have onion peering information into m_tag.
25612233Spsandoz	 */
25712233Spsandoz	ip6_delaux(m);
25812233Spsandoz
25912233Spsandoz	/*
26012233Spsandoz	 * mbuf statistics
26112233Spsandoz	 */
26212233Spsandoz	if (m->m_flags & M_EXT) {
26312233Spsandoz		if (m->m_next)
26412233Spsandoz			ip6stat.ip6s_mext2m++;
26512233Spsandoz		else
26612233Spsandoz			ip6stat.ip6s_mext1++;
26712233Spsandoz	} else {
268#define M2MMAX	(sizeof(ip6stat.ip6s_m2m)/sizeof(ip6stat.ip6s_m2m[0]))
269		if (m->m_next) {
270			if (m->m_flags & M_LOOP) {
271				ip6stat.ip6s_m2m[loif[0].if_index]++; /* XXX */
272			} else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
273				ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
274			else
275				ip6stat.ip6s_m2m[0]++;
276		} else
277			ip6stat.ip6s_m1++;
278#undef M2MMAX
279	}
280
281	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
282	ip6stat.ip6s_total++;
283
284#ifndef PULLDOWN_TEST
285	/*
286	 * L2 bridge code and some other code can return mbuf chain
287	 * that does not conform to KAME requirement.  too bad.
288	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
289	 */
290	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
291		struct mbuf *n;
292
293		MGETHDR(n, M_DONTWAIT, MT_HEADER);
294		if (n)
295			M_MOVE_PKTHDR(n, m);
296		if (n && n->m_pkthdr.len > MHLEN) {
297			MCLGET(n, M_DONTWAIT);
298			if ((n->m_flags & M_EXT) == 0) {
299				m_freem(n);
300				n = NULL;
301			}
302		}
303		if (n == NULL) {
304			m_freem(m);
305			return;	/* ENOBUFS */
306		}
307
308		m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
309		n->m_len = n->m_pkthdr.len;
310		m_freem(m);
311		m = n;
312	}
313	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
314#endif
315
316	if (m->m_len < sizeof(struct ip6_hdr)) {
317		struct ifnet *inifp;
318		inifp = m->m_pkthdr.rcvif;
319		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
320			ip6stat.ip6s_toosmall++;
321			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
322			return;
323		}
324	}
325
326	ip6 = mtod(m, struct ip6_hdr *);
327
328	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
329		ip6stat.ip6s_badvers++;
330		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
331		goto bad;
332	}
333
334	ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
335
336	/*
337	 * Check against address spoofing/corruption.
338	 */
339	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
340	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
341		/*
342		 * XXX: "badscope" is not very suitable for a multicast source.
343		 */
344		ip6stat.ip6s_badscope++;
345		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
346		goto bad;
347	}
348	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
349	    !(m->m_flags & M_LOOP)) {
350		/*
351		 * In this case, the packet should come from the loopback
352		 * interface.  However, we cannot just check the if_flags,
353		 * because ip6_mloopback() passes the "actual" interface
354		 * as the outgoing/incoming interface.
355		 */
356		ip6stat.ip6s_badscope++;
357		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
358		goto bad;
359	}
360
361#ifdef ALTQ
362	if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
363		/* packet is dropped by traffic conditioner */
364		return;
365	}
366#endif
367	/*
368	 * The following check is not documented in specs.  A malicious
369	 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
370	 * and bypass security checks (act as if it was from 127.0.0.1 by using
371	 * IPv6 src ::ffff:127.0.0.1).  Be cautious.
372	 *
373	 * This check chokes if we are in an SIIT cloud.  As none of BSDs
374	 * support IPv4-less kernel compilation, we cannot support SIIT
375	 * environment at all.  So, it makes more sense for us to reject any
376	 * malicious packets for non-SIIT environment, than try to do a
377	 * partial support for SIIT environment.
378	 */
379	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
380	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
381		ip6stat.ip6s_badscope++;
382		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
383		goto bad;
384	}
385#if 0
386	/*
387	 * Reject packets with IPv4 compatible addresses (auto tunnel).
388	 *
389	 * The code forbids auto tunnel relay case in RFC1933 (the check is
390	 * stronger than RFC1933).  We may want to re-enable it if mech-xx
391	 * is revised to forbid relaying case.
392	 */
393	if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
394	    IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
395		ip6stat.ip6s_badscope++;
396		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
397		goto bad;
398	}
399#endif
400
401	/*
402	 * Disambiguate address scope zones (if there is ambiguity).
403	 * We first make sure that the original source or destination address
404	 * is not in our internal form for scoped addresses.  Such addresses
405	 * are not necessarily invalid spec-wise, but we cannot accept them due
406	 * to the usage conflict.
407	 * in6_setscope() then also checks and rejects the cases where src or
408	 * dst are the loopback address and the receiving interface
409	 * is not loopback.
410	 */
411	if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
412		ip6stat.ip6s_badscope++; /* XXX */
413		goto bad;
414	}
415	if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) ||
416	    in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) {
417		ip6stat.ip6s_badscope++;
418		goto bad;
419	}
420
421	/*
422	 * Run through list of hooks for input packets.
423	 *
424	 * NB: Beware of the destination address changing
425	 *     (e.g. by NAT rewriting).  When this happens,
426	 *     tell ip6_forward to do the right thing.
427	 */
428	odst = ip6->ip6_dst;
429
430	/* Jump over all PFIL processing if hooks are not active. */
431	if (inet6_pfil_hook.ph_busy_count == -1)
432		goto passin;
433
434	if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL))
435		return;
436	if (m == NULL)			/* consumed by filter */
437		return;
438	ip6 = mtod(m, struct ip6_hdr *);
439	srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
440
441passin:
442	/*
443	 * Check with the firewall...
444	 */
445	if (ip6_fw_enable && ip6_fw_chk_ptr) {
446		u_short port = 0;
447		/* If ipfw says divert, we have to just drop packet */
448		/* use port as a dummy argument */
449		if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) {
450			m_freem(m);
451			m = NULL;
452		}
453		if (!m)
454			return;
455	}
456
457	/*
458	 * Multicast check
459	 */
460	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
461	  	struct in6_multi *in6m = 0;
462
463		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
464		/*
465		 * See if we belong to the destination multicast group on the
466		 * arrival interface.
467		 */
468		IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m);
469		if (in6m)
470			ours = 1;
471		else if (!ip6_mrouter) {
472			ip6stat.ip6s_notmember++;
473			ip6stat.ip6s_cantforward++;
474			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
475			goto bad;
476		}
477		deliverifp = m->m_pkthdr.rcvif;
478		goto hbhcheck;
479	}
480
481	/*
482	 *  Unicast check
483	 */
484	if (ip6_forward_rt.ro_rt != NULL &&
485	    (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 &&
486	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
487	    &((struct sockaddr_in6 *)(&ip6_forward_rt.ro_dst))->sin6_addr))
488		ip6stat.ip6s_forward_cachehit++;
489	else {
490		struct sockaddr_in6 *dst6;
491
492		if (ip6_forward_rt.ro_rt) {
493			/* route is down or destination is different */
494			ip6stat.ip6s_forward_cachemiss++;
495			RTFREE(ip6_forward_rt.ro_rt);
496			ip6_forward_rt.ro_rt = 0;
497		}
498
499		bzero(&ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
500		dst6 = (struct sockaddr_in6 *)&ip6_forward_rt.ro_dst;
501		dst6->sin6_len = sizeof(struct sockaddr_in6);
502		dst6->sin6_family = AF_INET6;
503		dst6->sin6_addr = ip6->ip6_dst;
504
505		rtalloc((struct route *)&ip6_forward_rt);
506	}
507
508#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
509
510	/*
511	 * Accept the packet if the forwarding interface to the destination
512	 * according to the routing table is the loopback interface,
513	 * unless the associated route has a gateway.
514	 * Note that this approach causes to accept a packet if there is a
515	 * route to the loopback interface for the destination of the packet.
516	 * But we think it's even useful in some situations, e.g. when using
517	 * a special daemon which wants to intercept the packet.
518	 *
519	 * XXX: some OSes automatically make a cloned route for the destination
520	 * of an outgoing packet.  If the outgoing interface of the packet
521	 * is a loopback one, the kernel would consider the packet to be
522	 * accepted, even if we have no such address assinged on the interface.
523	 * We check the cloned flag of the route entry to reject such cases,
524	 * assuming that route entries for our own addresses are not made by
525	 * cloning (it should be true because in6_addloop explicitly installs
526	 * the host route).  However, we might have to do an explicit check
527	 * while it would be less efficient.  Or, should we rather install a
528	 * reject route for such a case?
529	 */
530	if (ip6_forward_rt.ro_rt &&
531	    (ip6_forward_rt.ro_rt->rt_flags &
532	     (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
533#ifdef RTF_WASCLONED
534	    !(ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) &&
535#endif
536#ifdef RTF_CLONED
537	    !(ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) &&
538#endif
539#if 0
540	    /*
541	     * The check below is redundant since the comparison of
542	     * the destination and the key of the rtentry has
543	     * already done through looking up the routing table.
544	     */
545	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
546	    &rt6_key(ip6_forward_rt.ro_rt)->sin6_addr)
547#endif
548	    ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
549		struct in6_ifaddr *ia6 =
550			(struct in6_ifaddr *)ip6_forward_rt.ro_rt->rt_ifa;
551
552		/*
553		 * record address information into m_tag.
554		 */
555		(void)ip6_setdstifaddr(m, ia6);
556
557		/*
558		 * packets to a tentative, duplicated, or somehow invalid
559		 * address must not be accepted.
560		 */
561		if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
562			/* this address is ready */
563			ours = 1;
564			deliverifp = ia6->ia_ifp;	/* correct? */
565			/* Count the packet in the ip address stats */
566			ia6->ia_ifa.if_ipackets++;
567			ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
568			goto hbhcheck;
569		} else {
570			/* address is not ready, so discard the packet. */
571			nd6log((LOG_INFO,
572			    "ip6_input: packet to an unready address %s->%s\n",
573			    ip6_sprintf(&ip6->ip6_src),
574			    ip6_sprintf(&ip6->ip6_dst)));
575
576			goto bad;
577		}
578	}
579
580	/*
581	 * FAITH (Firewall Aided Internet Translator)
582	 */
583	if (ip6_keepfaith) {
584		if (ip6_forward_rt.ro_rt && ip6_forward_rt.ro_rt->rt_ifp
585		 && ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
586			/* XXX do we need more sanity checks? */
587			ours = 1;
588			deliverifp = ip6_forward_rt.ro_rt->rt_ifp; /* faith */
589			goto hbhcheck;
590		}
591	}
592
593	/*
594	 * Now there is no reason to process the packet if it's not our own
595	 * and we're not a router.
596	 */
597	if (!ip6_forwarding) {
598		ip6stat.ip6s_cantforward++;
599		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
600		goto bad;
601	}
602
603  hbhcheck:
604	/*
605	 * record address information into m_tag, if we don't have one yet.
606	 * note that we are unable to record it, if the address is not listed
607	 * as our interface address (e.g. multicast addresses, addresses
608	 * within FAITH prefixes and such).
609	 */
610	if (deliverifp && !ip6_getdstifaddr(m)) {
611		struct in6_ifaddr *ia6;
612
613		ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
614		if (ia6) {
615			if (!ip6_setdstifaddr(m, ia6)) {
616				/*
617				 * XXX maybe we should drop the packet here,
618				 * as we could not provide enough information
619				 * to the upper layers.
620				 */
621			}
622		}
623	}
624
625	/*
626	 * Process Hop-by-Hop options header if it's contained.
627	 * m may be modified in ip6_hopopts_input().
628	 * If a JumboPayload option is included, plen will also be modified.
629	 */
630	plen = (u_int32_t)ntohs(ip6->ip6_plen);
631	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
632		struct ip6_hbh *hbh;
633
634		if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
635#if 0	/*touches NULL pointer*/
636			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
637#endif
638			return;	/* m have already been freed */
639		}
640
641		/* adjust pointer */
642		ip6 = mtod(m, struct ip6_hdr *);
643
644		/*
645		 * if the payload length field is 0 and the next header field
646		 * indicates Hop-by-Hop Options header, then a Jumbo Payload
647		 * option MUST be included.
648		 */
649		if (ip6->ip6_plen == 0 && plen == 0) {
650			/*
651			 * Note that if a valid jumbo payload option is
652			 * contained, ip6_hopopts_input() must set a valid
653			 * (non-zero) payload length to the variable plen.
654			 */
655			ip6stat.ip6s_badoptions++;
656			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
657			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
658			icmp6_error(m, ICMP6_PARAM_PROB,
659				    ICMP6_PARAMPROB_HEADER,
660				    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
661			return;
662		}
663#ifndef PULLDOWN_TEST
664		/* ip6_hopopts_input() ensures that mbuf is contiguous */
665		hbh = (struct ip6_hbh *)(ip6 + 1);
666#else
667		IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
668			sizeof(struct ip6_hbh));
669		if (hbh == NULL) {
670			ip6stat.ip6s_tooshort++;
671			return;
672		}
673#endif
674		nxt = hbh->ip6h_nxt;
675
676		/*
677		 * accept the packet if a router alert option is included
678		 * and we act as an IPv6 router.
679		 */
680		if (rtalert != ~0 && ip6_forwarding)
681			ours = 1;
682	} else
683		nxt = ip6->ip6_nxt;
684
685	/*
686	 * Check that the amount of data in the buffers
687	 * is as at least much as the IPv6 header would have us expect.
688	 * Trim mbufs if longer than we expect.
689	 * Drop packet if shorter than we expect.
690	 */
691	if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
692		ip6stat.ip6s_tooshort++;
693		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
694		goto bad;
695	}
696	if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
697		if (m->m_len == m->m_pkthdr.len) {
698			m->m_len = sizeof(struct ip6_hdr) + plen;
699			m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
700		} else
701			m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
702	}
703
704	/*
705	 * Forward if desirable.
706	 */
707	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
708		/*
709		 * If we are acting as a multicast router, all
710		 * incoming multicast packets are passed to the
711		 * kernel-level multicast forwarding function.
712		 * The packet is returned (relatively) intact; if
713		 * ip6_mforward() returns a non-zero value, the packet
714		 * must be discarded, else it may be accepted below.
715		 */
716		if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
717			ip6stat.ip6s_cantforward++;
718			m_freem(m);
719			return;
720		}
721		if (!ours) {
722			m_freem(m);
723			return;
724		}
725	} else if (!ours) {
726		ip6_forward(m, srcrt);
727		return;
728	}
729
730	ip6 = mtod(m, struct ip6_hdr *);
731
732	/*
733	 * Malicious party may be able to use IPv4 mapped addr to confuse
734	 * tcp/udp stack and bypass security checks (act as if it was from
735	 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1).  Be cautious.
736	 *
737	 * For SIIT end node behavior, you may want to disable the check.
738	 * However, you will  become vulnerable to attacks using IPv4 mapped
739	 * source.
740	 */
741	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
742	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
743		ip6stat.ip6s_badscope++;
744		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
745		goto bad;
746	}
747
748	/*
749	 * Tell launch routine the next header
750	 */
751	ip6stat.ip6s_delivered++;
752	in6_ifstat_inc(deliverifp, ifs6_in_deliver);
753	nest = 0;
754
755	while (nxt != IPPROTO_DONE) {
756		if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
757			ip6stat.ip6s_toomanyhdr++;
758			goto bad;
759		}
760
761		/*
762		 * protection against faulty packet - there should be
763		 * more sanity checks in header chain processing.
764		 */
765		if (m->m_pkthdr.len < off) {
766			ip6stat.ip6s_tooshort++;
767			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
768			goto bad;
769		}
770
771#ifdef IPSEC
772		/*
773		 * enforce IPsec policy checking if we are seeing last header.
774		 * note that we do not visit this with protocols with pcb layer
775		 * code - like udp/tcp/raw ip.
776		 */
777		if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 &&
778		    ipsec6_in_reject(m, NULL)) {
779			ipsec6stat.in_polvio++;
780			goto bad;
781		}
782#endif
783		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
784	}
785	return;
786 bad:
787	m_freem(m);
788}
789
790/*
791 * set/grab in6_ifaddr correspond to IPv6 destination address.
792 * XXX backward compatibility wrapper
793 */
794static struct ip6aux *
795ip6_setdstifaddr(m, ia6)
796	struct mbuf *m;
797	struct in6_ifaddr *ia6;
798{
799	struct ip6aux *ip6a;
800
801	ip6a = ip6_addaux(m);
802	if (ip6a)
803		ip6a->ip6a_dstia6 = ia6;
804	return ip6a;	/* NULL if failed to set */
805}
806
807struct in6_ifaddr *
808ip6_getdstifaddr(m)
809	struct mbuf *m;
810{
811	struct ip6aux *ip6a;
812
813	ip6a = ip6_findaux(m);
814	if (ip6a)
815		return ip6a->ip6a_dstia6;
816	else
817		return NULL;
818}
819
820/*
821 * Hop-by-Hop options header processing. If a valid jumbo payload option is
822 * included, the real payload length will be stored in plenp.
823 */
824static int
825ip6_hopopts_input(plenp, rtalertp, mp, offp)
826	u_int32_t *plenp;
827	u_int32_t *rtalertp;	/* XXX: should be stored more smart way */
828	struct mbuf **mp;
829	int *offp;
830{
831	struct mbuf *m = *mp;
832	int off = *offp, hbhlen;
833	struct ip6_hbh *hbh;
834	u_int8_t *opt;
835
836	/* validation of the length of the header */
837#ifndef PULLDOWN_TEST
838	IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
839	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
840	hbhlen = (hbh->ip6h_len + 1) << 3;
841
842	IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
843	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
844#else
845	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
846		sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
847	if (hbh == NULL) {
848		ip6stat.ip6s_tooshort++;
849		return -1;
850	}
851	hbhlen = (hbh->ip6h_len + 1) << 3;
852	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
853		hbhlen);
854	if (hbh == NULL) {
855		ip6stat.ip6s_tooshort++;
856		return -1;
857	}
858#endif
859	off += hbhlen;
860	hbhlen -= sizeof(struct ip6_hbh);
861	opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
862
863	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
864				hbhlen, rtalertp, plenp) < 0)
865		return (-1);
866
867	*offp = off;
868	*mp = m;
869	return (0);
870}
871
872/*
873 * Search header for all Hop-by-hop options and process each option.
874 * This function is separate from ip6_hopopts_input() in order to
875 * handle a case where the sending node itself process its hop-by-hop
876 * options header. In such a case, the function is called from ip6_output().
877 *
878 * The function assumes that hbh header is located right after the IPv6 header
879 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
880 * opthead + hbhlen is located in continuous memory region.
881 */
882int
883ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
884	struct mbuf *m;
885	u_int8_t *opthead;
886	int hbhlen;
887	u_int32_t *rtalertp;
888	u_int32_t *plenp;
889{
890	struct ip6_hdr *ip6;
891	int optlen = 0;
892	u_int8_t *opt = opthead;
893	u_int16_t rtalert_val;
894	u_int32_t jumboplen;
895	const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
896
897	for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
898		switch (*opt) {
899		case IP6OPT_PAD1:
900			optlen = 1;
901			break;
902		case IP6OPT_PADN:
903			if (hbhlen < IP6OPT_MINLEN) {
904				ip6stat.ip6s_toosmall++;
905				goto bad;
906			}
907			optlen = *(opt + 1) + 2;
908			break;
909		case IP6OPT_ROUTER_ALERT:
910			/* XXX may need check for alignment */
911			if (hbhlen < IP6OPT_RTALERT_LEN) {
912				ip6stat.ip6s_toosmall++;
913				goto bad;
914			}
915			if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
916				/* XXX stat */
917				icmp6_error(m, ICMP6_PARAM_PROB,
918				    ICMP6_PARAMPROB_HEADER,
919				    erroff + opt + 1 - opthead);
920				return (-1);
921			}
922			optlen = IP6OPT_RTALERT_LEN;
923			bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
924			*rtalertp = ntohs(rtalert_val);
925			break;
926		case IP6OPT_JUMBO:
927			/* XXX may need check for alignment */
928			if (hbhlen < IP6OPT_JUMBO_LEN) {
929				ip6stat.ip6s_toosmall++;
930				goto bad;
931			}
932			if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
933				/* XXX stat */
934				icmp6_error(m, ICMP6_PARAM_PROB,
935				    ICMP6_PARAMPROB_HEADER,
936				    erroff + opt + 1 - opthead);
937				return (-1);
938			}
939			optlen = IP6OPT_JUMBO_LEN;
940
941			/*
942			 * IPv6 packets that have non 0 payload length
943			 * must not contain a jumbo payload option.
944			 */
945			ip6 = mtod(m, struct ip6_hdr *);
946			if (ip6->ip6_plen) {
947				ip6stat.ip6s_badoptions++;
948				icmp6_error(m, ICMP6_PARAM_PROB,
949				    ICMP6_PARAMPROB_HEADER,
950				    erroff + opt - opthead);
951				return (-1);
952			}
953
954			/*
955			 * We may see jumbolen in unaligned location, so
956			 * we'd need to perform bcopy().
957			 */
958			bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
959			jumboplen = (u_int32_t)htonl(jumboplen);
960
961#if 1
962			/*
963			 * if there are multiple jumbo payload options,
964			 * *plenp will be non-zero and the packet will be
965			 * rejected.
966			 * the behavior may need some debate in ipngwg -
967			 * multiple options does not make sense, however,
968			 * there's no explicit mention in specification.
969			 */
970			if (*plenp != 0) {
971				ip6stat.ip6s_badoptions++;
972				icmp6_error(m, ICMP6_PARAM_PROB,
973				    ICMP6_PARAMPROB_HEADER,
974				    erroff + opt + 2 - opthead);
975				return (-1);
976			}
977#endif
978
979			/*
980			 * jumbo payload length must be larger than 65535.
981			 */
982			if (jumboplen <= IPV6_MAXPACKET) {
983				ip6stat.ip6s_badoptions++;
984				icmp6_error(m, ICMP6_PARAM_PROB,
985				    ICMP6_PARAMPROB_HEADER,
986				    erroff + opt + 2 - opthead);
987				return (-1);
988			}
989			*plenp = jumboplen;
990
991			break;
992		default:		/* unknown option */
993			if (hbhlen < IP6OPT_MINLEN) {
994				ip6stat.ip6s_toosmall++;
995				goto bad;
996			}
997			optlen = ip6_unknown_opt(opt, m,
998			    erroff + opt - opthead);
999			if (optlen == -1)
1000				return (-1);
1001			optlen += 2;
1002			break;
1003		}
1004	}
1005
1006	return (0);
1007
1008  bad:
1009	m_freem(m);
1010	return (-1);
1011}
1012
1013/*
1014 * Unknown option processing.
1015 * The third argument `off' is the offset from the IPv6 header to the option,
1016 * which is necessary if the IPv6 header the and option header and IPv6 header
1017 * is not continuous in order to return an ICMPv6 error.
1018 */
1019int
1020ip6_unknown_opt(optp, m, off)
1021	u_int8_t *optp;
1022	struct mbuf *m;
1023	int off;
1024{
1025	struct ip6_hdr *ip6;
1026
1027	switch (IP6OPT_TYPE(*optp)) {
1028	case IP6OPT_TYPE_SKIP: /* ignore the option */
1029		return ((int)*(optp + 1));
1030	case IP6OPT_TYPE_DISCARD:	/* silently discard */
1031		m_freem(m);
1032		return (-1);
1033	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
1034		ip6stat.ip6s_badoptions++;
1035		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
1036		return (-1);
1037	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
1038		ip6stat.ip6s_badoptions++;
1039		ip6 = mtod(m, struct ip6_hdr *);
1040		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1041		    (m->m_flags & (M_BCAST|M_MCAST)))
1042			m_freem(m);
1043		else
1044			icmp6_error(m, ICMP6_PARAM_PROB,
1045				    ICMP6_PARAMPROB_OPTION, off);
1046		return (-1);
1047	}
1048
1049	m_freem(m);		/* XXX: NOTREACHED */
1050	return (-1);
1051}
1052
1053/*
1054 * Create the "control" list for this pcb.
1055 * The function will not modify mbuf chain at all.
1056 *
1057 * with KAME mbuf chain restriction:
1058 * The routine will be called from upper layer handlers like tcp6_input().
1059 * Thus the routine assumes that the caller (tcp6_input) have already
1060 * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
1061 * very first mbuf on the mbuf chain.
1062 */
1063void
1064ip6_savecontrol(in6p, m, mp)
1065	struct inpcb *in6p;
1066	struct mbuf *m, **mp;
1067{
1068#define IS2292(x, y)	((in6p->in6p_flags & IN6P_RFC2292) ? (x) : (y))
1069	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1070
1071#ifdef SO_TIMESTAMP
1072	if ((in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0) {
1073		struct timeval tv;
1074
1075		microtime(&tv);
1076		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1077		    SCM_TIMESTAMP, SOL_SOCKET);
1078		if (*mp)
1079			mp = &(*mp)->m_next;
1080	}
1081#endif
1082
1083	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
1084		return;
1085
1086	/* RFC 2292 sec. 5 */
1087	if ((in6p->in6p_flags & IN6P_PKTINFO) != 0) {
1088		struct in6_pktinfo pi6;
1089
1090		bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
1091		in6_clearscope(&pi6.ipi6_addr);	/* XXX */
1092		pi6.ipi6_ifindex =
1093		    (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
1094
1095		*mp = sbcreatecontrol((caddr_t) &pi6,
1096		    sizeof(struct in6_pktinfo),
1097		    IS2292(IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
1098		if (*mp)
1099			mp = &(*mp)->m_next;
1100	}
1101
1102	if ((in6p->in6p_flags & IN6P_HOPLIMIT) != 0) {
1103		int hlim = ip6->ip6_hlim & 0xff;
1104
1105		*mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
1106		    IS2292(IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), IPPROTO_IPV6);
1107		if (*mp)
1108			mp = &(*mp)->m_next;
1109	}
1110
1111	if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
1112		u_int32_t flowinfo;
1113		int tclass;
1114
1115		flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
1116		flowinfo >>= 20;
1117
1118		tclass = flowinfo & 0xff;
1119		*mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
1120		    IPV6_TCLASS, IPPROTO_IPV6);
1121		if (*mp)
1122			mp = &(*mp)->m_next;
1123	}
1124
1125	/*
1126	 * IPV6_HOPOPTS socket option.  Recall that we required super-user
1127	 * privilege for the option (see ip6_ctloutput), but it might be too
1128	 * strict, since there might be some hop-by-hop options which can be
1129	 * returned to normal user.
1130	 * See also RFC 2292 section 6 (or RFC 3542 section 8).
1131	 */
1132	if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) {
1133		/*
1134		 * Check if a hop-by-hop options header is contatined in the
1135		 * received packet, and if so, store the options as ancillary
1136		 * data. Note that a hop-by-hop options header must be
1137		 * just after the IPv6 header, which is assured through the
1138		 * IPv6 input processing.
1139		 */
1140		if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
1141			struct ip6_hbh *hbh;
1142			int hbhlen = 0;
1143#ifdef PULLDOWN_TEST
1144			struct mbuf *ext;
1145#endif
1146
1147#ifndef PULLDOWN_TEST
1148			hbh = (struct ip6_hbh *)(ip6 + 1);
1149			hbhlen = (hbh->ip6h_len + 1) << 3;
1150#else
1151			ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
1152			    ip6->ip6_nxt);
1153			if (ext == NULL) {
1154				ip6stat.ip6s_tooshort++;
1155				return;
1156			}
1157			hbh = mtod(ext, struct ip6_hbh *);
1158			hbhlen = (hbh->ip6h_len + 1) << 3;
1159			if (hbhlen != ext->m_len) {
1160				m_freem(ext);
1161				ip6stat.ip6s_tooshort++;
1162				return;
1163			}
1164#endif
1165
1166			/*
1167			 * XXX: We copy the whole header even if a
1168			 * jumbo payload option is included, the option which
1169			 * is to be removed before returning according to
1170			 * RFC2292.
1171			 * Note: this constraint is removed in RFC3542
1172			 */
1173			*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
1174			    IS2292(IPV6_2292HOPOPTS, IPV6_HOPOPTS),
1175			    IPPROTO_IPV6);
1176			if (*mp)
1177				mp = &(*mp)->m_next;
1178#ifdef PULLDOWN_TEST
1179			m_freem(ext);
1180#endif
1181		}
1182	}
1183
1184	if ((in6p->in6p_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
1185		int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
1186
1187		/*
1188		 * Search for destination options headers or routing
1189		 * header(s) through the header chain, and stores each
1190		 * header as ancillary data.
1191		 * Note that the order of the headers remains in
1192		 * the chain of ancillary data.
1193		 */
1194		while (1) {	/* is explicit loop prevention necessary? */
1195			struct ip6_ext *ip6e = NULL;
1196			int elen;
1197#ifdef PULLDOWN_TEST
1198			struct mbuf *ext = NULL;
1199#endif
1200
1201			/*
1202			 * if it is not an extension header, don't try to
1203			 * pull it from the chain.
1204			 */
1205			switch (nxt) {
1206			case IPPROTO_DSTOPTS:
1207			case IPPROTO_ROUTING:
1208			case IPPROTO_HOPOPTS:
1209			case IPPROTO_AH: /* is it possible? */
1210				break;
1211			default:
1212				goto loopend;
1213			}
1214
1215#ifndef PULLDOWN_TEST
1216			if (off + sizeof(*ip6e) > m->m_len)
1217				goto loopend;
1218			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
1219			if (nxt == IPPROTO_AH)
1220				elen = (ip6e->ip6e_len + 2) << 2;
1221			else
1222				elen = (ip6e->ip6e_len + 1) << 3;
1223			if (off + elen > m->m_len)
1224				goto loopend;
1225#else
1226			ext = ip6_pullexthdr(m, off, nxt);
1227			if (ext == NULL) {
1228				ip6stat.ip6s_tooshort++;
1229				return;
1230			}
1231			ip6e = mtod(ext, struct ip6_ext *);
1232			if (nxt == IPPROTO_AH)
1233				elen = (ip6e->ip6e_len + 2) << 2;
1234			else
1235				elen = (ip6e->ip6e_len + 1) << 3;
1236			if (elen != ext->m_len) {
1237				m_freem(ext);
1238				ip6stat.ip6s_tooshort++;
1239				return;
1240			}
1241#endif
1242
1243			switch (nxt) {
1244			case IPPROTO_DSTOPTS:
1245				if (!(in6p->in6p_flags & IN6P_DSTOPTS))
1246					break;
1247
1248				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
1249				    IS2292(IPV6_2292DSTOPTS, IPV6_DSTOPTS),
1250				    IPPROTO_IPV6);
1251				if (*mp)
1252					mp = &(*mp)->m_next;
1253				break;
1254			case IPPROTO_ROUTING:
1255				if (!in6p->in6p_flags & IN6P_RTHDR)
1256					break;
1257
1258				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
1259				    IS2292(IPV6_2292RTHDR, IPV6_RTHDR),
1260				    IPPROTO_IPV6);
1261				if (*mp)
1262					mp = &(*mp)->m_next;
1263				break;
1264			case IPPROTO_HOPOPTS:
1265			case IPPROTO_AH: /* is it possible? */
1266				break;
1267
1268			default:
1269				/*
1270			 	 * other cases have been filtered in the above.
1271				 * none will visit this case.  here we supply
1272				 * the code just in case (nxt overwritten or
1273				 * other cases).
1274				 */
1275#ifdef PULLDOWN_TEST
1276				m_freem(ext);
1277#endif
1278				goto loopend;
1279
1280			}
1281
1282			/* proceed with the next header. */
1283			off += elen;
1284			nxt = ip6e->ip6e_nxt;
1285			ip6e = NULL;
1286#ifdef PULLDOWN_TEST
1287			m_freem(ext);
1288			ext = NULL;
1289#endif
1290		}
1291	  loopend:
1292		;
1293	}
1294
1295#undef IS2292
1296}
1297
1298void
1299ip6_notify_pmtu(in6p, dst, mtu)
1300	struct inpcb *in6p;
1301	struct sockaddr_in6 *dst;
1302	u_int32_t *mtu;
1303{
1304	struct socket *so;
1305	struct mbuf *m_mtu;
1306	struct ip6_mtuinfo mtuctl;
1307
1308	so =  in6p->inp_socket;
1309
1310	if (mtu == NULL)
1311		return;
1312
1313#ifdef DIAGNOSTIC
1314	if (so == NULL)		/* I believe this is impossible */
1315		panic("ip6_notify_pmtu: socket is NULL");
1316#endif
1317
1318	bzero(&mtuctl, sizeof(mtuctl));	/* zero-clear for safety */
1319	mtuctl.ip6m_mtu = *mtu;
1320	mtuctl.ip6m_addr = *dst;
1321	if (sa6_recoverscope(&mtuctl.ip6m_addr))
1322		return;
1323
1324	if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
1325	    IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
1326		return;
1327
1328	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
1329	    == 0) {
1330		m_freem(m_mtu);
1331		/* XXX: should count statistics */
1332	} else
1333		sorwakeup(so);
1334
1335	return;
1336}
1337
1338#ifdef PULLDOWN_TEST
1339/*
1340 * pull single extension header from mbuf chain.  returns single mbuf that
1341 * contains the result, or NULL on error.
1342 */
1343static struct mbuf *
1344ip6_pullexthdr(m, off, nxt)
1345	struct mbuf *m;
1346	size_t off;
1347	int nxt;
1348{
1349	struct ip6_ext ip6e;
1350	size_t elen;
1351	struct mbuf *n;
1352
1353#ifdef DIAGNOSTIC
1354	switch (nxt) {
1355	case IPPROTO_DSTOPTS:
1356	case IPPROTO_ROUTING:
1357	case IPPROTO_HOPOPTS:
1358	case IPPROTO_AH: /* is it possible? */
1359		break;
1360	default:
1361		printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
1362	}
1363#endif
1364
1365	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1366	if (nxt == IPPROTO_AH)
1367		elen = (ip6e.ip6e_len + 2) << 2;
1368	else
1369		elen = (ip6e.ip6e_len + 1) << 3;
1370
1371	MGET(n, M_DONTWAIT, MT_DATA);
1372	if (n && elen >= MLEN) {
1373		MCLGET(n, M_DONTWAIT);
1374		if ((n->m_flags & M_EXT) == 0) {
1375			m_free(n);
1376			n = NULL;
1377		}
1378	}
1379	if (!n)
1380		return NULL;
1381
1382	n->m_len = 0;
1383	if (elen >= M_TRAILINGSPACE(n)) {
1384		m_free(n);
1385		return NULL;
1386	}
1387
1388	m_copydata(m, off, elen, mtod(n, caddr_t));
1389	n->m_len = elen;
1390	return n;
1391}
1392#endif
1393
1394/*
1395 * Get pointer to the previous header followed by the header
1396 * currently processed.
1397 * XXX: This function supposes that
1398 *	M includes all headers,
1399 *	the next header field and the header length field of each header
1400 *	are valid, and
1401 *	the sum of each header length equals to OFF.
1402 * Because of these assumptions, this function must be called very
1403 * carefully. Moreover, it will not be used in the near future when
1404 * we develop `neater' mechanism to process extension headers.
1405 */
1406char *
1407ip6_get_prevhdr(m, off)
1408	struct mbuf *m;
1409	int off;
1410{
1411	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1412
1413	if (off == sizeof(struct ip6_hdr))
1414		return (&ip6->ip6_nxt);
1415	else {
1416		int len, nxt;
1417		struct ip6_ext *ip6e = NULL;
1418
1419		nxt = ip6->ip6_nxt;
1420		len = sizeof(struct ip6_hdr);
1421		while (len < off) {
1422			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
1423
1424			switch (nxt) {
1425			case IPPROTO_FRAGMENT:
1426				len += sizeof(struct ip6_frag);
1427				break;
1428			case IPPROTO_AH:
1429				len += (ip6e->ip6e_len + 2) << 2;
1430				break;
1431			default:
1432				len += (ip6e->ip6e_len + 1) << 3;
1433				break;
1434			}
1435			nxt = ip6e->ip6e_nxt;
1436		}
1437		if (ip6e)
1438			return (&ip6e->ip6e_nxt);
1439		else
1440			return NULL;
1441	}
1442}
1443
1444/*
1445 * get next header offset.  m will be retained.
1446 */
1447int
1448ip6_nexthdr(m, off, proto, nxtp)
1449	struct mbuf *m;
1450	int off;
1451	int proto;
1452	int *nxtp;
1453{
1454	struct ip6_hdr ip6;
1455	struct ip6_ext ip6e;
1456	struct ip6_frag fh;
1457
1458	/* just in case */
1459	if (m == NULL)
1460		panic("ip6_nexthdr: m == NULL");
1461	if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
1462		return -1;
1463
1464	switch (proto) {
1465	case IPPROTO_IPV6:
1466		if (m->m_pkthdr.len < off + sizeof(ip6))
1467			return -1;
1468		m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
1469		if (nxtp)
1470			*nxtp = ip6.ip6_nxt;
1471		off += sizeof(ip6);
1472		return off;
1473
1474	case IPPROTO_FRAGMENT:
1475		/*
1476		 * terminate parsing if it is not the first fragment,
1477		 * it does not make sense to parse through it.
1478		 */
1479		if (m->m_pkthdr.len < off + sizeof(fh))
1480			return -1;
1481		m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
1482		/* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
1483		if (fh.ip6f_offlg & IP6F_OFF_MASK)
1484			return -1;
1485		if (nxtp)
1486			*nxtp = fh.ip6f_nxt;
1487		off += sizeof(struct ip6_frag);
1488		return off;
1489
1490	case IPPROTO_AH:
1491		if (m->m_pkthdr.len < off + sizeof(ip6e))
1492			return -1;
1493		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1494		if (nxtp)
1495			*nxtp = ip6e.ip6e_nxt;
1496		off += (ip6e.ip6e_len + 2) << 2;
1497		return off;
1498
1499	case IPPROTO_HOPOPTS:
1500	case IPPROTO_ROUTING:
1501	case IPPROTO_DSTOPTS:
1502		if (m->m_pkthdr.len < off + sizeof(ip6e))
1503			return -1;
1504		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1505		if (nxtp)
1506			*nxtp = ip6e.ip6e_nxt;
1507		off += (ip6e.ip6e_len + 1) << 3;
1508		return off;
1509
1510	case IPPROTO_NONE:
1511	case IPPROTO_ESP:
1512	case IPPROTO_IPCOMP:
1513		/* give up */
1514		return -1;
1515
1516	default:
1517		return -1;
1518	}
1519
1520	return -1;
1521}
1522
1523/*
1524 * get offset for the last header in the chain.  m will be kept untainted.
1525 */
1526int
1527ip6_lasthdr(m, off, proto, nxtp)
1528	struct mbuf *m;
1529	int off;
1530	int proto;
1531	int *nxtp;
1532{
1533	int newoff;
1534	int nxt;
1535
1536	if (!nxtp) {
1537		nxt = -1;
1538		nxtp = &nxt;
1539	}
1540	while (1) {
1541		newoff = ip6_nexthdr(m, off, proto, nxtp);
1542		if (newoff < 0)
1543			return off;
1544		else if (newoff < off)
1545			return -1;	/* invalid */
1546		else if (newoff == off)
1547			return newoff;
1548
1549		off = newoff;
1550		proto = *nxtp;
1551	}
1552}
1553
1554struct ip6aux *
1555ip6_addaux(m)
1556	struct mbuf *m;
1557{
1558	struct m_tag *mtag;
1559
1560	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1561	if (!mtag) {
1562		mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
1563		    M_NOWAIT);
1564		if (mtag) {
1565			m_tag_prepend(m, mtag);
1566			bzero(mtag + 1, sizeof(struct ip6aux));
1567		}
1568	}
1569	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1570}
1571
1572struct ip6aux *
1573ip6_findaux(m)
1574	struct mbuf *m;
1575{
1576	struct m_tag *mtag;
1577
1578	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1579	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1580}
1581
1582void
1583ip6_delaux(m)
1584	struct mbuf *m;
1585{
1586	struct m_tag *mtag;
1587
1588	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1589	if (mtag)
1590		m_tag_delete(m, mtag);
1591}
1592
1593/*
1594 * System control for IP6
1595 */
1596
1597u_char	inet6ctlerrmap[PRC_NCMDS] = {
1598	0,		0,		0,		0,
1599	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1600	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1601	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1602	0,		0,		0,		0,
1603	ENOPROTOOPT
1604};
1605