ip6_input.c revision 130416
1/*	$FreeBSD: head/sys/netinet6/ip6_input.c 130416 2004-06-13 17:29:10Z mlaier $	*/
2/*	$KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 4. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
62 */
63
64#include "opt_ip6fw.h"
65#include "opt_inet.h"
66#include "opt_inet6.h"
67#include "opt_ipsec.h"
68#include "opt_pfil_hooks.h"
69#include "opt_random_ip_id.h"
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/proc.h>
76#include <sys/domain.h>
77#include <sys/protosw.h>
78#include <sys/socket.h>
79#include <sys/socketvar.h>
80#include <sys/errno.h>
81#include <sys/time.h>
82#include <sys/kernel.h>
83#include <sys/syslog.h>
84
85#include <net/if.h>
86#include <net/if_types.h>
87#include <net/if_dl.h>
88#include <net/route.h>
89#include <net/netisr.h>
90#ifdef PFIL_HOOKS
91#include <net/pfil.h>
92#endif
93
94#include <netinet/in.h>
95#include <netinet/in_systm.h>
96#ifdef INET
97#include <netinet/ip.h>
98#include <netinet/ip_icmp.h>
99#endif /* INET */
100#include <netinet/ip6.h>
101#include <netinet6/in6_var.h>
102#include <netinet6/ip6_var.h>
103#include <netinet/in_pcb.h>
104#include <netinet/icmp6.h>
105#include <netinet6/scope6_var.h>
106#include <netinet6/in6_ifattach.h>
107#include <netinet6/nd6.h>
108#include <netinet6/in6_prefix.h>
109
110#ifdef IPSEC
111#include <netinet6/ipsec.h>
112#ifdef INET6
113#include <netinet6/ipsec6.h>
114#endif
115#endif
116
117#ifdef FAST_IPSEC
118#include <netipsec/ipsec.h>
119#include <netipsec/ipsec6.h>
120#define	IPSEC
121#endif /* FAST_IPSEC */
122
123#include <netinet6/ip6_fw.h>
124
125#include <netinet6/ip6protosw.h>
126
127#include <net/net_osdep.h>
128
129extern struct domain inet6domain;
130
131u_char ip6_protox[IPPROTO_MAX];
132static struct ifqueue ip6intrq;
133static int ip6qmaxlen = IFQ_MAXLEN;
134struct in6_ifaddr *in6_ifaddr;
135
136extern struct callout in6_tmpaddrtimer_ch;
137
138int ip6_forward_srcrt;			/* XXX */
139int ip6_sourcecheck;			/* XXX */
140int ip6_sourcecheck_interval;		/* XXX */
141
142int ip6_ours_check_algorithm;
143
144#ifdef PFIL_HOOKS
145struct pfil_head inet6_pfil_hook;
146#endif
147
148/* firewall hooks */
149ip6_fw_chk_t *ip6_fw_chk_ptr;
150ip6_fw_ctl_t *ip6_fw_ctl_ptr;
151int ip6_fw_enable = 1;
152
153struct ip6stat ip6stat;
154
155static void ip6_init2 __P((void *));
156static struct ip6aux *ip6_setdstifaddr __P((struct mbuf *, struct in6_ifaddr *));
157static int ip6_hopopts_input __P((u_int32_t *, u_int32_t *, struct mbuf **, int *));
158#ifdef PULLDOWN_TEST
159static struct mbuf *ip6_pullexthdr __P((struct mbuf *, size_t, int));
160#endif
161
162/*
163 * IP6 initialization: fill in IP6 protocol switch table.
164 * All protocols not implemented in kernel go to raw IP6 protocol handler.
165 */
166void
167ip6_init()
168{
169	struct ip6protosw *pr;
170	int i;
171
172#ifdef DIAGNOSTIC
173	if (sizeof(struct protosw) != sizeof(struct ip6protosw))
174		panic("sizeof(protosw) != sizeof(ip6protosw)");
175#endif
176	pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
177	if (pr == 0)
178		panic("ip6_init");
179	for (i = 0; i < IPPROTO_MAX; i++)
180		ip6_protox[i] = pr - inet6sw;
181	for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
182	    pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
183		if (pr->pr_domain->dom_family == PF_INET6 &&
184		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
185			ip6_protox[pr->pr_protocol] = pr - inet6sw;
186#ifdef PFIL_HOOKS
187	inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
188	inet6_pfil_hook.ph_af = AF_INET6;
189	if ((i = pfil_head_register(&inet6_pfil_hook)) != 0)
190		printf("%s: WARNING: unable to register pfil hook, "
191			"error %d\n", __func__, i);
192#endif /* PFIL_HOOKS */
193	ip6intrq.ifq_maxlen = ip6qmaxlen;
194	mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF);
195	netisr_register(NETISR_IPV6, ip6_input, &ip6intrq, 0);
196	scope6_init();
197	addrsel_policy_init();
198	nd6_init();
199	frag6_init();
200#ifndef RANDOM_IP_ID
201	ip6_flow_seq = arc4random();
202#endif
203	ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
204}
205
206static void
207ip6_init2(dummy)
208	void *dummy;
209{
210
211	/* nd6_timer_init */
212	callout_init(&nd6_timer_ch, 0);
213	callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL);
214
215	/* router renumbering prefix list maintenance */
216	callout_init(&in6_rr_timer_ch, 0);
217	callout_reset(&in6_rr_timer_ch, hz, in6_rr_timer, NULL);
218
219	/* timer for regeneranation of temporary addresses randomize ID */
220	callout_init(&in6_tmpaddrtimer_ch, 0);
221	callout_reset(&in6_tmpaddrtimer_ch,
222		      (ip6_temp_preferred_lifetime - ip6_desync_factor -
223		       ip6_temp_regen_advance) * hz,
224		      in6_tmpaddrtimer, NULL);
225}
226
227/* cheat */
228/* This must be after route_init(), which is now SI_ORDER_THIRD */
229SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
230
231extern struct	route_in6 ip6_forward_rt;
232
233void
234ip6_input(m)
235	struct mbuf *m;
236{
237	struct ip6_hdr *ip6;
238	int off = sizeof(struct ip6_hdr), nest;
239	u_int32_t plen;
240	u_int32_t rtalert = ~0;
241	int nxt, ours = 0;
242	struct ifnet *deliverifp = NULL;
243	struct sockaddr_in6 sa6;
244	u_int32_t srczone, dstzone;
245#ifdef PFIL_HOOKS
246	struct in6_addr odst;
247#endif
248	int srcrt = 0;
249
250	GIANT_REQUIRED;			/* XXX for now */
251#ifdef IPSEC
252	/*
253	 * should the inner packet be considered authentic?
254	 * see comment in ah4_input().
255	 */
256	if (m) {
257		m->m_flags &= ~M_AUTHIPHDR;
258		m->m_flags &= ~M_AUTHIPDGM;
259	}
260#endif
261
262	/*
263	 * make sure we don't have onion peering information into m_tag.
264	 */
265	ip6_delaux(m);
266
267	/*
268	 * mbuf statistics
269	 */
270	if (m->m_flags & M_EXT) {
271		if (m->m_next)
272			ip6stat.ip6s_mext2m++;
273		else
274			ip6stat.ip6s_mext1++;
275	} else {
276#define M2MMAX	(sizeof(ip6stat.ip6s_m2m)/sizeof(ip6stat.ip6s_m2m[0]))
277		if (m->m_next) {
278			if (m->m_flags & M_LOOP) {
279				ip6stat.ip6s_m2m[loif[0].if_index]++; /* XXX */
280			} else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
281				ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
282			else
283				ip6stat.ip6s_m2m[0]++;
284		} else
285			ip6stat.ip6s_m1++;
286#undef M2MMAX
287	}
288
289	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
290	ip6stat.ip6s_total++;
291
292#ifndef PULLDOWN_TEST
293	/*
294	 * L2 bridge code and some other code can return mbuf chain
295	 * that does not conform to KAME requirement.  too bad.
296	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
297	 */
298	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
299		struct mbuf *n;
300
301		MGETHDR(n, M_DONTWAIT, MT_HEADER);
302		if (n)
303			M_MOVE_PKTHDR(n, m);
304		if (n && n->m_pkthdr.len > MHLEN) {
305			MCLGET(n, M_DONTWAIT);
306			if ((n->m_flags & M_EXT) == 0) {
307				m_freem(n);
308				n = NULL;
309			}
310		}
311		if (n == NULL) {
312			m_freem(m);
313			return;	/* ENOBUFS */
314		}
315
316		m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
317		n->m_len = n->m_pkthdr.len;
318		m_freem(m);
319		m = n;
320	}
321	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
322#endif
323
324	if (m->m_len < sizeof(struct ip6_hdr)) {
325		struct ifnet *inifp;
326		inifp = m->m_pkthdr.rcvif;
327		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
328			ip6stat.ip6s_toosmall++;
329			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
330			return;
331		}
332	}
333
334	ip6 = mtod(m, struct ip6_hdr *);
335
336	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
337		ip6stat.ip6s_badvers++;
338		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
339		goto bad;
340	}
341
342	ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
343
344	/*
345	 * Check against address spoofing/corruption.
346	 */
347	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
348	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
349		/*
350		 * XXX: "badscope" is not very suitable for a multicast source.
351		 */
352		ip6stat.ip6s_badscope++;
353		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
354		goto bad;
355	}
356	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
357	    !(m->m_flags & M_LOOP)) {
358		/*
359		 * In this case, the packet should come from the loopback
360		 * interface.  However, we cannot just check the if_flags,
361		 * because ip6_mloopback() passes the "actual" interface
362		 * as the outgoing/incoming interface.
363		 */
364		ip6stat.ip6s_badscope++;
365		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
366		goto bad;
367	}
368
369#ifdef ALTQ
370	if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
371		/* packet is dropped by traffic conditioner */
372		return;
373	}
374#endif
375	/*
376	 * The following check is not documented in specs.  A malicious
377	 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
378	 * and bypass security checks (act as if it was from 127.0.0.1 by using
379	 * IPv6 src ::ffff:127.0.0.1).  Be cautious.
380	 *
381	 * This check chokes if we are in an SIIT cloud.  As none of BSDs
382	 * support IPv4-less kernel compilation, we cannot support SIIT
383	 * environment at all.  So, it makes more sense for us to reject any
384	 * malicious packets for non-SIIT environment, than try to do a
385	 * partial support for SIIT environment.
386	 */
387	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
388	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
389		ip6stat.ip6s_badscope++;
390		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
391		goto bad;
392	}
393#if 0
394	/*
395	 * Reject packets with IPv4 compatible addresses (auto tunnel).
396	 *
397	 * The code forbids auto tunnel relay case in RFC1933 (the check is
398	 * stronger than RFC1933).  We may want to re-enable it if mech-xx
399	 * is revised to forbid relaying case.
400	 */
401	if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
402	    IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
403		ip6stat.ip6s_badscope++;
404		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
405		goto bad;
406	}
407#endif
408
409	/*
410	 * Drop packets if the link ID portion is already filled.
411	 * XXX: this is technically not a good behavior.  But, we internally
412	 * use the field to disambiguate link-local addresses, so we cannot
413	 * be generous against those a bit strange addresses.
414	 */
415	if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
416		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src) &&
417		    ip6->ip6_src.s6_addr16[1]) {
418			ip6stat.ip6s_badscope++;
419			goto bad;
420		}
421		if ((IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) ||
422		     IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) &&
423		    ip6->ip6_dst.s6_addr16[1]) {
424			ip6stat.ip6s_badscope++;
425			goto bad;
426		}
427	}
428
429#ifdef PFIL_HOOKS
430	/*
431	 * Run through list of hooks for input packets.
432	 *
433	 * NB: Beware of the destination address changing
434	 *     (e.g. by NAT rewriting).  When this happens,
435	 *     tell ip6_forward to do the right thing.
436	 */
437	odst = ip6->ip6_dst;
438	if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN))
439		return;
440	if (m == NULL)			/* consumed by filter */
441		return;
442	ip6 = mtod(m, struct ip6_hdr *);
443	srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
444#endif /* PFIL_HOOKS */
445
446	/*
447	 * Check with the firewall...
448	 */
449	if (ip6_fw_enable && ip6_fw_chk_ptr) {
450		u_short port = 0;
451		/* If ipfw says divert, we have to just drop packet */
452		/* use port as a dummy argument */
453		if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) {
454			m_freem(m);
455			m = NULL;
456		}
457		if (!m)
458			return;
459	}
460
461	/*
462	 * construct source and destination address structures with
463	 * disambiguating their scope zones (if there is ambiguity).
464	 * XXX: sin6_family and sin6_len will NOT be referred to, but we fill
465	 * in these fields just in case.
466	 */
467	if (in6_addr2zoneid(m->m_pkthdr.rcvif, &ip6->ip6_src, &srczone) ||
468	    in6_addr2zoneid(m->m_pkthdr.rcvif, &ip6->ip6_dst, &dstzone)) {
469		/*
470		 * Note that these generic checks cover cases that src or
471		 * dst are the loopback address and the receiving interface
472		 * is not loopback.
473		 */
474		ip6stat.ip6s_badscope++;
475		goto bad;
476	}
477
478	bzero(&sa6, sizeof(sa6));
479	sa6.sin6_family = AF_INET6;
480	sa6.sin6_len = sizeof(struct sockaddr_in6);
481
482	sa6.sin6_addr = ip6->ip6_src;
483	sa6.sin6_scope_id = srczone;
484	if (in6_embedscope(&ip6->ip6_src, &sa6, NULL, NULL)) {
485		/* XXX: should not happen */
486		ip6stat.ip6s_badscope++;
487		goto bad;
488	}
489
490	sa6.sin6_addr = ip6->ip6_dst;
491	sa6.sin6_scope_id = dstzone;
492	if (in6_embedscope(&ip6->ip6_dst, &sa6, NULL, NULL)) {
493		/* XXX: should not happen */
494		ip6stat.ip6s_badscope++;
495		goto bad;
496	}
497
498	/* XXX: ff01::%ifN awareness is not merged, yet. */
499	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_src))
500		ip6->ip6_src.s6_addr16[1] = 0;
501	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst))
502		ip6->ip6_dst.s6_addr16[1] = 0;
503
504	/*
505	 * Multicast check
506	 */
507	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
508	  	struct in6_multi *in6m = 0;
509
510		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
511		/*
512		 * See if we belong to the destination multicast group on the
513		 * arrival interface.
514		 */
515		IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m);
516		if (in6m)
517			ours = 1;
518		else if (!ip6_mrouter) {
519			ip6stat.ip6s_notmember++;
520			ip6stat.ip6s_cantforward++;
521			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
522			goto bad;
523		}
524		deliverifp = m->m_pkthdr.rcvif;
525		goto hbhcheck;
526	}
527
528	/*
529	 *  Unicast check
530	 */
531	if (ip6_forward_rt.ro_rt != NULL &&
532	    (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 &&
533	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
534	    &((struct sockaddr_in6 *)(&ip6_forward_rt.ro_dst))->sin6_addr))
535		ip6stat.ip6s_forward_cachehit++;
536	else {
537		struct sockaddr_in6 *dst6;
538
539		if (ip6_forward_rt.ro_rt) {
540			/* route is down or destination is different */
541			ip6stat.ip6s_forward_cachemiss++;
542			RTFREE(ip6_forward_rt.ro_rt);
543			ip6_forward_rt.ro_rt = 0;
544		}
545
546		bzero(&ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
547		dst6 = (struct sockaddr_in6 *)&ip6_forward_rt.ro_dst;
548		dst6->sin6_len = sizeof(struct sockaddr_in6);
549		dst6->sin6_family = AF_INET6;
550		dst6->sin6_addr = ip6->ip6_dst;
551
552		rtalloc((struct route *)&ip6_forward_rt);
553	}
554
555#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
556
557	/*
558	 * Accept the packet if the forwarding interface to the destination
559	 * according to the routing table is the loopback interface,
560	 * unless the associated route has a gateway.
561	 * Note that this approach causes to accept a packet if there is a
562	 * route to the loopback interface for the destination of the packet.
563	 * But we think it's even useful in some situations, e.g. when using
564	 * a special daemon which wants to intercept the packet.
565	 *
566	 * XXX: some OSes automatically make a cloned route for the destination
567	 * of an outgoing packet.  If the outgoing interface of the packet
568	 * is a loopback one, the kernel would consider the packet to be
569	 * accepted, even if we have no such address assinged on the interface.
570	 * We check the cloned flag of the route entry to reject such cases,
571	 * assuming that route entries for our own addresses are not made by
572	 * cloning (it should be true because in6_addloop explicitly installs
573	 * the host route).  However, we might have to do an explicit check
574	 * while it would be less efficient.  Or, should we rather install a
575	 * reject route for such a case?
576	 */
577	if (ip6_forward_rt.ro_rt &&
578	    (ip6_forward_rt.ro_rt->rt_flags &
579	     (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
580#ifdef RTF_WASCLONED
581	    !(ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) &&
582#endif
583#ifdef RTF_CLONED
584	    !(ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) &&
585#endif
586#if 0
587	    /*
588	     * The check below is redundant since the comparison of
589	     * the destination and the key of the rtentry has
590	     * already done through looking up the routing table.
591	     */
592	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
593	    &rt6_key(ip6_forward_rt.ro_rt)->sin6_addr)
594#endif
595	    ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
596		struct in6_ifaddr *ia6 =
597			(struct in6_ifaddr *)ip6_forward_rt.ro_rt->rt_ifa;
598
599		/*
600		 * record address information into m_tag.
601		 */
602		(void)ip6_setdstifaddr(m, ia6);
603
604		/*
605		 * packets to a tentative, duplicated, or somehow invalid
606		 * address must not be accepted.
607		 */
608		if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
609			/* this address is ready */
610			ours = 1;
611			deliverifp = ia6->ia_ifp;	/* correct? */
612			/* Count the packet in the ip address stats */
613			ia6->ia_ifa.if_ipackets++;
614			ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
615			goto hbhcheck;
616		} else {
617			/* address is not ready, so discard the packet. */
618			nd6log((LOG_INFO,
619			    "ip6_input: packet to an unready address %s->%s\n",
620			    ip6_sprintf(&ip6->ip6_src),
621			    ip6_sprintf(&ip6->ip6_dst)));
622
623			goto bad;
624		}
625	}
626
627	/*
628	 * FAITH (Firewall Aided Internet Translator)
629	 */
630	if (ip6_keepfaith) {
631		if (ip6_forward_rt.ro_rt && ip6_forward_rt.ro_rt->rt_ifp
632		 && ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
633			/* XXX do we need more sanity checks? */
634			ours = 1;
635			deliverifp = ip6_forward_rt.ro_rt->rt_ifp; /* faith */
636			goto hbhcheck;
637		}
638	}
639
640	/*
641	 * Now there is no reason to process the packet if it's not our own
642	 * and we're not a router.
643	 */
644	if (!ip6_forwarding) {
645		ip6stat.ip6s_cantforward++;
646		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
647		goto bad;
648	}
649
650  hbhcheck:
651	/*
652	 * record address information into m_tag, if we don't have one yet.
653	 * note that we are unable to record it, if the address is not listed
654	 * as our interface address (e.g. multicast addresses, addresses
655	 * within FAITH prefixes and such).
656	 */
657	if (deliverifp && !ip6_getdstifaddr(m)) {
658		struct in6_ifaddr *ia6;
659
660		ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
661		if (ia6) {
662			if (!ip6_setdstifaddr(m, ia6)) {
663				/*
664				 * XXX maybe we should drop the packet here,
665				 * as we could not provide enough information
666				 * to the upper layers.
667				 */
668			}
669		}
670	}
671
672	/*
673	 * Process Hop-by-Hop options header if it's contained.
674	 * m may be modified in ip6_hopopts_input().
675	 * If a JumboPayload option is included, plen will also be modified.
676	 */
677	plen = (u_int32_t)ntohs(ip6->ip6_plen);
678	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
679		struct ip6_hbh *hbh;
680
681		if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
682#if 0	/*touches NULL pointer*/
683			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
684#endif
685			return;	/* m have already been freed */
686		}
687
688		/* adjust pointer */
689		ip6 = mtod(m, struct ip6_hdr *);
690
691		/*
692		 * if the payload length field is 0 and the next header field
693		 * indicates Hop-by-Hop Options header, then a Jumbo Payload
694		 * option MUST be included.
695		 */
696		if (ip6->ip6_plen == 0 && plen == 0) {
697			/*
698			 * Note that if a valid jumbo payload option is
699			 * contained, ip6_hopopts_input() must set a valid
700			 * (non-zero) payload length to the variable plen.
701			 */
702			ip6stat.ip6s_badoptions++;
703			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
704			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
705			icmp6_error(m, ICMP6_PARAM_PROB,
706				    ICMP6_PARAMPROB_HEADER,
707				    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
708			return;
709		}
710#ifndef PULLDOWN_TEST
711		/* ip6_hopopts_input() ensures that mbuf is contiguous */
712		hbh = (struct ip6_hbh *)(ip6 + 1);
713#else
714		IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
715			sizeof(struct ip6_hbh));
716		if (hbh == NULL) {
717			ip6stat.ip6s_tooshort++;
718			return;
719		}
720#endif
721		nxt = hbh->ip6h_nxt;
722
723		/*
724		 * accept the packet if a router alert option is included
725		 * and we act as an IPv6 router.
726		 */
727		if (rtalert != ~0 && ip6_forwarding)
728			ours = 1;
729	} else
730		nxt = ip6->ip6_nxt;
731
732	/*
733	 * Check that the amount of data in the buffers
734	 * is as at least much as the IPv6 header would have us expect.
735	 * Trim mbufs if longer than we expect.
736	 * Drop packet if shorter than we expect.
737	 */
738	if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
739		ip6stat.ip6s_tooshort++;
740		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
741		goto bad;
742	}
743	if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
744		if (m->m_len == m->m_pkthdr.len) {
745			m->m_len = sizeof(struct ip6_hdr) + plen;
746			m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
747		} else
748			m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
749	}
750
751	/*
752	 * Forward if desirable.
753	 */
754	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
755		/*
756		 * If we are acting as a multicast router, all
757		 * incoming multicast packets are passed to the
758		 * kernel-level multicast forwarding function.
759		 * The packet is returned (relatively) intact; if
760		 * ip6_mforward() returns a non-zero value, the packet
761		 * must be discarded, else it may be accepted below.
762		 */
763		if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
764			ip6stat.ip6s_cantforward++;
765			m_freem(m);
766			return;
767		}
768		if (!ours) {
769			m_freem(m);
770			return;
771		}
772	} else if (!ours) {
773		ip6_forward(m, srcrt);
774		return;
775	}
776
777	ip6 = mtod(m, struct ip6_hdr *);
778
779	/*
780	 * Malicious party may be able to use IPv4 mapped addr to confuse
781	 * tcp/udp stack and bypass security checks (act as if it was from
782	 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1).  Be cautious.
783	 *
784	 * For SIIT end node behavior, you may want to disable the check.
785	 * However, you will  become vulnerable to attacks using IPv4 mapped
786	 * source.
787	 */
788	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
789	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
790		ip6stat.ip6s_badscope++;
791		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
792		goto bad;
793	}
794
795	/*
796	 * Tell launch routine the next header
797	 */
798	ip6stat.ip6s_delivered++;
799	in6_ifstat_inc(deliverifp, ifs6_in_deliver);
800	nest = 0;
801
802	while (nxt != IPPROTO_DONE) {
803		if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
804			ip6stat.ip6s_toomanyhdr++;
805			goto bad;
806		}
807
808		/*
809		 * protection against faulty packet - there should be
810		 * more sanity checks in header chain processing.
811		 */
812		if (m->m_pkthdr.len < off) {
813			ip6stat.ip6s_tooshort++;
814			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
815			goto bad;
816		}
817
818#ifdef IPSEC
819		/*
820		 * enforce IPsec policy checking if we are seeing last header.
821		 * note that we do not visit this with protocols with pcb layer
822		 * code - like udp/tcp/raw ip.
823		 */
824		if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 &&
825		    ipsec6_in_reject(m, NULL)) {
826			ipsec6stat.in_polvio++;
827			goto bad;
828		}
829#endif
830		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
831	}
832	return;
833 bad:
834	m_freem(m);
835}
836
837/*
838 * set/grab in6_ifaddr correspond to IPv6 destination address.
839 * XXX backward compatibility wrapper
840 */
841static struct ip6aux *
842ip6_setdstifaddr(m, ia6)
843	struct mbuf *m;
844	struct in6_ifaddr *ia6;
845{
846	struct ip6aux *ip6a;
847
848	ip6a = ip6_addaux(m);
849	if (ip6a)
850		ip6a->ip6a_dstia6 = ia6;
851	return ip6a;	/* NULL if failed to set */
852}
853
854struct in6_ifaddr *
855ip6_getdstifaddr(m)
856	struct mbuf *m;
857{
858	struct ip6aux *ip6a;
859
860	ip6a = ip6_findaux(m);
861	if (ip6a)
862		return ip6a->ip6a_dstia6;
863	else
864		return NULL;
865}
866
867/*
868 * Hop-by-Hop options header processing. If a valid jumbo payload option is
869 * included, the real payload length will be stored in plenp.
870 */
871static int
872ip6_hopopts_input(plenp, rtalertp, mp, offp)
873	u_int32_t *plenp;
874	u_int32_t *rtalertp;	/* XXX: should be stored more smart way */
875	struct mbuf **mp;
876	int *offp;
877{
878	struct mbuf *m = *mp;
879	int off = *offp, hbhlen;
880	struct ip6_hbh *hbh;
881	u_int8_t *opt;
882
883	/* validation of the length of the header */
884#ifndef PULLDOWN_TEST
885	IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
886	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
887	hbhlen = (hbh->ip6h_len + 1) << 3;
888
889	IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
890	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
891#else
892	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
893		sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
894	if (hbh == NULL) {
895		ip6stat.ip6s_tooshort++;
896		return -1;
897	}
898	hbhlen = (hbh->ip6h_len + 1) << 3;
899	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
900		hbhlen);
901	if (hbh == NULL) {
902		ip6stat.ip6s_tooshort++;
903		return -1;
904	}
905#endif
906	off += hbhlen;
907	hbhlen -= sizeof(struct ip6_hbh);
908	opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
909
910	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
911				hbhlen, rtalertp, plenp) < 0)
912		return (-1);
913
914	*offp = off;
915	*mp = m;
916	return (0);
917}
918
919/*
920 * Search header for all Hop-by-hop options and process each option.
921 * This function is separate from ip6_hopopts_input() in order to
922 * handle a case where the sending node itself process its hop-by-hop
923 * options header. In such a case, the function is called from ip6_output().
924 *
925 * The function assumes that hbh header is located right after the IPv6 header
926 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
927 * opthead + hbhlen is located in continuous memory region.
928 */
929int
930ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
931	struct mbuf *m;
932	u_int8_t *opthead;
933	int hbhlen;
934	u_int32_t *rtalertp;
935	u_int32_t *plenp;
936{
937	struct ip6_hdr *ip6;
938	int optlen = 0;
939	u_int8_t *opt = opthead;
940	u_int16_t rtalert_val;
941	u_int32_t jumboplen;
942	const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
943
944	for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
945		switch (*opt) {
946		case IP6OPT_PAD1:
947			optlen = 1;
948			break;
949		case IP6OPT_PADN:
950			if (hbhlen < IP6OPT_MINLEN) {
951				ip6stat.ip6s_toosmall++;
952				goto bad;
953			}
954			optlen = *(opt + 1) + 2;
955			break;
956		case IP6OPT_ROUTER_ALERT:
957			/* XXX may need check for alignment */
958			if (hbhlen < IP6OPT_RTALERT_LEN) {
959				ip6stat.ip6s_toosmall++;
960				goto bad;
961			}
962			if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
963				/* XXX stat */
964				icmp6_error(m, ICMP6_PARAM_PROB,
965				    ICMP6_PARAMPROB_HEADER,
966				    erroff + opt + 1 - opthead);
967				return (-1);
968			}
969			optlen = IP6OPT_RTALERT_LEN;
970			bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
971			*rtalertp = ntohs(rtalert_val);
972			break;
973		case IP6OPT_JUMBO:
974			/* XXX may need check for alignment */
975			if (hbhlen < IP6OPT_JUMBO_LEN) {
976				ip6stat.ip6s_toosmall++;
977				goto bad;
978			}
979			if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
980				/* XXX stat */
981				icmp6_error(m, ICMP6_PARAM_PROB,
982				    ICMP6_PARAMPROB_HEADER,
983				    erroff + opt + 1 - opthead);
984				return (-1);
985			}
986			optlen = IP6OPT_JUMBO_LEN;
987
988			/*
989			 * IPv6 packets that have non 0 payload length
990			 * must not contain a jumbo payload option.
991			 */
992			ip6 = mtod(m, struct ip6_hdr *);
993			if (ip6->ip6_plen) {
994				ip6stat.ip6s_badoptions++;
995				icmp6_error(m, ICMP6_PARAM_PROB,
996				    ICMP6_PARAMPROB_HEADER,
997				    erroff + opt - opthead);
998				return (-1);
999			}
1000
1001			/*
1002			 * We may see jumbolen in unaligned location, so
1003			 * we'd need to perform bcopy().
1004			 */
1005			bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
1006			jumboplen = (u_int32_t)htonl(jumboplen);
1007
1008#if 1
1009			/*
1010			 * if there are multiple jumbo payload options,
1011			 * *plenp will be non-zero and the packet will be
1012			 * rejected.
1013			 * the behavior may need some debate in ipngwg -
1014			 * multiple options does not make sense, however,
1015			 * there's no explicit mention in specification.
1016			 */
1017			if (*plenp != 0) {
1018				ip6stat.ip6s_badoptions++;
1019				icmp6_error(m, ICMP6_PARAM_PROB,
1020				    ICMP6_PARAMPROB_HEADER,
1021				    erroff + opt + 2 - opthead);
1022				return (-1);
1023			}
1024#endif
1025
1026			/*
1027			 * jumbo payload length must be larger than 65535.
1028			 */
1029			if (jumboplen <= IPV6_MAXPACKET) {
1030				ip6stat.ip6s_badoptions++;
1031				icmp6_error(m, ICMP6_PARAM_PROB,
1032				    ICMP6_PARAMPROB_HEADER,
1033				    erroff + opt + 2 - opthead);
1034				return (-1);
1035			}
1036			*plenp = jumboplen;
1037
1038			break;
1039		default:		/* unknown option */
1040			if (hbhlen < IP6OPT_MINLEN) {
1041				ip6stat.ip6s_toosmall++;
1042				goto bad;
1043			}
1044			optlen = ip6_unknown_opt(opt, m,
1045			    erroff + opt - opthead);
1046			if (optlen == -1)
1047				return (-1);
1048			optlen += 2;
1049			break;
1050		}
1051	}
1052
1053	return (0);
1054
1055  bad:
1056	m_freem(m);
1057	return (-1);
1058}
1059
1060/*
1061 * Unknown option processing.
1062 * The third argument `off' is the offset from the IPv6 header to the option,
1063 * which is necessary if the IPv6 header the and option header and IPv6 header
1064 * is not continuous in order to return an ICMPv6 error.
1065 */
1066int
1067ip6_unknown_opt(optp, m, off)
1068	u_int8_t *optp;
1069	struct mbuf *m;
1070	int off;
1071{
1072	struct ip6_hdr *ip6;
1073
1074	switch (IP6OPT_TYPE(*optp)) {
1075	case IP6OPT_TYPE_SKIP: /* ignore the option */
1076		return ((int)*(optp + 1));
1077	case IP6OPT_TYPE_DISCARD:	/* silently discard */
1078		m_freem(m);
1079		return (-1);
1080	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
1081		ip6stat.ip6s_badoptions++;
1082		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
1083		return (-1);
1084	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
1085		ip6stat.ip6s_badoptions++;
1086		ip6 = mtod(m, struct ip6_hdr *);
1087		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1088		    (m->m_flags & (M_BCAST|M_MCAST)))
1089			m_freem(m);
1090		else
1091			icmp6_error(m, ICMP6_PARAM_PROB,
1092				    ICMP6_PARAMPROB_OPTION, off);
1093		return (-1);
1094	}
1095
1096	m_freem(m);		/* XXX: NOTREACHED */
1097	return (-1);
1098}
1099
1100/*
1101 * Create the "control" list for this pcb.
1102 * The function will not modify mbuf chain at all.
1103 *
1104 * with KAME mbuf chain restriction:
1105 * The routine will be called from upper layer handlers like tcp6_input().
1106 * Thus the routine assumes that the caller (tcp6_input) have already
1107 * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
1108 * very first mbuf on the mbuf chain.
1109 */
1110void
1111ip6_savecontrol(in6p, m, mp)
1112	struct inpcb *in6p;
1113	struct mbuf *m, **mp;
1114{
1115#define IS2292(x, y)	((in6p->in6p_flags & IN6P_RFC2292) ? (x) : (y))
1116	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1117
1118#ifdef SO_TIMESTAMP
1119	if ((in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0) {
1120		struct timeval tv;
1121
1122		microtime(&tv);
1123		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1124		    SCM_TIMESTAMP, SOL_SOCKET);
1125		if (*mp)
1126			mp = &(*mp)->m_next;
1127	}
1128#endif
1129
1130	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
1131		return;
1132
1133	/* RFC 2292 sec. 5 */
1134	if ((in6p->in6p_flags & IN6P_PKTINFO) != 0) {
1135		struct in6_pktinfo pi6;
1136
1137		bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
1138		in6_clearscope(&pi6.ipi6_addr);	/* XXX */
1139		pi6.ipi6_ifindex =
1140		    (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
1141
1142		*mp = sbcreatecontrol((caddr_t) &pi6,
1143		    sizeof(struct in6_pktinfo),
1144		    IS2292(IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
1145		if (*mp)
1146			mp = &(*mp)->m_next;
1147	}
1148
1149	if ((in6p->in6p_flags & IN6P_HOPLIMIT) != 0) {
1150		int hlim = ip6->ip6_hlim & 0xff;
1151
1152		*mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
1153		    IS2292(IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), IPPROTO_IPV6);
1154		if (*mp)
1155			mp = &(*mp)->m_next;
1156	}
1157
1158	if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
1159		u_int32_t flowinfo;
1160		int tclass;
1161
1162		flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
1163		flowinfo >>= 20;
1164
1165		tclass = flowinfo & 0xff;
1166		*mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
1167		    IPV6_TCLASS, IPPROTO_IPV6);
1168		if (*mp)
1169			mp = &(*mp)->m_next;
1170	}
1171
1172	/*
1173	 * IPV6_HOPOPTS socket option.  Recall that we required super-user
1174	 * privilege for the option (see ip6_ctloutput), but it might be too
1175	 * strict, since there might be some hop-by-hop options which can be
1176	 * returned to normal user.
1177	 * See also RFC 2292 section 6 (or RFC 3542 section 8).
1178	 */
1179	if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) {
1180		/*
1181		 * Check if a hop-by-hop options header is contatined in the
1182		 * received packet, and if so, store the options as ancillary
1183		 * data. Note that a hop-by-hop options header must be
1184		 * just after the IPv6 header, which is assured through the
1185		 * IPv6 input processing.
1186		 */
1187		if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
1188			struct ip6_hbh *hbh;
1189			int hbhlen = 0;
1190#ifdef PULLDOWN_TEST
1191			struct mbuf *ext;
1192#endif
1193
1194#ifndef PULLDOWN_TEST
1195			hbh = (struct ip6_hbh *)(ip6 + 1);
1196			hbhlen = (hbh->ip6h_len + 1) << 3;
1197#else
1198			ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
1199			    ip6->ip6_nxt);
1200			if (ext == NULL) {
1201				ip6stat.ip6s_tooshort++;
1202				return;
1203			}
1204			hbh = mtod(ext, struct ip6_hbh *);
1205			hbhlen = (hbh->ip6h_len + 1) << 3;
1206			if (hbhlen != ext->m_len) {
1207				m_freem(ext);
1208				ip6stat.ip6s_tooshort++;
1209				return;
1210			}
1211#endif
1212
1213			/*
1214			 * XXX: We copy the whole header even if a
1215			 * jumbo payload option is included, the option which
1216			 * is to be removed before returning according to
1217			 * RFC2292.
1218			 * Note: this constraint is removed in 2292bis.
1219			 */
1220			*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
1221			    IS2292(IPV6_2292HOPOPTS, IPV6_HOPOPTS),
1222			    IPPROTO_IPV6);
1223			if (*mp)
1224				mp = &(*mp)->m_next;
1225#ifdef PULLDOWN_TEST
1226			m_freem(ext);
1227#endif
1228		}
1229	}
1230
1231	if ((in6p->in6p_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
1232		int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
1233
1234		/*
1235		 * Search for destination options headers or routing
1236		 * header(s) through the header chain, and stores each
1237		 * header as ancillary data.
1238		 * Note that the order of the headers remains in
1239		 * the chain of ancillary data.
1240		 */
1241		while (1) {	/* is explicit loop prevention necessary? */
1242			struct ip6_ext *ip6e = NULL;
1243			int elen;
1244#ifdef PULLDOWN_TEST
1245			struct mbuf *ext = NULL;
1246#endif
1247
1248			/*
1249			 * if it is not an extension header, don't try to
1250			 * pull it from the chain.
1251			 */
1252			switch (nxt) {
1253			case IPPROTO_DSTOPTS:
1254			case IPPROTO_ROUTING:
1255			case IPPROTO_HOPOPTS:
1256			case IPPROTO_AH: /* is it possible? */
1257				break;
1258			default:
1259				goto loopend;
1260			}
1261
1262#ifndef PULLDOWN_TEST
1263			if (off + sizeof(*ip6e) > m->m_len)
1264				goto loopend;
1265			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
1266			if (nxt == IPPROTO_AH)
1267				elen = (ip6e->ip6e_len + 2) << 2;
1268			else
1269				elen = (ip6e->ip6e_len + 1) << 3;
1270			if (off + elen > m->m_len)
1271				goto loopend;
1272#else
1273			ext = ip6_pullexthdr(m, off, nxt);
1274			if (ext == NULL) {
1275				ip6stat.ip6s_tooshort++;
1276				return;
1277			}
1278			ip6e = mtod(ext, struct ip6_ext *);
1279			if (nxt == IPPROTO_AH)
1280				elen = (ip6e->ip6e_len + 2) << 2;
1281			else
1282				elen = (ip6e->ip6e_len + 1) << 3;
1283			if (elen != ext->m_len) {
1284				m_freem(ext);
1285				ip6stat.ip6s_tooshort++;
1286				return;
1287			}
1288#endif
1289
1290			switch (nxt) {
1291			case IPPROTO_DSTOPTS:
1292				if (!(in6p->in6p_flags & IN6P_DSTOPTS))
1293					break;
1294
1295				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
1296				    IS2292(IPV6_2292DSTOPTS, IPV6_DSTOPTS),
1297				    IPPROTO_IPV6);
1298				if (*mp)
1299					mp = &(*mp)->m_next;
1300				break;
1301			case IPPROTO_ROUTING:
1302				if (!in6p->in6p_flags & IN6P_RTHDR)
1303					break;
1304
1305				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
1306				    IS2292(IPV6_2292RTHDR, IPV6_RTHDR),
1307				    IPPROTO_IPV6);
1308				if (*mp)
1309					mp = &(*mp)->m_next;
1310				break;
1311			case IPPROTO_HOPOPTS:
1312			case IPPROTO_AH: /* is it possible? */
1313				break;
1314
1315			default:
1316				/*
1317			 	 * other cases have been filtered in the above.
1318				 * none will visit this case.  here we supply
1319				 * the code just in case (nxt overwritten or
1320				 * other cases).
1321				 */
1322#ifdef PULLDOWN_TEST
1323				m_freem(ext);
1324#endif
1325				goto loopend;
1326
1327			}
1328
1329			/* proceed with the next header. */
1330			off += elen;
1331			nxt = ip6e->ip6e_nxt;
1332			ip6e = NULL;
1333#ifdef PULLDOWN_TEST
1334			m_freem(ext);
1335			ext = NULL;
1336#endif
1337		}
1338	  loopend:
1339		;
1340	}
1341
1342#undef IS2292
1343}
1344
1345void
1346ip6_notify_pmtu(in6p, dst, mtu)
1347	struct inpcb *in6p;
1348	struct sockaddr_in6 *dst;
1349	u_int32_t *mtu;
1350{
1351	struct socket *so;
1352	struct mbuf *m_mtu;
1353	struct ip6_mtuinfo mtuctl;
1354
1355	so =  in6p->inp_socket;
1356
1357	if (mtu == NULL)
1358		return;
1359
1360#ifdef DIAGNOSTIC
1361	if (so == NULL)		/* I believe this is impossible */
1362		panic("ip6_notify_pmtu: socket is NULL");
1363#endif
1364
1365	bzero(&mtuctl, sizeof(mtuctl));	/* zero-clear for safety */
1366	mtuctl.ip6m_mtu = *mtu;
1367	mtuctl.ip6m_addr = *dst;
1368	in6_recoverscope(&mtuctl.ip6m_addr, &mtuctl.ip6m_addr.sin6_addr, NULL);
1369
1370	if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
1371	    IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
1372		return;
1373
1374	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
1375	    == 0) {
1376		m_freem(m_mtu);
1377		/* XXX: should count statistics */
1378	} else
1379		sorwakeup(so);
1380
1381	return;
1382}
1383
1384#ifdef PULLDOWN_TEST
1385/*
1386 * pull single extension header from mbuf chain.  returns single mbuf that
1387 * contains the result, or NULL on error.
1388 */
1389static struct mbuf *
1390ip6_pullexthdr(m, off, nxt)
1391	struct mbuf *m;
1392	size_t off;
1393	int nxt;
1394{
1395	struct ip6_ext ip6e;
1396	size_t elen;
1397	struct mbuf *n;
1398
1399#ifdef DIAGNOSTIC
1400	switch (nxt) {
1401	case IPPROTO_DSTOPTS:
1402	case IPPROTO_ROUTING:
1403	case IPPROTO_HOPOPTS:
1404	case IPPROTO_AH: /* is it possible? */
1405		break;
1406	default:
1407		printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
1408	}
1409#endif
1410
1411	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1412	if (nxt == IPPROTO_AH)
1413		elen = (ip6e.ip6e_len + 2) << 2;
1414	else
1415		elen = (ip6e.ip6e_len + 1) << 3;
1416
1417	MGET(n, M_DONTWAIT, MT_DATA);
1418	if (n && elen >= MLEN) {
1419		MCLGET(n, M_DONTWAIT);
1420		if ((n->m_flags & M_EXT) == 0) {
1421			m_free(n);
1422			n = NULL;
1423		}
1424	}
1425	if (!n)
1426		return NULL;
1427
1428	n->m_len = 0;
1429	if (elen >= M_TRAILINGSPACE(n)) {
1430		m_free(n);
1431		return NULL;
1432	}
1433
1434	m_copydata(m, off, elen, mtod(n, caddr_t));
1435	n->m_len = elen;
1436	return n;
1437}
1438#endif
1439
1440/*
1441 * Get pointer to the previous header followed by the header
1442 * currently processed.
1443 * XXX: This function supposes that
1444 *	M includes all headers,
1445 *	the next header field and the header length field of each header
1446 *	are valid, and
1447 *	the sum of each header length equals to OFF.
1448 * Because of these assumptions, this function must be called very
1449 * carefully. Moreover, it will not be used in the near future when
1450 * we develop `neater' mechanism to process extension headers.
1451 */
1452char *
1453ip6_get_prevhdr(m, off)
1454	struct mbuf *m;
1455	int off;
1456{
1457	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1458
1459	if (off == sizeof(struct ip6_hdr))
1460		return (&ip6->ip6_nxt);
1461	else {
1462		int len, nxt;
1463		struct ip6_ext *ip6e = NULL;
1464
1465		nxt = ip6->ip6_nxt;
1466		len = sizeof(struct ip6_hdr);
1467		while (len < off) {
1468			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
1469
1470			switch (nxt) {
1471			case IPPROTO_FRAGMENT:
1472				len += sizeof(struct ip6_frag);
1473				break;
1474			case IPPROTO_AH:
1475				len += (ip6e->ip6e_len + 2) << 2;
1476				break;
1477			default:
1478				len += (ip6e->ip6e_len + 1) << 3;
1479				break;
1480			}
1481			nxt = ip6e->ip6e_nxt;
1482		}
1483		if (ip6e)
1484			return (&ip6e->ip6e_nxt);
1485		else
1486			return NULL;
1487	}
1488}
1489
1490/*
1491 * get next header offset.  m will be retained.
1492 */
1493int
1494ip6_nexthdr(m, off, proto, nxtp)
1495	struct mbuf *m;
1496	int off;
1497	int proto;
1498	int *nxtp;
1499{
1500	struct ip6_hdr ip6;
1501	struct ip6_ext ip6e;
1502	struct ip6_frag fh;
1503
1504	/* just in case */
1505	if (m == NULL)
1506		panic("ip6_nexthdr: m == NULL");
1507	if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
1508		return -1;
1509
1510	switch (proto) {
1511	case IPPROTO_IPV6:
1512		if (m->m_pkthdr.len < off + sizeof(ip6))
1513			return -1;
1514		m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
1515		if (nxtp)
1516			*nxtp = ip6.ip6_nxt;
1517		off += sizeof(ip6);
1518		return off;
1519
1520	case IPPROTO_FRAGMENT:
1521		/*
1522		 * terminate parsing if it is not the first fragment,
1523		 * it does not make sense to parse through it.
1524		 */
1525		if (m->m_pkthdr.len < off + sizeof(fh))
1526			return -1;
1527		m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
1528		/* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
1529		if (fh.ip6f_offlg & IP6F_OFF_MASK)
1530			return -1;
1531		if (nxtp)
1532			*nxtp = fh.ip6f_nxt;
1533		off += sizeof(struct ip6_frag);
1534		return off;
1535
1536	case IPPROTO_AH:
1537		if (m->m_pkthdr.len < off + sizeof(ip6e))
1538			return -1;
1539		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1540		if (nxtp)
1541			*nxtp = ip6e.ip6e_nxt;
1542		off += (ip6e.ip6e_len + 2) << 2;
1543		return off;
1544
1545	case IPPROTO_HOPOPTS:
1546	case IPPROTO_ROUTING:
1547	case IPPROTO_DSTOPTS:
1548		if (m->m_pkthdr.len < off + sizeof(ip6e))
1549			return -1;
1550		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1551		if (nxtp)
1552			*nxtp = ip6e.ip6e_nxt;
1553		off += (ip6e.ip6e_len + 1) << 3;
1554		return off;
1555
1556	case IPPROTO_NONE:
1557	case IPPROTO_ESP:
1558	case IPPROTO_IPCOMP:
1559		/* give up */
1560		return -1;
1561
1562	default:
1563		return -1;
1564	}
1565
1566	return -1;
1567}
1568
1569/*
1570 * get offset for the last header in the chain.  m will be kept untainted.
1571 */
1572int
1573ip6_lasthdr(m, off, proto, nxtp)
1574	struct mbuf *m;
1575	int off;
1576	int proto;
1577	int *nxtp;
1578{
1579	int newoff;
1580	int nxt;
1581
1582	if (!nxtp) {
1583		nxt = -1;
1584		nxtp = &nxt;
1585	}
1586	while (1) {
1587		newoff = ip6_nexthdr(m, off, proto, nxtp);
1588		if (newoff < 0)
1589			return off;
1590		else if (newoff < off)
1591			return -1;	/* invalid */
1592		else if (newoff == off)
1593			return newoff;
1594
1595		off = newoff;
1596		proto = *nxtp;
1597	}
1598}
1599
1600struct ip6aux *
1601ip6_addaux(m)
1602	struct mbuf *m;
1603{
1604	struct m_tag *mtag;
1605
1606	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1607	if (!mtag) {
1608		mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
1609		    M_NOWAIT);
1610		if (mtag) {
1611			m_tag_prepend(m, mtag);
1612			bzero(mtag + 1, sizeof(struct ip6aux));
1613		}
1614	}
1615	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1616}
1617
1618struct ip6aux *
1619ip6_findaux(m)
1620	struct mbuf *m;
1621{
1622	struct m_tag *mtag;
1623
1624	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1625	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
1626}
1627
1628void
1629ip6_delaux(m)
1630	struct mbuf *m;
1631{
1632	struct m_tag *mtag;
1633
1634	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1635	if (mtag)
1636		m_tag_delete(m, mtag);
1637}
1638
1639/*
1640 * System control for IP6
1641 */
1642
1643u_char	inet6ctlerrmap[PRC_NCMDS] = {
1644	0,		0,		0,		0,
1645	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1646	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1647	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1648	0,		0,		0,		0,
1649	ENOPROTOOPT
1650};
1651