ip6_input.c revision 210350
1193326Sed/*- 2193326Sed * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 3193326Sed * All rights reserved. 4193326Sed * 5193326Sed * Redistribution and use in source and binary forms, with or without 6193326Sed * modification, are permitted provided that the following conditions 7193326Sed * are met: 8193326Sed * 1. Redistributions of source code must retain the above copyright 9193326Sed * notice, this list of conditions and the following disclaimer. 10193326Sed * 2. Redistributions in binary form must reproduce the above copyright 11193326Sed * notice, this list of conditions and the following disclaimer in the 12193326Sed * documentation and/or other materials provided with the distribution. 13193326Sed * 3. Neither the name of the project nor the names of its contributors 14193326Sed * may be used to endorse or promote products derived from this software 15193326Sed * without specific prior written permission. 16198954Srdivacky * 17193326Sed * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 18193326Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19193326Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20193326Sed * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 21199482Srdivacky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22193326Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23193326Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24193326Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25193326Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26193326Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27193326Sed * SUCH DAMAGE. 28198092Srdivacky * 29193326Sed * $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $ 30193326Sed */ 31193326Sed 32193326Sed/*- 33193326Sed * Copyright (c) 1982, 1986, 1988, 1993 34193326Sed * The Regents of the University of California. All rights reserved. 35193326Sed * 36193326Sed * Redistribution and use in source and binary forms, with or without 37193326Sed * modification, are permitted provided that the following conditions 38193326Sed * are met: 39193326Sed * 1. Redistributions of source code must retain the above copyright 40193326Sed * notice, this list of conditions and the following disclaimer. 41193326Sed * 2. Redistributions in binary form must reproduce the above copyright 42193326Sed * notice, this list of conditions and the following disclaimer in the 43193326Sed * documentation and/or other materials provided with the distribution. 44193326Sed * 4. Neither the name of the University nor the names of its contributors 45198092Srdivacky * may be used to endorse or promote products derived from this software 46193326Sed * without specific prior written permission. 47193326Sed * 48193326Sed * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49193326Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50193326Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51193326Sed * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52193326Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53199990Srdivacky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54199990Srdivacky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55199990Srdivacky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56199990Srdivacky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57199990Srdivacky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58199990Srdivacky * SUCH DAMAGE. 59199990Srdivacky * 60199990Srdivacky * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 61199990Srdivacky */ 62199990Srdivacky 63199990Srdivacky#include <sys/cdefs.h> 64193326Sed__FBSDID("$FreeBSD: head/sys/netinet6/ip6_input.c 210350 2010-07-21 13:01:21Z bz $"); 65198954Srdivacky 66198954Srdivacky#include "opt_inet.h" 67198954Srdivacky#include "opt_inet6.h" 68198954Srdivacky#include "opt_ipsec.h" 69198954Srdivacky#include "opt_route.h" 70198954Srdivacky 71198954Srdivacky#include <sys/param.h> 72193326Sed#include <sys/systm.h> 73198954Srdivacky#include <sys/malloc.h> 74198954Srdivacky#include <sys/mbuf.h> 75198954Srdivacky#include <sys/proc.h> 76198954Srdivacky#include <sys/domain.h> 77198954Srdivacky#include <sys/protosw.h> 78198954Srdivacky#include <sys/socket.h> 79198954Srdivacky#include <sys/socketvar.h> 80198954Srdivacky#include <sys/errno.h> 81198954Srdivacky#include <sys/time.h> 82198954Srdivacky#include <sys/kernel.h> 83198954Srdivacky#include <sys/syslog.h> 84198954Srdivacky 85198954Srdivacky#include <net/if.h> 86198954Srdivacky#include <net/if_types.h> 87198954Srdivacky#include <net/if_dl.h> 88198954Srdivacky#include <net/route.h> 89198954Srdivacky#include <net/netisr.h> 90198954Srdivacky#include <net/pfil.h> 91198954Srdivacky#include <net/vnet.h> 92198954Srdivacky 93198954Srdivacky#include <netinet/in.h> 94198954Srdivacky#include <netinet/in_systm.h> 95198954Srdivacky#include <net/if_llatbl.h> 96198954Srdivacky#ifdef INET 97198954Srdivacky#include <netinet/ip.h> 98198954Srdivacky#include <netinet/ip_icmp.h> 99198954Srdivacky#endif /* INET */ 100198954Srdivacky#include <netinet/ip6.h> 101198954Srdivacky#include <netinet6/in6_var.h> 102198954Srdivacky#include <netinet6/ip6_var.h> 103199990Srdivacky#include <netinet/in_pcb.h> 104199990Srdivacky#include <netinet/icmp6.h> 105199990Srdivacky#include <netinet6/scope6_var.h> 106199990Srdivacky#include <netinet6/in6_ifattach.h> 107198954Srdivacky#include <netinet6/nd6.h> 108198954Srdivacky 109198954Srdivacky#ifdef IPSEC 110198954Srdivacky#include <netipsec/ipsec.h> 111198954Srdivacky#include <netinet6/ip6_ipsec.h> 112198954Srdivacky#include <netipsec/ipsec6.h> 113193326Sed#endif /* IPSEC */ 114193326Sed 115193326Sed#include <netinet6/ip6protosw.h> 116193326Sed 117193326Sed#ifdef FLOWTABLE 118193326Sed#include <net/flowtable.h> 119198398SrdivackyVNET_DECLARE(int, ip6_output_flowtable_size); 120193326Sed#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size) 121193326Sed#endif 122193326Sed 123193326Sedextern struct domain inet6domain; 124193326Sed 125193326Sedu_char ip6_protox[IPPROTO_MAX]; 126193326SedVNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead); 127193326Sed 128193326Sedstatic struct netisr_handler ip6_nh = { 129193326Sed .nh_name = "ip6", 130193326Sed .nh_handler = ip6_input, 131193326Sed .nh_proto = NETISR_IPV6, 132193326Sed .nh_policy = NETISR_POLICY_FLOW, 133193326Sed}; 134193326Sed 135193326SedVNET_DECLARE(struct callout, in6_tmpaddrtimer_ch); 136193326Sed#define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch) 137193326Sed 138193326SedVNET_DEFINE(struct pfil_head, inet6_pfil_hook); 139193326Sed 140193326SedVNET_DEFINE(struct ip6stat, ip6stat); 141193326Sed 142193326Sedstruct rwlock in6_ifaddr_lock; 143193326SedRW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); 144193326Sed 145193326Sedstatic void ip6_init2(void *); 146193326Sedstatic struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); 147193326Sedstatic int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); 148193326Sed#ifdef PULLDOWN_TEST 149193326Sedstatic struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); 150193326Sed#endif 151193326Sed 152198092Srdivacky/* 153193326Sed * IP6 initialization: fill in IP6 protocol switch table. 154193326Sed * All protocols not implemented in kernel go to raw IP6 protocol handler. 155198092Srdivacky */ 156193326Sedvoid 157193326Sedip6_init(void) 158198092Srdivacky{ 159193326Sed struct ip6protosw *pr; 160193326Sed int i; 161199990Srdivacky 162199990Srdivacky TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal", 163199990Srdivacky &V_ip6_auto_linklocal); 164193326Sed 165193326Sed TAILQ_INIT(&V_in6_ifaddrhead); 166193326Sed 167193326Sed /* Initialize packet filter hooks. */ 168193326Sed V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; 169198092Srdivacky V_inet6_pfil_hook.ph_af = AF_INET6; 170193326Sed if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0) 171193326Sed printf("%s: WARNING: unable to register pfil hook, " 172193326Sed "error %d\n", __func__, i); 173193326Sed 174193326Sed scope6_init(); 175193326Sed addrsel_policy_init(); 176193326Sed nd6_init(); 177193326Sed frag6_init(); 178193326Sed 179193326Sed#ifdef FLOWTABLE 180193326Sed if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size", 181193326Sed &V_ip6_output_flowtable_size)) { 182193326Sed if (V_ip6_output_flowtable_size < 256) 183193326Sed V_ip6_output_flowtable_size = 256; 184193326Sed if (!powerof2(V_ip6_output_flowtable_size)) { 185193326Sed printf("flowtable must be power of 2 size\n"); 186193326Sed V_ip6_output_flowtable_size = 2048; 187193326Sed } 188193326Sed } else { 189193326Sed /* 190193326Sed * round up to the next power of 2 191193326Sed */ 192193326Sed V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1); 193193326Sed } 194193326Sed V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU); 195193326Sed#endif 196193326Sed 197198092Srdivacky V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; 198193326Sed 199193326Sed /* Skip global initialization stuff for non-default instances. */ 200193326Sed if (!IS_DEFAULT_VNET(curvnet)) 201193326Sed return; 202193326Sed 203193326Sed#ifdef DIAGNOSTIC 204193326Sed if (sizeof(struct protosw) != sizeof(struct ip6protosw)) 205198092Srdivacky panic("sizeof(protosw) != sizeof(ip6protosw)"); 206193326Sed#endif 207193326Sed pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); 208193326Sed if (pr == NULL) 209193326Sed panic("ip6_init"); 210193326Sed 211193326Sed /* Initialize the entire ip6_protox[] array to IPPROTO_RAW. */ 212193326Sed for (i = 0; i < IPPROTO_MAX; i++) 213193326Sed ip6_protox[i] = pr - inet6sw; 214193326Sed /* 215193326Sed * Cycle through IP protocols and put them into the appropriate place 216193326Sed * in ip6_protox[]. 217193326Sed */ 218193326Sed for (pr = (struct ip6protosw *)inet6domain.dom_protosw; 219193326Sed pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) 220193326Sed if (pr->pr_domain->dom_family == PF_INET6 && 221198092Srdivacky pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 222193326Sed /* Be careful to only index valid IP protocols. */ 223193326Sed if (pr->pr_protocol < IPPROTO_MAX) 224193326Sed ip6_protox[pr->pr_protocol] = pr - inet6sw; 225193326Sed } 226193326Sed 227193326Sed netisr_register(&ip6_nh); 228193326Sed} 229199990Srdivacky 230199990Srdivacky#ifdef VIMAGE 231199990Srdivackyvoid 232199990Srdivackyip6_destroy() 233193326Sed{ 234193326Sed 235193326Sed nd6_destroy(); 236198092Srdivacky callout_drain(&V_in6_tmpaddrtimer_ch); 237193326Sed} 238193326Sed#endif 239193326Sed 240193326Sedstatic int 241193326Sedip6_init2_vnet(const void *unused __unused) 242193326Sed{ 243193326Sed 244193326Sed /* nd6_timer_init */ 245193326Sed callout_init(&V_nd6_timer_ch, 0); 246193326Sed callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet); 247193326Sed 248193326Sed /* timer for regeneranation of temporary addresses randomize ID */ 249193326Sed callout_init(&V_in6_tmpaddrtimer_ch, 0); 250193326Sed callout_reset(&V_in6_tmpaddrtimer_ch, 251193326Sed (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor - 252193326Sed V_ip6_temp_regen_advance) * hz, 253193326Sed in6_tmpaddrtimer, curvnet); 254193326Sed 255193326Sed return (0); 256193326Sed} 257193326Sed 258193326Sedstatic void 259198092Srdivackyip6_init2(void *dummy) 260193326Sed{ 261193326Sed 262193326Sed ip6_init2_vnet(NULL); 263193326Sed} 264193326Sed 265193326Sed/* cheat */ 266193326Sed/* This must be after route_init(), which is now SI_ORDER_THIRD */ 267199990SrdivackySYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL); 268199990Srdivacky 269199990Srdivackyvoid 270199990Srdivackyip6_input(struct mbuf *m) 271199990Srdivacky{ 272199990Srdivacky struct ip6_hdr *ip6; 273199990Srdivacky int off = sizeof(struct ip6_hdr), nest; 274193326Sed u_int32_t plen; 275193326Sed u_int32_t rtalert = ~0; 276193326Sed int nxt, ours = 0; 277193326Sed struct ifnet *deliverifp = NULL, *ifp = NULL; 278193326Sed struct in6_addr odst; 279193326Sed struct route_in6 rin6; 280193326Sed int srcrt = 0; 281193326Sed struct llentry *lle = NULL; 282193326Sed struct sockaddr_in6 dst6, *dst; 283193326Sed 284193326Sed bzero(&rin6, sizeof(struct route_in6)); 285193326Sed#ifdef IPSEC 286193326Sed /* 287193326Sed * should the inner packet be considered authentic? 288193326Sed * see comment in ah4_input(). 289193326Sed * NB: m cannot be NULL when passed to the input routine 290193326Sed */ 291193326Sed 292193326Sed m->m_flags &= ~M_AUTHIPHDR; 293193326Sed m->m_flags &= ~M_AUTHIPDGM; 294193326Sed 295193326Sed#endif /* IPSEC */ 296193326Sed 297193326Sed /* 298193326Sed * make sure we don't have onion peering information into m_tag. 299193326Sed */ 300193326Sed ip6_delaux(m); 301193326Sed 302193326Sed /* 303193326Sed * mbuf statistics 304193326Sed */ 305199990Srdivacky if (m->m_flags & M_EXT) { 306199990Srdivacky if (m->m_next) 307199990Srdivacky V_ip6stat.ip6s_mext2m++; 308193326Sed else 309193326Sed V_ip6stat.ip6s_mext1++; 310193326Sed } else { 311193326Sed#define M2MMAX (sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0])) 312193326Sed if (m->m_next) { 313193326Sed if (m->m_flags & M_LOOP) { 314193326Sed V_ip6stat.ip6s_m2m[V_loif->if_index]++; 315193326Sed } else if (m->m_pkthdr.rcvif->if_index < M2MMAX) 316193326Sed V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++; 317193326Sed else 318193326Sed V_ip6stat.ip6s_m2m[0]++; 319193326Sed } else 320193326Sed V_ip6stat.ip6s_m1++; 321193326Sed#undef M2MMAX 322193326Sed } 323193326Sed 324193326Sed /* drop the packet if IPv6 operation is disabled on the IF */ 325193326Sed if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) { 326193326Sed m_freem(m); 327193326Sed return; 328193326Sed } 329193326Sed 330199990Srdivacky in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive); 331199990Srdivacky V_ip6stat.ip6s_total++; 332199990Srdivacky 333199990Srdivacky#ifndef PULLDOWN_TEST 334193326Sed /* 335193326Sed * L2 bridge code and some other code can return mbuf chain 336193326Sed * that does not conform to KAME requirement. too bad. 337193326Sed * XXX: fails to join if interface MTU > MCLBYTES. jumbogram? 338193326Sed */ 339193326Sed if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { 340193326Sed struct mbuf *n; 341200583Srdivacky 342193326Sed MGETHDR(n, M_DONTWAIT, MT_HEADER); 343193326Sed if (n) 344193326Sed M_MOVE_PKTHDR(n, m); 345193326Sed if (n && n->m_pkthdr.len > MHLEN) { 346193326Sed MCLGET(n, M_DONTWAIT); 347193326Sed if ((n->m_flags & M_EXT) == 0) { 348193326Sed m_freem(n); 349193326Sed n = NULL; 350199482Srdivacky } 351199482Srdivacky } 352199482Srdivacky if (n == NULL) { 353199482Srdivacky m_freem(m); 354193326Sed return; /* ENOBUFS */ 355193326Sed } 356193326Sed 357193326Sed m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t)); 358193326Sed n->m_len = n->m_pkthdr.len; 359193326Sed m_freem(m); 360198092Srdivacky m = n; 361193326Sed } 362193326Sed IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */); 363193326Sed#endif 364193326Sed 365193326Sed if (m->m_len < sizeof(struct ip6_hdr)) { 366193326Sed struct ifnet *inifp; 367193326Sed inifp = m->m_pkthdr.rcvif; 368193326Sed if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { 369193326Sed V_ip6stat.ip6s_toosmall++; 370193326Sed in6_ifstat_inc(inifp, ifs6_in_hdrerr); 371193326Sed return; 372193326Sed } 373193326Sed } 374193326Sed 375193326Sed ip6 = mtod(m, struct ip6_hdr *); 376193326Sed 377193326Sed if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { 378193326Sed V_ip6stat.ip6s_badvers++; 379193326Sed in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); 380198092Srdivacky goto bad; 381198092Srdivacky } 382198092Srdivacky 383193326Sed V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++; 384193326Sed 385193326Sed /* 386198092Srdivacky * Check against address spoofing/corruption. 387193326Sed */ 388193326Sed if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) || 389193326Sed IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) { 390193326Sed /* 391198092Srdivacky * XXX: "badscope" is not very suitable for a multicast source. 392193326Sed */ 393198092Srdivacky V_ip6stat.ip6s_badscope++; 394193326Sed in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 395193326Sed goto bad; 396193326Sed } 397198092Srdivacky if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) && 398193326Sed !(m->m_flags & M_LOOP)) { 399193326Sed /* 400193326Sed * In this case, the packet should come from the loopback 401193326Sed * interface. However, we cannot just check the if_flags, 402193326Sed * because ip6_mloopback() passes the "actual" interface 403193326Sed * as the outgoing/incoming interface. 404193326Sed */ 405193326Sed V_ip6stat.ip6s_badscope++; 406193326Sed in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 407193326Sed goto bad; 408193326Sed } 409193326Sed 410193326Sed#ifdef ALTQ 411193326Sed if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) { 412193326Sed /* packet is dropped by traffic conditioner */ 413193326Sed return; 414193326Sed } 415193326Sed#endif 416193326Sed /* 417193326Sed * The following check is not documented in specs. A malicious 418193326Sed * party may be able to use IPv4 mapped addr to confuse tcp/udp stack 419193326Sed * and bypass security checks (act as if it was from 127.0.0.1 by using 420193326Sed * IPv6 src ::ffff:127.0.0.1). Be cautious. 421193326Sed * 422193326Sed * This check chokes if we are in an SIIT cloud. As none of BSDs 423193326Sed * support IPv4-less kernel compilation, we cannot support SIIT 424198092Srdivacky * environment at all. So, it makes more sense for us to reject any 425193326Sed * malicious packets for non-SIIT environment, than try to do a 426193326Sed * partial support for SIIT environment. 427193326Sed */ 428193326Sed if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || 429199990Srdivacky IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { 430199990Srdivacky V_ip6stat.ip6s_badscope++; 431199990Srdivacky in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 432199990Srdivacky goto bad; 433199990Srdivacky } 434199990Srdivacky#if 0 435199990Srdivacky /* 436199990Srdivacky * Reject packets with IPv4 compatible addresses (auto tunnel). 437198092Srdivacky * 438193326Sed * The code forbids auto tunnel relay case in RFC1933 (the check is 439193326Sed * stronger than RFC1933). We may want to re-enable it if mech-xx 440193326Sed * is revised to forbid relaying case. 441193326Sed */ 442193326Sed if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) || 443 IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) { 444 V_ip6stat.ip6s_badscope++; 445 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 446 goto bad; 447 } 448#endif 449 450 /* 451 * Run through list of hooks for input packets. 452 * 453 * NB: Beware of the destination address changing 454 * (e.g. by NAT rewriting). When this happens, 455 * tell ip6_forward to do the right thing. 456 */ 457 odst = ip6->ip6_dst; 458 459 /* Jump over all PFIL processing if hooks are not active. */ 460 if (!PFIL_HOOKED(&V_inet6_pfil_hook)) 461 goto passin; 462 463 if (pfil_run_hooks(&V_inet6_pfil_hook, &m, 464 m->m_pkthdr.rcvif, PFIL_IN, NULL)) 465 return; 466 if (m == NULL) /* consumed by filter */ 467 return; 468 ip6 = mtod(m, struct ip6_hdr *); 469 srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst); 470 471passin: 472 /* 473 * Disambiguate address scope zones (if there is ambiguity). 474 * We first make sure that the original source or destination address 475 * is not in our internal form for scoped addresses. Such addresses 476 * are not necessarily invalid spec-wise, but we cannot accept them due 477 * to the usage conflict. 478 * in6_setscope() then also checks and rejects the cases where src or 479 * dst are the loopback address and the receiving interface 480 * is not loopback. 481 */ 482 if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) { 483 V_ip6stat.ip6s_badscope++; /* XXX */ 484 goto bad; 485 } 486 if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) || 487 in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) { 488 V_ip6stat.ip6s_badscope++; 489 goto bad; 490 } 491 492 /* 493 * Multicast check. Assume packet is for us to avoid 494 * prematurely taking locks. 495 */ 496 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 497 ours = 1; 498 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast); 499 deliverifp = m->m_pkthdr.rcvif; 500 goto hbhcheck; 501 } 502 503 /* 504 * Unicast check 505 */ 506 507 bzero(&dst6, sizeof(dst6)); 508 dst6.sin6_family = AF_INET6; 509 dst6.sin6_len = sizeof(struct sockaddr_in6); 510 dst6.sin6_addr = ip6->ip6_dst; 511 ifp = m->m_pkthdr.rcvif; 512 IF_AFDATA_LOCK(ifp); 513 lle = lla_lookup(LLTABLE6(ifp), 0, 514 (struct sockaddr *)&dst6); 515 IF_AFDATA_UNLOCK(ifp); 516 if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) { 517 struct ifaddr *ifa; 518 struct in6_ifaddr *ia6; 519 int bad; 520 521 bad = 1; 522#define sa_equal(a1, a2) \ 523 (bcmp((a1), (a2), ((a1))->sin6_len) == 0) 524 IF_ADDR_LOCK(ifp); 525 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 526 if (ifa->ifa_addr->sa_family != dst6.sin6_family) 527 continue; 528 if (sa_equal(&dst6, ifa->ifa_addr)) 529 break; 530 } 531 KASSERT(ifa != NULL, ("%s: ifa not found for lle %p", 532 __func__, lle)); 533#undef sa_equal 534 535 ia6 = (struct in6_ifaddr *)ifa; 536 if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { 537 /* Count the packet in the ip address stats */ 538 ia6->ia_ifa.if_ipackets++; 539 ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; 540 541 /* 542 * record address information into m_tag. 543 */ 544 (void)ip6_setdstifaddr(m, ia6); 545 546 bad = 0; 547 } else { 548 char ip6bufs[INET6_ADDRSTRLEN]; 549 char ip6bufd[INET6_ADDRSTRLEN]; 550 /* address is not ready, so discard the packet. */ 551 nd6log((LOG_INFO, 552 "ip6_input: packet to an unready address %s->%s\n", 553 ip6_sprintf(ip6bufs, &ip6->ip6_src), 554 ip6_sprintf(ip6bufd, &ip6->ip6_dst))); 555 } 556 IF_ADDR_UNLOCK(ifp); 557 LLE_RUNLOCK(lle); 558 if (bad) 559 goto bad; 560 else { 561 ours = 1; 562 deliverifp = ifp; 563 goto hbhcheck; 564 } 565 } 566 if (lle != NULL) 567 LLE_RUNLOCK(lle); 568 569 dst = &rin6.ro_dst; 570 dst->sin6_len = sizeof(struct sockaddr_in6); 571 dst->sin6_family = AF_INET6; 572 dst->sin6_addr = ip6->ip6_dst; 573 rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0); 574 if (rin6.ro_rt) 575 RT_UNLOCK(rin6.ro_rt); 576 577#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key)) 578 579 /* 580 * Accept the packet if the forwarding interface to the destination 581 * according to the routing table is the loopback interface, 582 * unless the associated route has a gateway. 583 * Note that this approach causes to accept a packet if there is a 584 * route to the loopback interface for the destination of the packet. 585 * But we think it's even useful in some situations, e.g. when using 586 * a special daemon which wants to intercept the packet. 587 * 588 * XXX: some OSes automatically make a cloned route for the destination 589 * of an outgoing packet. If the outgoing interface of the packet 590 * is a loopback one, the kernel would consider the packet to be 591 * accepted, even if we have no such address assinged on the interface. 592 * We check the cloned flag of the route entry to reject such cases, 593 * assuming that route entries for our own addresses are not made by 594 * cloning (it should be true because in6_addloop explicitly installs 595 * the host route). However, we might have to do an explicit check 596 * while it would be less efficient. Or, should we rather install a 597 * reject route for such a case? 598 */ 599 if (rin6.ro_rt && 600 (rin6.ro_rt->rt_flags & 601 (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && 602#ifdef RTF_WASCLONED 603 !(rin6.ro_rt->rt_flags & RTF_WASCLONED) && 604#endif 605#ifdef RTF_CLONED 606 !(rin6.ro_rt->rt_flags & RTF_CLONED) && 607#endif 608#if 0 609 /* 610 * The check below is redundant since the comparison of 611 * the destination and the key of the rtentry has 612 * already done through looking up the routing table. 613 */ 614 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, 615 &rt6_key(rin6.ro_rt)->sin6_addr) 616#endif 617 rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) { 618 int free_ia6 = 0; 619 struct in6_ifaddr *ia6; 620 621 /* 622 * found the loopback route to the interface address 623 */ 624 if (rin6.ro_rt->rt_gateway->sa_family == AF_LINK) { 625 struct sockaddr_in6 dest6; 626 627 bzero(&dest6, sizeof(dest6)); 628 dest6.sin6_family = AF_INET6; 629 dest6.sin6_len = sizeof(dest6); 630 dest6.sin6_addr = ip6->ip6_dst; 631 ia6 = (struct in6_ifaddr *) 632 ifa_ifwithaddr((struct sockaddr *)&dest6); 633 if (ia6 == NULL) 634 goto bad; 635 free_ia6 = 1; 636 } 637 else 638 ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa; 639 640 /* 641 * record address information into m_tag. 642 */ 643 (void)ip6_setdstifaddr(m, ia6); 644 645 /* 646 * packets to a tentative, duplicated, or somehow invalid 647 * address must not be accepted. 648 */ 649 if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { 650 /* this address is ready */ 651 ours = 1; 652 deliverifp = ia6->ia_ifp; /* correct? */ 653 /* Count the packet in the ip address stats */ 654 ia6->ia_ifa.if_ipackets++; 655 ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; 656 if (ia6 != NULL && free_ia6 != 0) 657 ifa_free(&ia6->ia_ifa); 658 goto hbhcheck; 659 } else { 660 char ip6bufs[INET6_ADDRSTRLEN]; 661 char ip6bufd[INET6_ADDRSTRLEN]; 662 /* address is not ready, so discard the packet. */ 663 nd6log((LOG_INFO, 664 "ip6_input: packet to an unready address %s->%s\n", 665 ip6_sprintf(ip6bufs, &ip6->ip6_src), 666 ip6_sprintf(ip6bufd, &ip6->ip6_dst))); 667 668 if (ia6 != NULL && free_ia6 != 0) 669 ifa_free(&ia6->ia_ifa); 670 goto bad; 671 } 672 } 673 674 /* 675 * FAITH (Firewall Aided Internet Translator) 676 */ 677 if (V_ip6_keepfaith) { 678 if (rin6.ro_rt && rin6.ro_rt->rt_ifp && 679 rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) { 680 /* XXX do we need more sanity checks? */ 681 ours = 1; 682 deliverifp = rin6.ro_rt->rt_ifp; /* faith */ 683 goto hbhcheck; 684 } 685 } 686 687 /* 688 * Now there is no reason to process the packet if it's not our own 689 * and we're not a router. 690 */ 691 if (!V_ip6_forwarding) { 692 V_ip6stat.ip6s_cantforward++; 693 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); 694 goto bad; 695 } 696 697 hbhcheck: 698 /* 699 * record address information into m_tag, if we don't have one yet. 700 * note that we are unable to record it, if the address is not listed 701 * as our interface address (e.g. multicast addresses, addresses 702 * within FAITH prefixes and such). 703 */ 704 if (deliverifp && !ip6_getdstifaddr(m)) { 705 struct in6_ifaddr *ia6; 706 707 ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst); 708 if (ia6) { 709 if (!ip6_setdstifaddr(m, ia6)) { 710 /* 711 * XXX maybe we should drop the packet here, 712 * as we could not provide enough information 713 * to the upper layers. 714 */ 715 } 716 ifa_free(&ia6->ia_ifa); 717 } 718 } 719 720 /* 721 * Process Hop-by-Hop options header if it's contained. 722 * m may be modified in ip6_hopopts_input(). 723 * If a JumboPayload option is included, plen will also be modified. 724 */ 725 plen = (u_int32_t)ntohs(ip6->ip6_plen); 726 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { 727 struct ip6_hbh *hbh; 728 729 if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) { 730#if 0 /*touches NULL pointer*/ 731 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); 732#endif 733 goto out; /* m have already been freed */ 734 } 735 736 /* adjust pointer */ 737 ip6 = mtod(m, struct ip6_hdr *); 738 739 /* 740 * if the payload length field is 0 and the next header field 741 * indicates Hop-by-Hop Options header, then a Jumbo Payload 742 * option MUST be included. 743 */ 744 if (ip6->ip6_plen == 0 && plen == 0) { 745 /* 746 * Note that if a valid jumbo payload option is 747 * contained, ip6_hopopts_input() must set a valid 748 * (non-zero) payload length to the variable plen. 749 */ 750 V_ip6stat.ip6s_badoptions++; 751 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); 752 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); 753 icmp6_error(m, ICMP6_PARAM_PROB, 754 ICMP6_PARAMPROB_HEADER, 755 (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); 756 goto out; 757 } 758#ifndef PULLDOWN_TEST 759 /* ip6_hopopts_input() ensures that mbuf is contiguous */ 760 hbh = (struct ip6_hbh *)(ip6 + 1); 761#else 762 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), 763 sizeof(struct ip6_hbh)); 764 if (hbh == NULL) { 765 V_ip6stat.ip6s_tooshort++; 766 goto out; 767 } 768#endif 769 nxt = hbh->ip6h_nxt; 770 771 /* 772 * If we are acting as a router and the packet contains a 773 * router alert option, see if we know the option value. 774 * Currently, we only support the option value for MLD, in which 775 * case we should pass the packet to the multicast routing 776 * daemon. 777 */ 778 if (rtalert != ~0) { 779 switch (rtalert) { 780 case IP6OPT_RTALERT_MLD: 781 if (V_ip6_forwarding) 782 ours = 1; 783 break; 784 default: 785 /* 786 * RFC2711 requires unrecognized values must be 787 * silently ignored. 788 */ 789 break; 790 } 791 } 792 } else 793 nxt = ip6->ip6_nxt; 794 795 /* 796 * Check that the amount of data in the buffers 797 * is as at least much as the IPv6 header would have us expect. 798 * Trim mbufs if longer than we expect. 799 * Drop packet if shorter than we expect. 800 */ 801 if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) { 802 V_ip6stat.ip6s_tooshort++; 803 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); 804 goto bad; 805 } 806 if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) { 807 if (m->m_len == m->m_pkthdr.len) { 808 m->m_len = sizeof(struct ip6_hdr) + plen; 809 m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; 810 } else 811 m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len); 812 } 813 814 /* 815 * Forward if desirable. 816 */ 817 if (V_ip6_mrouter && 818 IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 819 /* 820 * If we are acting as a multicast router, all 821 * incoming multicast packets are passed to the 822 * kernel-level multicast forwarding function. 823 * The packet is returned (relatively) intact; if 824 * ip6_mforward() returns a non-zero value, the packet 825 * must be discarded, else it may be accepted below. 826 * 827 * XXX TODO: Check hlim and multicast scope here to avoid 828 * unnecessarily calling into ip6_mforward(). 829 */ 830 if (ip6_mforward && 831 ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) { 832 IP6STAT_INC(ip6s_cantforward); 833 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); 834 goto bad; 835 } 836 } else if (!ours) { 837 ip6_forward(m, srcrt); 838 goto out; 839 } 840 841 ip6 = mtod(m, struct ip6_hdr *); 842 843 /* 844 * Malicious party may be able to use IPv4 mapped addr to confuse 845 * tcp/udp stack and bypass security checks (act as if it was from 846 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious. 847 * 848 * For SIIT end node behavior, you may want to disable the check. 849 * However, you will become vulnerable to attacks using IPv4 mapped 850 * source. 851 */ 852 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || 853 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { 854 V_ip6stat.ip6s_badscope++; 855 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 856 goto bad; 857 } 858 859 /* 860 * Tell launch routine the next header 861 */ 862 V_ip6stat.ip6s_delivered++; 863 in6_ifstat_inc(deliverifp, ifs6_in_deliver); 864 nest = 0; 865 866 while (nxt != IPPROTO_DONE) { 867 if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) { 868 V_ip6stat.ip6s_toomanyhdr++; 869 goto bad; 870 } 871 872 /* 873 * protection against faulty packet - there should be 874 * more sanity checks in header chain processing. 875 */ 876 if (m->m_pkthdr.len < off) { 877 V_ip6stat.ip6s_tooshort++; 878 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); 879 goto bad; 880 } 881 882#ifdef IPSEC 883 /* 884 * enforce IPsec policy checking if we are seeing last header. 885 * note that we do not visit this with protocols with pcb layer 886 * code - like udp/tcp/raw ip. 887 */ 888 if (ip6_ipsec_input(m, nxt)) 889 goto bad; 890#endif /* IPSEC */ 891 892 /* 893 * Use mbuf flags to propagate Router Alert option to 894 * ICMPv6 layer, as hop-by-hop options have been stripped. 895 */ 896 if (nxt == IPPROTO_ICMPV6 && rtalert != ~0) 897 m->m_flags |= M_RTALERT_MLD; 898 899 nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); 900 } 901 goto out; 902bad: 903 m_freem(m); 904out: 905 if (rin6.ro_rt) 906 RTFREE(rin6.ro_rt); 907} 908 909/* 910 * set/grab in6_ifaddr correspond to IPv6 destination address. 911 * XXX backward compatibility wrapper 912 * 913 * XXXRW: We should bump the refcount on ia6 before sticking it in the m_tag, 914 * and then bump it when the tag is copied, and release it when the tag is 915 * freed. Unfortunately, m_tags don't support deep copies (yet), so instead 916 * we just bump the ia refcount when we receive it. This should be fixed. 917 */ 918static struct ip6aux * 919ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6) 920{ 921 struct ip6aux *ip6a; 922 923 ip6a = ip6_addaux(m); 924 if (ip6a) 925 ip6a->ip6a_dstia6 = ia6; 926 return ip6a; /* NULL if failed to set */ 927} 928 929struct in6_ifaddr * 930ip6_getdstifaddr(struct mbuf *m) 931{ 932 struct ip6aux *ip6a; 933 struct in6_ifaddr *ia; 934 935 ip6a = ip6_findaux(m); 936 if (ip6a) { 937 ia = ip6a->ip6a_dstia6; 938 ifa_ref(&ia->ia_ifa); 939 return ia; 940 } else 941 return NULL; 942} 943 944/* 945 * Hop-by-Hop options header processing. If a valid jumbo payload option is 946 * included, the real payload length will be stored in plenp. 947 * 948 * rtalertp - XXX: should be stored more smart way 949 */ 950static int 951ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp, 952 struct mbuf **mp, int *offp) 953{ 954 struct mbuf *m = *mp; 955 int off = *offp, hbhlen; 956 struct ip6_hbh *hbh; 957 u_int8_t *opt; 958 959 /* validation of the length of the header */ 960#ifndef PULLDOWN_TEST 961 IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1); 962 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); 963 hbhlen = (hbh->ip6h_len + 1) << 3; 964 965 IP6_EXTHDR_CHECK(m, off, hbhlen, -1); 966 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); 967#else 968 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, 969 sizeof(struct ip6_hdr), sizeof(struct ip6_hbh)); 970 if (hbh == NULL) { 971 V_ip6stat.ip6s_tooshort++; 972 return -1; 973 } 974 hbhlen = (hbh->ip6h_len + 1) << 3; 975 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), 976 hbhlen); 977 if (hbh == NULL) { 978 V_ip6stat.ip6s_tooshort++; 979 return -1; 980 } 981#endif 982 off += hbhlen; 983 hbhlen -= sizeof(struct ip6_hbh); 984 opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh); 985 986 if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh), 987 hbhlen, rtalertp, plenp) < 0) 988 return (-1); 989 990 *offp = off; 991 *mp = m; 992 return (0); 993} 994 995/* 996 * Search header for all Hop-by-hop options and process each option. 997 * This function is separate from ip6_hopopts_input() in order to 998 * handle a case where the sending node itself process its hop-by-hop 999 * options header. In such a case, the function is called from ip6_output(). 1000 * 1001 * The function assumes that hbh header is located right after the IPv6 header 1002 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to 1003 * opthead + hbhlen is located in continuous memory region. 1004 */ 1005int 1006ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen, 1007 u_int32_t *rtalertp, u_int32_t *plenp) 1008{ 1009 struct ip6_hdr *ip6; 1010 int optlen = 0; 1011 u_int8_t *opt = opthead; 1012 u_int16_t rtalert_val; 1013 u_int32_t jumboplen; 1014 const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh); 1015 1016 for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) { 1017 switch (*opt) { 1018 case IP6OPT_PAD1: 1019 optlen = 1; 1020 break; 1021 case IP6OPT_PADN: 1022 if (hbhlen < IP6OPT_MINLEN) { 1023 V_ip6stat.ip6s_toosmall++; 1024 goto bad; 1025 } 1026 optlen = *(opt + 1) + 2; 1027 break; 1028 case IP6OPT_ROUTER_ALERT: 1029 /* XXX may need check for alignment */ 1030 if (hbhlen < IP6OPT_RTALERT_LEN) { 1031 V_ip6stat.ip6s_toosmall++; 1032 goto bad; 1033 } 1034 if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) { 1035 /* XXX stat */ 1036 icmp6_error(m, ICMP6_PARAM_PROB, 1037 ICMP6_PARAMPROB_HEADER, 1038 erroff + opt + 1 - opthead); 1039 return (-1); 1040 } 1041 optlen = IP6OPT_RTALERT_LEN; 1042 bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2); 1043 *rtalertp = ntohs(rtalert_val); 1044 break; 1045 case IP6OPT_JUMBO: 1046 /* XXX may need check for alignment */ 1047 if (hbhlen < IP6OPT_JUMBO_LEN) { 1048 V_ip6stat.ip6s_toosmall++; 1049 goto bad; 1050 } 1051 if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) { 1052 /* XXX stat */ 1053 icmp6_error(m, ICMP6_PARAM_PROB, 1054 ICMP6_PARAMPROB_HEADER, 1055 erroff + opt + 1 - opthead); 1056 return (-1); 1057 } 1058 optlen = IP6OPT_JUMBO_LEN; 1059 1060 /* 1061 * IPv6 packets that have non 0 payload length 1062 * must not contain a jumbo payload option. 1063 */ 1064 ip6 = mtod(m, struct ip6_hdr *); 1065 if (ip6->ip6_plen) { 1066 V_ip6stat.ip6s_badoptions++; 1067 icmp6_error(m, ICMP6_PARAM_PROB, 1068 ICMP6_PARAMPROB_HEADER, 1069 erroff + opt - opthead); 1070 return (-1); 1071 } 1072 1073 /* 1074 * We may see jumbolen in unaligned location, so 1075 * we'd need to perform bcopy(). 1076 */ 1077 bcopy(opt + 2, &jumboplen, sizeof(jumboplen)); 1078 jumboplen = (u_int32_t)htonl(jumboplen); 1079 1080#if 1 1081 /* 1082 * if there are multiple jumbo payload options, 1083 * *plenp will be non-zero and the packet will be 1084 * rejected. 1085 * the behavior may need some debate in ipngwg - 1086 * multiple options does not make sense, however, 1087 * there's no explicit mention in specification. 1088 */ 1089 if (*plenp != 0) { 1090 V_ip6stat.ip6s_badoptions++; 1091 icmp6_error(m, ICMP6_PARAM_PROB, 1092 ICMP6_PARAMPROB_HEADER, 1093 erroff + opt + 2 - opthead); 1094 return (-1); 1095 } 1096#endif 1097 1098 /* 1099 * jumbo payload length must be larger than 65535. 1100 */ 1101 if (jumboplen <= IPV6_MAXPACKET) { 1102 V_ip6stat.ip6s_badoptions++; 1103 icmp6_error(m, ICMP6_PARAM_PROB, 1104 ICMP6_PARAMPROB_HEADER, 1105 erroff + opt + 2 - opthead); 1106 return (-1); 1107 } 1108 *plenp = jumboplen; 1109 1110 break; 1111 default: /* unknown option */ 1112 if (hbhlen < IP6OPT_MINLEN) { 1113 V_ip6stat.ip6s_toosmall++; 1114 goto bad; 1115 } 1116 optlen = ip6_unknown_opt(opt, m, 1117 erroff + opt - opthead); 1118 if (optlen == -1) 1119 return (-1); 1120 optlen += 2; 1121 break; 1122 } 1123 } 1124 1125 return (0); 1126 1127 bad: 1128 m_freem(m); 1129 return (-1); 1130} 1131 1132/* 1133 * Unknown option processing. 1134 * The third argument `off' is the offset from the IPv6 header to the option, 1135 * which is necessary if the IPv6 header the and option header and IPv6 header 1136 * is not continuous in order to return an ICMPv6 error. 1137 */ 1138int 1139ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off) 1140{ 1141 struct ip6_hdr *ip6; 1142 1143 switch (IP6OPT_TYPE(*optp)) { 1144 case IP6OPT_TYPE_SKIP: /* ignore the option */ 1145 return ((int)*(optp + 1)); 1146 case IP6OPT_TYPE_DISCARD: /* silently discard */ 1147 m_freem(m); 1148 return (-1); 1149 case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */ 1150 V_ip6stat.ip6s_badoptions++; 1151 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); 1152 return (-1); 1153 case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */ 1154 V_ip6stat.ip6s_badoptions++; 1155 ip6 = mtod(m, struct ip6_hdr *); 1156 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || 1157 (m->m_flags & (M_BCAST|M_MCAST))) 1158 m_freem(m); 1159 else 1160 icmp6_error(m, ICMP6_PARAM_PROB, 1161 ICMP6_PARAMPROB_OPTION, off); 1162 return (-1); 1163 } 1164 1165 m_freem(m); /* XXX: NOTREACHED */ 1166 return (-1); 1167} 1168 1169/* 1170 * Create the "control" list for this pcb. 1171 * These functions will not modify mbuf chain at all. 1172 * 1173 * With KAME mbuf chain restriction: 1174 * The routine will be called from upper layer handlers like tcp6_input(). 1175 * Thus the routine assumes that the caller (tcp6_input) have already 1176 * called IP6_EXTHDR_CHECK() and all the extension headers are located in the 1177 * very first mbuf on the mbuf chain. 1178 * 1179 * ip6_savecontrol_v4 will handle those options that are possible to be 1180 * set on a v4-mapped socket. 1181 * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those 1182 * options and handle the v6-only ones itself. 1183 */ 1184struct mbuf ** 1185ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp, 1186 int *v4only) 1187{ 1188 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1189 1190#ifdef SO_TIMESTAMP 1191 if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) { 1192 struct timeval tv; 1193 1194 microtime(&tv); 1195 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), 1196 SCM_TIMESTAMP, SOL_SOCKET); 1197 if (*mp) 1198 mp = &(*mp)->m_next; 1199 } 1200#endif 1201 1202 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { 1203 if (v4only != NULL) 1204 *v4only = 1; 1205 return (mp); 1206 } 1207 1208#define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y)) 1209 /* RFC 2292 sec. 5 */ 1210 if ((inp->inp_flags & IN6P_PKTINFO) != 0) { 1211 struct in6_pktinfo pi6; 1212 1213 bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr)); 1214 in6_clearscope(&pi6.ipi6_addr); /* XXX */ 1215 pi6.ipi6_ifindex = 1216 (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0; 1217 1218 *mp = sbcreatecontrol((caddr_t) &pi6, 1219 sizeof(struct in6_pktinfo), 1220 IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6); 1221 if (*mp) 1222 mp = &(*mp)->m_next; 1223 } 1224 1225 if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) { 1226 int hlim = ip6->ip6_hlim & 0xff; 1227 1228 *mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int), 1229 IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), 1230 IPPROTO_IPV6); 1231 if (*mp) 1232 mp = &(*mp)->m_next; 1233 } 1234 1235 if (v4only != NULL) 1236 *v4only = 0; 1237 return (mp); 1238} 1239 1240void 1241ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) 1242{ 1243 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1244 int v4only = 0; 1245 1246 mp = ip6_savecontrol_v4(in6p, m, mp, &v4only); 1247 if (v4only) 1248 return; 1249 1250 if ((in6p->inp_flags & IN6P_TCLASS) != 0) { 1251 u_int32_t flowinfo; 1252 int tclass; 1253 1254 flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK); 1255 flowinfo >>= 20; 1256 1257 tclass = flowinfo & 0xff; 1258 *mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass), 1259 IPV6_TCLASS, IPPROTO_IPV6); 1260 if (*mp) 1261 mp = &(*mp)->m_next; 1262 } 1263 1264 /* 1265 * IPV6_HOPOPTS socket option. Recall that we required super-user 1266 * privilege for the option (see ip6_ctloutput), but it might be too 1267 * strict, since there might be some hop-by-hop options which can be 1268 * returned to normal user. 1269 * See also RFC 2292 section 6 (or RFC 3542 section 8). 1270 */ 1271 if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) { 1272 /* 1273 * Check if a hop-by-hop options header is contatined in the 1274 * received packet, and if so, store the options as ancillary 1275 * data. Note that a hop-by-hop options header must be 1276 * just after the IPv6 header, which is assured through the 1277 * IPv6 input processing. 1278 */ 1279 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { 1280 struct ip6_hbh *hbh; 1281 int hbhlen = 0; 1282#ifdef PULLDOWN_TEST 1283 struct mbuf *ext; 1284#endif 1285 1286#ifndef PULLDOWN_TEST 1287 hbh = (struct ip6_hbh *)(ip6 + 1); 1288 hbhlen = (hbh->ip6h_len + 1) << 3; 1289#else 1290 ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr), 1291 ip6->ip6_nxt); 1292 if (ext == NULL) { 1293 V_ip6stat.ip6s_tooshort++; 1294 return; 1295 } 1296 hbh = mtod(ext, struct ip6_hbh *); 1297 hbhlen = (hbh->ip6h_len + 1) << 3; 1298 if (hbhlen != ext->m_len) { 1299 m_freem(ext); 1300 V_ip6stat.ip6s_tooshort++; 1301 return; 1302 } 1303#endif 1304 1305 /* 1306 * XXX: We copy the whole header even if a 1307 * jumbo payload option is included, the option which 1308 * is to be removed before returning according to 1309 * RFC2292. 1310 * Note: this constraint is removed in RFC3542 1311 */ 1312 *mp = sbcreatecontrol((caddr_t)hbh, hbhlen, 1313 IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS), 1314 IPPROTO_IPV6); 1315 if (*mp) 1316 mp = &(*mp)->m_next; 1317#ifdef PULLDOWN_TEST 1318 m_freem(ext); 1319#endif 1320 } 1321 } 1322 1323 if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) { 1324 int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr); 1325 1326 /* 1327 * Search for destination options headers or routing 1328 * header(s) through the header chain, and stores each 1329 * header as ancillary data. 1330 * Note that the order of the headers remains in 1331 * the chain of ancillary data. 1332 */ 1333 while (1) { /* is explicit loop prevention necessary? */ 1334 struct ip6_ext *ip6e = NULL; 1335 int elen; 1336#ifdef PULLDOWN_TEST 1337 struct mbuf *ext = NULL; 1338#endif 1339 1340 /* 1341 * if it is not an extension header, don't try to 1342 * pull it from the chain. 1343 */ 1344 switch (nxt) { 1345 case IPPROTO_DSTOPTS: 1346 case IPPROTO_ROUTING: 1347 case IPPROTO_HOPOPTS: 1348 case IPPROTO_AH: /* is it possible? */ 1349 break; 1350 default: 1351 goto loopend; 1352 } 1353 1354#ifndef PULLDOWN_TEST 1355 if (off + sizeof(*ip6e) > m->m_len) 1356 goto loopend; 1357 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); 1358 if (nxt == IPPROTO_AH) 1359 elen = (ip6e->ip6e_len + 2) << 2; 1360 else 1361 elen = (ip6e->ip6e_len + 1) << 3; 1362 if (off + elen > m->m_len) 1363 goto loopend; 1364#else 1365 ext = ip6_pullexthdr(m, off, nxt); 1366 if (ext == NULL) { 1367 V_ip6stat.ip6s_tooshort++; 1368 return; 1369 } 1370 ip6e = mtod(ext, struct ip6_ext *); 1371 if (nxt == IPPROTO_AH) 1372 elen = (ip6e->ip6e_len + 2) << 2; 1373 else 1374 elen = (ip6e->ip6e_len + 1) << 3; 1375 if (elen != ext->m_len) { 1376 m_freem(ext); 1377 V_ip6stat.ip6s_tooshort++; 1378 return; 1379 } 1380#endif 1381 1382 switch (nxt) { 1383 case IPPROTO_DSTOPTS: 1384 if (!(in6p->inp_flags & IN6P_DSTOPTS)) 1385 break; 1386 1387 *mp = sbcreatecontrol((caddr_t)ip6e, elen, 1388 IS2292(in6p, 1389 IPV6_2292DSTOPTS, IPV6_DSTOPTS), 1390 IPPROTO_IPV6); 1391 if (*mp) 1392 mp = &(*mp)->m_next; 1393 break; 1394 case IPPROTO_ROUTING: 1395 if (!(in6p->inp_flags & IN6P_RTHDR)) 1396 break; 1397 1398 *mp = sbcreatecontrol((caddr_t)ip6e, elen, 1399 IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR), 1400 IPPROTO_IPV6); 1401 if (*mp) 1402 mp = &(*mp)->m_next; 1403 break; 1404 case IPPROTO_HOPOPTS: 1405 case IPPROTO_AH: /* is it possible? */ 1406 break; 1407 1408 default: 1409 /* 1410 * other cases have been filtered in the above. 1411 * none will visit this case. here we supply 1412 * the code just in case (nxt overwritten or 1413 * other cases). 1414 */ 1415#ifdef PULLDOWN_TEST 1416 m_freem(ext); 1417#endif 1418 goto loopend; 1419 1420 } 1421 1422 /* proceed with the next header. */ 1423 off += elen; 1424 nxt = ip6e->ip6e_nxt; 1425 ip6e = NULL; 1426#ifdef PULLDOWN_TEST 1427 m_freem(ext); 1428 ext = NULL; 1429#endif 1430 } 1431 loopend: 1432 ; 1433 } 1434} 1435#undef IS2292 1436 1437void 1438ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu) 1439{ 1440 struct socket *so; 1441 struct mbuf *m_mtu; 1442 struct ip6_mtuinfo mtuctl; 1443 1444 so = in6p->inp_socket; 1445 1446 if (mtu == NULL) 1447 return; 1448 1449#ifdef DIAGNOSTIC 1450 if (so == NULL) /* I believe this is impossible */ 1451 panic("ip6_notify_pmtu: socket is NULL"); 1452#endif 1453 1454 bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */ 1455 mtuctl.ip6m_mtu = *mtu; 1456 mtuctl.ip6m_addr = *dst; 1457 if (sa6_recoverscope(&mtuctl.ip6m_addr)) 1458 return; 1459 1460 if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl), 1461 IPV6_PATHMTU, IPPROTO_IPV6)) == NULL) 1462 return; 1463 1464 if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu) 1465 == 0) { 1466 m_freem(m_mtu); 1467 /* XXX: should count statistics */ 1468 } else 1469 sorwakeup(so); 1470 1471 return; 1472} 1473 1474#ifdef PULLDOWN_TEST 1475/* 1476 * pull single extension header from mbuf chain. returns single mbuf that 1477 * contains the result, or NULL on error. 1478 */ 1479static struct mbuf * 1480ip6_pullexthdr(struct mbuf *m, size_t off, int nxt) 1481{ 1482 struct ip6_ext ip6e; 1483 size_t elen; 1484 struct mbuf *n; 1485 1486#ifdef DIAGNOSTIC 1487 switch (nxt) { 1488 case IPPROTO_DSTOPTS: 1489 case IPPROTO_ROUTING: 1490 case IPPROTO_HOPOPTS: 1491 case IPPROTO_AH: /* is it possible? */ 1492 break; 1493 default: 1494 printf("ip6_pullexthdr: invalid nxt=%d\n", nxt); 1495 } 1496#endif 1497 1498 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); 1499 if (nxt == IPPROTO_AH) 1500 elen = (ip6e.ip6e_len + 2) << 2; 1501 else 1502 elen = (ip6e.ip6e_len + 1) << 3; 1503 1504 MGET(n, M_DONTWAIT, MT_DATA); 1505 if (n && elen >= MLEN) { 1506 MCLGET(n, M_DONTWAIT); 1507 if ((n->m_flags & M_EXT) == 0) { 1508 m_free(n); 1509 n = NULL; 1510 } 1511 } 1512 if (!n) 1513 return NULL; 1514 1515 n->m_len = 0; 1516 if (elen >= M_TRAILINGSPACE(n)) { 1517 m_free(n); 1518 return NULL; 1519 } 1520 1521 m_copydata(m, off, elen, mtod(n, caddr_t)); 1522 n->m_len = elen; 1523 return n; 1524} 1525#endif 1526 1527/* 1528 * Get pointer to the previous header followed by the header 1529 * currently processed. 1530 * XXX: This function supposes that 1531 * M includes all headers, 1532 * the next header field and the header length field of each header 1533 * are valid, and 1534 * the sum of each header length equals to OFF. 1535 * Because of these assumptions, this function must be called very 1536 * carefully. Moreover, it will not be used in the near future when 1537 * we develop `neater' mechanism to process extension headers. 1538 */ 1539char * 1540ip6_get_prevhdr(struct mbuf *m, int off) 1541{ 1542 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1543 1544 if (off == sizeof(struct ip6_hdr)) 1545 return (&ip6->ip6_nxt); 1546 else { 1547 int len, nxt; 1548 struct ip6_ext *ip6e = NULL; 1549 1550 nxt = ip6->ip6_nxt; 1551 len = sizeof(struct ip6_hdr); 1552 while (len < off) { 1553 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len); 1554 1555 switch (nxt) { 1556 case IPPROTO_FRAGMENT: 1557 len += sizeof(struct ip6_frag); 1558 break; 1559 case IPPROTO_AH: 1560 len += (ip6e->ip6e_len + 2) << 2; 1561 break; 1562 default: 1563 len += (ip6e->ip6e_len + 1) << 3; 1564 break; 1565 } 1566 nxt = ip6e->ip6e_nxt; 1567 } 1568 if (ip6e) 1569 return (&ip6e->ip6e_nxt); 1570 else 1571 return NULL; 1572 } 1573} 1574 1575/* 1576 * get next header offset. m will be retained. 1577 */ 1578int 1579ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp) 1580{ 1581 struct ip6_hdr ip6; 1582 struct ip6_ext ip6e; 1583 struct ip6_frag fh; 1584 1585 /* just in case */ 1586 if (m == NULL) 1587 panic("ip6_nexthdr: m == NULL"); 1588 if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off) 1589 return -1; 1590 1591 switch (proto) { 1592 case IPPROTO_IPV6: 1593 if (m->m_pkthdr.len < off + sizeof(ip6)) 1594 return -1; 1595 m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6); 1596 if (nxtp) 1597 *nxtp = ip6.ip6_nxt; 1598 off += sizeof(ip6); 1599 return off; 1600 1601 case IPPROTO_FRAGMENT: 1602 /* 1603 * terminate parsing if it is not the first fragment, 1604 * it does not make sense to parse through it. 1605 */ 1606 if (m->m_pkthdr.len < off + sizeof(fh)) 1607 return -1; 1608 m_copydata(m, off, sizeof(fh), (caddr_t)&fh); 1609 /* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */ 1610 if (fh.ip6f_offlg & IP6F_OFF_MASK) 1611 return -1; 1612 if (nxtp) 1613 *nxtp = fh.ip6f_nxt; 1614 off += sizeof(struct ip6_frag); 1615 return off; 1616 1617 case IPPROTO_AH: 1618 if (m->m_pkthdr.len < off + sizeof(ip6e)) 1619 return -1; 1620 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); 1621 if (nxtp) 1622 *nxtp = ip6e.ip6e_nxt; 1623 off += (ip6e.ip6e_len + 2) << 2; 1624 return off; 1625 1626 case IPPROTO_HOPOPTS: 1627 case IPPROTO_ROUTING: 1628 case IPPROTO_DSTOPTS: 1629 if (m->m_pkthdr.len < off + sizeof(ip6e)) 1630 return -1; 1631 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); 1632 if (nxtp) 1633 *nxtp = ip6e.ip6e_nxt; 1634 off += (ip6e.ip6e_len + 1) << 3; 1635 return off; 1636 1637 case IPPROTO_NONE: 1638 case IPPROTO_ESP: 1639 case IPPROTO_IPCOMP: 1640 /* give up */ 1641 return -1; 1642 1643 default: 1644 return -1; 1645 } 1646 1647 return -1; 1648} 1649 1650/* 1651 * get offset for the last header in the chain. m will be kept untainted. 1652 */ 1653int 1654ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp) 1655{ 1656 int newoff; 1657 int nxt; 1658 1659 if (!nxtp) { 1660 nxt = -1; 1661 nxtp = &nxt; 1662 } 1663 while (1) { 1664 newoff = ip6_nexthdr(m, off, proto, nxtp); 1665 if (newoff < 0) 1666 return off; 1667 else if (newoff < off) 1668 return -1; /* invalid */ 1669 else if (newoff == off) 1670 return newoff; 1671 1672 off = newoff; 1673 proto = *nxtp; 1674 } 1675} 1676 1677struct ip6aux * 1678ip6_addaux(struct mbuf *m) 1679{ 1680 struct m_tag *mtag; 1681 1682 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); 1683 if (!mtag) { 1684 mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux), 1685 M_NOWAIT); 1686 if (mtag) { 1687 m_tag_prepend(m, mtag); 1688 bzero(mtag + 1, sizeof(struct ip6aux)); 1689 } 1690 } 1691 return mtag ? (struct ip6aux *)(mtag + 1) : NULL; 1692} 1693 1694struct ip6aux * 1695ip6_findaux(struct mbuf *m) 1696{ 1697 struct m_tag *mtag; 1698 1699 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); 1700 return mtag ? (struct ip6aux *)(mtag + 1) : NULL; 1701} 1702 1703void 1704ip6_delaux(struct mbuf *m) 1705{ 1706 struct m_tag *mtag; 1707 1708 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); 1709 if (mtag) 1710 m_tag_delete(m, mtag); 1711} 1712 1713/* 1714 * System control for IP6 1715 */ 1716 1717u_char inet6ctlerrmap[PRC_NCMDS] = { 1718 0, 0, 0, 0, 1719 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 1720 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 1721 EMSGSIZE, EHOSTUNREACH, 0, 0, 1722 0, 0, 0, 0, 1723 ENOPROTOOPT 1724}; 1725