ip6_input.c revision 207828
1/*- 2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the project nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $ 30 */ 31 32/*- 33 * Copyright (c) 1982, 1986, 1988, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 61 */ 62 63#include <sys/cdefs.h> 64__FBSDID("$FreeBSD: head/sys/netinet6/ip6_input.c 207828 2010-05-09 20:32:00Z kmacy $"); 65 66#include "opt_inet.h" 67#include "opt_inet6.h" 68#include "opt_ipsec.h" 69#include "opt_route.h" 70 71#include <sys/param.h> 72#include <sys/systm.h> 73#include <sys/malloc.h> 74#include <sys/mbuf.h> 75#include <sys/proc.h> 76#include <sys/domain.h> 77#include <sys/protosw.h> 78#include <sys/socket.h> 79#include <sys/socketvar.h> 80#include <sys/errno.h> 81#include <sys/time.h> 82#include <sys/kernel.h> 83#include <sys/syslog.h> 84 85#include <net/if.h> 86#include <net/if_types.h> 87#include <net/if_dl.h> 88#include <net/route.h> 89#include <net/netisr.h> 90#include <net/pfil.h> 91#include <net/vnet.h> 92 93#include <netinet/in.h> 94#include <netinet/in_systm.h> 95#include <net/if_llatbl.h> 96#ifdef INET 97#include <netinet/ip.h> 98#include <netinet/ip_icmp.h> 99#endif /* INET */ 100#include <netinet/ip6.h> 101#include <netinet6/in6_var.h> 102#include <netinet6/ip6_var.h> 103#include <netinet/in_pcb.h> 104#include <netinet/icmp6.h> 105#include <netinet6/scope6_var.h> 106#include <netinet6/in6_ifattach.h> 107#include <netinet6/nd6.h> 108 109#ifdef IPSEC 110#include <netipsec/ipsec.h> 111#include <netinet6/ip6_ipsec.h> 112#include <netipsec/ipsec6.h> 113#endif /* IPSEC */ 114 115#include <netinet6/ip6protosw.h> 116 117#ifdef FLOWTABLE 118#include <net/flowtable.h> 119extern VNET_DEFINE(int, ip6_output_flowtable_size); 120#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size) 121#endif 122 123extern struct domain inet6domain; 124 125u_char ip6_protox[IPPROTO_MAX]; 126VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead); 127 128static struct netisr_handler ip6_nh = { 129 .nh_name = "ip6", 130 .nh_handler = ip6_input, 131 .nh_proto = NETISR_IPV6, 132 .nh_policy = NETISR_POLICY_FLOW, 133}; 134 135VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch); 136#define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch) 137 138VNET_DEFINE(struct pfil_head, inet6_pfil_hook); 139 140VNET_DEFINE(struct ip6stat, ip6stat); 141 142struct rwlock in6_ifaddr_lock; 143RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); 144 145static void ip6_init2(void *); 146static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); 147static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); 148#ifdef PULLDOWN_TEST 149static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); 150#endif 151 152/* 153 * IP6 initialization: fill in IP6 protocol switch table. 154 * All protocols not implemented in kernel go to raw IP6 protocol handler. 155 */ 156void 157ip6_init(void) 158{ 159 struct ip6protosw *pr; 160 int i; 161 162 TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal", 163 &V_ip6_auto_linklocal); 164 165 TAILQ_INIT(&V_in6_ifaddrhead); 166 167 /* Initialize packet filter hooks. */ 168 V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; 169 V_inet6_pfil_hook.ph_af = AF_INET6; 170 if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0) 171 printf("%s: WARNING: unable to register pfil hook, " 172 "error %d\n", __func__, i); 173 174 scope6_init(); 175 addrsel_policy_init(); 176 nd6_init(); 177 frag6_init(); 178 179#ifdef FLOWTABLE 180 TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size", 181 &V_ip6_output_flowtable_size); 182 V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_PCPU); 183#endif 184 185 V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; 186 187 /* Skip global initialization stuff for non-default instances. */ 188 if (!IS_DEFAULT_VNET(curvnet)) 189 return; 190 191#ifdef DIAGNOSTIC 192 if (sizeof(struct protosw) != sizeof(struct ip6protosw)) 193 panic("sizeof(protosw) != sizeof(ip6protosw)"); 194#endif 195 pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); 196 if (pr == NULL) 197 panic("ip6_init"); 198 199 /* Initialize the entire ip6_protox[] array to IPPROTO_RAW. */ 200 for (i = 0; i < IPPROTO_MAX; i++) 201 ip6_protox[i] = pr - inet6sw; 202 /* 203 * Cycle through IP protocols and put them into the appropriate place 204 * in ip6_protox[]. 205 */ 206 for (pr = (struct ip6protosw *)inet6domain.dom_protosw; 207 pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) 208 if (pr->pr_domain->dom_family == PF_INET6 && 209 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 210 /* Be careful to only index valid IP protocols. */ 211 if (pr->pr_protocol < IPPROTO_MAX) 212 ip6_protox[pr->pr_protocol] = pr - inet6sw; 213 } 214 215 netisr_register(&ip6_nh); 216} 217 218#ifdef VIMAGE 219void 220ip6_destroy() 221{ 222 223 nd6_destroy(); 224 callout_drain(&V_in6_tmpaddrtimer_ch); 225} 226#endif 227 228static int 229ip6_init2_vnet(const void *unused __unused) 230{ 231 232 /* nd6_timer_init */ 233 callout_init(&V_nd6_timer_ch, 0); 234 callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet); 235 236 /* timer for regeneranation of temporary addresses randomize ID */ 237 callout_init(&V_in6_tmpaddrtimer_ch, 0); 238 callout_reset(&V_in6_tmpaddrtimer_ch, 239 (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor - 240 V_ip6_temp_regen_advance) * hz, 241 in6_tmpaddrtimer, curvnet); 242 243 return (0); 244} 245 246static void 247ip6_init2(void *dummy) 248{ 249 250 ip6_init2_vnet(NULL); 251} 252 253/* cheat */ 254/* This must be after route_init(), which is now SI_ORDER_THIRD */ 255SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL); 256 257void 258ip6_input(struct mbuf *m) 259{ 260 struct ip6_hdr *ip6; 261 int off = sizeof(struct ip6_hdr), nest; 262 u_int32_t plen; 263 u_int32_t rtalert = ~0; 264 int nxt, ours = 0; 265 struct ifnet *deliverifp = NULL, *ifp = NULL; 266 struct in6_addr odst; 267 struct route_in6 rin6; 268 int srcrt = 0; 269 struct llentry *lle = NULL; 270 struct sockaddr_in6 dst6, *dst; 271 272 bzero(&rin6, sizeof(struct route_in6)); 273#ifdef IPSEC 274 /* 275 * should the inner packet be considered authentic? 276 * see comment in ah4_input(). 277 * NB: m cannot be NULL when passed to the input routine 278 */ 279 280 m->m_flags &= ~M_AUTHIPHDR; 281 m->m_flags &= ~M_AUTHIPDGM; 282 283#endif /* IPSEC */ 284 285 /* 286 * make sure we don't have onion peering information into m_tag. 287 */ 288 ip6_delaux(m); 289 290 /* 291 * mbuf statistics 292 */ 293 if (m->m_flags & M_EXT) { 294 if (m->m_next) 295 V_ip6stat.ip6s_mext2m++; 296 else 297 V_ip6stat.ip6s_mext1++; 298 } else { 299#define M2MMAX (sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0])) 300 if (m->m_next) { 301 if (m->m_flags & M_LOOP) { 302 V_ip6stat.ip6s_m2m[V_loif->if_index]++; 303 } else if (m->m_pkthdr.rcvif->if_index < M2MMAX) 304 V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++; 305 else 306 V_ip6stat.ip6s_m2m[0]++; 307 } else 308 V_ip6stat.ip6s_m1++; 309#undef M2MMAX 310 } 311 312 /* drop the packet if IPv6 operation is disabled on the IF */ 313 if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) { 314 m_freem(m); 315 return; 316 } 317 318 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive); 319 V_ip6stat.ip6s_total++; 320 321#ifndef PULLDOWN_TEST 322 /* 323 * L2 bridge code and some other code can return mbuf chain 324 * that does not conform to KAME requirement. too bad. 325 * XXX: fails to join if interface MTU > MCLBYTES. jumbogram? 326 */ 327 if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { 328 struct mbuf *n; 329 330 MGETHDR(n, M_DONTWAIT, MT_HEADER); 331 if (n) 332 M_MOVE_PKTHDR(n, m); 333 if (n && n->m_pkthdr.len > MHLEN) { 334 MCLGET(n, M_DONTWAIT); 335 if ((n->m_flags & M_EXT) == 0) { 336 m_freem(n); 337 n = NULL; 338 } 339 } 340 if (n == NULL) { 341 m_freem(m); 342 return; /* ENOBUFS */ 343 } 344 345 m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t)); 346 n->m_len = n->m_pkthdr.len; 347 m_freem(m); 348 m = n; 349 } 350 IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */); 351#endif 352 353 if (m->m_len < sizeof(struct ip6_hdr)) { 354 struct ifnet *inifp; 355 inifp = m->m_pkthdr.rcvif; 356 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { 357 V_ip6stat.ip6s_toosmall++; 358 in6_ifstat_inc(inifp, ifs6_in_hdrerr); 359 return; 360 } 361 } 362 363 ip6 = mtod(m, struct ip6_hdr *); 364 365 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { 366 V_ip6stat.ip6s_badvers++; 367 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); 368 goto bad; 369 } 370 371 V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++; 372 373 /* 374 * Check against address spoofing/corruption. 375 */ 376 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) || 377 IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) { 378 /* 379 * XXX: "badscope" is not very suitable for a multicast source. 380 */ 381 V_ip6stat.ip6s_badscope++; 382 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 383 goto bad; 384 } 385 if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) && 386 !(m->m_flags & M_LOOP)) { 387 /* 388 * In this case, the packet should come from the loopback 389 * interface. However, we cannot just check the if_flags, 390 * because ip6_mloopback() passes the "actual" interface 391 * as the outgoing/incoming interface. 392 */ 393 V_ip6stat.ip6s_badscope++; 394 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 395 goto bad; 396 } 397 398#ifdef ALTQ 399 if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) { 400 /* packet is dropped by traffic conditioner */ 401 return; 402 } 403#endif 404 /* 405 * The following check is not documented in specs. A malicious 406 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack 407 * and bypass security checks (act as if it was from 127.0.0.1 by using 408 * IPv6 src ::ffff:127.0.0.1). Be cautious. 409 * 410 * This check chokes if we are in an SIIT cloud. As none of BSDs 411 * support IPv4-less kernel compilation, we cannot support SIIT 412 * environment at all. So, it makes more sense for us to reject any 413 * malicious packets for non-SIIT environment, than try to do a 414 * partial support for SIIT environment. 415 */ 416 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || 417 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { 418 V_ip6stat.ip6s_badscope++; 419 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 420 goto bad; 421 } 422#if 0 423 /* 424 * Reject packets with IPv4 compatible addresses (auto tunnel). 425 * 426 * The code forbids auto tunnel relay case in RFC1933 (the check is 427 * stronger than RFC1933). We may want to re-enable it if mech-xx 428 * is revised to forbid relaying case. 429 */ 430 if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) || 431 IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) { 432 V_ip6stat.ip6s_badscope++; 433 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 434 goto bad; 435 } 436#endif 437 438 /* 439 * Run through list of hooks for input packets. 440 * 441 * NB: Beware of the destination address changing 442 * (e.g. by NAT rewriting). When this happens, 443 * tell ip6_forward to do the right thing. 444 */ 445 odst = ip6->ip6_dst; 446 447 /* Jump over all PFIL processing if hooks are not active. */ 448 if (!PFIL_HOOKED(&V_inet6_pfil_hook)) 449 goto passin; 450 451 if (pfil_run_hooks(&V_inet6_pfil_hook, &m, 452 m->m_pkthdr.rcvif, PFIL_IN, NULL)) 453 return; 454 if (m == NULL) /* consumed by filter */ 455 return; 456 ip6 = mtod(m, struct ip6_hdr *); 457 srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst); 458 459passin: 460 /* 461 * Disambiguate address scope zones (if there is ambiguity). 462 * We first make sure that the original source or destination address 463 * is not in our internal form for scoped addresses. Such addresses 464 * are not necessarily invalid spec-wise, but we cannot accept them due 465 * to the usage conflict. 466 * in6_setscope() then also checks and rejects the cases where src or 467 * dst are the loopback address and the receiving interface 468 * is not loopback. 469 */ 470 if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) { 471 V_ip6stat.ip6s_badscope++; /* XXX */ 472 goto bad; 473 } 474 if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) || 475 in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) { 476 V_ip6stat.ip6s_badscope++; 477 goto bad; 478 } 479 480 /* 481 * Multicast check. Assume packet is for us to avoid 482 * prematurely taking locks. 483 */ 484 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 485 ours = 1; 486 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast); 487 deliverifp = m->m_pkthdr.rcvif; 488 goto hbhcheck; 489 } 490 491 /* 492 * Unicast check 493 */ 494 495 bzero(&dst6, sizeof(dst6)); 496 dst6.sin6_family = AF_INET6; 497 dst6.sin6_len = sizeof(struct sockaddr_in6); 498 dst6.sin6_addr = ip6->ip6_dst; 499 ifp = m->m_pkthdr.rcvif; 500 IF_AFDATA_LOCK(ifp); 501 lle = lla_lookup(LLTABLE6(ifp), 0, 502 (struct sockaddr *)&dst6); 503 IF_AFDATA_UNLOCK(ifp); 504 if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) { 505 ours = 1; 506 deliverifp = ifp; 507 LLE_RUNLOCK(lle); 508 goto hbhcheck; 509 } 510 if (lle != NULL) 511 LLE_RUNLOCK(lle); 512 513 dst = &rin6.ro_dst; 514 dst->sin6_len = sizeof(struct sockaddr_in6); 515 dst->sin6_family = AF_INET6; 516 dst->sin6_addr = ip6->ip6_dst; 517 rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0); 518 if (rin6.ro_rt) 519 RT_UNLOCK(rin6.ro_rt); 520 521#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key)) 522 523 /* 524 * Accept the packet if the forwarding interface to the destination 525 * according to the routing table is the loopback interface, 526 * unless the associated route has a gateway. 527 * Note that this approach causes to accept a packet if there is a 528 * route to the loopback interface for the destination of the packet. 529 * But we think it's even useful in some situations, e.g. when using 530 * a special daemon which wants to intercept the packet. 531 * 532 * XXX: some OSes automatically make a cloned route for the destination 533 * of an outgoing packet. If the outgoing interface of the packet 534 * is a loopback one, the kernel would consider the packet to be 535 * accepted, even if we have no such address assinged on the interface. 536 * We check the cloned flag of the route entry to reject such cases, 537 * assuming that route entries for our own addresses are not made by 538 * cloning (it should be true because in6_addloop explicitly installs 539 * the host route). However, we might have to do an explicit check 540 * while it would be less efficient. Or, should we rather install a 541 * reject route for such a case? 542 */ 543 if (rin6.ro_rt && 544 (rin6.ro_rt->rt_flags & 545 (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && 546#ifdef RTF_WASCLONED 547 !(rin6.ro_rt->rt_flags & RTF_WASCLONED) && 548#endif 549#ifdef RTF_CLONED 550 !(rin6.ro_rt->rt_flags & RTF_CLONED) && 551#endif 552#if 0 553 /* 554 * The check below is redundant since the comparison of 555 * the destination and the key of the rtentry has 556 * already done through looking up the routing table. 557 */ 558 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, 559 &rt6_key(rin6.ro_rt)->sin6_addr) 560#endif 561 rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) { 562 int free_ia6 = 0; 563 struct in6_ifaddr *ia6; 564 565 /* 566 * found the loopback route to the interface address 567 */ 568 if (rin6.ro_rt->rt_gateway->sa_family == AF_LINK) { 569 struct sockaddr_in6 dest6; 570 571 bzero(&dest6, sizeof(dest6)); 572 dest6.sin6_family = AF_INET6; 573 dest6.sin6_len = sizeof(dest6); 574 dest6.sin6_addr = ip6->ip6_dst; 575 ia6 = (struct in6_ifaddr *) 576 ifa_ifwithaddr((struct sockaddr *)&dest6); 577 if (ia6 == NULL) 578 goto bad; 579 free_ia6 = 1; 580 } 581 else 582 ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa; 583 584 /* 585 * record address information into m_tag. 586 */ 587 (void)ip6_setdstifaddr(m, ia6); 588 589 /* 590 * packets to a tentative, duplicated, or somehow invalid 591 * address must not be accepted. 592 */ 593 if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { 594 /* this address is ready */ 595 ours = 1; 596 deliverifp = ia6->ia_ifp; /* correct? */ 597 /* Count the packet in the ip address stats */ 598 ia6->ia_ifa.if_ipackets++; 599 ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; 600 if (ia6 != NULL && free_ia6 != 0) 601 ifa_free(&ia6->ia_ifa); 602 goto hbhcheck; 603 } else { 604 char ip6bufs[INET6_ADDRSTRLEN]; 605 char ip6bufd[INET6_ADDRSTRLEN]; 606 /* address is not ready, so discard the packet. */ 607 nd6log((LOG_INFO, 608 "ip6_input: packet to an unready address %s->%s\n", 609 ip6_sprintf(ip6bufs, &ip6->ip6_src), 610 ip6_sprintf(ip6bufd, &ip6->ip6_dst))); 611 612 if (ia6 != NULL && free_ia6 != 0) 613 ifa_free(&ia6->ia_ifa); 614 goto bad; 615 } 616 } 617 618 /* 619 * FAITH (Firewall Aided Internet Translator) 620 */ 621 if (V_ip6_keepfaith) { 622 if (rin6.ro_rt && rin6.ro_rt->rt_ifp && 623 rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) { 624 /* XXX do we need more sanity checks? */ 625 ours = 1; 626 deliverifp = rin6.ro_rt->rt_ifp; /* faith */ 627 goto hbhcheck; 628 } 629 } 630 631 /* 632 * Now there is no reason to process the packet if it's not our own 633 * and we're not a router. 634 */ 635 if (!V_ip6_forwarding) { 636 V_ip6stat.ip6s_cantforward++; 637 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); 638 goto bad; 639 } 640 641 hbhcheck: 642 /* 643 * record address information into m_tag, if we don't have one yet. 644 * note that we are unable to record it, if the address is not listed 645 * as our interface address (e.g. multicast addresses, addresses 646 * within FAITH prefixes and such). 647 */ 648 if (deliverifp && !ip6_getdstifaddr(m)) { 649 struct in6_ifaddr *ia6; 650 651 ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst); 652 if (ia6) { 653 if (!ip6_setdstifaddr(m, ia6)) { 654 /* 655 * XXX maybe we should drop the packet here, 656 * as we could not provide enough information 657 * to the upper layers. 658 */ 659 } 660 ifa_free(&ia6->ia_ifa); 661 } 662 } 663 664 /* 665 * Process Hop-by-Hop options header if it's contained. 666 * m may be modified in ip6_hopopts_input(). 667 * If a JumboPayload option is included, plen will also be modified. 668 */ 669 plen = (u_int32_t)ntohs(ip6->ip6_plen); 670 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { 671 struct ip6_hbh *hbh; 672 673 if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) { 674#if 0 /*touches NULL pointer*/ 675 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); 676#endif 677 goto out; /* m have already been freed */ 678 } 679 680 /* adjust pointer */ 681 ip6 = mtod(m, struct ip6_hdr *); 682 683 /* 684 * if the payload length field is 0 and the next header field 685 * indicates Hop-by-Hop Options header, then a Jumbo Payload 686 * option MUST be included. 687 */ 688 if (ip6->ip6_plen == 0 && plen == 0) { 689 /* 690 * Note that if a valid jumbo payload option is 691 * contained, ip6_hopopts_input() must set a valid 692 * (non-zero) payload length to the variable plen. 693 */ 694 V_ip6stat.ip6s_badoptions++; 695 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); 696 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); 697 icmp6_error(m, ICMP6_PARAM_PROB, 698 ICMP6_PARAMPROB_HEADER, 699 (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); 700 goto out; 701 } 702#ifndef PULLDOWN_TEST 703 /* ip6_hopopts_input() ensures that mbuf is contiguous */ 704 hbh = (struct ip6_hbh *)(ip6 + 1); 705#else 706 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), 707 sizeof(struct ip6_hbh)); 708 if (hbh == NULL) { 709 V_ip6stat.ip6s_tooshort++; 710 goto out; 711 } 712#endif 713 nxt = hbh->ip6h_nxt; 714 715 /* 716 * If we are acting as a router and the packet contains a 717 * router alert option, see if we know the option value. 718 * Currently, we only support the option value for MLD, in which 719 * case we should pass the packet to the multicast routing 720 * daemon. 721 */ 722 if (rtalert != ~0) { 723 switch (rtalert) { 724 case IP6OPT_RTALERT_MLD: 725 if (V_ip6_forwarding) 726 ours = 1; 727 break; 728 default: 729 /* 730 * RFC2711 requires unrecognized values must be 731 * silently ignored. 732 */ 733 break; 734 } 735 } 736 } else 737 nxt = ip6->ip6_nxt; 738 739 /* 740 * Check that the amount of data in the buffers 741 * is as at least much as the IPv6 header would have us expect. 742 * Trim mbufs if longer than we expect. 743 * Drop packet if shorter than we expect. 744 */ 745 if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) { 746 V_ip6stat.ip6s_tooshort++; 747 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); 748 goto bad; 749 } 750 if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) { 751 if (m->m_len == m->m_pkthdr.len) { 752 m->m_len = sizeof(struct ip6_hdr) + plen; 753 m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; 754 } else 755 m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len); 756 } 757 758 /* 759 * Forward if desirable. 760 */ 761 if (V_ip6_mrouter && 762 IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 763 /* 764 * If we are acting as a multicast router, all 765 * incoming multicast packets are passed to the 766 * kernel-level multicast forwarding function. 767 * The packet is returned (relatively) intact; if 768 * ip6_mforward() returns a non-zero value, the packet 769 * must be discarded, else it may be accepted below. 770 * 771 * XXX TODO: Check hlim and multicast scope here to avoid 772 * unnecessarily calling into ip6_mforward(). 773 */ 774 if (ip6_mforward && 775 ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) { 776 IP6STAT_INC(ip6s_cantforward); 777 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); 778 goto bad; 779 } 780 } else if (!ours) { 781 ip6_forward(m, srcrt); 782 goto out; 783 } 784 785 ip6 = mtod(m, struct ip6_hdr *); 786 787 /* 788 * Malicious party may be able to use IPv4 mapped addr to confuse 789 * tcp/udp stack and bypass security checks (act as if it was from 790 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious. 791 * 792 * For SIIT end node behavior, you may want to disable the check. 793 * However, you will become vulnerable to attacks using IPv4 mapped 794 * source. 795 */ 796 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || 797 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { 798 V_ip6stat.ip6s_badscope++; 799 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); 800 goto bad; 801 } 802 803 /* 804 * Tell launch routine the next header 805 */ 806 V_ip6stat.ip6s_delivered++; 807 in6_ifstat_inc(deliverifp, ifs6_in_deliver); 808 nest = 0; 809 810 while (nxt != IPPROTO_DONE) { 811 if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) { 812 V_ip6stat.ip6s_toomanyhdr++; 813 goto bad; 814 } 815 816 /* 817 * protection against faulty packet - there should be 818 * more sanity checks in header chain processing. 819 */ 820 if (m->m_pkthdr.len < off) { 821 V_ip6stat.ip6s_tooshort++; 822 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); 823 goto bad; 824 } 825 826#ifdef IPSEC 827 /* 828 * enforce IPsec policy checking if we are seeing last header. 829 * note that we do not visit this with protocols with pcb layer 830 * code - like udp/tcp/raw ip. 831 */ 832 if (ip6_ipsec_input(m, nxt)) 833 goto bad; 834#endif /* IPSEC */ 835 836 /* 837 * Use mbuf flags to propagate Router Alert option to 838 * ICMPv6 layer, as hop-by-hop options have been stripped. 839 */ 840 if (nxt == IPPROTO_ICMPV6 && rtalert != ~0) 841 m->m_flags |= M_RTALERT_MLD; 842 843 nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); 844 } 845 goto out; 846bad: 847 m_freem(m); 848out: 849 if (rin6.ro_rt) 850 RTFREE(rin6.ro_rt); 851} 852 853/* 854 * set/grab in6_ifaddr correspond to IPv6 destination address. 855 * XXX backward compatibility wrapper 856 * 857 * XXXRW: We should bump the refcount on ia6 before sticking it in the m_tag, 858 * and then bump it when the tag is copied, and release it when the tag is 859 * freed. Unfortunately, m_tags don't support deep copies (yet), so instead 860 * we just bump the ia refcount when we receive it. This should be fixed. 861 */ 862static struct ip6aux * 863ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6) 864{ 865 struct ip6aux *ip6a; 866 867 ip6a = ip6_addaux(m); 868 if (ip6a) 869 ip6a->ip6a_dstia6 = ia6; 870 return ip6a; /* NULL if failed to set */ 871} 872 873struct in6_ifaddr * 874ip6_getdstifaddr(struct mbuf *m) 875{ 876 struct ip6aux *ip6a; 877 struct in6_ifaddr *ia; 878 879 ip6a = ip6_findaux(m); 880 if (ip6a) { 881 ia = ip6a->ip6a_dstia6; 882 ifa_ref(&ia->ia_ifa); 883 return ia; 884 } else 885 return NULL; 886} 887 888/* 889 * Hop-by-Hop options header processing. If a valid jumbo payload option is 890 * included, the real payload length will be stored in plenp. 891 * 892 * rtalertp - XXX: should be stored more smart way 893 */ 894static int 895ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp, 896 struct mbuf **mp, int *offp) 897{ 898 struct mbuf *m = *mp; 899 int off = *offp, hbhlen; 900 struct ip6_hbh *hbh; 901 u_int8_t *opt; 902 903 /* validation of the length of the header */ 904#ifndef PULLDOWN_TEST 905 IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1); 906 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); 907 hbhlen = (hbh->ip6h_len + 1) << 3; 908 909 IP6_EXTHDR_CHECK(m, off, hbhlen, -1); 910 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); 911#else 912 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, 913 sizeof(struct ip6_hdr), sizeof(struct ip6_hbh)); 914 if (hbh == NULL) { 915 V_ip6stat.ip6s_tooshort++; 916 return -1; 917 } 918 hbhlen = (hbh->ip6h_len + 1) << 3; 919 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), 920 hbhlen); 921 if (hbh == NULL) { 922 V_ip6stat.ip6s_tooshort++; 923 return -1; 924 } 925#endif 926 off += hbhlen; 927 hbhlen -= sizeof(struct ip6_hbh); 928 opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh); 929 930 if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh), 931 hbhlen, rtalertp, plenp) < 0) 932 return (-1); 933 934 *offp = off; 935 *mp = m; 936 return (0); 937} 938 939/* 940 * Search header for all Hop-by-hop options and process each option. 941 * This function is separate from ip6_hopopts_input() in order to 942 * handle a case where the sending node itself process its hop-by-hop 943 * options header. In such a case, the function is called from ip6_output(). 944 * 945 * The function assumes that hbh header is located right after the IPv6 header 946 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to 947 * opthead + hbhlen is located in continuous memory region. 948 */ 949int 950ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen, 951 u_int32_t *rtalertp, u_int32_t *plenp) 952{ 953 struct ip6_hdr *ip6; 954 int optlen = 0; 955 u_int8_t *opt = opthead; 956 u_int16_t rtalert_val; 957 u_int32_t jumboplen; 958 const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh); 959 960 for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) { 961 switch (*opt) { 962 case IP6OPT_PAD1: 963 optlen = 1; 964 break; 965 case IP6OPT_PADN: 966 if (hbhlen < IP6OPT_MINLEN) { 967 V_ip6stat.ip6s_toosmall++; 968 goto bad; 969 } 970 optlen = *(opt + 1) + 2; 971 break; 972 case IP6OPT_ROUTER_ALERT: 973 /* XXX may need check for alignment */ 974 if (hbhlen < IP6OPT_RTALERT_LEN) { 975 V_ip6stat.ip6s_toosmall++; 976 goto bad; 977 } 978 if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) { 979 /* XXX stat */ 980 icmp6_error(m, ICMP6_PARAM_PROB, 981 ICMP6_PARAMPROB_HEADER, 982 erroff + opt + 1 - opthead); 983 return (-1); 984 } 985 optlen = IP6OPT_RTALERT_LEN; 986 bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2); 987 *rtalertp = ntohs(rtalert_val); 988 break; 989 case IP6OPT_JUMBO: 990 /* XXX may need check for alignment */ 991 if (hbhlen < IP6OPT_JUMBO_LEN) { 992 V_ip6stat.ip6s_toosmall++; 993 goto bad; 994 } 995 if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) { 996 /* XXX stat */ 997 icmp6_error(m, ICMP6_PARAM_PROB, 998 ICMP6_PARAMPROB_HEADER, 999 erroff + opt + 1 - opthead); 1000 return (-1); 1001 } 1002 optlen = IP6OPT_JUMBO_LEN; 1003 1004 /* 1005 * IPv6 packets that have non 0 payload length 1006 * must not contain a jumbo payload option. 1007 */ 1008 ip6 = mtod(m, struct ip6_hdr *); 1009 if (ip6->ip6_plen) { 1010 V_ip6stat.ip6s_badoptions++; 1011 icmp6_error(m, ICMP6_PARAM_PROB, 1012 ICMP6_PARAMPROB_HEADER, 1013 erroff + opt - opthead); 1014 return (-1); 1015 } 1016 1017 /* 1018 * We may see jumbolen in unaligned location, so 1019 * we'd need to perform bcopy(). 1020 */ 1021 bcopy(opt + 2, &jumboplen, sizeof(jumboplen)); 1022 jumboplen = (u_int32_t)htonl(jumboplen); 1023 1024#if 1 1025 /* 1026 * if there are multiple jumbo payload options, 1027 * *plenp will be non-zero and the packet will be 1028 * rejected. 1029 * the behavior may need some debate in ipngwg - 1030 * multiple options does not make sense, however, 1031 * there's no explicit mention in specification. 1032 */ 1033 if (*plenp != 0) { 1034 V_ip6stat.ip6s_badoptions++; 1035 icmp6_error(m, ICMP6_PARAM_PROB, 1036 ICMP6_PARAMPROB_HEADER, 1037 erroff + opt + 2 - opthead); 1038 return (-1); 1039 } 1040#endif 1041 1042 /* 1043 * jumbo payload length must be larger than 65535. 1044 */ 1045 if (jumboplen <= IPV6_MAXPACKET) { 1046 V_ip6stat.ip6s_badoptions++; 1047 icmp6_error(m, ICMP6_PARAM_PROB, 1048 ICMP6_PARAMPROB_HEADER, 1049 erroff + opt + 2 - opthead); 1050 return (-1); 1051 } 1052 *plenp = jumboplen; 1053 1054 break; 1055 default: /* unknown option */ 1056 if (hbhlen < IP6OPT_MINLEN) { 1057 V_ip6stat.ip6s_toosmall++; 1058 goto bad; 1059 } 1060 optlen = ip6_unknown_opt(opt, m, 1061 erroff + opt - opthead); 1062 if (optlen == -1) 1063 return (-1); 1064 optlen += 2; 1065 break; 1066 } 1067 } 1068 1069 return (0); 1070 1071 bad: 1072 m_freem(m); 1073 return (-1); 1074} 1075 1076/* 1077 * Unknown option processing. 1078 * The third argument `off' is the offset from the IPv6 header to the option, 1079 * which is necessary if the IPv6 header the and option header and IPv6 header 1080 * is not continuous in order to return an ICMPv6 error. 1081 */ 1082int 1083ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off) 1084{ 1085 struct ip6_hdr *ip6; 1086 1087 switch (IP6OPT_TYPE(*optp)) { 1088 case IP6OPT_TYPE_SKIP: /* ignore the option */ 1089 return ((int)*(optp + 1)); 1090 case IP6OPT_TYPE_DISCARD: /* silently discard */ 1091 m_freem(m); 1092 return (-1); 1093 case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */ 1094 V_ip6stat.ip6s_badoptions++; 1095 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); 1096 return (-1); 1097 case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */ 1098 V_ip6stat.ip6s_badoptions++; 1099 ip6 = mtod(m, struct ip6_hdr *); 1100 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || 1101 (m->m_flags & (M_BCAST|M_MCAST))) 1102 m_freem(m); 1103 else 1104 icmp6_error(m, ICMP6_PARAM_PROB, 1105 ICMP6_PARAMPROB_OPTION, off); 1106 return (-1); 1107 } 1108 1109 m_freem(m); /* XXX: NOTREACHED */ 1110 return (-1); 1111} 1112 1113/* 1114 * Create the "control" list for this pcb. 1115 * These functions will not modify mbuf chain at all. 1116 * 1117 * With KAME mbuf chain restriction: 1118 * The routine will be called from upper layer handlers like tcp6_input(). 1119 * Thus the routine assumes that the caller (tcp6_input) have already 1120 * called IP6_EXTHDR_CHECK() and all the extension headers are located in the 1121 * very first mbuf on the mbuf chain. 1122 * 1123 * ip6_savecontrol_v4 will handle those options that are possible to be 1124 * set on a v4-mapped socket. 1125 * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those 1126 * options and handle the v6-only ones itself. 1127 */ 1128struct mbuf ** 1129ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp, 1130 int *v4only) 1131{ 1132 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1133 1134#ifdef SO_TIMESTAMP 1135 if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) { 1136 struct timeval tv; 1137 1138 microtime(&tv); 1139 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), 1140 SCM_TIMESTAMP, SOL_SOCKET); 1141 if (*mp) 1142 mp = &(*mp)->m_next; 1143 } 1144#endif 1145 1146 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { 1147 if (v4only != NULL) 1148 *v4only = 1; 1149 return (mp); 1150 } 1151 1152#define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y)) 1153 /* RFC 2292 sec. 5 */ 1154 if ((inp->inp_flags & IN6P_PKTINFO) != 0) { 1155 struct in6_pktinfo pi6; 1156 1157 bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr)); 1158 in6_clearscope(&pi6.ipi6_addr); /* XXX */ 1159 pi6.ipi6_ifindex = 1160 (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0; 1161 1162 *mp = sbcreatecontrol((caddr_t) &pi6, 1163 sizeof(struct in6_pktinfo), 1164 IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6); 1165 if (*mp) 1166 mp = &(*mp)->m_next; 1167 } 1168 1169 if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) { 1170 int hlim = ip6->ip6_hlim & 0xff; 1171 1172 *mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int), 1173 IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), 1174 IPPROTO_IPV6); 1175 if (*mp) 1176 mp = &(*mp)->m_next; 1177 } 1178 1179 if (v4only != NULL) 1180 *v4only = 0; 1181 return (mp); 1182} 1183 1184void 1185ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) 1186{ 1187 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1188 int v4only = 0; 1189 1190 mp = ip6_savecontrol_v4(in6p, m, mp, &v4only); 1191 if (v4only) 1192 return; 1193 1194 if ((in6p->inp_flags & IN6P_TCLASS) != 0) { 1195 u_int32_t flowinfo; 1196 int tclass; 1197 1198 flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK); 1199 flowinfo >>= 20; 1200 1201 tclass = flowinfo & 0xff; 1202 *mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass), 1203 IPV6_TCLASS, IPPROTO_IPV6); 1204 if (*mp) 1205 mp = &(*mp)->m_next; 1206 } 1207 1208 /* 1209 * IPV6_HOPOPTS socket option. Recall that we required super-user 1210 * privilege for the option (see ip6_ctloutput), but it might be too 1211 * strict, since there might be some hop-by-hop options which can be 1212 * returned to normal user. 1213 * See also RFC 2292 section 6 (or RFC 3542 section 8). 1214 */ 1215 if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) { 1216 /* 1217 * Check if a hop-by-hop options header is contatined in the 1218 * received packet, and if so, store the options as ancillary 1219 * data. Note that a hop-by-hop options header must be 1220 * just after the IPv6 header, which is assured through the 1221 * IPv6 input processing. 1222 */ 1223 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { 1224 struct ip6_hbh *hbh; 1225 int hbhlen = 0; 1226#ifdef PULLDOWN_TEST 1227 struct mbuf *ext; 1228#endif 1229 1230#ifndef PULLDOWN_TEST 1231 hbh = (struct ip6_hbh *)(ip6 + 1); 1232 hbhlen = (hbh->ip6h_len + 1) << 3; 1233#else 1234 ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr), 1235 ip6->ip6_nxt); 1236 if (ext == NULL) { 1237 V_ip6stat.ip6s_tooshort++; 1238 return; 1239 } 1240 hbh = mtod(ext, struct ip6_hbh *); 1241 hbhlen = (hbh->ip6h_len + 1) << 3; 1242 if (hbhlen != ext->m_len) { 1243 m_freem(ext); 1244 V_ip6stat.ip6s_tooshort++; 1245 return; 1246 } 1247#endif 1248 1249 /* 1250 * XXX: We copy the whole header even if a 1251 * jumbo payload option is included, the option which 1252 * is to be removed before returning according to 1253 * RFC2292. 1254 * Note: this constraint is removed in RFC3542 1255 */ 1256 *mp = sbcreatecontrol((caddr_t)hbh, hbhlen, 1257 IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS), 1258 IPPROTO_IPV6); 1259 if (*mp) 1260 mp = &(*mp)->m_next; 1261#ifdef PULLDOWN_TEST 1262 m_freem(ext); 1263#endif 1264 } 1265 } 1266 1267 if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) { 1268 int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr); 1269 1270 /* 1271 * Search for destination options headers or routing 1272 * header(s) through the header chain, and stores each 1273 * header as ancillary data. 1274 * Note that the order of the headers remains in 1275 * the chain of ancillary data. 1276 */ 1277 while (1) { /* is explicit loop prevention necessary? */ 1278 struct ip6_ext *ip6e = NULL; 1279 int elen; 1280#ifdef PULLDOWN_TEST 1281 struct mbuf *ext = NULL; 1282#endif 1283 1284 /* 1285 * if it is not an extension header, don't try to 1286 * pull it from the chain. 1287 */ 1288 switch (nxt) { 1289 case IPPROTO_DSTOPTS: 1290 case IPPROTO_ROUTING: 1291 case IPPROTO_HOPOPTS: 1292 case IPPROTO_AH: /* is it possible? */ 1293 break; 1294 default: 1295 goto loopend; 1296 } 1297 1298#ifndef PULLDOWN_TEST 1299 if (off + sizeof(*ip6e) > m->m_len) 1300 goto loopend; 1301 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); 1302 if (nxt == IPPROTO_AH) 1303 elen = (ip6e->ip6e_len + 2) << 2; 1304 else 1305 elen = (ip6e->ip6e_len + 1) << 3; 1306 if (off + elen > m->m_len) 1307 goto loopend; 1308#else 1309 ext = ip6_pullexthdr(m, off, nxt); 1310 if (ext == NULL) { 1311 V_ip6stat.ip6s_tooshort++; 1312 return; 1313 } 1314 ip6e = mtod(ext, struct ip6_ext *); 1315 if (nxt == IPPROTO_AH) 1316 elen = (ip6e->ip6e_len + 2) << 2; 1317 else 1318 elen = (ip6e->ip6e_len + 1) << 3; 1319 if (elen != ext->m_len) { 1320 m_freem(ext); 1321 V_ip6stat.ip6s_tooshort++; 1322 return; 1323 } 1324#endif 1325 1326 switch (nxt) { 1327 case IPPROTO_DSTOPTS: 1328 if (!(in6p->inp_flags & IN6P_DSTOPTS)) 1329 break; 1330 1331 *mp = sbcreatecontrol((caddr_t)ip6e, elen, 1332 IS2292(in6p, 1333 IPV6_2292DSTOPTS, IPV6_DSTOPTS), 1334 IPPROTO_IPV6); 1335 if (*mp) 1336 mp = &(*mp)->m_next; 1337 break; 1338 case IPPROTO_ROUTING: 1339 if (!(in6p->inp_flags & IN6P_RTHDR)) 1340 break; 1341 1342 *mp = sbcreatecontrol((caddr_t)ip6e, elen, 1343 IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR), 1344 IPPROTO_IPV6); 1345 if (*mp) 1346 mp = &(*mp)->m_next; 1347 break; 1348 case IPPROTO_HOPOPTS: 1349 case IPPROTO_AH: /* is it possible? */ 1350 break; 1351 1352 default: 1353 /* 1354 * other cases have been filtered in the above. 1355 * none will visit this case. here we supply 1356 * the code just in case (nxt overwritten or 1357 * other cases). 1358 */ 1359#ifdef PULLDOWN_TEST 1360 m_freem(ext); 1361#endif 1362 goto loopend; 1363 1364 } 1365 1366 /* proceed with the next header. */ 1367 off += elen; 1368 nxt = ip6e->ip6e_nxt; 1369 ip6e = NULL; 1370#ifdef PULLDOWN_TEST 1371 m_freem(ext); 1372 ext = NULL; 1373#endif 1374 } 1375 loopend: 1376 ; 1377 } 1378} 1379#undef IS2292 1380 1381void 1382ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu) 1383{ 1384 struct socket *so; 1385 struct mbuf *m_mtu; 1386 struct ip6_mtuinfo mtuctl; 1387 1388 so = in6p->inp_socket; 1389 1390 if (mtu == NULL) 1391 return; 1392 1393#ifdef DIAGNOSTIC 1394 if (so == NULL) /* I believe this is impossible */ 1395 panic("ip6_notify_pmtu: socket is NULL"); 1396#endif 1397 1398 bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */ 1399 mtuctl.ip6m_mtu = *mtu; 1400 mtuctl.ip6m_addr = *dst; 1401 if (sa6_recoverscope(&mtuctl.ip6m_addr)) 1402 return; 1403 1404 if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl), 1405 IPV6_PATHMTU, IPPROTO_IPV6)) == NULL) 1406 return; 1407 1408 if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu) 1409 == 0) { 1410 m_freem(m_mtu); 1411 /* XXX: should count statistics */ 1412 } else 1413 sorwakeup(so); 1414 1415 return; 1416} 1417 1418#ifdef PULLDOWN_TEST 1419/* 1420 * pull single extension header from mbuf chain. returns single mbuf that 1421 * contains the result, or NULL on error. 1422 */ 1423static struct mbuf * 1424ip6_pullexthdr(struct mbuf *m, size_t off, int nxt) 1425{ 1426 struct ip6_ext ip6e; 1427 size_t elen; 1428 struct mbuf *n; 1429 1430#ifdef DIAGNOSTIC 1431 switch (nxt) { 1432 case IPPROTO_DSTOPTS: 1433 case IPPROTO_ROUTING: 1434 case IPPROTO_HOPOPTS: 1435 case IPPROTO_AH: /* is it possible? */ 1436 break; 1437 default: 1438 printf("ip6_pullexthdr: invalid nxt=%d\n", nxt); 1439 } 1440#endif 1441 1442 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); 1443 if (nxt == IPPROTO_AH) 1444 elen = (ip6e.ip6e_len + 2) << 2; 1445 else 1446 elen = (ip6e.ip6e_len + 1) << 3; 1447 1448 MGET(n, M_DONTWAIT, MT_DATA); 1449 if (n && elen >= MLEN) { 1450 MCLGET(n, M_DONTWAIT); 1451 if ((n->m_flags & M_EXT) == 0) { 1452 m_free(n); 1453 n = NULL; 1454 } 1455 } 1456 if (!n) 1457 return NULL; 1458 1459 n->m_len = 0; 1460 if (elen >= M_TRAILINGSPACE(n)) { 1461 m_free(n); 1462 return NULL; 1463 } 1464 1465 m_copydata(m, off, elen, mtod(n, caddr_t)); 1466 n->m_len = elen; 1467 return n; 1468} 1469#endif 1470 1471/* 1472 * Get pointer to the previous header followed by the header 1473 * currently processed. 1474 * XXX: This function supposes that 1475 * M includes all headers, 1476 * the next header field and the header length field of each header 1477 * are valid, and 1478 * the sum of each header length equals to OFF. 1479 * Because of these assumptions, this function must be called very 1480 * carefully. Moreover, it will not be used in the near future when 1481 * we develop `neater' mechanism to process extension headers. 1482 */ 1483char * 1484ip6_get_prevhdr(struct mbuf *m, int off) 1485{ 1486 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1487 1488 if (off == sizeof(struct ip6_hdr)) 1489 return (&ip6->ip6_nxt); 1490 else { 1491 int len, nxt; 1492 struct ip6_ext *ip6e = NULL; 1493 1494 nxt = ip6->ip6_nxt; 1495 len = sizeof(struct ip6_hdr); 1496 while (len < off) { 1497 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len); 1498 1499 switch (nxt) { 1500 case IPPROTO_FRAGMENT: 1501 len += sizeof(struct ip6_frag); 1502 break; 1503 case IPPROTO_AH: 1504 len += (ip6e->ip6e_len + 2) << 2; 1505 break; 1506 default: 1507 len += (ip6e->ip6e_len + 1) << 3; 1508 break; 1509 } 1510 nxt = ip6e->ip6e_nxt; 1511 } 1512 if (ip6e) 1513 return (&ip6e->ip6e_nxt); 1514 else 1515 return NULL; 1516 } 1517} 1518 1519/* 1520 * get next header offset. m will be retained. 1521 */ 1522int 1523ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp) 1524{ 1525 struct ip6_hdr ip6; 1526 struct ip6_ext ip6e; 1527 struct ip6_frag fh; 1528 1529 /* just in case */ 1530 if (m == NULL) 1531 panic("ip6_nexthdr: m == NULL"); 1532 if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off) 1533 return -1; 1534 1535 switch (proto) { 1536 case IPPROTO_IPV6: 1537 if (m->m_pkthdr.len < off + sizeof(ip6)) 1538 return -1; 1539 m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6); 1540 if (nxtp) 1541 *nxtp = ip6.ip6_nxt; 1542 off += sizeof(ip6); 1543 return off; 1544 1545 case IPPROTO_FRAGMENT: 1546 /* 1547 * terminate parsing if it is not the first fragment, 1548 * it does not make sense to parse through it. 1549 */ 1550 if (m->m_pkthdr.len < off + sizeof(fh)) 1551 return -1; 1552 m_copydata(m, off, sizeof(fh), (caddr_t)&fh); 1553 /* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */ 1554 if (fh.ip6f_offlg & IP6F_OFF_MASK) 1555 return -1; 1556 if (nxtp) 1557 *nxtp = fh.ip6f_nxt; 1558 off += sizeof(struct ip6_frag); 1559 return off; 1560 1561 case IPPROTO_AH: 1562 if (m->m_pkthdr.len < off + sizeof(ip6e)) 1563 return -1; 1564 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); 1565 if (nxtp) 1566 *nxtp = ip6e.ip6e_nxt; 1567 off += (ip6e.ip6e_len + 2) << 2; 1568 return off; 1569 1570 case IPPROTO_HOPOPTS: 1571 case IPPROTO_ROUTING: 1572 case IPPROTO_DSTOPTS: 1573 if (m->m_pkthdr.len < off + sizeof(ip6e)) 1574 return -1; 1575 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); 1576 if (nxtp) 1577 *nxtp = ip6e.ip6e_nxt; 1578 off += (ip6e.ip6e_len + 1) << 3; 1579 return off; 1580 1581 case IPPROTO_NONE: 1582 case IPPROTO_ESP: 1583 case IPPROTO_IPCOMP: 1584 /* give up */ 1585 return -1; 1586 1587 default: 1588 return -1; 1589 } 1590 1591 return -1; 1592} 1593 1594/* 1595 * get offset for the last header in the chain. m will be kept untainted. 1596 */ 1597int 1598ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp) 1599{ 1600 int newoff; 1601 int nxt; 1602 1603 if (!nxtp) { 1604 nxt = -1; 1605 nxtp = &nxt; 1606 } 1607 while (1) { 1608 newoff = ip6_nexthdr(m, off, proto, nxtp); 1609 if (newoff < 0) 1610 return off; 1611 else if (newoff < off) 1612 return -1; /* invalid */ 1613 else if (newoff == off) 1614 return newoff; 1615 1616 off = newoff; 1617 proto = *nxtp; 1618 } 1619} 1620 1621struct ip6aux * 1622ip6_addaux(struct mbuf *m) 1623{ 1624 struct m_tag *mtag; 1625 1626 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); 1627 if (!mtag) { 1628 mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux), 1629 M_NOWAIT); 1630 if (mtag) { 1631 m_tag_prepend(m, mtag); 1632 bzero(mtag + 1, sizeof(struct ip6aux)); 1633 } 1634 } 1635 return mtag ? (struct ip6aux *)(mtag + 1) : NULL; 1636} 1637 1638struct ip6aux * 1639ip6_findaux(struct mbuf *m) 1640{ 1641 struct m_tag *mtag; 1642 1643 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); 1644 return mtag ? (struct ip6aux *)(mtag + 1) : NULL; 1645} 1646 1647void 1648ip6_delaux(struct mbuf *m) 1649{ 1650 struct m_tag *mtag; 1651 1652 mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); 1653 if (mtag) 1654 m_tag_delete(m, mtag); 1655} 1656 1657/* 1658 * System control for IP6 1659 */ 1660 1661u_char inet6ctlerrmap[PRC_NCMDS] = { 1662 0, 0, 0, 0, 1663 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 1664 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 1665 EMSGSIZE, EHOSTUNREACH, 0, 0, 1666 0, 0, 0, 0, 1667 ENOPROTOOPT 1668}; 1669