ip_input.c revision 1.151
1/* $NetBSD: ip_input.c,v 1.151 2002/06/07 13:43:47 itojun Exp $ */ 2 3/* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32/*- 33 * Copyright (c) 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Public Access Networks Corporation ("Panix"). It was developed under 38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69/* 70 * Copyright (c) 1982, 1986, 1988, 1993 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 102 */ 103 104#include <sys/cdefs.h> 105__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.151 2002/06/07 13:43:47 itojun Exp $"); 106 107#include "opt_gateway.h" 108#include "opt_pfil_hooks.h" 109#include "opt_ipsec.h" 110#include "opt_mrouting.h" 111#include "opt_inet_csum.h" 112 113#include <sys/param.h> 114#include <sys/systm.h> 115#include <sys/malloc.h> 116#include <sys/mbuf.h> 117#include <sys/domain.h> 118#include <sys/protosw.h> 119#include <sys/socket.h> 120#include <sys/socketvar.h> 121#include <sys/errno.h> 122#include <sys/time.h> 123#include <sys/kernel.h> 124#include <sys/pool.h> 125#include <sys/sysctl.h> 126 127#include <net/if.h> 128#include <net/if_dl.h> 129#include <net/route.h> 130#include <net/pfil.h> 131 132#include <netinet/in.h> 133#include <netinet/in_systm.h> 134#include <netinet/ip.h> 135#include <netinet/in_pcb.h> 136#include <netinet/in_var.h> 137#include <netinet/ip_var.h> 138#include <netinet/ip_icmp.h> 139/* just for gif_ttl */ 140#include <netinet/in_gif.h> 141#include "gif.h" 142#include <net/if_gre.h> 143#include "gre.h" 144 145#ifdef MROUTING 146#include <netinet/ip_mroute.h> 147#endif 148 149#ifdef IPSEC 150#include <netinet6/ipsec.h> 151#include <netkey/key.h> 152#endif 153 154#ifndef IPFORWARDING 155#ifdef GATEWAY 156#define IPFORWARDING 1 /* forward IP packets not for us */ 157#else /* GATEWAY */ 158#define IPFORWARDING 0 /* don't forward IP packets not for us */ 159#endif /* GATEWAY */ 160#endif /* IPFORWARDING */ 161#ifndef IPSENDREDIRECTS 162#define IPSENDREDIRECTS 1 163#endif 164#ifndef IPFORWSRCRT 165#define IPFORWSRCRT 1 /* forward source-routed packets */ 166#endif 167#ifndef IPALLOWSRCRT 168#define IPALLOWSRCRT 1 /* allow source-routed packets */ 169#endif 170#ifndef IPMTUDISC 171#define IPMTUDISC 0 172#endif 173#ifndef IPMTUDISCTIMEOUT 174#define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */ 175#endif 176 177/* 178 * Note: DIRECTED_BROADCAST is handled this way so that previous 179 * configuration using this option will Just Work. 180 */ 181#ifndef IPDIRECTEDBCAST 182#ifdef DIRECTED_BROADCAST 183#define IPDIRECTEDBCAST 1 184#else 185#define IPDIRECTEDBCAST 0 186#endif /* DIRECTED_BROADCAST */ 187#endif /* IPDIRECTEDBCAST */ 188int ipforwarding = IPFORWARDING; 189int ipsendredirects = IPSENDREDIRECTS; 190int ip_defttl = IPDEFTTL; 191int ip_forwsrcrt = IPFORWSRCRT; 192int ip_directedbcast = IPDIRECTEDBCAST; 193int ip_allowsrcrt = IPALLOWSRCRT; 194int ip_mtudisc = IPMTUDISC; 195u_int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; 196#ifdef DIAGNOSTIC 197int ipprintfs = 0; 198#endif 199 200struct rttimer_queue *ip_mtudisc_timeout_q = NULL; 201 202extern struct domain inetdomain; 203int ipqmaxlen = IFQ_MAXLEN; 204u_long in_ifaddrhash; /* size of hash table - 1 */ 205int in_ifaddrentries; /* total number of addrs */ 206struct in_ifaddrhead in_ifaddr; 207struct in_ifaddrhashhead *in_ifaddrhashtbl; 208struct ifqueue ipintrq; 209struct ipstat ipstat; 210u_int16_t ip_id; 211 212#ifdef PFIL_HOOKS 213struct pfil_head inet_pfil_hook; 214#endif 215 216struct ipqhead ipq; 217int ipq_locked; 218int ip_nfragpackets = 0; 219int ip_maxfragpackets = 200; 220 221static __inline int ipq_lock_try __P((void)); 222static __inline void ipq_unlock __P((void)); 223 224static __inline int 225ipq_lock_try() 226{ 227 int s; 228 229 /* 230 * Use splvm() -- we're blocking things that would cause 231 * mbuf allocation. 232 */ 233 s = splvm(); 234 if (ipq_locked) { 235 splx(s); 236 return (0); 237 } 238 ipq_locked = 1; 239 splx(s); 240 return (1); 241} 242 243static __inline void 244ipq_unlock() 245{ 246 int s; 247 248 s = splvm(); 249 ipq_locked = 0; 250 splx(s); 251} 252 253#ifdef DIAGNOSTIC 254#define IPQ_LOCK() \ 255do { \ 256 if (ipq_lock_try() == 0) { \ 257 printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \ 258 panic("ipq_lock"); \ 259 } \ 260} while (0) 261#define IPQ_LOCK_CHECK() \ 262do { \ 263 if (ipq_locked == 0) { \ 264 printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \ 265 panic("ipq lock check"); \ 266 } \ 267} while (0) 268#else 269#define IPQ_LOCK() (void) ipq_lock_try() 270#define IPQ_LOCK_CHECK() /* nothing */ 271#endif 272 273#define IPQ_UNLOCK() ipq_unlock() 274 275struct pool ipqent_pool; 276 277#ifdef INET_CSUM_COUNTERS 278#include <sys/device.h> 279 280struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 281 NULL, "inet", "hwcsum bad"); 282struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 283 NULL, "inet", "hwcsum ok"); 284struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 285 NULL, "inet", "swcsum"); 286 287#define INET_CSUM_COUNTER_INCR(ev) (ev)->ev_count++ 288 289#else 290 291#define INET_CSUM_COUNTER_INCR(ev) /* nothing */ 292 293#endif /* INET_CSUM_COUNTERS */ 294 295/* 296 * We need to save the IP options in case a protocol wants to respond 297 * to an incoming packet over the same route if the packet got here 298 * using IP source routing. This allows connection establishment and 299 * maintenance when the remote end is on a network that is not known 300 * to us. 301 */ 302int ip_nhops = 0; 303static struct ip_srcrt { 304 struct in_addr dst; /* final destination */ 305 char nop; /* one NOP to align */ 306 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ 307 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; 308} ip_srcrt; 309 310static void save_rte __P((u_char *, struct in_addr)); 311 312/* 313 * IP initialization: fill in IP protocol switch table. 314 * All protocols not implemented in kernel go to raw IP protocol handler. 315 */ 316void 317ip_init() 318{ 319 struct protosw *pr; 320 int i; 321 322 pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", 323 NULL); 324 325 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 326 if (pr == 0) 327 panic("ip_init"); 328 for (i = 0; i < IPPROTO_MAX; i++) 329 ip_protox[i] = pr - inetsw; 330 for (pr = inetdomain.dom_protosw; 331 pr < inetdomain.dom_protoswNPROTOSW; pr++) 332 if (pr->pr_domain->dom_family == PF_INET && 333 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) 334 ip_protox[pr->pr_protocol] = pr - inetsw; 335 LIST_INIT(&ipq); 336 ip_id = time.tv_sec & 0xffff; 337 ipintrq.ifq_maxlen = ipqmaxlen; 338 TAILQ_INIT(&in_ifaddr); 339 in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IFADDR, 340 M_WAITOK, &in_ifaddrhash); 341 if (ip_mtudisc != 0) 342 ip_mtudisc_timeout_q = 343 rt_timer_queue_create(ip_mtudisc_timeout); 344#ifdef GATEWAY 345 ipflow_init(); 346#endif 347 348#ifdef PFIL_HOOKS 349 /* Register our Packet Filter hook. */ 350 inet_pfil_hook.ph_type = PFIL_TYPE_AF; 351 inet_pfil_hook.ph_af = AF_INET; 352 i = pfil_head_register(&inet_pfil_hook); 353 if (i != 0) 354 printf("ip_init: WARNING: unable to register pfil hook, " 355 "error %d\n", i); 356#endif /* PFIL_HOOKS */ 357 358#ifdef INET_CSUM_COUNTERS 359 evcnt_attach_static(&ip_hwcsum_bad); 360 evcnt_attach_static(&ip_hwcsum_ok); 361 evcnt_attach_static(&ip_swcsum); 362#endif /* INET_CSUM_COUNTERS */ 363} 364 365struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; 366struct route ipforward_rt; 367 368/* 369 * IP software interrupt routine 370 */ 371void 372ipintr() 373{ 374 int s; 375 struct mbuf *m; 376 377 while (1) { 378 s = splnet(); 379 IF_DEQUEUE(&ipintrq, m); 380 splx(s); 381 if (m == 0) 382 return; 383 ip_input(m); 384 } 385} 386 387/* 388 * Ip input routine. Checksum and byte swap header. If fragmented 389 * try to reassemble. Process options. Pass to next level. 390 */ 391void 392ip_input(struct mbuf *m) 393{ 394 struct ip *ip = NULL; 395 struct ipq *fp; 396 struct in_ifaddr *ia; 397 struct ifaddr *ifa; 398 struct ipqent *ipqe; 399 int hlen = 0, mff, len; 400 int downmatch; 401 402#ifdef DIAGNOSTIC 403 if ((m->m_flags & M_PKTHDR) == 0) 404 panic("ipintr no HDR"); 405#endif 406#ifdef IPSEC 407 /* 408 * should the inner packet be considered authentic? 409 * see comment in ah4_input(). 410 */ 411 if (m) { 412 m->m_flags &= ~M_AUTHIPHDR; 413 m->m_flags &= ~M_AUTHIPDGM; 414 } 415#endif 416 /* 417 * If no IP addresses have been set yet but the interfaces 418 * are receiving, can't do anything with incoming packets yet. 419 */ 420 if (TAILQ_FIRST(&in_ifaddr) == 0) 421 goto bad; 422 ipstat.ips_total++; 423 if (m->m_len < sizeof (struct ip) && 424 (m = m_pullup(m, sizeof (struct ip))) == 0) { 425 ipstat.ips_toosmall++; 426 return; 427 } 428 ip = mtod(m, struct ip *); 429 if (ip->ip_v != IPVERSION) { 430 ipstat.ips_badvers++; 431 goto bad; 432 } 433 hlen = ip->ip_hl << 2; 434 if (hlen < sizeof(struct ip)) { /* minimum header length */ 435 ipstat.ips_badhlen++; 436 goto bad; 437 } 438 if (hlen > m->m_len) { 439 if ((m = m_pullup(m, hlen)) == 0) { 440 ipstat.ips_badhlen++; 441 return; 442 } 443 ip = mtod(m, struct ip *); 444 } 445 446 /* 447 * RFC1122: packets with a multicast source address are 448 * not allowed. 449 */ 450 if (IN_MULTICAST(ip->ip_src.s_addr)) { 451 ipstat.ips_badaddr++; 452 goto bad; 453 } 454 455 /* 127/8 must not appear on wire - RFC1122 */ 456 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 457 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 458 if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { 459 ipstat.ips_badaddr++; 460 goto bad; 461 } 462 } 463 464 switch (m->m_pkthdr.csum_flags & 465 ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) | 466 M_CSUM_IPv4_BAD)) { 467 case M_CSUM_IPv4|M_CSUM_IPv4_BAD: 468 INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); 469 goto badcsum; 470 471 case M_CSUM_IPv4: 472 /* Checksum was okay. */ 473 INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); 474 break; 475 476 default: 477 /* Must compute it ourselves. */ 478 INET_CSUM_COUNTER_INCR(&ip_swcsum); 479 if (in_cksum(m, hlen) != 0) 480 goto bad; 481 break; 482 } 483 484 /* Retrieve the packet length. */ 485 len = ntohs(ip->ip_len); 486 487 /* 488 * Check for additional length bogosity 489 */ 490 if (len < hlen) { 491 ipstat.ips_badlen++; 492 goto bad; 493 } 494 495 /* 496 * Check that the amount of data in the buffers 497 * is as at least much as the IP header would have us expect. 498 * Trim mbufs if longer than we expect. 499 * Drop packet if shorter than we expect. 500 */ 501 if (m->m_pkthdr.len < len) { 502 ipstat.ips_tooshort++; 503 goto bad; 504 } 505 if (m->m_pkthdr.len > len) { 506 if (m->m_len == m->m_pkthdr.len) { 507 m->m_len = len; 508 m->m_pkthdr.len = len; 509 } else 510 m_adj(m, len - m->m_pkthdr.len); 511 } 512 513#ifdef IPSEC 514 /* ipflow (IP fast forwarding) is not compatible with IPsec. */ 515 m->m_flags &= ~M_CANFASTFWD; 516#else 517 /* 518 * Assume that we can create a fast-forward IP flow entry 519 * based on this packet. 520 */ 521 m->m_flags |= M_CANFASTFWD; 522#endif 523 524#ifdef PFIL_HOOKS 525 /* 526 * Run through list of hooks for input packets. If there are any 527 * filters which require that additional packets in the flow are 528 * not fast-forwarded, they must clear the M_CANFASTFWD flag. 529 * Note that filters must _never_ set this flag, as another filter 530 * in the list may have previously cleared it. 531 */ 532 /* 533 * let ipfilter look at packet on the wire, 534 * not the decapsulated packet. 535 */ 536#ifdef IPSEC 537 if (!ipsec_getnhist(m)) 538#else 539 if (1) 540#endif 541 { 542 if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, 543 PFIL_IN) != 0) 544 return; 545 if (m == NULL) 546 return; 547 ip = mtod(m, struct ip *); 548 hlen = ip->ip_hl << 2; 549 } 550#endif /* PFIL_HOOKS */ 551 552#ifdef ALTQ 553 /* XXX Temporary until ALTQ is changed to use a pfil hook */ 554 if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) { 555 /* packet dropped by traffic conditioner */ 556 return; 557 } 558#endif 559 560 /* 561 * Convert fields to host representation. 562 */ 563 NTOHS(ip->ip_len); 564 NTOHS(ip->ip_off); 565 566 /* 567 * Process options and, if not destined for us, 568 * ship it on. ip_dooptions returns 1 when an 569 * error was detected (causing an icmp message 570 * to be sent and the original packet to be freed). 571 */ 572 ip_nhops = 0; /* for source routed packets */ 573 if (hlen > sizeof (struct ip) && ip_dooptions(m)) 574 return; 575 576 /* 577 * Check our list of addresses, to see if the packet is for us. 578 * 579 * Traditional 4.4BSD did not consult IFF_UP at all. 580 * The behavior here is to treat addresses on !IFF_UP interface 581 * as not mine. 582 */ 583 downmatch = 0; 584 LIST_FOREACH(ia, &IN_IFADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 585 if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) { 586 if ((ia->ia_ifp->if_flags & IFF_UP) != 0) 587 break; 588 else 589 downmatch++; 590 } 591 } 592 if (ia != NULL) 593 goto ours; 594 if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { 595 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) { 596 if (ifa->ifa_addr->sa_family != AF_INET) 597 continue; 598 ia = ifatoia(ifa); 599 if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) || 600 in_hosteq(ip->ip_dst, ia->ia_netbroadcast) || 601 /* 602 * Look for all-0's host part (old broadcast addr), 603 * either for subnet or net. 604 */ 605 ip->ip_dst.s_addr == ia->ia_subnet || 606 ip->ip_dst.s_addr == ia->ia_net) 607 goto ours; 608 /* 609 * An interface with IP address zero accepts 610 * all packets that arrive on that interface. 611 */ 612 if (in_nullhost(ia->ia_addr.sin_addr)) 613 goto ours; 614 } 615 } 616 if (IN_MULTICAST(ip->ip_dst.s_addr)) { 617 struct in_multi *inm; 618#ifdef MROUTING 619 extern struct socket *ip_mrouter; 620 621 if (M_READONLY(m)) { 622 if ((m = m_pullup(m, hlen)) == 0) { 623 ipstat.ips_toosmall++; 624 return; 625 } 626 ip = mtod(m, struct ip *); 627 } 628 629 if (ip_mrouter) { 630 /* 631 * If we are acting as a multicast router, all 632 * incoming multicast packets are passed to the 633 * kernel-level multicast forwarding function. 634 * The packet is returned (relatively) intact; if 635 * ip_mforward() returns a non-zero value, the packet 636 * must be discarded, else it may be accepted below. 637 * 638 * (The IP ident field is put in the same byte order 639 * as expected when ip_mforward() is called from 640 * ip_output().) 641 */ 642 if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) { 643 ipstat.ips_cantforward++; 644 m_freem(m); 645 return; 646 } 647 648 /* 649 * The process-level routing demon needs to receive 650 * all multicast IGMP packets, whether or not this 651 * host belongs to their destination groups. 652 */ 653 if (ip->ip_p == IPPROTO_IGMP) 654 goto ours; 655 ipstat.ips_forward++; 656 } 657#endif 658 /* 659 * See if we belong to the destination multicast group on the 660 * arrival interface. 661 */ 662 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); 663 if (inm == NULL) { 664 ipstat.ips_cantforward++; 665 m_freem(m); 666 return; 667 } 668 goto ours; 669 } 670 if (ip->ip_dst.s_addr == INADDR_BROADCAST || 671 in_nullhost(ip->ip_dst)) 672 goto ours; 673 674 /* 675 * Not for us; forward if possible and desirable. 676 */ 677 if (ipforwarding == 0) { 678 ipstat.ips_cantforward++; 679 m_freem(m); 680 } else { 681 /* 682 * If ip_dst matched any of my address on !IFF_UP interface, 683 * and there's no IFF_UP interface that matches ip_dst, 684 * send icmp unreach. Forwarding it will result in in-kernel 685 * forwarding loop till TTL goes to 0. 686 */ 687 if (downmatch) { 688 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 689 ipstat.ips_cantforward++; 690 return; 691 } 692#ifdef IPSEC 693 if (ipsec4_in_reject(m, NULL)) { 694 ipsecstat.in_polvio++; 695 goto bad; 696 } 697#endif 698 699 ip_forward(m, 0); 700 } 701 return; 702 703ours: 704 /* 705 * If offset or IP_MF are set, must reassemble. 706 * Otherwise, nothing need be done. 707 * (We could look in the reassembly queue to see 708 * if the packet was previously fragmented, 709 * but it's not worth the time; just let them time out.) 710 */ 711 if (ip->ip_off & ~(IP_DF|IP_RF)) { 712 /* 713 * Look for queue of fragments 714 * of this datagram. 715 */ 716 IPQ_LOCK(); 717 LIST_FOREACH(fp, &ipq, ipq_q) 718 if (ip->ip_id == fp->ipq_id && 719 in_hosteq(ip->ip_src, fp->ipq_src) && 720 in_hosteq(ip->ip_dst, fp->ipq_dst) && 721 ip->ip_p == fp->ipq_p) 722 goto found; 723 fp = 0; 724found: 725 726 /* 727 * Adjust ip_len to not reflect header, 728 * set ipqe_mff if more fragments are expected, 729 * convert offset of this to bytes. 730 */ 731 ip->ip_len -= hlen; 732 mff = (ip->ip_off & IP_MF) != 0; 733 if (mff) { 734 /* 735 * Make sure that fragments have a data length 736 * that's a non-zero multiple of 8 bytes. 737 */ 738 if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { 739 ipstat.ips_badfrags++; 740 IPQ_UNLOCK(); 741 goto bad; 742 } 743 } 744 ip->ip_off <<= 3; 745 746 /* 747 * If datagram marked as having more fragments 748 * or if this is not the first fragment, 749 * attempt reassembly; if it succeeds, proceed. 750 */ 751 if (mff || ip->ip_off) { 752 ipstat.ips_fragments++; 753 ipqe = pool_get(&ipqent_pool, PR_NOWAIT); 754 if (ipqe == NULL) { 755 ipstat.ips_rcvmemdrop++; 756 IPQ_UNLOCK(); 757 goto bad; 758 } 759 ipqe->ipqe_mff = mff; 760 ipqe->ipqe_m = m; 761 ipqe->ipqe_ip = ip; 762 m = ip_reass(ipqe, fp); 763 if (m == 0) { 764 IPQ_UNLOCK(); 765 return; 766 } 767 ipstat.ips_reassembled++; 768 ip = mtod(m, struct ip *); 769 hlen = ip->ip_hl << 2; 770 ip->ip_len += hlen; 771 } else 772 if (fp) 773 ip_freef(fp); 774 IPQ_UNLOCK(); 775 } 776 777#ifdef IPSEC 778 /* 779 * enforce IPsec policy checking if we are seeing last header. 780 * note that we do not visit this with protocols with pcb layer 781 * code - like udp/tcp/raw ip. 782 */ 783 if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 && 784 ipsec4_in_reject(m, NULL)) { 785 ipsecstat.in_polvio++; 786 goto bad; 787 } 788#endif 789 790 /* 791 * Switch out to protocol's input routine. 792 */ 793#if IFA_STATS 794 if (ia && ip) 795 ia->ia_ifa.ifa_data.ifad_inbytes += ip->ip_len; 796#endif 797 ipstat.ips_delivered++; 798 { 799 int off = hlen, nh = ip->ip_p; 800 801 (*inetsw[ip_protox[nh]].pr_input)(m, off, nh); 802 return; 803 } 804bad: 805 m_freem(m); 806 return; 807 808badcsum: 809 ipstat.ips_badsum++; 810 m_freem(m); 811} 812 813/* 814 * Take incoming datagram fragment and try to 815 * reassemble it into whole datagram. If a chain for 816 * reassembly of this datagram already exists, then it 817 * is given as fp; otherwise have to make a chain. 818 */ 819struct mbuf * 820ip_reass(ipqe, fp) 821 struct ipqent *ipqe; 822 struct ipq *fp; 823{ 824 struct mbuf *m = ipqe->ipqe_m; 825 struct ipqent *nq, *p, *q; 826 struct ip *ip; 827 struct mbuf *t; 828 int hlen = ipqe->ipqe_ip->ip_hl << 2; 829 int i, next; 830 831 IPQ_LOCK_CHECK(); 832 833 /* 834 * Presence of header sizes in mbufs 835 * would confuse code below. 836 */ 837 m->m_data += hlen; 838 m->m_len -= hlen; 839 840 /* 841 * If first fragment to arrive, create a reassembly queue. 842 */ 843 if (fp == 0) { 844 /* 845 * Enforce upper bound on number of fragmented packets 846 * for which we attempt reassembly; 847 * If maxfrag is 0, never accept fragments. 848 * If maxfrag is -1, accept all fragments without limitation. 849 */ 850 if (ip_maxfragpackets < 0) 851 ; 852 else if (ip_nfragpackets >= ip_maxfragpackets) 853 goto dropfrag; 854 ip_nfragpackets++; 855 MALLOC(fp, struct ipq *, sizeof (struct ipq), 856 M_FTABLE, M_NOWAIT); 857 if (fp == NULL) 858 goto dropfrag; 859 LIST_INSERT_HEAD(&ipq, fp, ipq_q); 860 fp->ipq_ttl = IPFRAGTTL; 861 fp->ipq_p = ipqe->ipqe_ip->ip_p; 862 fp->ipq_id = ipqe->ipqe_ip->ip_id; 863 TAILQ_INIT(&fp->ipq_fragq); 864 fp->ipq_src = ipqe->ipqe_ip->ip_src; 865 fp->ipq_dst = ipqe->ipqe_ip->ip_dst; 866 p = NULL; 867 goto insert; 868 } 869 870 /* 871 * Find a segment which begins after this one does. 872 */ 873 for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; 874 p = q, q = TAILQ_NEXT(q, ipqe_q)) 875 if (q->ipqe_ip->ip_off > ipqe->ipqe_ip->ip_off) 876 break; 877 878 /* 879 * If there is a preceding segment, it may provide some of 880 * our data already. If so, drop the data from the incoming 881 * segment. If it provides all of our data, drop us. 882 */ 883 if (p != NULL) { 884 i = p->ipqe_ip->ip_off + p->ipqe_ip->ip_len - 885 ipqe->ipqe_ip->ip_off; 886 if (i > 0) { 887 if (i >= ipqe->ipqe_ip->ip_len) 888 goto dropfrag; 889 m_adj(ipqe->ipqe_m, i); 890 ipqe->ipqe_ip->ip_off += i; 891 ipqe->ipqe_ip->ip_len -= i; 892 } 893 } 894 895 /* 896 * While we overlap succeeding segments trim them or, 897 * if they are completely covered, dequeue them. 898 */ 899 for (; q != NULL && ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len > 900 q->ipqe_ip->ip_off; q = nq) { 901 i = (ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len) - 902 q->ipqe_ip->ip_off; 903 if (i < q->ipqe_ip->ip_len) { 904 q->ipqe_ip->ip_len -= i; 905 q->ipqe_ip->ip_off += i; 906 m_adj(q->ipqe_m, i); 907 break; 908 } 909 nq = TAILQ_NEXT(q, ipqe_q); 910 m_freem(q->ipqe_m); 911 TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); 912 pool_put(&ipqent_pool, q); 913 } 914 915insert: 916 /* 917 * Stick new segment in its place; 918 * check for complete reassembly. 919 */ 920 if (p == NULL) { 921 TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q); 922 } else { 923 TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q); 924 } 925 next = 0; 926 for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; 927 p = q, q = TAILQ_NEXT(q, ipqe_q)) { 928 if (q->ipqe_ip->ip_off != next) 929 return (0); 930 next += q->ipqe_ip->ip_len; 931 } 932 if (p->ipqe_mff) 933 return (0); 934 935 /* 936 * Reassembly is complete. Check for a bogus message size and 937 * concatenate fragments. 938 */ 939 q = TAILQ_FIRST(&fp->ipq_fragq); 940 ip = q->ipqe_ip; 941 if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) { 942 ipstat.ips_toolong++; 943 ip_freef(fp); 944 return (0); 945 } 946 m = q->ipqe_m; 947 t = m->m_next; 948 m->m_next = 0; 949 m_cat(m, t); 950 nq = TAILQ_NEXT(q, ipqe_q); 951 pool_put(&ipqent_pool, q); 952 for (q = nq; q != NULL; q = nq) { 953 t = q->ipqe_m; 954 nq = TAILQ_NEXT(q, ipqe_q); 955 pool_put(&ipqent_pool, q); 956 m_cat(m, t); 957 } 958 959 /* 960 * Create header for new ip packet by 961 * modifying header of first packet; 962 * dequeue and discard fragment reassembly header. 963 * Make header visible. 964 */ 965 ip->ip_len = next; 966 ip->ip_src = fp->ipq_src; 967 ip->ip_dst = fp->ipq_dst; 968 LIST_REMOVE(fp, ipq_q); 969 FREE(fp, M_FTABLE); 970 ip_nfragpackets--; 971 m->m_len += (ip->ip_hl << 2); 972 m->m_data -= (ip->ip_hl << 2); 973 /* some debugging cruft by sklower, below, will go away soon */ 974 if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */ 975 int plen = 0; 976 for (t = m; t; t = t->m_next) 977 plen += t->m_len; 978 m->m_pkthdr.len = plen; 979 } 980 return (m); 981 982dropfrag: 983 ipstat.ips_fragdropped++; 984 m_freem(m); 985 pool_put(&ipqent_pool, ipqe); 986 return (0); 987} 988 989/* 990 * Free a fragment reassembly header and all 991 * associated datagrams. 992 */ 993void 994ip_freef(fp) 995 struct ipq *fp; 996{ 997 struct ipqent *q, *p; 998 999 IPQ_LOCK_CHECK(); 1000 1001 for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) { 1002 p = TAILQ_NEXT(q, ipqe_q); 1003 m_freem(q->ipqe_m); 1004 TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); 1005 pool_put(&ipqent_pool, q); 1006 } 1007 LIST_REMOVE(fp, ipq_q); 1008 FREE(fp, M_FTABLE); 1009 ip_nfragpackets--; 1010} 1011 1012/* 1013 * IP timer processing; 1014 * if a timer expires on a reassembly 1015 * queue, discard it. 1016 */ 1017void 1018ip_slowtimo() 1019{ 1020 struct ipq *fp, *nfp; 1021 int s = splsoftnet(); 1022 1023 IPQ_LOCK(); 1024 for (fp = LIST_FIRST(&ipq); fp != NULL; fp = nfp) { 1025 nfp = LIST_NEXT(fp, ipq_q); 1026 if (--fp->ipq_ttl == 0) { 1027 ipstat.ips_fragtimeout++; 1028 ip_freef(fp); 1029 } 1030 } 1031 /* 1032 * If we are over the maximum number of fragments 1033 * (due to the limit being lowered), drain off 1034 * enough to get down to the new limit. 1035 */ 1036 if (ip_maxfragpackets < 0) 1037 ; 1038 else { 1039 while (ip_nfragpackets > ip_maxfragpackets && LIST_FIRST(&ipq)) 1040 ip_freef(LIST_FIRST(&ipq)); 1041 } 1042 IPQ_UNLOCK(); 1043#ifdef GATEWAY 1044 ipflow_slowtimo(); 1045#endif 1046 splx(s); 1047} 1048 1049/* 1050 * Drain off all datagram fragments. 1051 */ 1052void 1053ip_drain() 1054{ 1055 1056 /* 1057 * We may be called from a device's interrupt context. If 1058 * the ipq is already busy, just bail out now. 1059 */ 1060 if (ipq_lock_try() == 0) 1061 return; 1062 1063 while (LIST_FIRST(&ipq) != NULL) { 1064 ipstat.ips_fragdropped++; 1065 ip_freef(LIST_FIRST(&ipq)); 1066 } 1067 1068 IPQ_UNLOCK(); 1069} 1070 1071/* 1072 * Do option processing on a datagram, 1073 * possibly discarding it if bad options are encountered, 1074 * or forwarding it if source-routed. 1075 * Returns 1 if packet has been forwarded/freed, 1076 * 0 if the packet should be processed further. 1077 */ 1078int 1079ip_dooptions(m) 1080 struct mbuf *m; 1081{ 1082 struct ip *ip = mtod(m, struct ip *); 1083 u_char *cp, *cp0; 1084 struct ip_timestamp *ipt; 1085 struct in_ifaddr *ia; 1086 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; 1087 struct in_addr dst; 1088 n_time ntime; 1089 1090 dst = ip->ip_dst; 1091 cp = (u_char *)(ip + 1); 1092 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 1093 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1094 opt = cp[IPOPT_OPTVAL]; 1095 if (opt == IPOPT_EOL) 1096 break; 1097 if (opt == IPOPT_NOP) 1098 optlen = 1; 1099 else { 1100 if (cnt < IPOPT_OLEN + sizeof(*cp)) { 1101 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1102 goto bad; 1103 } 1104 optlen = cp[IPOPT_OLEN]; 1105 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) { 1106 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1107 goto bad; 1108 } 1109 } 1110 switch (opt) { 1111 1112 default: 1113 break; 1114 1115 /* 1116 * Source routing with record. 1117 * Find interface with current destination address. 1118 * If none on this machine then drop if strictly routed, 1119 * or do nothing if loosely routed. 1120 * Record interface address and bring up next address 1121 * component. If strictly routed make sure next 1122 * address is on directly accessible net. 1123 */ 1124 case IPOPT_LSRR: 1125 case IPOPT_SSRR: 1126 if (ip_allowsrcrt == 0) { 1127 type = ICMP_UNREACH; 1128 code = ICMP_UNREACH_NET_PROHIB; 1129 goto bad; 1130 } 1131 if (optlen < IPOPT_OFFSET + sizeof(*cp)) { 1132 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1133 goto bad; 1134 } 1135 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { 1136 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1137 goto bad; 1138 } 1139 ipaddr.sin_addr = ip->ip_dst; 1140 ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))); 1141 if (ia == 0) { 1142 if (opt == IPOPT_SSRR) { 1143 type = ICMP_UNREACH; 1144 code = ICMP_UNREACH_SRCFAIL; 1145 goto bad; 1146 } 1147 /* 1148 * Loose routing, and not at next destination 1149 * yet; nothing to do except forward. 1150 */ 1151 break; 1152 } 1153 off--; /* 0 origin */ 1154 if ((off + sizeof(struct in_addr)) > optlen) { 1155 /* 1156 * End of source route. Should be for us. 1157 */ 1158 save_rte(cp, ip->ip_src); 1159 break; 1160 } 1161 /* 1162 * locate outgoing interface 1163 */ 1164 bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr, 1165 sizeof(ipaddr.sin_addr)); 1166 if (opt == IPOPT_SSRR) 1167 ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))); 1168 else 1169 ia = ip_rtaddr(ipaddr.sin_addr); 1170 if (ia == 0) { 1171 type = ICMP_UNREACH; 1172 code = ICMP_UNREACH_SRCFAIL; 1173 goto bad; 1174 } 1175 ip->ip_dst = ipaddr.sin_addr; 1176 bcopy((caddr_t)&ia->ia_addr.sin_addr, 1177 (caddr_t)(cp + off), sizeof(struct in_addr)); 1178 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1179 /* 1180 * Let ip_intr's mcast routing check handle mcast pkts 1181 */ 1182 forward = !IN_MULTICAST(ip->ip_dst.s_addr); 1183 break; 1184 1185 case IPOPT_RR: 1186 if (optlen < IPOPT_OFFSET + sizeof(*cp)) { 1187 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1188 goto bad; 1189 } 1190 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { 1191 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1192 goto bad; 1193 } 1194 /* 1195 * If no space remains, ignore. 1196 */ 1197 off--; /* 0 origin */ 1198 if ((off + sizeof(struct in_addr)) > optlen) 1199 break; 1200 bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr, 1201 sizeof(ipaddr.sin_addr)); 1202 /* 1203 * locate outgoing interface; if we're the destination, 1204 * use the incoming interface (should be same). 1205 */ 1206 if ((ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)))) 1207 == NULL && 1208 (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) { 1209 type = ICMP_UNREACH; 1210 code = ICMP_UNREACH_HOST; 1211 goto bad; 1212 } 1213 bcopy((caddr_t)&ia->ia_addr.sin_addr, 1214 (caddr_t)(cp + off), sizeof(struct in_addr)); 1215 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1216 break; 1217 1218 case IPOPT_TS: 1219 code = cp - (u_char *)ip; 1220 ipt = (struct ip_timestamp *)cp; 1221 if (ipt->ipt_len < 4 || ipt->ipt_len > 40) { 1222 code = (u_char *)&ipt->ipt_len - (u_char *)ip; 1223 goto bad; 1224 } 1225 if (ipt->ipt_ptr < 5) { 1226 code = (u_char *)&ipt->ipt_ptr - (u_char *)ip; 1227 goto bad; 1228 } 1229 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) { 1230 if (++ipt->ipt_oflw == 0) { 1231 code = (u_char *)&ipt->ipt_ptr - 1232 (u_char *)ip; 1233 goto bad; 1234 } 1235 break; 1236 } 1237 cp0 = (cp + ipt->ipt_ptr - 1); 1238 switch (ipt->ipt_flg) { 1239 1240 case IPOPT_TS_TSONLY: 1241 break; 1242 1243 case IPOPT_TS_TSANDADDR: 1244 if (ipt->ipt_ptr - 1 + sizeof(n_time) + 1245 sizeof(struct in_addr) > ipt->ipt_len) { 1246 code = (u_char *)&ipt->ipt_ptr - 1247 (u_char *)ip; 1248 goto bad; 1249 } 1250 ipaddr.sin_addr = dst; 1251 ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr), 1252 m->m_pkthdr.rcvif)); 1253 if (ia == 0) 1254 continue; 1255 bcopy(&ia->ia_addr.sin_addr, 1256 cp0, sizeof(struct in_addr)); 1257 ipt->ipt_ptr += sizeof(struct in_addr); 1258 break; 1259 1260 case IPOPT_TS_PRESPEC: 1261 if (ipt->ipt_ptr - 1 + sizeof(n_time) + 1262 sizeof(struct in_addr) > ipt->ipt_len) { 1263 code = (u_char *)&ipt->ipt_ptr - 1264 (u_char *)ip; 1265 goto bad; 1266 } 1267 bcopy(cp0, &ipaddr.sin_addr, 1268 sizeof(struct in_addr)); 1269 if (ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))) 1270 == NULL) 1271 continue; 1272 ipt->ipt_ptr += sizeof(struct in_addr); 1273 break; 1274 1275 default: 1276 /* XXX can't take &ipt->ipt_flg */ 1277 code = (u_char *)&ipt->ipt_ptr - 1278 (u_char *)ip + 1; 1279 goto bad; 1280 } 1281 ntime = iptime(); 1282 cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */ 1283 bcopy(cp0, (caddr_t)cp + ipt->ipt_ptr - 1, 1284 sizeof(n_time)); 1285 ipt->ipt_ptr += sizeof(n_time); 1286 } 1287 } 1288 if (forward) { 1289 if (ip_forwsrcrt == 0) { 1290 type = ICMP_UNREACH; 1291 code = ICMP_UNREACH_SRCFAIL; 1292 goto bad; 1293 } 1294 ip_forward(m, 1); 1295 return (1); 1296 } 1297 return (0); 1298bad: 1299 icmp_error(m, type, code, 0, 0); 1300 ipstat.ips_badoptions++; 1301 return (1); 1302} 1303 1304/* 1305 * Given address of next destination (final or next hop), 1306 * return internet address info of interface to be used to get there. 1307 */ 1308struct in_ifaddr * 1309ip_rtaddr(dst) 1310 struct in_addr dst; 1311{ 1312 struct sockaddr_in *sin; 1313 1314 sin = satosin(&ipforward_rt.ro_dst); 1315 1316 if (ipforward_rt.ro_rt == 0 || !in_hosteq(dst, sin->sin_addr)) { 1317 if (ipforward_rt.ro_rt) { 1318 RTFREE(ipforward_rt.ro_rt); 1319 ipforward_rt.ro_rt = 0; 1320 } 1321 sin->sin_family = AF_INET; 1322 sin->sin_len = sizeof(*sin); 1323 sin->sin_addr = dst; 1324 1325 rtalloc(&ipforward_rt); 1326 } 1327 if (ipforward_rt.ro_rt == 0) 1328 return ((struct in_ifaddr *)0); 1329 return (ifatoia(ipforward_rt.ro_rt->rt_ifa)); 1330} 1331 1332/* 1333 * Save incoming source route for use in replies, 1334 * to be picked up later by ip_srcroute if the receiver is interested. 1335 */ 1336void 1337save_rte(option, dst) 1338 u_char *option; 1339 struct in_addr dst; 1340{ 1341 unsigned olen; 1342 1343 olen = option[IPOPT_OLEN]; 1344#ifdef DIAGNOSTIC 1345 if (ipprintfs) 1346 printf("save_rte: olen %d\n", olen); 1347#endif /* 0 */ 1348 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst))) 1349 return; 1350 bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen); 1351 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); 1352 ip_srcrt.dst = dst; 1353} 1354 1355/* 1356 * Retrieve incoming source route for use in replies, 1357 * in the same form used by setsockopt. 1358 * The first hop is placed before the options, will be removed later. 1359 */ 1360struct mbuf * 1361ip_srcroute() 1362{ 1363 struct in_addr *p, *q; 1364 struct mbuf *m; 1365 1366 if (ip_nhops == 0) 1367 return ((struct mbuf *)0); 1368 m = m_get(M_DONTWAIT, MT_SOOPTS); 1369 if (m == 0) 1370 return ((struct mbuf *)0); 1371 1372#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt)) 1373 1374 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */ 1375 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) + 1376 OPTSIZ; 1377#ifdef DIAGNOSTIC 1378 if (ipprintfs) 1379 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len); 1380#endif 1381 1382 /* 1383 * First save first hop for return route 1384 */ 1385 p = &ip_srcrt.route[ip_nhops - 1]; 1386 *(mtod(m, struct in_addr *)) = *p--; 1387#ifdef DIAGNOSTIC 1388 if (ipprintfs) 1389 printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr)); 1390#endif 1391 1392 /* 1393 * Copy option fields and padding (nop) to mbuf. 1394 */ 1395 ip_srcrt.nop = IPOPT_NOP; 1396 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF; 1397 bcopy((caddr_t)&ip_srcrt.nop, 1398 mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ); 1399 q = (struct in_addr *)(mtod(m, caddr_t) + 1400 sizeof(struct in_addr) + OPTSIZ); 1401#undef OPTSIZ 1402 /* 1403 * Record return path as an IP source route, 1404 * reversing the path (pointers are now aligned). 1405 */ 1406 while (p >= ip_srcrt.route) { 1407#ifdef DIAGNOSTIC 1408 if (ipprintfs) 1409 printf(" %x", ntohl(q->s_addr)); 1410#endif 1411 *q++ = *p--; 1412 } 1413 /* 1414 * Last hop goes to final destination. 1415 */ 1416 *q = ip_srcrt.dst; 1417#ifdef DIAGNOSTIC 1418 if (ipprintfs) 1419 printf(" %x\n", ntohl(q->s_addr)); 1420#endif 1421 return (m); 1422} 1423 1424/* 1425 * Strip out IP options, at higher 1426 * level protocol in the kernel. 1427 * Second argument is buffer to which options 1428 * will be moved, and return value is their length. 1429 * XXX should be deleted; last arg currently ignored. 1430 */ 1431void 1432ip_stripoptions(m, mopt) 1433 struct mbuf *m; 1434 struct mbuf *mopt; 1435{ 1436 int i; 1437 struct ip *ip = mtod(m, struct ip *); 1438 caddr_t opts; 1439 int olen; 1440 1441 olen = (ip->ip_hl << 2) - sizeof (struct ip); 1442 opts = (caddr_t)(ip + 1); 1443 i = m->m_len - (sizeof (struct ip) + olen); 1444 bcopy(opts + olen, opts, (unsigned)i); 1445 m->m_len -= olen; 1446 if (m->m_flags & M_PKTHDR) 1447 m->m_pkthdr.len -= olen; 1448 ip->ip_len -= olen; 1449 ip->ip_hl = sizeof (struct ip) >> 2; 1450} 1451 1452const int inetctlerrmap[PRC_NCMDS] = { 1453 0, 0, 0, 0, 1454 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 1455 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 1456 EMSGSIZE, EHOSTUNREACH, 0, 0, 1457 0, 0, 0, 0, 1458 ENOPROTOOPT 1459}; 1460 1461/* 1462 * Forward a packet. If some error occurs return the sender 1463 * an icmp packet. Note we can't always generate a meaningful 1464 * icmp message because icmp doesn't have a large enough repertoire 1465 * of codes and types. 1466 * 1467 * If not forwarding, just drop the packet. This could be confusing 1468 * if ipforwarding was zero but some routing protocol was advancing 1469 * us as a gateway to somewhere. However, we must let the routing 1470 * protocol deal with that. 1471 * 1472 * The srcrt parameter indicates whether the packet is being forwarded 1473 * via a source route. 1474 */ 1475void 1476ip_forward(m, srcrt) 1477 struct mbuf *m; 1478 int srcrt; 1479{ 1480 struct ip *ip = mtod(m, struct ip *); 1481 struct sockaddr_in *sin; 1482 struct rtentry *rt; 1483 int error, type = 0, code = 0; 1484 struct mbuf *mcopy; 1485 n_long dest; 1486 struct ifnet *destifp; 1487#ifdef IPSEC 1488 struct ifnet dummyifp; 1489#endif 1490 1491 /* 1492 * Clear any in-bound checksum flags for this packet. 1493 */ 1494 m->m_pkthdr.csum_flags = 0; 1495 1496 dest = 0; 1497#ifdef DIAGNOSTIC 1498 if (ipprintfs) 1499 printf("forward: src %2.2x dst %2.2x ttl %x\n", 1500 ntohl(ip->ip_src.s_addr), 1501 ntohl(ip->ip_dst.s_addr), ip->ip_ttl); 1502#endif 1503 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 1504 ipstat.ips_cantforward++; 1505 m_freem(m); 1506 return; 1507 } 1508 if (ip->ip_ttl <= IPTTLDEC) { 1509 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); 1510 return; 1511 } 1512 ip->ip_ttl -= IPTTLDEC; 1513 1514 sin = satosin(&ipforward_rt.ro_dst); 1515 if ((rt = ipforward_rt.ro_rt) == 0 || 1516 !in_hosteq(ip->ip_dst, sin->sin_addr)) { 1517 if (ipforward_rt.ro_rt) { 1518 RTFREE(ipforward_rt.ro_rt); 1519 ipforward_rt.ro_rt = 0; 1520 } 1521 sin->sin_family = AF_INET; 1522 sin->sin_len = sizeof(struct sockaddr_in); 1523 sin->sin_addr = ip->ip_dst; 1524 1525 rtalloc(&ipforward_rt); 1526 if (ipforward_rt.ro_rt == 0) { 1527 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0); 1528 return; 1529 } 1530 rt = ipforward_rt.ro_rt; 1531 } 1532 1533 /* 1534 * Save at most 68 bytes of the packet in case 1535 * we need to generate an ICMP message to the src. 1536 * Pullup to avoid sharing mbuf cluster between m and mcopy. 1537 */ 1538 mcopy = m_copym(m, 0, imin((int)ip->ip_len, 68), M_DONTWAIT); 1539 if (mcopy) 1540 mcopy = m_pullup(mcopy, ip->ip_hl << 2); 1541 1542 /* 1543 * If forwarding packet using same interface that it came in on, 1544 * perhaps should send a redirect to sender to shortcut a hop. 1545 * Only send redirect if source is sending directly to us, 1546 * and if packet was not source routed (or has any options). 1547 * Also, don't send redirect if forwarding using a default route 1548 * or a route modified by a redirect. 1549 */ 1550 if (rt->rt_ifp == m->m_pkthdr.rcvif && 1551 (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 1552 !in_nullhost(satosin(rt_key(rt))->sin_addr) && 1553 ipsendredirects && !srcrt) { 1554 if (rt->rt_ifa && 1555 (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) == 1556 ifatoia(rt->rt_ifa)->ia_subnet) { 1557 if (rt->rt_flags & RTF_GATEWAY) 1558 dest = satosin(rt->rt_gateway)->sin_addr.s_addr; 1559 else 1560 dest = ip->ip_dst.s_addr; 1561 /* 1562 * Router requirements says to only send host 1563 * redirects. 1564 */ 1565 type = ICMP_REDIRECT; 1566 code = ICMP_REDIRECT_HOST; 1567#ifdef DIAGNOSTIC 1568 if (ipprintfs) 1569 printf("redirect (%d) to %x\n", code, 1570 (u_int32_t)dest); 1571#endif 1572 } 1573 } 1574 1575#ifdef IPSEC 1576 /* Don't lookup socket in forwarding case */ 1577 (void)ipsec_setsocket(m, NULL); 1578#endif 1579 error = ip_output(m, (struct mbuf *)0, &ipforward_rt, 1580 (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), 0); 1581 if (error) 1582 ipstat.ips_cantforward++; 1583 else { 1584 ipstat.ips_forward++; 1585 if (type) 1586 ipstat.ips_redirectsent++; 1587 else { 1588 if (mcopy) { 1589#ifdef GATEWAY 1590 if (mcopy->m_flags & M_CANFASTFWD) 1591 ipflow_create(&ipforward_rt, mcopy); 1592#endif 1593 m_freem(mcopy); 1594 } 1595 return; 1596 } 1597 } 1598 if (mcopy == NULL) 1599 return; 1600 destifp = NULL; 1601 1602 switch (error) { 1603 1604 case 0: /* forwarded, but need redirect */ 1605 /* type, code set above */ 1606 break; 1607 1608 case ENETUNREACH: /* shouldn't happen, checked above */ 1609 case EHOSTUNREACH: 1610 case ENETDOWN: 1611 case EHOSTDOWN: 1612 default: 1613 type = ICMP_UNREACH; 1614 code = ICMP_UNREACH_HOST; 1615 break; 1616 1617 case EMSGSIZE: 1618 type = ICMP_UNREACH; 1619 code = ICMP_UNREACH_NEEDFRAG; 1620#ifndef IPSEC 1621 if (ipforward_rt.ro_rt) 1622 destifp = ipforward_rt.ro_rt->rt_ifp; 1623#else 1624 /* 1625 * If the packet is routed over IPsec tunnel, tell the 1626 * originator the tunnel MTU. 1627 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz 1628 * XXX quickhack!!! 1629 */ 1630 if (ipforward_rt.ro_rt) { 1631 struct secpolicy *sp; 1632 int ipsecerror; 1633 size_t ipsechdr; 1634 struct route *ro; 1635 1636 sp = ipsec4_getpolicybyaddr(mcopy, 1637 IPSEC_DIR_OUTBOUND, 1638 IP_FORWARDING, 1639 &ipsecerror); 1640 1641 if (sp == NULL) 1642 destifp = ipforward_rt.ro_rt->rt_ifp; 1643 else { 1644 /* count IPsec header size */ 1645 ipsechdr = ipsec4_hdrsiz(mcopy, 1646 IPSEC_DIR_OUTBOUND, 1647 NULL); 1648 1649 /* 1650 * find the correct route for outer IPv4 1651 * header, compute tunnel MTU. 1652 * 1653 * XXX BUG ALERT 1654 * The "dummyifp" code relies upon the fact 1655 * that icmp_error() touches only ifp->if_mtu. 1656 */ 1657 /*XXX*/ 1658 destifp = NULL; 1659 if (sp->req != NULL 1660 && sp->req->sav != NULL 1661 && sp->req->sav->sah != NULL) { 1662 ro = &sp->req->sav->sah->sa_route; 1663 if (ro->ro_rt && ro->ro_rt->rt_ifp) { 1664 dummyifp.if_mtu = 1665 ro->ro_rt->rt_rmx.rmx_mtu ? 1666 ro->ro_rt->rt_rmx.rmx_mtu : 1667 ro->ro_rt->rt_ifp->if_mtu; 1668 dummyifp.if_mtu -= ipsechdr; 1669 destifp = &dummyifp; 1670 } 1671 } 1672 1673 key_freesp(sp); 1674 } 1675 } 1676#endif /*IPSEC*/ 1677 ipstat.ips_cantfrag++; 1678 break; 1679 1680 case ENOBUFS: 1681#if 1 1682 /* 1683 * a router should not generate ICMP_SOURCEQUENCH as 1684 * required in RFC1812 Requirements for IP Version 4 Routers. 1685 * source quench could be a big problem under DoS attacks, 1686 * or if the underlying interface is rate-limited. 1687 */ 1688 if (mcopy) 1689 m_freem(mcopy); 1690 return; 1691#else 1692 type = ICMP_SOURCEQUENCH; 1693 code = 0; 1694 break; 1695#endif 1696 } 1697 icmp_error(mcopy, type, code, dest, destifp); 1698} 1699 1700void 1701ip_savecontrol(inp, mp, ip, m) 1702 struct inpcb *inp; 1703 struct mbuf **mp; 1704 struct ip *ip; 1705 struct mbuf *m; 1706{ 1707 1708 if (inp->inp_socket->so_options & SO_TIMESTAMP) { 1709 struct timeval tv; 1710 1711 microtime(&tv); 1712 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), 1713 SCM_TIMESTAMP, SOL_SOCKET); 1714 if (*mp) 1715 mp = &(*mp)->m_next; 1716 } 1717 if (inp->inp_flags & INP_RECVDSTADDR) { 1718 *mp = sbcreatecontrol((caddr_t) &ip->ip_dst, 1719 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 1720 if (*mp) 1721 mp = &(*mp)->m_next; 1722 } 1723#ifdef notyet 1724 /* 1725 * XXX 1726 * Moving these out of udp_input() made them even more broken 1727 * than they already were. 1728 * - fenner@parc.xerox.com 1729 */ 1730 /* options were tossed already */ 1731 if (inp->inp_flags & INP_RECVOPTS) { 1732 *mp = sbcreatecontrol((caddr_t) opts_deleted_above, 1733 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 1734 if (*mp) 1735 mp = &(*mp)->m_next; 1736 } 1737 /* ip_srcroute doesn't do what we want here, need to fix */ 1738 if (inp->inp_flags & INP_RECVRETOPTS) { 1739 *mp = sbcreatecontrol((caddr_t) ip_srcroute(), 1740 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 1741 if (*mp) 1742 mp = &(*mp)->m_next; 1743 } 1744#endif 1745 if (inp->inp_flags & INP_RECVIF) { 1746 struct sockaddr_dl sdl; 1747 1748 sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]); 1749 sdl.sdl_family = AF_LINK; 1750 sdl.sdl_index = m->m_pkthdr.rcvif ? 1751 m->m_pkthdr.rcvif->if_index : 0; 1752 sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0; 1753 *mp = sbcreatecontrol((caddr_t) &sdl, sdl.sdl_len, 1754 IP_RECVIF, IPPROTO_IP); 1755 if (*mp) 1756 mp = &(*mp)->m_next; 1757 } 1758} 1759 1760int 1761ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 1762 int *name; 1763 u_int namelen; 1764 void *oldp; 1765 size_t *oldlenp; 1766 void *newp; 1767 size_t newlen; 1768{ 1769 extern int subnetsarelocal, hostzeroisbroadcast; 1770 1771 int error, old; 1772 1773 /* All sysctl names at this level are terminal. */ 1774 if (namelen != 1) 1775 return (ENOTDIR); 1776 1777 switch (name[0]) { 1778 case IPCTL_FORWARDING: 1779 return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding)); 1780 case IPCTL_SENDREDIRECTS: 1781 return (sysctl_int(oldp, oldlenp, newp, newlen, 1782 &ipsendredirects)); 1783 case IPCTL_DEFTTL: 1784 return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl)); 1785#ifdef notyet 1786 case IPCTL_DEFMTU: 1787 return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu)); 1788#endif 1789 case IPCTL_FORWSRCRT: 1790 /* Don't allow this to change in a secure environment. */ 1791 if (securelevel > 0) 1792 return (sysctl_rdint(oldp, oldlenp, newp, 1793 ip_forwsrcrt)); 1794 else 1795 return (sysctl_int(oldp, oldlenp, newp, newlen, 1796 &ip_forwsrcrt)); 1797 case IPCTL_DIRECTEDBCAST: 1798 return (sysctl_int(oldp, oldlenp, newp, newlen, 1799 &ip_directedbcast)); 1800 case IPCTL_ALLOWSRCRT: 1801 return (sysctl_int(oldp, oldlenp, newp, newlen, 1802 &ip_allowsrcrt)); 1803 case IPCTL_SUBNETSARELOCAL: 1804 return (sysctl_int(oldp, oldlenp, newp, newlen, 1805 &subnetsarelocal)); 1806 case IPCTL_MTUDISC: 1807 error = sysctl_int(oldp, oldlenp, newp, newlen, 1808 &ip_mtudisc); 1809 if (ip_mtudisc != 0 && ip_mtudisc_timeout_q == NULL) { 1810 ip_mtudisc_timeout_q = 1811 rt_timer_queue_create(ip_mtudisc_timeout); 1812 } else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) { 1813 rt_timer_queue_destroy(ip_mtudisc_timeout_q, TRUE); 1814 ip_mtudisc_timeout_q = NULL; 1815 } 1816 return error; 1817 case IPCTL_ANONPORTMIN: 1818 old = anonportmin; 1819 error = sysctl_int(oldp, oldlenp, newp, newlen, &anonportmin); 1820 if (anonportmin >= anonportmax || anonportmin < 0 1821 || anonportmin > 65535 1822#ifndef IPNOPRIVPORTS 1823 || anonportmin < IPPORT_RESERVED 1824#endif 1825 ) { 1826 anonportmin = old; 1827 return (EINVAL); 1828 } 1829 return (error); 1830 case IPCTL_ANONPORTMAX: 1831 old = anonportmax; 1832 error = sysctl_int(oldp, oldlenp, newp, newlen, &anonportmax); 1833 if (anonportmin >= anonportmax || anonportmax < 0 1834 || anonportmax > 65535 1835#ifndef IPNOPRIVPORTS 1836 || anonportmax < IPPORT_RESERVED 1837#endif 1838 ) { 1839 anonportmax = old; 1840 return (EINVAL); 1841 } 1842 return (error); 1843 case IPCTL_MTUDISCTIMEOUT: 1844 error = sysctl_int(oldp, oldlenp, newp, newlen, 1845 &ip_mtudisc_timeout); 1846 if (ip_mtudisc_timeout_q != NULL) 1847 rt_timer_queue_change(ip_mtudisc_timeout_q, 1848 ip_mtudisc_timeout); 1849 return (error); 1850#ifdef GATEWAY 1851 case IPCTL_MAXFLOWS: 1852 { 1853 int s; 1854 1855 error = sysctl_int(oldp, oldlenp, newp, newlen, 1856 &ip_maxflows); 1857 s = splsoftnet(); 1858 ipflow_reap(0); 1859 splx(s); 1860 return (error); 1861 } 1862#endif 1863 case IPCTL_HOSTZEROBROADCAST: 1864 return (sysctl_int(oldp, oldlenp, newp, newlen, 1865 &hostzeroisbroadcast)); 1866#if NGIF > 0 1867 case IPCTL_GIF_TTL: 1868 return(sysctl_int(oldp, oldlenp, newp, newlen, 1869 &ip_gif_ttl)); 1870#endif 1871 1872#if NGRE > 0 1873 case IPCTL_GRE_TTL: 1874 return(sysctl_int(oldp, oldlenp, newp, newlen, 1875 &ip_gre_ttl)); 1876#endif 1877 1878#ifndef IPNOPRIVPORTS 1879 case IPCTL_LOWPORTMIN: 1880 old = lowportmin; 1881 error = sysctl_int(oldp, oldlenp, newp, newlen, &lowportmin); 1882 if (lowportmin >= lowportmax 1883 || lowportmin > IPPORT_RESERVEDMAX 1884 || lowportmin < IPPORT_RESERVEDMIN 1885 ) { 1886 lowportmin = old; 1887 return (EINVAL); 1888 } 1889 return (error); 1890 case IPCTL_LOWPORTMAX: 1891 old = lowportmax; 1892 error = sysctl_int(oldp, oldlenp, newp, newlen, &lowportmax); 1893 if (lowportmin >= lowportmax 1894 || lowportmax > IPPORT_RESERVEDMAX 1895 || lowportmax < IPPORT_RESERVEDMIN 1896 ) { 1897 lowportmax = old; 1898 return (EINVAL); 1899 } 1900 return (error); 1901#endif 1902 1903 case IPCTL_MAXFRAGPACKETS: 1904 return (sysctl_int(oldp, oldlenp, newp, newlen, 1905 &ip_maxfragpackets)); 1906 1907 default: 1908 return (EOPNOTSUPP); 1909 } 1910 /* NOTREACHED */ 1911} 1912