ip_input.c revision 221131
1104477Ssam/*- 2104477Ssam * Copyright (c) 1982, 1986, 1988, 1993 3104477Ssam * The Regents of the University of California. All rights reserved. 4104477Ssam * 5104477Ssam * Redistribution and use in source and binary forms, with or without 6104477Ssam * modification, are permitted provided that the following conditions 7104477Ssam * are met: 8104477Ssam * 1. Redistributions of source code must retain the above copyright 9120915Ssam * notice, this list of conditions and the following disclaimer. 10104477Ssam * 2. Redistributions in binary form must reproduce the above copyright 11104477Ssam * notice, this list of conditions and the following disclaimer in the 12104477Ssam * documentation and/or other materials provided with the distribution. 13104477Ssam * 4. Neither the name of the University nor the names of its contributors 14104477Ssam * may be used to endorse or promote products derived from this software 15104477Ssam * without specific prior written permission. 16104477Ssam * 17104477Ssam * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18104477Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19104477Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20104477Ssam * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21104477Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22104477Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23104477Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24104477Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25104477Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26104477Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27104477Ssam * SUCH DAMAGE. 28104477Ssam * 29104477Ssam * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 30104477Ssam */ 31104477Ssam 32104477Ssam#include <sys/cdefs.h> 33104477Ssam__FBSDID("$FreeBSD: head/sys/netinet/ip_input.c 221131 2011-04-27 19:32:27Z bz $"); 34104477Ssam 35104477Ssam#include "opt_bootp.h" 36104477Ssam#include "opt_ipfw.h" 37104477Ssam#include "opt_ipstealth.h" 38104477Ssam#include "opt_ipsec.h" 39104477Ssam#include "opt_route.h" 40104477Ssam 41104477Ssam#include <sys/param.h> 42104477Ssam#include <sys/systm.h> 43119418Sobrien#include <sys/mbuf.h> 44119418Sobrien#include <sys/malloc.h> 45119418Sobrien#include <sys/domain.h> 46104477Ssam#include <sys/protosw.h> 47120915Ssam#include <sys/socket.h> 48104477Ssam#include <sys/time.h> 49112124Ssam#include <sys/kernel.h> 50104477Ssam#include <sys/lock.h> 51104477Ssam#include <sys/rwlock.h> 52104477Ssam#include <sys/syslog.h> 53104477Ssam#include <sys/sysctl.h> 54104477Ssam 55104477Ssam#include <net/pfil.h> 56104477Ssam#include <net/if.h> 57129879Sphk#include <net/if_types.h> 58104477Ssam#include <net/if_var.h> 59104477Ssam#include <net/if_dl.h> 60104477Ssam#include <net/route.h> 61104477Ssam#include <net/netisr.h> 62104477Ssam#include <net/vnet.h> 63104477Ssam#include <net/flowtable.h> 64104477Ssam 65104477Ssam#include <netinet/in.h> 66104477Ssam#include <netinet/in_systm.h> 67104477Ssam#include <netinet/in_var.h> 68104477Ssam#include <netinet/ip.h> 69104477Ssam#include <netinet/in_pcb.h> 70104477Ssam#include <netinet/ip_var.h> 71104477Ssam#include <netinet/ip_fw.h> 72104477Ssam#include <netinet/ip_icmp.h> 73104477Ssam#include <netinet/ip_options.h> 74104477Ssam#include <machine/in_cksum.h> 75119280Simp#include <netinet/ip_carp.h> 76119280Simp#ifdef IPSEC 77112124Ssam#include <netinet/ip_ipsec.h> 78112124Ssam#endif /* IPSEC */ 79112124Ssam 80112124Ssam#include <sys/socketvar.h> 81104477Ssam 82104477Ssam#include <security/mac/mac_framework.h> 83104477Ssam 84104477Ssam#ifdef CTASSERT 85104477SsamCTASSERT(sizeof(struct ip) == 20); 86104477Ssam#endif 87104477Ssam 88104477Ssamstruct rwlock in_ifaddr_lock; 89104477SsamRW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 90104477Ssam 91104477SsamVNET_DEFINE(int, rsvp_on); 92104477Ssam 93104477SsamVNET_DEFINE(int, ipforwarding); 94104477SsamSYSCTL_VNET_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, 95104477Ssam &VNET_NAME(ipforwarding), 0, 96104477Ssam "Enable IP forwarding between interfaces"); 97104477Ssam 98104477Ssamstatic VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ 99104477Ssam#define V_ipsendredirects VNET(ipsendredirects) 100104477SsamSYSCTL_VNET_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, 101104477Ssam &VNET_NAME(ipsendredirects), 0, 102104477Ssam "Enable sending IP redirects"); 103104477Ssam 104104477Ssamstatic VNET_DEFINE(int, ip_keepfaith); 105104477Ssam#define V_ip_keepfaith VNET(ip_keepfaith) 106104477SsamSYSCTL_VNET_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW, 107104477Ssam &VNET_NAME(ip_keepfaith), 0, 108104477Ssam "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); 109104477Ssam 110104477Ssamstatic VNET_DEFINE(int, ip_sendsourcequench); 111104477Ssam#define V_ip_sendsourcequench VNET(ip_sendsourcequench) 112104477SsamSYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW, 113104477Ssam &VNET_NAME(ip_sendsourcequench), 0, 114104477Ssam "Enable the transmission of source quench packets"); 115104477Ssam 116104477SsamVNET_DEFINE(int, ip_do_randomid); 117105251SmarkmSYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW, 118112124Ssam &VNET_NAME(ip_do_randomid), 0, 119112124Ssam "Assign random ip_id values"); 120112124Ssam 121104477Ssam/* 122104477Ssam * XXX - Setting ip_checkinterface mostly implements the receive side of 123104477Ssam * the Strong ES model described in RFC 1122, but since the routing table 124104477Ssam * and transmit implementation do not implement the Strong ES model, 125104477Ssam * setting this to 1 results in an odd hybrid. 126104477Ssam * 127104477Ssam * XXX - ip_checkinterface currently must be disabled if you use ipnat 128104477Ssam * to translate the destination address to another local interface. 129104477Ssam * 130104477Ssam * XXX - ip_checkinterface must be disabled if you add IP aliases 131104477Ssam * to the loopback interface instead of the interface where the 132104477Ssam * packets for those addresses are received. 133104477Ssam */ 134104477Ssamstatic VNET_DEFINE(int, ip_checkinterface); 135104477Ssam#define V_ip_checkinterface VNET(ip_checkinterface) 136104477SsamSYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, 137104477Ssam &VNET_NAME(ip_checkinterface), 0, 138104477Ssam "Verify packet arrives on correct interface"); 139104477Ssam 140104477SsamVNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ 141104477Ssam 142104477Ssamstatic struct netisr_handler ip_nh = { 143104477Ssam .nh_name = "ip", 144104477Ssam .nh_handler = ip_input, 145104477Ssam .nh_proto = NETISR_IP, 146104477Ssam .nh_policy = NETISR_POLICY_FLOW, 147104477Ssam}; 148104477Ssam 149104477Ssamextern struct domain inetdomain; 150104477Ssamextern struct protosw inetsw[]; 151104477Ssamu_char ip_protox[IPPROTO_MAX]; 152104477SsamVNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 153104477SsamVNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 154131575SstefanfVNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 155104477Ssam 156104477SsamVNET_DEFINE(struct ipstat, ipstat); 157104477SsamSYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, 158104477Ssam &VNET_NAME(ipstat), ipstat, 159104477Ssam "IP statistics (struct ipstat, netinet/ip_var.h)"); 160104477Ssam 161104477Ssamstatic VNET_DEFINE(uma_zone_t, ipq_zone); 162104477Ssamstatic VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]); 163131575Sstefanfstatic struct mtx ipqlock; 164104477Ssam 165104477Ssam#define V_ipq_zone VNET(ipq_zone) 166104477Ssam#define V_ipq VNET(ipq) 167104477Ssam 168104477Ssam#define IPQ_LOCK() mtx_lock(&ipqlock) 169104477Ssam#define IPQ_UNLOCK() mtx_unlock(&ipqlock) 170104477Ssam#define IPQ_LOCK_INIT() mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF) 171104477Ssam#define IPQ_LOCK_ASSERT() mtx_assert(&ipqlock, MA_OWNED) 172109596Ssam 173109596Ssamstatic void maxnipq_update(void); 174104477Ssamstatic void ipq_zone_change(void *); 175104477Ssamstatic void ip_drain_locked(void); 176109596Ssam 177109596Ssamstatic VNET_DEFINE(int, maxnipq); /* Administrative limit on # reass queues. */ 178104477Ssamstatic VNET_DEFINE(int, nipq); /* Total # of reass queues */ 179104477Ssam#define V_maxnipq VNET(maxnipq) 180104477Ssam#define V_nipq VNET(nipq) 181109596SsamSYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, 182109596Ssam &VNET_NAME(nipq), 0, 183112121Ssam "Current number of IPv4 fragment reassembly queue entries"); 184109596Ssam 185109596Ssamstatic VNET_DEFINE(int, maxfragsperpacket); 186104477Ssam#define V_maxfragsperpacket VNET(maxfragsperpacket) 187104477SsamSYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW, 188104477Ssam &VNET_NAME(maxfragsperpacket), 0, 189104477Ssam "Maximum number of IPv4 fragments allowed per packet"); 190104477Ssam 191104477Ssam#ifdef IPCTL_DEFMTU 192104477SsamSYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 193104477Ssam &ip_mtu, 0, "Default MTU"); 194104477Ssam#endif 195104477Ssam 196104477Ssam#ifdef IPSTEALTH 197104477SsamVNET_DEFINE(int, ipstealth); 198104477SsamSYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW, 199104477Ssam &VNET_NAME(ipstealth), 0, 200120915Ssam "IP stealth mode, no TTL decrementation on forwarding"); 201120915Ssam#endif 202104477Ssam 203104477Ssam#ifdef FLOWTABLE 204104477Ssamstatic VNET_DEFINE(int, ip_output_flowtable_size) = 2048; 205104477SsamVNET_DEFINE(struct flowtable *, ip_ft); 206104477Ssam#define V_ip_output_flowtable_size VNET(ip_output_flowtable_size) 207104477Ssam 208104477SsamSYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN, 209104477Ssam &VNET_NAME(ip_output_flowtable_size), 2048, 210104477Ssam "number of entries in the per-cpu output flow caches"); 211104477Ssam#endif 212104477Ssam 213104477Ssamstatic void ip_freef(struct ipqhead *, struct ipq *); 214104477Ssam 215104477Ssam/* 216104477Ssam * Kernel module interface for updating ipstat. The argument is an index 217104477Ssam * into ipstat treated as an array of u_long. While this encodes the general 218104477Ssam * layout of ipstat into the caller, it doesn't encode its location, so that 219104477Ssam * future changes to add, for example, per-CPU stats support won't cause 220104477Ssam * binary compatibility problems for kernel modules. 221104477Ssam */ 222104477Ssamvoid 223104477Ssamkmod_ipstat_inc(int statnum) 224104477Ssam{ 225104477Ssam 226104477Ssam (*((u_long *)&V_ipstat + statnum))++; 227104477Ssam} 228120915Ssam 229120915Ssamvoid 230104477Ssamkmod_ipstat_dec(int statnum) 231104477Ssam{ 232104477Ssam 233104477Ssam (*((u_long *)&V_ipstat + statnum))--; 234104477Ssam} 235104477Ssam 236104477Ssamstatic int 237104477Ssamsysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 238104477Ssam{ 239104477Ssam int error, qlimit; 240104477Ssam 241104477Ssam netisr_getqlimit(&ip_nh, &qlimit); 242104477Ssam error = sysctl_handle_int(oidp, &qlimit, 0, req); 243104477Ssam if (error || !req->newptr) 244104477Ssam return (error); 245104477Ssam if (qlimit < 1) 246112124Ssam return (EINVAL); 247112124Ssam return (netisr_setqlimit(&ip_nh, qlimit)); 248112124Ssam} 249112124SsamSYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 250112124Ssam CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", 251112124Ssam "Maximum size of the IP input queue"); 252104477Ssam 253104477Ssamstatic int 254104477Ssamsysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 255104477Ssam{ 256104477Ssam u_int64_t qdrops_long; 257104477Ssam int error, qdrops; 258104477Ssam 259104477Ssam netisr_getqdrops(&ip_nh, &qdrops_long); 260104477Ssam qdrops = qdrops_long; 261104477Ssam error = sysctl_handle_int(oidp, &qdrops, 0, req); 262104477Ssam if (error || !req->newptr) 263104477Ssam return (error); 264104477Ssam if (qdrops != 0) 265104477Ssam return (EINVAL); 266104477Ssam netisr_clearqdrops(&ip_nh); 267104477Ssam return (0); 268104477Ssam} 269115748Ssam 270104477SsamSYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 271104477Ssam CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", 272104477Ssam "Number of packets dropped from the IP input queue"); 273104477Ssam 274120915Ssam/* 275104477Ssam * IP initialization: fill in IP protocol switch table. 276104477Ssam * All protocols not implemented in kernel go to raw IP protocol handler. 277104477Ssam */ 278120915Ssamvoid 279120915Ssamip_init(void) 280120915Ssam{ 281104477Ssam struct protosw *pr; 282104477Ssam int i; 283104477Ssam 284104477Ssam V_ip_id = time_second & 0xffff; 285104477Ssam 286104477Ssam TAILQ_INIT(&V_in_ifaddrhead); 287104477Ssam V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 288104477Ssam 289104477Ssam /* Initialize IP reassembly queue. */ 290104477Ssam for (i = 0; i < IPREASS_NHASH; i++) 291120915Ssam TAILQ_INIT(&V_ipq[i]); 292120915Ssam V_maxnipq = nmbclusters / 32; 293120915Ssam V_maxfragsperpacket = 16; 294120915Ssam V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL, 295120915Ssam NULL, UMA_ALIGN_PTR, 0); 296120915Ssam maxnipq_update(); 297120915Ssam 298120915Ssam /* Initialize packet filter hooks. */ 299104477Ssam V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; 300104477Ssam V_inet_pfil_hook.ph_af = AF_INET; 301104477Ssam if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) 302104477Ssam printf("%s: WARNING: unable to register pfil hook, " 303104477Ssam "error %d\n", __func__, i); 304104477Ssam 305104477Ssam#ifdef FLOWTABLE 306104477Ssam if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size", 307104477Ssam &V_ip_output_flowtable_size)) { 308104477Ssam if (V_ip_output_flowtable_size < 256) 309104477Ssam V_ip_output_flowtable_size = 256; 310104477Ssam if (!powerof2(V_ip_output_flowtable_size)) { 311104477Ssam printf("flowtable must be power of 2 size\n"); 312104477Ssam V_ip_output_flowtable_size = 2048; 313104477Ssam } 314104477Ssam } else { 315104477Ssam /* 316104477Ssam * round up to the next power of 2 317104477Ssam */ 318104477Ssam V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1); 319104477Ssam } 320104477Ssam V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU); 321104477Ssam#endif 322104477Ssam 323104477Ssam /* Skip initialization of globals for non-default instances. */ 324127135Snjl if (!IS_DEFAULT_VNET(curvnet)) 325127135Snjl return; 326104477Ssam 327104477Ssam pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 328104477Ssam if (pr == NULL) 329104477Ssam panic("ip_init: PF_INET not found"); 330104477Ssam 331104477Ssam /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 332104477Ssam for (i = 0; i < IPPROTO_MAX; i++) 333104477Ssam ip_protox[i] = pr - inetsw; 334104477Ssam /* 335127135Snjl * Cycle through IP protocols and put them into the appropriate place 336127135Snjl * in ip_protox[]. 337104477Ssam */ 338104477Ssam for (pr = inetdomain.dom_protosw; 339104477Ssam pr < inetdomain.dom_protoswNPROTOSW; pr++) 340104477Ssam if (pr->pr_domain->dom_family == PF_INET && 341104477Ssam pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 342104477Ssam /* Be careful to only index valid IP protocols. */ 343104477Ssam if (pr->pr_protocol < IPPROTO_MAX) 344104477Ssam ip_protox[pr->pr_protocol] = pr - inetsw; 345104477Ssam } 346104477Ssam 347104477Ssam EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change, 348104477Ssam NULL, EVENTHANDLER_PRI_ANY); 349104477Ssam 350104477Ssam /* Initialize various other remaining things. */ 351104477Ssam IPQ_LOCK_INIT(); 352104477Ssam netisr_register(&ip_nh); 353104477Ssam} 354104477Ssam 355104477Ssam#ifdef VIMAGE 356104477Ssamvoid 357104477Ssamip_destroy(void) 358104477Ssam{ 359104477Ssam 360117126Sscottl /* Cleanup in_ifaddr hash table; should be empty. */ 361117126Sscottl hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 362104477Ssam 363104477Ssam IPQ_LOCK(); 364104477Ssam ip_drain_locked(); 365104477Ssam IPQ_UNLOCK(); 366104477Ssam 367104477Ssam uma_zdestroy(V_ipq_zone); 368104477Ssam} 369104477Ssam#endif 370104477Ssam 371104477Ssam/* 372104477Ssam * Ip input routine. Checksum and byte swap header. If fragmented 373104477Ssam * try to reassemble. Process options. Pass to next level. 374104477Ssam */ 375104477Ssamvoid 376104477Ssamip_input(struct mbuf *m) 377104477Ssam{ 378104477Ssam struct ip *ip = NULL; 379104477Ssam struct in_ifaddr *ia = NULL; 380104477Ssam struct ifaddr *ifa; 381104477Ssam struct ifnet *ifp; 382104477Ssam int checkif, hlen = 0; 383104477Ssam u_short sum; 384104477Ssam int dchg = 0; /* dest changed after fw */ 385104477Ssam struct in_addr odst; /* original dst address */ 386104477Ssam 387104477Ssam M_ASSERTPKTHDR(m); 388104477Ssam 389104477Ssam if (m->m_flags & M_FASTFWD_OURS) { 390123824Ssam /* 391123824Ssam * Firewall or NAT changed destination to local. 392123824Ssam * We expect ip_len and ip_off to be in host byte order. 393123824Ssam */ 394104477Ssam m->m_flags &= ~M_FASTFWD_OURS; 395104477Ssam /* Set up some basics that will be used later. */ 396104477Ssam ip = mtod(m, struct ip *); 397104477Ssam hlen = ip->ip_hl << 2; 398104477Ssam goto ours; 399104477Ssam } 400104477Ssam 401104477Ssam IPSTAT_INC(ips_total); 402104477Ssam 403104477Ssam if (m->m_pkthdr.len < sizeof(struct ip)) 404104477Ssam goto tooshort; 405104477Ssam 406104477Ssam if (m->m_len < sizeof (struct ip) && 407104477Ssam (m = m_pullup(m, sizeof (struct ip))) == NULL) { 408104477Ssam IPSTAT_INC(ips_toosmall); 409104477Ssam return; 410104477Ssam } 411104477Ssam ip = mtod(m, struct ip *); 412104477Ssam 413104477Ssam if (ip->ip_v != IPVERSION) { 414120915Ssam IPSTAT_INC(ips_badvers); 415120915Ssam goto bad; 416120915Ssam } 417120915Ssam 418104477Ssam hlen = ip->ip_hl << 2; 419104477Ssam if (hlen < sizeof(struct ip)) { /* minimum header length */ 420104477Ssam IPSTAT_INC(ips_badhlen); 421104477Ssam goto bad; 422104477Ssam } 423104477Ssam if (hlen > m->m_len) { 424104477Ssam if ((m = m_pullup(m, hlen)) == NULL) { 425104477Ssam IPSTAT_INC(ips_badhlen); 426104477Ssam return; 427104477Ssam } 428104477Ssam ip = mtod(m, struct ip *); 429104477Ssam } 430104477Ssam 431104477Ssam /* 127/8 must not appear on wire - RFC1122 */ 432104477Ssam ifp = m->m_pkthdr.rcvif; 433104477Ssam if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 434104477Ssam (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 435104477Ssam if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 436104477Ssam IPSTAT_INC(ips_badaddr); 437104477Ssam goto bad; 438127135Snjl } 439127135Snjl } 440104477Ssam 441104477Ssam if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 442104477Ssam sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 443104477Ssam } else { 444104477Ssam if (hlen == sizeof(struct ip)) { 445104477Ssam sum = in_cksum_hdr(ip); 446104477Ssam } else { 447104477Ssam sum = in_cksum(m, hlen); 448115748Ssam } 449104477Ssam } 450104477Ssam if (sum) { 451104477Ssam IPSTAT_INC(ips_badsum); 452104477Ssam goto bad; 453104477Ssam } 454104477Ssam 455104477Ssam#ifdef ALTQ 456104477Ssam if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 457104477Ssam /* packet is dropped by traffic conditioner */ 458104477Ssam return; 459104477Ssam#endif 460104477Ssam 461104477Ssam /* 462104477Ssam * Convert fields to host representation. 463104477Ssam */ 464104477Ssam ip->ip_len = ntohs(ip->ip_len); 465104477Ssam if (ip->ip_len < hlen) { 466104477Ssam IPSTAT_INC(ips_badlen); 467104477Ssam goto bad; 468104477Ssam } 469104477Ssam ip->ip_off = ntohs(ip->ip_off); 470104477Ssam 471104477Ssam /* 472104477Ssam * Check that the amount of data in the buffers 473104477Ssam * is as at least much as the IP header would have us expect. 474104477Ssam * Trim mbufs if longer than we expect. 475104477Ssam * Drop packet if shorter than we expect. 476104477Ssam */ 477104477Ssam if (m->m_pkthdr.len < ip->ip_len) { 478104477Ssamtooshort: 479104477Ssam IPSTAT_INC(ips_tooshort); 480104477Ssam goto bad; 481104477Ssam } 482104477Ssam if (m->m_pkthdr.len > ip->ip_len) { 483104477Ssam if (m->m_len == m->m_pkthdr.len) { 484104477Ssam m->m_len = ip->ip_len; 485104477Ssam m->m_pkthdr.len = ip->ip_len; 486104477Ssam } else 487104477Ssam m_adj(m, ip->ip_len - m->m_pkthdr.len); 488104477Ssam } 489120915Ssam#ifdef IPSEC 490120915Ssam /* 491120915Ssam * Bypass packet filtering for packets from a tunnel (gif). 492120915Ssam */ 493104477Ssam if (ip_ipsec_filtertunnel(m)) 494104477Ssam goto passin; 495104477Ssam#endif /* IPSEC */ 496104477Ssam 497104477Ssam /* 498104477Ssam * Run through list of hooks for input packets. 499104477Ssam * 500104477Ssam * NB: Beware of the destination address changing (e.g. 501104477Ssam * by NAT rewriting). When this happens, tell 502104477Ssam * ip_forward to do the right thing. 503104477Ssam */ 504104477Ssam 505104477Ssam /* Jump over all PFIL processing if hooks are not active. */ 506104477Ssam if (!PFIL_HOOKED(&V_inet_pfil_hook)) 507104477Ssam goto passin; 508104477Ssam 509104477Ssam odst = ip->ip_dst; 510104477Ssam if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0) 511104477Ssam return; 512104477Ssam if (m == NULL) /* consumed by filter */ 513104477Ssam return; 514119137Ssam 515104477Ssam ip = mtod(m, struct ip *); 516104477Ssam dchg = (odst.s_addr != ip->ip_dst.s_addr); 517104477Ssam ifp = m->m_pkthdr.rcvif; 518104477Ssam 519104477Ssam#ifdef IPFIREWALL_FORWARD 520104477Ssam if (m->m_flags & M_FASTFWD_OURS) { 521104477Ssam m->m_flags &= ~M_FASTFWD_OURS; 522104477Ssam goto ours; 523104477Ssam } 524104477Ssam if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) { 525104477Ssam /* 526104477Ssam * Directly ship the packet on. This allows forwarding 527104477Ssam * packets originally destined to us to some other directly 528104477Ssam * connected host. 529104477Ssam */ 530104477Ssam ip_forward(m, dchg); 531104477Ssam return; 532104477Ssam } 533104477Ssam#endif /* IPFIREWALL_FORWARD */ 534104477Ssam 535104477Ssampassin: 536104477Ssam /* 537104477Ssam * Process options and, if not destined for us, 538104477Ssam * ship it on. ip_dooptions returns 1 when an 539104477Ssam * error was detected (causing an icmp message 540104477Ssam * to be sent and the original packet to be freed). 541104477Ssam */ 542104477Ssam if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 543104477Ssam return; 544104477Ssam 545104477Ssam /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 546104477Ssam * matter if it is destined to another node, or whether it is 547104477Ssam * a multicast one, RSVP wants it! and prevents it from being forwarded 548104477Ssam * anywhere else. Also checks if the rsvp daemon is running before 549104477Ssam * grabbing the packet. 550104477Ssam */ 551104477Ssam if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 552115748Ssam goto ours; 553115748Ssam 554104477Ssam /* 555104477Ssam * Check our list of addresses, to see if the packet is for us. 556104477Ssam * If we don't have any addresses, assume any unicast packet 557104477Ssam * we receive might be for us (and let the upper layers deal 558115848Ssam * with it). 559115848Ssam */ 560115862Ssam if (TAILQ_EMPTY(&V_in_ifaddrhead) && 561115848Ssam (m->m_flags & (M_MCAST|M_BCAST)) == 0) 562104477Ssam goto ours; 563104477Ssam 564104477Ssam /* 565104477Ssam * Enable a consistency check between the destination address 566104477Ssam * and the arrival interface for a unicast packet (the RFC 1122 567104477Ssam * strong ES model) if IP forwarding is disabled and the packet 568104477Ssam * is not locally generated and the packet is not subject to 569104477Ssam * 'ipfw fwd'. 570104477Ssam * 571104477Ssam * XXX - Checking also should be disabled if the destination 572104477Ssam * address is ipnat'ed to a different interface. 573104477Ssam * 574104477Ssam * XXX - Checking is incompatible with IP aliases added 575104477Ssam * to the loopback interface instead of the interface where 576104477Ssam * the packets are received. 577104477Ssam * 578104477Ssam * XXX - This is the case for carp vhost IPs as well so we 579104477Ssam * insert a workaround. If the packet got here, we already 580104477Ssam * checked with carp_iamatch() and carp_forus(). 581104477Ssam */ 582104477Ssam checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 583104477Ssam ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 584104477Ssam ifp->if_carp == NULL && (dchg == 0); 585104477Ssam 586104477Ssam /* 587104477Ssam * Check for exact addresses in the hash bucket. 588104477Ssam */ 589104477Ssam /* IN_IFADDR_RLOCK(); */ 590104477Ssam LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 591104477Ssam /* 592104477Ssam * If the address matches, verify that the packet 593104477Ssam * arrived via the correct interface if checking is 594104477Ssam * enabled. 595104477Ssam */ 596104477Ssam if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 597104477Ssam (!checkif || ia->ia_ifp == ifp)) { 598104477Ssam ifa_ref(&ia->ia_ifa); 599104477Ssam /* IN_IFADDR_RUNLOCK(); */ 600104477Ssam goto ours; 601104477Ssam } 602104477Ssam } 603104477Ssam /* IN_IFADDR_RUNLOCK(); */ 604104477Ssam 605104477Ssam /* 606104477Ssam * Check for broadcast addresses. 607104477Ssam * 608104477Ssam * Only accept broadcast packets that arrive via the matching 609104477Ssam * interface. Reception of forwarded directed broadcasts would 610104477Ssam * be handled via ip_forward() and ether_output() with the loopback 611104477Ssam * into the stack for SIMPLEX interfaces handled by ether_output(). 612104477Ssam */ 613104477Ssam if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 614119690Sjhb IF_ADDR_LOCK(ifp); 615104477Ssam TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 616104477Ssam if (ifa->ifa_addr->sa_family != AF_INET) 617104477Ssam continue; 618104477Ssam ia = ifatoia(ifa); 619104477Ssam if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 620104477Ssam ip->ip_dst.s_addr) { 621104477Ssam ifa_ref(ifa); 622104477Ssam IF_ADDR_UNLOCK(ifp); 623104477Ssam goto ours; 624104477Ssam } 625104477Ssam if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) { 626104477Ssam ifa_ref(ifa); 627104477Ssam IF_ADDR_UNLOCK(ifp); 628104477Ssam goto ours; 629104477Ssam } 630104477Ssam#ifdef BOOTP_COMPAT 631104477Ssam if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 632104477Ssam ifa_ref(ifa); 633104477Ssam IF_ADDR_UNLOCK(ifp); 634104477Ssam goto ours; 635104477Ssam } 636104477Ssam#endif 637104477Ssam } 638104477Ssam IF_ADDR_UNLOCK(ifp); 639119690Sjhb ia = NULL; 640104477Ssam } 641104477Ssam /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 642104477Ssam if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 643104477Ssam IPSTAT_INC(ips_cantforward); 644104477Ssam m_freem(m); 645104477Ssam return; 646104477Ssam } 647104477Ssam if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 648104477Ssam if (V_ip_mrouter) { 649104477Ssam /* 650104477Ssam * If we are acting as a multicast router, all 651104477Ssam * incoming multicast packets are passed to the 652104477Ssam * kernel-level multicast forwarding function. 653104477Ssam * The packet is returned (relatively) intact; if 654104477Ssam * ip_mforward() returns a non-zero value, the packet 655104477Ssam * must be discarded, else it may be accepted below. 656104477Ssam */ 657104477Ssam if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 658104477Ssam IPSTAT_INC(ips_cantforward); 659104477Ssam m_freem(m); 660104477Ssam return; 661104477Ssam } 662104477Ssam 663104477Ssam /* 664112124Ssam * The process-level routing daemon needs to receive 665112124Ssam * all multicast IGMP packets, whether or not this 666112124Ssam * host belongs to their destination groups. 667112124Ssam */ 668112124Ssam if (ip->ip_p == IPPROTO_IGMP) 669112124Ssam goto ours; 670112124Ssam IPSTAT_INC(ips_forward); 671112124Ssam } 672112124Ssam /* 673104477Ssam * Assume the packet is for us, to avoid prematurely taking 674104477Ssam * a lock on the in_multi hash. Protocols must perform 675104477Ssam * their own filtering and update statistics accordingly. 676104477Ssam */ 677104477Ssam goto ours; 678104477Ssam } 679104477Ssam if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 680104477Ssam goto ours; 681104477Ssam if (ip->ip_dst.s_addr == INADDR_ANY) 682104477Ssam goto ours; 683104477Ssam 684104477Ssam /* 685104477Ssam * FAITH(Firewall Aided Internet Translator) 686104477Ssam */ 687104477Ssam if (ifp && ifp->if_type == IFT_FAITH) { 688104477Ssam if (V_ip_keepfaith) { 689104477Ssam if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 690104477Ssam goto ours; 691104477Ssam } 692104477Ssam m_freem(m); 693104477Ssam return; 694104477Ssam } 695104477Ssam 696104477Ssam /* 697104477Ssam * Not for us; forward if possible and desirable. 698104477Ssam */ 699104477Ssam if (V_ipforwarding == 0) { 700104477Ssam IPSTAT_INC(ips_cantforward); 701104477Ssam m_freem(m); 702104477Ssam } else { 703104477Ssam#ifdef IPSEC 704104477Ssam if (ip_ipsec_fwd(m)) 705104477Ssam goto bad; 706104477Ssam#endif /* IPSEC */ 707104477Ssam ip_forward(m, dchg); 708104477Ssam } 709104477Ssam return; 710104477Ssam 711104477Ssamours: 712104477Ssam#ifdef IPSTEALTH 713119137Ssam /* 714104477Ssam * IPSTEALTH: Process non-routing options only 715104477Ssam * if the packet is destined for us. 716104477Ssam */ 717104477Ssam if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) { 718104477Ssam if (ia != NULL) 719104477Ssam ifa_free(&ia->ia_ifa); 720104477Ssam return; 721104477Ssam } 722104477Ssam#endif /* IPSTEALTH */ 723104477Ssam 724104477Ssam /* Count the packet in the ip address stats */ 725104477Ssam if (ia != NULL) { 726104477Ssam ia->ia_ifa.if_ipackets++; 727104477Ssam ia->ia_ifa.if_ibytes += m->m_pkthdr.len; 728104477Ssam ifa_free(&ia->ia_ifa); 729104477Ssam } 730104477Ssam 731104477Ssam /* 732104477Ssam * Attempt reassembly; if it succeeds, proceed. 733104477Ssam * ip_reass() will return a different mbuf. 734104477Ssam */ 735104477Ssam if (ip->ip_off & (IP_MF | IP_OFFMASK)) { 736104477Ssam m = ip_reass(m); 737104477Ssam if (m == NULL) 738104477Ssam return; 739104477Ssam ip = mtod(m, struct ip *); 740104477Ssam /* Get the header length of the reassembled packet */ 741104477Ssam hlen = ip->ip_hl << 2; 742104477Ssam } 743104477Ssam 744104477Ssam /* 745104477Ssam * Further protocols expect the packet length to be w/o the 746104477Ssam * IP header. 747104477Ssam */ 748104477Ssam ip->ip_len -= hlen; 749104477Ssam 750104477Ssam#ifdef IPSEC 751104477Ssam /* 752104477Ssam * enforce IPsec policy checking if we are seeing last header. 753104477Ssam * note that we do not visit this with protocols with pcb layer 754104477Ssam * code - like udp/tcp/raw ip. 755104477Ssam */ 756112124Ssam if (ip_ipsec_input(m)) 757112124Ssam goto bad; 758104477Ssam#endif /* IPSEC */ 759104477Ssam 760104477Ssam /* 761104477Ssam * Switch out to protocol's input routine. 762104477Ssam */ 763104477Ssam IPSTAT_INC(ips_delivered); 764104477Ssam 765104477Ssam (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); 766112124Ssam return; 767112124Ssambad: 768104477Ssam m_freem(m); 769104477Ssam} 770104477Ssam 771104477Ssam/* 772104477Ssam * After maxnipq has been updated, propagate the change to UMA. The UMA zone 773104477Ssam * max has slightly different semantics than the sysctl, for historical 774104477Ssam * reasons. 775104477Ssam */ 776104477Ssamstatic void 777104477Ssammaxnipq_update(void) 778104477Ssam{ 779104477Ssam 780104477Ssam /* 781104477Ssam * -1 for unlimited allocation. 782104477Ssam */ 783104477Ssam if (V_maxnipq < 0) 784104477Ssam uma_zone_set_max(V_ipq_zone, 0); 785104477Ssam /* 786104477Ssam * Positive number for specific bound. 787104477Ssam */ 788104477Ssam if (V_maxnipq > 0) 789104477Ssam uma_zone_set_max(V_ipq_zone, V_maxnipq); 790104477Ssam /* 791104477Ssam * Zero specifies no further fragment queue allocation -- set the 792104477Ssam * bound very low, but rely on implementation elsewhere to actually 793104477Ssam * prevent allocation and reclaim current queues. 794104477Ssam */ 795104477Ssam if (V_maxnipq == 0) 796104477Ssam uma_zone_set_max(V_ipq_zone, 1); 797104477Ssam} 798104477Ssam 799104477Ssamstatic void 800104477Ssamipq_zone_change(void *tag) 801104477Ssam{ 802104477Ssam 803104477Ssam if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) { 804104477Ssam V_maxnipq = nmbclusters / 32; 805104477Ssam maxnipq_update(); 806104477Ssam } 807104477Ssam} 808104477Ssam 809104477Ssamstatic int 810104477Ssamsysctl_maxnipq(SYSCTL_HANDLER_ARGS) 811104477Ssam{ 812104477Ssam int error, i; 813104477Ssam 814104477Ssam i = V_maxnipq; 815104477Ssam error = sysctl_handle_int(oidp, &i, 0, req); 816104477Ssam if (error || !req->newptr) 817104477Ssam return (error); 818104477Ssam 819104477Ssam /* 820104477Ssam * XXXRW: Might be a good idea to sanity check the argument and place 821104477Ssam * an extreme upper bound. 822104477Ssam */ 823104477Ssam if (i < -1) 824104477Ssam return (EINVAL); 825104477Ssam V_maxnipq = i; 826104477Ssam maxnipq_update(); 827104477Ssam return (0); 828104477Ssam} 829104477Ssam 830104477SsamSYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW, 831104477Ssam NULL, 0, sysctl_maxnipq, "I", 832104477Ssam "Maximum number of IPv4 fragment reassembly queue entries"); 833104477Ssam 834104477Ssam/* 835104477Ssam * Take incoming datagram fragment and try to reassemble it into 836104477Ssam * whole datagram. If the argument is the first fragment or one 837104477Ssam * in between the function will return NULL and store the mbuf 838104477Ssam * in the fragment chain. If the argument is the last fragment 839104477Ssam * the packet will be reassembled and the pointer to the new 840104477Ssam * mbuf returned for further processing. Only m_tags attached 841104477Ssam * to the first packet/fragment are preserved. 842104477Ssam * The IP header is *NOT* adjusted out of iplen. 843104477Ssam */ 844104477Ssamstruct mbuf * 845104477Ssamip_reass(struct mbuf *m) 846104477Ssam{ 847104477Ssam struct ip *ip; 848104477Ssam struct mbuf *p, *q, *nq, *t; 849104477Ssam struct ipq *fp = NULL; 850104477Ssam struct ipqhead *head; 851104477Ssam int i, hlen, next; 852104477Ssam u_int8_t ecn, ecn0; 853104477Ssam u_short hash; 854104477Ssam 855104477Ssam /* If maxnipq or maxfragsperpacket are 0, never accept fragments. */ 856104477Ssam if (V_maxnipq == 0 || V_maxfragsperpacket == 0) { 857104477Ssam IPSTAT_INC(ips_fragments); 858104477Ssam IPSTAT_INC(ips_fragdropped); 859104477Ssam m_freem(m); 860104477Ssam return (NULL); 861104477Ssam } 862104477Ssam 863104477Ssam ip = mtod(m, struct ip *); 864104477Ssam hlen = ip->ip_hl << 2; 865104477Ssam 866104477Ssam hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); 867104477Ssam head = &V_ipq[hash]; 868104477Ssam IPQ_LOCK(); 869104477Ssam 870104477Ssam /* 871104477Ssam * Look for queue of fragments 872104477Ssam * of this datagram. 873104477Ssam */ 874104477Ssam TAILQ_FOREACH(fp, head, ipq_list) 875104477Ssam if (ip->ip_id == fp->ipq_id && 876104477Ssam ip->ip_src.s_addr == fp->ipq_src.s_addr && 877104477Ssam ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 878104477Ssam#ifdef MAC 879104477Ssam mac_ipq_match(m, fp) && 880104477Ssam#endif 881104477Ssam ip->ip_p == fp->ipq_p) 882104477Ssam goto found; 883104477Ssam 884104477Ssam fp = NULL; 885104477Ssam 886104477Ssam /* 887104477Ssam * Attempt to trim the number of allocated fragment queues if it 888104477Ssam * exceeds the administrative limit. 889104477Ssam */ 890104477Ssam if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) { 891104477Ssam /* 892104477Ssam * drop something from the tail of the current queue 893104477Ssam * before proceeding further 894104477Ssam */ 895104477Ssam struct ipq *q = TAILQ_LAST(head, ipqhead); 896104477Ssam if (q == NULL) { /* gak */ 897104477Ssam for (i = 0; i < IPREASS_NHASH; i++) { 898104477Ssam struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead); 899104477Ssam if (r) { 900104477Ssam IPSTAT_ADD(ips_fragtimeout, 901120915Ssam r->ipq_nfrags); 902120915Ssam ip_freef(&V_ipq[i], r); 903120915Ssam break; 904120915Ssam } 905120915Ssam } 906120915Ssam } else { 907120915Ssam IPSTAT_ADD(ips_fragtimeout, q->ipq_nfrags); 908120915Ssam ip_freef(head, q); 909120915Ssam } 910120915Ssam } 911104477Ssam 912104477Ssamfound: 913104477Ssam /* 914104477Ssam * Adjust ip_len to not reflect header, 915104477Ssam * convert offset of this to bytes. 916104477Ssam */ 917104477Ssam ip->ip_len -= hlen; 918104477Ssam if (ip->ip_off & IP_MF) { 919104477Ssam /* 920104477Ssam * Make sure that fragments have a data length 921104477Ssam * that's a non-zero multiple of 8 bytes. 922104477Ssam */ 923104477Ssam if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { 924104477Ssam IPSTAT_INC(ips_toosmall); /* XXX */ 925104477Ssam goto dropfrag; 926104477Ssam } 927104477Ssam m->m_flags |= M_FRAG; 928104477Ssam } else 929104477Ssam m->m_flags &= ~M_FRAG; 930104477Ssam ip->ip_off <<= 3; 931104477Ssam 932104477Ssam 933104477Ssam /* 934104477Ssam * Attempt reassembly; if it succeeds, proceed. 935104477Ssam * ip_reass() will return a different mbuf. 936104477Ssam */ 937104477Ssam IPSTAT_INC(ips_fragments); 938104477Ssam m->m_pkthdr.header = ip; 939104477Ssam 940104477Ssam /* Previous ip_reass() started here. */ 941104477Ssam /* 942104477Ssam * Presence of header sizes in mbufs 943104477Ssam * would confuse code below. 944104477Ssam */ 945104477Ssam m->m_data += hlen; 946104477Ssam m->m_len -= hlen; 947104477Ssam 948104477Ssam /* 949104477Ssam * If first fragment to arrive, create a reassembly queue. 950104477Ssam */ 951104477Ssam if (fp == NULL) { 952104477Ssam fp = uma_zalloc(V_ipq_zone, M_NOWAIT); 953104477Ssam if (fp == NULL) 954104477Ssam goto dropfrag; 955104477Ssam#ifdef MAC 956104477Ssam if (mac_ipq_init(fp, M_NOWAIT) != 0) { 957104477Ssam uma_zfree(V_ipq_zone, fp); 958104477Ssam fp = NULL; 959104477Ssam goto dropfrag; 960104477Ssam } 961104477Ssam mac_ipq_create(m, fp); 962104477Ssam#endif 963104477Ssam TAILQ_INSERT_HEAD(head, fp, ipq_list); 964104477Ssam V_nipq++; 965104477Ssam fp->ipq_nfrags = 1; 966104477Ssam fp->ipq_ttl = IPFRAGTTL; 967104477Ssam fp->ipq_p = ip->ip_p; 968104477Ssam fp->ipq_id = ip->ip_id; 969104477Ssam fp->ipq_src = ip->ip_src; 970104477Ssam fp->ipq_dst = ip->ip_dst; 971104477Ssam fp->ipq_frags = m; 972104477Ssam m->m_nextpkt = NULL; 973104477Ssam goto done; 974104477Ssam } else { 975104477Ssam fp->ipq_nfrags++; 976104477Ssam#ifdef MAC 977104477Ssam mac_ipq_update(m, fp); 978104477Ssam#endif 979104477Ssam } 980104477Ssam 981104477Ssam#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) 982104477Ssam 983104477Ssam /* 984104477Ssam * Handle ECN by comparing this segment with the first one; 985104477Ssam * if CE is set, do not lose CE. 986104477Ssam * drop if CE and not-ECT are mixed for the same packet. 987104477Ssam */ 988104477Ssam ecn = ip->ip_tos & IPTOS_ECN_MASK; 989104477Ssam ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK; 990104477Ssam if (ecn == IPTOS_ECN_CE) { 991104477Ssam if (ecn0 == IPTOS_ECN_NOTECT) 992104477Ssam goto dropfrag; 993104477Ssam if (ecn0 != IPTOS_ECN_CE) 994104477Ssam GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE; 995104477Ssam } 996104477Ssam if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) 997104477Ssam goto dropfrag; 998104477Ssam 999104477Ssam /* 1000104477Ssam * Find a segment which begins after this one does. 1001104477Ssam */ 1002104477Ssam for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) 1003104477Ssam if (GETIP(q)->ip_off > ip->ip_off) 1004104477Ssam break; 1005104477Ssam 1006104477Ssam /* 1007104477Ssam * If there is a preceding segment, it may provide some of 1008104477Ssam * our data already. If so, drop the data from the incoming 1009104477Ssam * segment. If it provides all of our data, drop us, otherwise 1010104477Ssam * stick new segment in the proper place. 1011104477Ssam * 1012104477Ssam * If some of the data is dropped from the preceding 1013104477Ssam * segment, then it's checksum is invalidated. 1014104477Ssam */ 1015104477Ssam if (p) { 1016104477Ssam i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off; 1017104477Ssam if (i > 0) { 1018104477Ssam if (i >= ip->ip_len) 1019104477Ssam goto dropfrag; 1020104477Ssam m_adj(m, i); 1021104477Ssam m->m_pkthdr.csum_flags = 0; 1022104477Ssam ip->ip_off += i; 1023104477Ssam ip->ip_len -= i; 1024104477Ssam } 1025104477Ssam m->m_nextpkt = p->m_nextpkt; 1026104477Ssam p->m_nextpkt = m; 1027104477Ssam } else { 1028104477Ssam m->m_nextpkt = fp->ipq_frags; 1029104477Ssam fp->ipq_frags = m; 1030104477Ssam } 1031104477Ssam 1032104477Ssam /* 1033104477Ssam * While we overlap succeeding segments trim them or, 1034104477Ssam * if they are completely covered, dequeue them. 1035104477Ssam */ 1036104477Ssam for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off; 1037104477Ssam q = nq) { 1038104477Ssam i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off; 1039104477Ssam if (i < GETIP(q)->ip_len) { 1040104477Ssam GETIP(q)->ip_len -= i; 1041104477Ssam GETIP(q)->ip_off += i; 1042104477Ssam m_adj(q, i); 1043104477Ssam q->m_pkthdr.csum_flags = 0; 1044104477Ssam break; 1045104477Ssam } 1046104477Ssam nq = q->m_nextpkt; 1047104477Ssam m->m_nextpkt = nq; 1048104477Ssam IPSTAT_INC(ips_fragdropped); 1049104477Ssam fp->ipq_nfrags--; 1050104477Ssam m_freem(q); 1051104477Ssam } 1052104477Ssam 1053104477Ssam /* 1054104477Ssam * Check for complete reassembly and perform frag per packet 1055104477Ssam * limiting. 1056104477Ssam * 1057104477Ssam * Frag limiting is performed here so that the nth frag has 1058104477Ssam * a chance to complete the packet before we drop the packet. 1059104477Ssam * As a result, n+1 frags are actually allowed per packet, but 1060104477Ssam * only n will ever be stored. (n = maxfragsperpacket.) 1061104477Ssam * 1062104477Ssam */ 1063104477Ssam next = 0; 1064104477Ssam for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) { 1065104477Ssam if (GETIP(q)->ip_off != next) { 1066104477Ssam if (fp->ipq_nfrags > V_maxfragsperpacket) { 1067104477Ssam IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags); 1068104477Ssam ip_freef(head, fp); 1069104477Ssam } 1070104477Ssam goto done; 1071104477Ssam } 1072104477Ssam next += GETIP(q)->ip_len; 1073104477Ssam } 1074104477Ssam /* Make sure the last packet didn't have the IP_MF flag */ 1075104477Ssam if (p->m_flags & M_FRAG) { 1076104477Ssam if (fp->ipq_nfrags > V_maxfragsperpacket) { 1077104477Ssam IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags); 1078104477Ssam ip_freef(head, fp); 1079104477Ssam } 1080104477Ssam goto done; 1081104477Ssam } 1082104477Ssam 1083104477Ssam /* 1084104477Ssam * Reassembly is complete. Make sure the packet is a sane size. 1085104477Ssam */ 1086104477Ssam q = fp->ipq_frags; 1087104477Ssam ip = GETIP(q); 1088104477Ssam if (next + (ip->ip_hl << 2) > IP_MAXPACKET) { 1089104477Ssam IPSTAT_INC(ips_toolong); 1090104477Ssam IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags); 1091104477Ssam ip_freef(head, fp); 1092120915Ssam goto done; 1093120915Ssam } 1094120915Ssam 1095120915Ssam /* 1096120915Ssam * Concatenate fragments. 1097120915Ssam */ 1098120915Ssam m = q; 1099120915Ssam t = m->m_next; 1100120915Ssam m->m_next = NULL; 1101120915Ssam m_cat(m, t); 1102120915Ssam nq = q->m_nextpkt; 1103120915Ssam q->m_nextpkt = NULL; 1104104477Ssam for (q = nq; q != NULL; q = nq) { 1105104477Ssam nq = q->m_nextpkt; 1106104477Ssam q->m_nextpkt = NULL; 1107104477Ssam m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags; 1108104477Ssam m->m_pkthdr.csum_data += q->m_pkthdr.csum_data; 1109104477Ssam m_cat(m, q); 1110104477Ssam } 1111104477Ssam /* 1112104477Ssam * In order to do checksumming faster we do 'end-around carry' here 1113104477Ssam * (and not in for{} loop), though it implies we are not going to 1114104477Ssam * reassemble more than 64k fragments. 1115104477Ssam */ 1116104477Ssam m->m_pkthdr.csum_data = 1117104477Ssam (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16); 1118104477Ssam#ifdef MAC 1119104477Ssam mac_ipq_reassemble(fp, m); 1120104477Ssam mac_ipq_destroy(fp); 1121104477Ssam#endif 1122104477Ssam 1123104477Ssam /* 1124104477Ssam * Create header for new ip packet by modifying header of first 1125104477Ssam * packet; dequeue and discard fragment reassembly header. 1126104477Ssam * Make header visible. 1127104477Ssam */ 1128104477Ssam ip->ip_len = (ip->ip_hl << 2) + next; 1129104477Ssam ip->ip_src = fp->ipq_src; 1130104477Ssam ip->ip_dst = fp->ipq_dst; 1131120915Ssam TAILQ_REMOVE(head, fp, ipq_list); 1132120915Ssam V_nipq--; 1133120915Ssam uma_zfree(V_ipq_zone, fp); 1134120915Ssam m->m_len += (ip->ip_hl << 2); 1135120915Ssam m->m_data -= (ip->ip_hl << 2); 1136120915Ssam /* some debugging cruft by sklower, below, will go away soon */ 1137120915Ssam if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */ 1138120915Ssam m_fixhdr(m); 1139104477Ssam IPSTAT_INC(ips_reassembled); 1140104477Ssam IPQ_UNLOCK(); 1141104477Ssam return (m); 1142104477Ssam 1143104477Ssamdropfrag: 1144104477Ssam IPSTAT_INC(ips_fragdropped); 1145104477Ssam if (fp != NULL) 1146104477Ssam fp->ipq_nfrags--; 1147104477Ssam m_freem(m); 1148104477Ssamdone: 1149104477Ssam IPQ_UNLOCK(); 1150104477Ssam return (NULL); 1151104477Ssam 1152104477Ssam#undef GETIP 1153104477Ssam} 1154104477Ssam 1155104477Ssam/* 1156104477Ssam * Free a fragment reassembly header and all 1157104477Ssam * associated datagrams. 1158104477Ssam */ 1159104477Ssamstatic void 1160104477Ssamip_freef(struct ipqhead *fhp, struct ipq *fp) 1161104477Ssam{ 1162104477Ssam struct mbuf *q; 1163104477Ssam 1164104477Ssam IPQ_LOCK_ASSERT(); 1165104477Ssam 1166104477Ssam while (fp->ipq_frags) { 1167104477Ssam q = fp->ipq_frags; 1168104477Ssam fp->ipq_frags = q->m_nextpkt; 1169104477Ssam m_freem(q); 1170104477Ssam } 1171104477Ssam TAILQ_REMOVE(fhp, fp, ipq_list); 1172104477Ssam uma_zfree(V_ipq_zone, fp); 1173104477Ssam V_nipq--; 1174104477Ssam} 1175104477Ssam 1176104477Ssam/* 1177104477Ssam * IP timer processing; 1178104477Ssam * if a timer expires on a reassembly 1179104477Ssam * queue, discard it. 1180104477Ssam */ 1181104477Ssamvoid 1182104477Ssamip_slowtimo(void) 1183104477Ssam{ 1184104477Ssam VNET_ITERATOR_DECL(vnet_iter); 1185104477Ssam struct ipq *fp; 1186104477Ssam int i; 1187104477Ssam 1188104477Ssam VNET_LIST_RLOCK_NOSLEEP(); 1189104477Ssam IPQ_LOCK(); 1190104477Ssam VNET_FOREACH(vnet_iter) { 1191104477Ssam CURVNET_SET(vnet_iter); 1192104477Ssam for (i = 0; i < IPREASS_NHASH; i++) { 1193104477Ssam for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) { 1194104477Ssam struct ipq *fpp; 1195104477Ssam 1196104477Ssam fpp = fp; 1197104477Ssam fp = TAILQ_NEXT(fp, ipq_list); 1198104477Ssam if(--fpp->ipq_ttl == 0) { 1199104477Ssam IPSTAT_ADD(ips_fragtimeout, 1200104477Ssam fpp->ipq_nfrags); 1201104477Ssam ip_freef(&V_ipq[i], fpp); 1202104477Ssam } 1203104477Ssam } 1204104477Ssam } 1205104477Ssam /* 1206104477Ssam * If we are over the maximum number of fragments 1207104477Ssam * (due to the limit being lowered), drain off 1208104477Ssam * enough to get down to the new limit. 1209104477Ssam */ 1210104477Ssam if (V_maxnipq >= 0 && V_nipq > V_maxnipq) { 1211104477Ssam for (i = 0; i < IPREASS_NHASH; i++) { 1212104477Ssam while (V_nipq > V_maxnipq && 1213104477Ssam !TAILQ_EMPTY(&V_ipq[i])) { 1214104477Ssam IPSTAT_ADD(ips_fragdropped, 1215104477Ssam TAILQ_FIRST(&V_ipq[i])->ipq_nfrags); 1216104477Ssam ip_freef(&V_ipq[i], 1217104477Ssam TAILQ_FIRST(&V_ipq[i])); 1218104477Ssam } 1219104477Ssam } 1220104477Ssam } 1221104477Ssam CURVNET_RESTORE(); 1222104477Ssam } 1223104477Ssam IPQ_UNLOCK(); 1224104477Ssam VNET_LIST_RUNLOCK_NOSLEEP(); 1225120915Ssam} 1226120915Ssam 1227120915Ssam/* 1228120915Ssam * Drain off all datagram fragments. 1229120915Ssam */ 1230120915Ssamstatic void 1231120915Ssamip_drain_locked(void) 1232120915Ssam{ 1233120915Ssam int i; 1234120915Ssam 1235104477Ssam IPQ_LOCK_ASSERT(); 1236104477Ssam 1237104477Ssam for (i = 0; i < IPREASS_NHASH; i++) { 1238104477Ssam while(!TAILQ_EMPTY(&V_ipq[i])) { 1239104477Ssam IPSTAT_ADD(ips_fragdropped, 1240104477Ssam TAILQ_FIRST(&V_ipq[i])->ipq_nfrags); 1241104477Ssam ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i])); 1242104477Ssam } 1243104477Ssam } 1244104477Ssam} 1245104477Ssam 1246104477Ssamvoid 1247104477Ssamip_drain(void) 1248104477Ssam{ 1249104477Ssam VNET_ITERATOR_DECL(vnet_iter); 1250104477Ssam 1251104477Ssam VNET_LIST_RLOCK_NOSLEEP(); 1252104477Ssam IPQ_LOCK(); 1253104477Ssam VNET_FOREACH(vnet_iter) { 1254104477Ssam CURVNET_SET(vnet_iter); 1255104477Ssam ip_drain_locked(); 1256104477Ssam CURVNET_RESTORE(); 1257104477Ssam } 1258104477Ssam IPQ_UNLOCK(); 1259104477Ssam VNET_LIST_RUNLOCK_NOSLEEP(); 1260104477Ssam in_rtqdrain(); 1261104477Ssam} 1262104477Ssam 1263104477Ssam/* 1264104477Ssam * The protocol to be inserted into ip_protox[] must be already registered 1265104477Ssam * in inetsw[], either statically or through pf_proto_register(). 1266104477Ssam */ 1267104477Ssamint 1268104477Ssamipproto_register(short ipproto) 1269104477Ssam{ 1270104477Ssam struct protosw *pr; 1271104477Ssam 1272104477Ssam /* Sanity checks. */ 1273104477Ssam if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 1274104477Ssam return (EPROTONOSUPPORT); 1275104477Ssam 1276104477Ssam /* 1277104477Ssam * The protocol slot must not be occupied by another protocol 1278104477Ssam * already. An index pointing to IPPROTO_RAW is unused. 1279104477Ssam */ 1280104477Ssam pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 1281104477Ssam if (pr == NULL) 1282104477Ssam return (EPFNOSUPPORT); 1283104477Ssam if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 1284104477Ssam return (EEXIST); 1285104477Ssam 1286104477Ssam /* Find the protocol position in inetsw[] and set the index. */ 1287104477Ssam for (pr = inetdomain.dom_protosw; 1288104477Ssam pr < inetdomain.dom_protoswNPROTOSW; pr++) { 1289104477Ssam if (pr->pr_domain->dom_family == PF_INET && 1290104477Ssam pr->pr_protocol && pr->pr_protocol == ipproto) { 1291104477Ssam ip_protox[pr->pr_protocol] = pr - inetsw; 1292104477Ssam return (0); 1293104477Ssam } 1294104477Ssam } 1295104477Ssam return (EPROTONOSUPPORT); 1296104477Ssam} 1297104477Ssam 1298104477Ssamint 1299104477Ssamipproto_unregister(short ipproto) 1300104477Ssam{ 1301104477Ssam struct protosw *pr; 1302104477Ssam 1303104477Ssam /* Sanity checks. */ 1304104477Ssam if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 1305104477Ssam return (EPROTONOSUPPORT); 1306104477Ssam 1307104477Ssam /* Check if the protocol was indeed registered. */ 1308104477Ssam pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 1309104477Ssam if (pr == NULL) 1310104477Ssam return (EPFNOSUPPORT); 1311104477Ssam if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 1312104477Ssam return (ENOENT); 1313104477Ssam 1314104477Ssam /* Reset the protocol slot to IPPROTO_RAW. */ 1315104477Ssam ip_protox[ipproto] = pr - inetsw; 1316104477Ssam return (0); 1317104477Ssam} 1318104477Ssam 1319104477Ssam/* 1320104477Ssam * Given address of next destination (final or next hop), return (referenced) 1321104477Ssam * internet address info of interface to be used to get there. 1322104477Ssam */ 1323104477Ssamstruct in_ifaddr * 1324104477Ssamip_rtaddr(struct in_addr dst, u_int fibnum) 1325104477Ssam{ 1326104477Ssam struct route sro; 1327104477Ssam struct sockaddr_in *sin; 1328104477Ssam struct in_ifaddr *ia; 1329104477Ssam 1330104477Ssam bzero(&sro, sizeof(sro)); 1331104477Ssam sin = (struct sockaddr_in *)&sro.ro_dst; 1332104477Ssam sin->sin_family = AF_INET; 1333104477Ssam sin->sin_len = sizeof(*sin); 1334104477Ssam sin->sin_addr = dst; 1335104477Ssam in_rtalloc_ign(&sro, 0, fibnum); 1336104477Ssam 1337104477Ssam if (sro.ro_rt == NULL) 1338104477Ssam return (NULL); 1339104477Ssam 1340104477Ssam ia = ifatoia(sro.ro_rt->rt_ifa); 1341104477Ssam ifa_ref(&ia->ia_ifa); 1342104477Ssam RTFREE(sro.ro_rt); 1343104477Ssam return (ia); 1344104477Ssam} 1345104477Ssam 1346104477Ssamu_char inetctlerrmap[PRC_NCMDS] = { 1347104477Ssam 0, 0, 0, 0, 1348104477Ssam 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 1349104477Ssam EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 1350104477Ssam EMSGSIZE, EHOSTUNREACH, 0, 0, 1351104477Ssam 0, 0, EHOSTUNREACH, 0, 1352104477Ssam ENOPROTOOPT, ECONNREFUSED 1353104477Ssam}; 1354104477Ssam 1355104477Ssam/* 1356104477Ssam * Forward a packet. If some error occurs return the sender 1357104477Ssam * an icmp packet. Note we can't always generate a meaningful 1358104477Ssam * icmp message because icmp doesn't have a large enough repertoire 1359104477Ssam * of codes and types. 1360104477Ssam * 1361104477Ssam * If not forwarding, just drop the packet. This could be confusing 1362104477Ssam * if ipforwarding was zero but some routing protocol was advancing 1363104477Ssam * us as a gateway to somewhere. However, we must let the routing 1364104477Ssam * protocol deal with that. 1365104477Ssam * 1366104477Ssam * The srcrt parameter indicates whether the packet is being forwarded 1367104477Ssam * via a source route. 1368104477Ssam */ 1369104477Ssamvoid 1370104477Ssamip_forward(struct mbuf *m, int srcrt) 1371104477Ssam{ 1372104477Ssam struct ip *ip = mtod(m, struct ip *); 1373104477Ssam struct in_ifaddr *ia; 1374104477Ssam struct mbuf *mcopy; 1375104477Ssam struct in_addr dest; 1376104477Ssam struct route ro; 1377104477Ssam int error, type = 0, code = 0, mtu = 0; 1378104477Ssam 1379104477Ssam if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 1380104477Ssam IPSTAT_INC(ips_cantforward); 1381104477Ssam m_freem(m); 1382104477Ssam return; 1383104477Ssam } 1384104477Ssam#ifdef IPSTEALTH 1385104477Ssam if (!V_ipstealth) { 1386104477Ssam#endif 1387104477Ssam if (ip->ip_ttl <= IPTTLDEC) { 1388104477Ssam icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 1389104477Ssam 0, 0); 1390104477Ssam return; 1391104477Ssam } 1392104477Ssam#ifdef IPSTEALTH 1393104477Ssam } 1394104477Ssam#endif 1395104477Ssam 1396104477Ssam ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m)); 1397104477Ssam#ifndef IPSEC 1398104477Ssam /* 1399104477Ssam * 'ia' may be NULL if there is no route for this destination. 1400104477Ssam * In case of IPsec, Don't discard it just yet, but pass it to 1401104477Ssam * ip_output in case of outgoing IPsec policy. 1402104477Ssam */ 1403104477Ssam if (!srcrt && ia == NULL) { 1404104477Ssam icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 1405104477Ssam return; 1406104477Ssam } 1407104477Ssam#endif 1408104477Ssam 1409104477Ssam /* 1410104477Ssam * Save the IP header and at most 8 bytes of the payload, 1411104477Ssam * in case we need to generate an ICMP message to the src. 1412104477Ssam * 1413104477Ssam * XXX this can be optimized a lot by saving the data in a local 1414104477Ssam * buffer on the stack (72 bytes at most), and only allocating the 1415104477Ssam * mbuf if really necessary. The vast majority of the packets 1416104477Ssam * are forwarded without having to send an ICMP back (either 1417104477Ssam * because unnecessary, or because rate limited), so we are 1418104477Ssam * really we are wasting a lot of work here. 1419104477Ssam * 1420104477Ssam * We don't use m_copy() because it might return a reference 1421104477Ssam * to a shared cluster. Both this function and ip_output() 1422104477Ssam * assume exclusive access to the IP header in `m', so any 1423104477Ssam * data in a cluster may change before we reach icmp_error(). 1424104477Ssam */ 1425104477Ssam MGETHDR(mcopy, M_DONTWAIT, m->m_type); 1426104477Ssam if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) { 1427104477Ssam /* 1428104477Ssam * It's probably ok if the pkthdr dup fails (because 1429104477Ssam * the deep copy of the tag chain failed), but for now 1430104477Ssam * be conservative and just discard the copy since 1431104477Ssam * code below may some day want the tags. 1432104477Ssam */ 1433104477Ssam m_free(mcopy); 1434104477Ssam mcopy = NULL; 1435104477Ssam } 1436104477Ssam if (mcopy != NULL) { 1437104477Ssam mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy)); 1438104477Ssam mcopy->m_pkthdr.len = mcopy->m_len; 1439104477Ssam m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1440104477Ssam } 1441104477Ssam 1442104477Ssam#ifdef IPSTEALTH 1443104477Ssam if (!V_ipstealth) { 1444104477Ssam#endif 1445104477Ssam ip->ip_ttl -= IPTTLDEC; 1446104477Ssam#ifdef IPSTEALTH 1447104477Ssam } 1448104477Ssam#endif 1449104477Ssam 1450120915Ssam /* 1451104477Ssam * If forwarding packet using same interface that it came in on, 1452104477Ssam * perhaps should send a redirect to sender to shortcut a hop. 1453104477Ssam * Only send redirect if source is sending directly to us, 1454104477Ssam * and if packet was not source routed (or has any options). 1455104477Ssam * Also, don't send redirect if forwarding using a default route 1456104477Ssam * or a route modified by a redirect. 1457104477Ssam */ 1458104477Ssam dest.s_addr = 0; 1459104477Ssam if (!srcrt && V_ipsendredirects && 1460104477Ssam ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 1461104477Ssam struct sockaddr_in *sin; 1462104477Ssam struct rtentry *rt; 1463104477Ssam 1464104477Ssam bzero(&ro, sizeof(ro)); 1465104477Ssam sin = (struct sockaddr_in *)&ro.ro_dst; 1466104477Ssam sin->sin_family = AF_INET; 1467104477Ssam sin->sin_len = sizeof(*sin); 1468104477Ssam sin->sin_addr = ip->ip_dst; 1469104477Ssam in_rtalloc_ign(&ro, 0, M_GETFIB(m)); 1470104477Ssam 1471104477Ssam rt = ro.ro_rt; 1472104477Ssam 1473104477Ssam if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 1474104477Ssam satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1475104477Ssam#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1476104477Ssam u_long src = ntohl(ip->ip_src.s_addr); 1477104477Ssam 1478104477Ssam if (RTA(rt) && 1479104477Ssam (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1480104477Ssam if (rt->rt_flags & RTF_GATEWAY) 1481104477Ssam dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1482104477Ssam else 1483104477Ssam dest.s_addr = ip->ip_dst.s_addr; 1484104477Ssam /* Router requirements says to only send host redirects */ 1485104477Ssam type = ICMP_REDIRECT; 1486104477Ssam code = ICMP_REDIRECT_HOST; 1487104477Ssam } 1488104477Ssam } 1489104477Ssam if (rt) 1490104477Ssam RTFREE(rt); 1491104477Ssam } 1492104477Ssam 1493104477Ssam /* 1494104477Ssam * Try to cache the route MTU from ip_output so we can consider it for 1495104477Ssam * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191. 1496104477Ssam */ 1497104477Ssam bzero(&ro, sizeof(ro)); 1498104477Ssam 1499104477Ssam error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1500104477Ssam 1501104477Ssam if (error == EMSGSIZE && ro.ro_rt) 1502104477Ssam mtu = ro.ro_rt->rt_rmx.rmx_mtu; 1503104477Ssam if (ro.ro_rt) 1504104477Ssam RTFREE(ro.ro_rt); 1505104477Ssam 1506104477Ssam if (error) 1507104477Ssam IPSTAT_INC(ips_cantforward); 1508104477Ssam else { 1509104477Ssam IPSTAT_INC(ips_forward); 1510120915Ssam if (type) 1511104477Ssam IPSTAT_INC(ips_redirectsent); 1512104477Ssam else { 1513104477Ssam if (mcopy) 1514104477Ssam m_freem(mcopy); 1515104477Ssam if (ia != NULL) 1516104477Ssam ifa_free(&ia->ia_ifa); 1517104477Ssam return; 1518104477Ssam } 1519104477Ssam } 1520104477Ssam if (mcopy == NULL) { 1521104477Ssam if (ia != NULL) 1522104477Ssam ifa_free(&ia->ia_ifa); 1523104477Ssam return; 1524104477Ssam } 1525120915Ssam 1526120915Ssam switch (error) { 1527120915Ssam 1528120915Ssam case 0: /* forwarded, but need redirect */ 1529120915Ssam /* type, code set above */ 1530120915Ssam break; 1531120915Ssam 1532120915Ssam case ENETUNREACH: 1533104477Ssam case EHOSTUNREACH: 1534104477Ssam case ENETDOWN: 1535104477Ssam case EHOSTDOWN: 1536104477Ssam default: 1537120915Ssam type = ICMP_UNREACH; 1538120915Ssam code = ICMP_UNREACH_HOST; 1539120915Ssam break; 1540120915Ssam 1541120915Ssam case EMSGSIZE: 1542120915Ssam type = ICMP_UNREACH; 1543120915Ssam code = ICMP_UNREACH_NEEDFRAG; 1544120915Ssam 1545120915Ssam#ifdef IPSEC 1546120915Ssam /* 1547104477Ssam * If IPsec is configured for this path, 1548104477Ssam * override any possibly mtu value set by ip_output. 1549104477Ssam */ 1550104477Ssam mtu = ip_ipsec_mtu(mcopy, mtu); 1551104477Ssam#endif /* IPSEC */ 1552104477Ssam /* 1553104477Ssam * If the MTU was set before make sure we are below the 1554104477Ssam * interface MTU. 1555104477Ssam * If the MTU wasn't set before use the interface mtu or 1556104477Ssam * fall back to the next smaller mtu step compared to the 1557104477Ssam * current packet size. 1558104477Ssam */ 1559104477Ssam if (mtu != 0) { 1560104477Ssam if (ia != NULL) 1561104477Ssam mtu = min(mtu, ia->ia_ifp->if_mtu); 1562104477Ssam } else { 1563104477Ssam if (ia != NULL) 1564104477Ssam mtu = ia->ia_ifp->if_mtu; 1565104477Ssam else 1566104477Ssam mtu = ip_next_mtu(ip->ip_len, 0); 1567104477Ssam } 1568104477Ssam IPSTAT_INC(ips_cantfrag); 1569104477Ssam break; 1570104477Ssam 1571104477Ssam case ENOBUFS: 1572104477Ssam /* 1573104477Ssam * A router should not generate ICMP_SOURCEQUENCH as 1574104477Ssam * required in RFC1812 Requirements for IP Version 4 Routers. 1575104477Ssam * Source quench could be a big problem under DoS attacks, 1576104477Ssam * or if the underlying interface is rate-limited. 1577104477Ssam * Those who need source quench packets may re-enable them 1578104477Ssam * via the net.inet.ip.sendsourcequench sysctl. 1579104477Ssam */ 1580104477Ssam if (V_ip_sendsourcequench == 0) { 1581104477Ssam m_freem(mcopy); 1582104477Ssam if (ia != NULL) 1583104477Ssam ifa_free(&ia->ia_ifa); 1584104477Ssam return; 1585104477Ssam } else { 1586104477Ssam type = ICMP_SOURCEQUENCH; 1587104477Ssam code = 0; 1588104477Ssam } 1589104477Ssam break; 1590104477Ssam 1591104477Ssam case EACCES: /* ipfw denied packet */ 1592104477Ssam m_freem(mcopy); 1593104477Ssam if (ia != NULL) 1594104477Ssam ifa_free(&ia->ia_ifa); 1595104477Ssam return; 1596104477Ssam } 1597104477Ssam if (ia != NULL) 1598104477Ssam ifa_free(&ia->ia_ifa); 1599104477Ssam icmp_error(mcopy, type, code, dest.s_addr, mtu); 1600104477Ssam} 1601104477Ssam 1602104477Ssamvoid 1603104477Ssamip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1604104477Ssam struct mbuf *m) 1605104477Ssam{ 1606104477Ssam 1607104477Ssam if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1608104477Ssam struct bintime bt; 1609104477Ssam 1610104477Ssam bintime(&bt); 1611104477Ssam if (inp->inp_socket->so_options & SO_BINTIME) { 1612104477Ssam *mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt), 1613104477Ssam SCM_BINTIME, SOL_SOCKET); 1614104477Ssam if (*mp) 1615104477Ssam mp = &(*mp)->m_next; 1616104477Ssam } 1617104477Ssam if (inp->inp_socket->so_options & SO_TIMESTAMP) { 1618104477Ssam struct timeval tv; 1619104477Ssam 1620104477Ssam bintime2timeval(&bt, &tv); 1621104477Ssam *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), 1622104477Ssam SCM_TIMESTAMP, SOL_SOCKET); 1623104477Ssam if (*mp) 1624104477Ssam mp = &(*mp)->m_next; 1625104477Ssam } 1626104477Ssam } 1627104477Ssam if (inp->inp_flags & INP_RECVDSTADDR) { 1628104477Ssam *mp = sbcreatecontrol((caddr_t) &ip->ip_dst, 1629104477Ssam sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 1630104477Ssam if (*mp) 1631104477Ssam mp = &(*mp)->m_next; 1632104477Ssam } 1633104477Ssam if (inp->inp_flags & INP_RECVTTL) { 1634104477Ssam *mp = sbcreatecontrol((caddr_t) &ip->ip_ttl, 1635104477Ssam sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 1636104477Ssam if (*mp) 1637104477Ssam mp = &(*mp)->m_next; 1638104477Ssam } 1639104477Ssam#ifdef notyet 1640104477Ssam /* XXX 1641104477Ssam * Moving these out of udp_input() made them even more broken 1642104477Ssam * than they already were. 1643104477Ssam */ 1644104477Ssam /* options were tossed already */ 1645104477Ssam if (inp->inp_flags & INP_RECVOPTS) { 1646104477Ssam *mp = sbcreatecontrol((caddr_t) opts_deleted_above, 1647104477Ssam sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 1648104477Ssam if (*mp) 1649104477Ssam mp = &(*mp)->m_next; 1650104477Ssam } 1651104477Ssam /* ip_srcroute doesn't do what we want here, need to fix */ 1652104477Ssam if (inp->inp_flags & INP_RECVRETOPTS) { 1653104477Ssam *mp = sbcreatecontrol((caddr_t) ip_srcroute(m), 1654104477Ssam sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 1655104477Ssam if (*mp) 1656104477Ssam mp = &(*mp)->m_next; 1657104477Ssam } 1658104477Ssam#endif 1659104477Ssam if (inp->inp_flags & INP_RECVIF) { 1660104477Ssam struct ifnet *ifp; 1661104477Ssam struct sdlbuf { 1662104477Ssam struct sockaddr_dl sdl; 1663104477Ssam u_char pad[32]; 1664104477Ssam } sdlbuf; 1665104477Ssam struct sockaddr_dl *sdp; 1666104477Ssam struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 1667104477Ssam 1668104477Ssam if (((ifp = m->m_pkthdr.rcvif)) 1669104477Ssam && ( ifp->if_index && (ifp->if_index <= V_if_index))) { 1670104477Ssam sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1671104477Ssam /* 1672104477Ssam * Change our mind and don't try copy. 1673104477Ssam */ 1674104477Ssam if ((sdp->sdl_family != AF_LINK) 1675104477Ssam || (sdp->sdl_len > sizeof(sdlbuf))) { 1676104477Ssam goto makedummy; 1677104477Ssam } 1678104477Ssam bcopy(sdp, sdl2, sdp->sdl_len); 1679104477Ssam } else { 1680104477Ssammakedummy: 1681104477Ssam sdl2->sdl_len 1682104477Ssam = offsetof(struct sockaddr_dl, sdl_data[0]); 1683104477Ssam sdl2->sdl_family = AF_LINK; 1684104477Ssam sdl2->sdl_index = 0; 1685104477Ssam sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1686104477Ssam } 1687104477Ssam *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len, 1688104477Ssam IP_RECVIF, IPPROTO_IP); 1689104477Ssam if (*mp) 1690104477Ssam mp = &(*mp)->m_next; 1691104477Ssam } 1692104477Ssam} 1693104477Ssam 1694104477Ssam/* 1695104477Ssam * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 1696104477Ssam * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 1697104477Ssam * locking. This code remains in ip_input.c as ip_mroute.c is optionally 1698104477Ssam * compiled. 1699104477Ssam */ 1700104477Ssamstatic VNET_DEFINE(int, ip_rsvp_on); 1701104477SsamVNET_DEFINE(struct socket *, ip_rsvpd); 1702104477Ssam 1703104477Ssam#define V_ip_rsvp_on VNET(ip_rsvp_on) 1704115748Ssam 1705104477Ssamint 1706104477Ssamip_rsvp_init(struct socket *so) 1707104477Ssam{ 1708104477Ssam 1709104477Ssam if (so->so_type != SOCK_RAW || 1710104477Ssam so->so_proto->pr_protocol != IPPROTO_RSVP) 1711104477Ssam return EOPNOTSUPP; 1712104477Ssam 1713104477Ssam if (V_ip_rsvpd != NULL) 1714104477Ssam return EADDRINUSE; 1715115748Ssam 1716104477Ssam V_ip_rsvpd = so; 1717104477Ssam /* 1718104477Ssam * This may seem silly, but we need to be sure we don't over-increment 1719104477Ssam * the RSVP counter, in case something slips up. 1720104477Ssam */ 1721115748Ssam if (!V_ip_rsvp_on) { 1722104477Ssam V_ip_rsvp_on = 1; 1723104477Ssam V_rsvp_on++; 1724104477Ssam } 1725104477Ssam 1726104477Ssam return 0; 1727104477Ssam} 1728104477Ssam 1729104477Ssamint 1730104477Ssamip_rsvp_done(void) 1731104477Ssam{ 1732104477Ssam 1733104477Ssam V_ip_rsvpd = NULL; 1734104477Ssam /* 1735104477Ssam * This may seem silly, but we need to be sure we don't over-decrement 1736104477Ssam * the RSVP counter, in case something slips up. 1737104477Ssam */ 1738104477Ssam if (V_ip_rsvp_on) { 1739104477Ssam V_ip_rsvp_on = 0; 1740104477Ssam V_rsvp_on--; 1741104477Ssam } 1742104477Ssam return 0; 1743104477Ssam} 1744104477Ssam 1745104477Ssamvoid 1746104477Ssamrsvp_input(struct mbuf *m, int off) /* XXX must fixup manually */ 1747104477Ssam{ 1748104477Ssam 1749104477Ssam if (rsvp_input_p) { /* call the real one if loaded */ 1750104477Ssam rsvp_input_p(m, off); 1751104477Ssam return; 1752104477Ssam } 1753104477Ssam 1754104477Ssam /* Can still get packets with rsvp_on = 0 if there is a local member 1755104477Ssam * of the group to which the RSVP packet is addressed. But in this 1756104477Ssam * case we want to throw the packet away. 1757104477Ssam */ 1758104477Ssam 1759104477Ssam if (!V_rsvp_on) { 1760104477Ssam m_freem(m); 1761104477Ssam return; 1762104477Ssam } 1763104477Ssam 1764104477Ssam if (V_ip_rsvpd != NULL) { 1765104477Ssam rip_input(m, off); 1766104477Ssam return; 1767104477Ssam } 1768104477Ssam /* Drop the packet */ 1769104477Ssam m_freem(m); 1770111119Simp} 1771111119Simp