ip_fastfwd.c revision 133480
1296853Sdes/* 2124208Sdes * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG 3124208Sdes * All rights reserved. 4255670Sdes * 5124208Sdes * Redistribution and use in source and binary forms, with or without 6255670Sdes * modification, are permitted provided that the following conditions 7124208Sdes * are met: 8255670Sdes * 1. Redistributions of source code must retain the above copyright 9295367Sdes * notice, this list of conditions and the following disclaimer. 10124208Sdes * 2. Redistributions in binary form must reproduce the above copyright 11262566Sdes * notice, this list of conditions and the following disclaimer in the 12262566Sdes * documentation and/or other materials provided with the distribution. 13262566Sdes * 3. The name of the author may not be used to endorse or promote 14262566Sdes * products derived from this software without specific prior written 15295367Sdes * permission. 16295367Sdes * 17295367Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18295367Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19295367Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20295367Sdes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21295367Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22255670Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23295367Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24295367Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25124208Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26262566Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27124208Sdes * SUCH DAMAGE. 28262566Sdes * 29124208Sdes * $FreeBSD: head/sys/netinet/ip_fastfwd.c 133480 2004-08-11 10:42:59Z andre $ 30124208Sdes */ 31124208Sdes 32124208Sdes/* 33262566Sdes * ip_fastforward gets its speed from processing the forwarded packet to 34124208Sdes * completion (if_output on the other side) without any queues or netisr's. 35262566Sdes * The receiving interface DMAs the packet into memory, the upper half of 36262566Sdes * driver calls ip_fastforward, we do our routing table lookup and directly 37262566Sdes * send it off to the outgoing interface which DMAs the packet to the 38262566Sdes * network card. The only part of the packet we touch with the CPU is the 39262566Sdes * IP header (unless there are complex firewall rules touching other parts 40262566Sdes * of the packet, but that is up to you). We are essentially limited by bus 41262566Sdes * bandwidth and how fast the network card/driver can set up receives and 42124208Sdes * transmits. 43262566Sdes * 44262566Sdes * We handle basic errors, ip header errors, checksum errors, 45262566Sdes * destination unreachable, fragmentation and fragmentation needed and 46262566Sdes * report them via icmp to the sender. 47262566Sdes * 48262566Sdes * Else if something is not pure IPv4 unicast forwarding we fall back to 49255670Sdes * the normal ip_input processing path. We should only be called from 50262566Sdes * interfaces connected to the outside world. 51262566Sdes * 52295367Sdes * Firewalling is fully supported including divert, ipfw fwd and ipfilter 53262566Sdes * ipnat and address rewrite. 54262566Sdes * 55262566Sdes * IPSEC is not supported if this host is a tunnel broker. IPSEC is 56262566Sdes * supported for connections to/from local host. 57262566Sdes * 58262566Sdes * We try to do the least expensive (in CPU ops) checks and operations 59262566Sdes * first to catch junk with as little overhead as possible. 60295367Sdes * 61262566Sdes * We take full advantage of hardware support for ip checksum and 62262566Sdes * fragmentation offloading. 63262566Sdes * 64262566Sdes * We don't do ICMP redirect in the fast forwarding path. I have had my own 65262566Sdes * cases where two core routers with Zebra routing suite would send millions 66262566Sdes * ICMP redirects to connected hosts if the router to dest was not the default 67295367Sdes * gateway. In one case it was filling the routing table of a host with close 68262566Sdes * 300'000 cloned redirect entries until it ran out of kernel memory. However 69262566Sdes * the networking code proved very robust and it didn't crash or went ill 70255670Sdes * otherwise. 71255670Sdes */ 72255670Sdes 73262566Sdes/* 74262566Sdes * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which 75255670Sdes * is being followed here. 76255670Sdes */ 77255670Sdes 78262566Sdes#include "opt_ipfw.h" 79255670Sdes#include "opt_ipdn.h" 80255670Sdes#include "opt_ipdivert.h" 81255670Sdes#include "opt_ipfilter.h" 82255670Sdes#include "opt_ipstealth.h" 83255670Sdes#include "opt_pfil_hooks.h" 84255670Sdes 85255670Sdes#include <sys/param.h> 86255670Sdes#include <sys/systm.h> 87255670Sdes#include <sys/kernel.h> 88255670Sdes#include <sys/malloc.h> 89255670Sdes#include <sys/mbuf.h> 90255670Sdes#include <sys/protosw.h> 91255670Sdes#include <sys/socket.h> 92255670Sdes#include <sys/sysctl.h> 93255670Sdes 94255670Sdes#include <net/pfil.h> 95255670Sdes#include <net/if.h> 96255670Sdes#include <net/if_types.h> 97255670Sdes#include <net/if_var.h> 98255670Sdes#include <net/if_dl.h> 99255670Sdes#include <net/route.h> 100255670Sdes 101255670Sdes#include <netinet/in.h> 102255670Sdes#include <netinet/in_systm.h> 103295367Sdes#include <netinet/in_var.h> 104295367Sdes#include <netinet/ip.h> 105295367Sdes#include <netinet/ip_var.h> 106295367Sdes#include <netinet/ip_icmp.h> 107295367Sdes 108295367Sdes#include <machine/in_cksum.h> 109295367Sdes 110295367Sdes#include <netinet/ip_fw.h> 111295367Sdes#include <netinet/ip_divert.h> 112295367Sdes#include <netinet/ip_dummynet.h> 113295367Sdes 114295367Sdesstatic int ipfastforward_active = 0; 115295367SdesSYSCTL_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW, 116295367Sdes &ipfastforward_active, 0, "Enable fast IP forwarding"); 117295367Sdes 118295367Sdesstatic struct sockaddr_in * 119295367Sdesip_findroute(struct route *ro, in_addr_t dest, struct mbuf *m) 120295367Sdes{ 121295367Sdes struct sockaddr_in *dst; 122255670Sdes struct rtentry *rt; 123255670Sdes 124295367Sdes /* 125295367Sdes * Find route to destination. 126255670Sdes */ 127255670Sdes bzero(ro, sizeof(*ro)); 128255670Sdes dst = (struct sockaddr_in *)&ro->ro_dst; 129255670Sdes dst->sin_family = AF_INET; 130255670Sdes dst->sin_len = sizeof(*dst); 131255670Sdes dst->sin_addr.s_addr = dest; 132255670Sdes rtalloc_ign(ro, RTF_CLONING); 133255670Sdes 134255670Sdes /* 135255670Sdes * Route there and interface still up? 136255670Sdes */ 137255670Sdes rt = ro->ro_rt; 138255670Sdes if (rt && (rt->rt_flags & RTF_UP) && 139255670Sdes (rt->rt_ifp->if_flags & IFF_UP) && 140296853Sdes (rt->rt_ifp->if_flags & IFF_RUNNING)) { 141255670Sdes if (rt->rt_flags & RTF_GATEWAY) 142255670Sdes dst = (struct sockaddr_in *)rt->rt_gateway; 143255670Sdes } else { 144255670Sdes ipstat.ips_noroute++; 145255670Sdes ipstat.ips_cantforward++; 146255670Sdes if (rt) 147296853Sdes RTFREE(rt); 148296853Sdes icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, NULL); 149255670Sdes return NULL; 150255670Sdes } 151255670Sdes return dst; 152255670Sdes} 153255670Sdes 154255670Sdes/* 155255670Sdes * Try to forward a packet based on the destination address. 156255670Sdes * This is a fast path optimized for the plain forwarding case. 157255670Sdes * If the packet is handled (and consumed) here then we return 1; 158255670Sdes * otherwise 0 is returned and the packet should be delivered 159255670Sdes * to ip_input for full processing. 160255670Sdes */ 161255670Sdesint 162255670Sdesip_fastforward(struct mbuf *m) 163255670Sdes{ 164262566Sdes struct ip *ip; 165255670Sdes struct mbuf *m0 = NULL; 166255670Sdes#ifdef IPDIVERT 167262566Sdes struct ip *tip; 168255670Sdes struct mbuf *clone = NULL; 169255670Sdes#endif 170255670Sdes struct route ro; 171255670Sdes struct sockaddr_in *dst = NULL; 172255670Sdes struct in_ifaddr *ia = NULL; 173 struct ifaddr *ifa = NULL; 174 struct ifnet *ifp; 175 struct ip_fw_args args; 176 in_addr_t odest, dest; 177 u_short sum, ip_len; 178 int error = 0; 179 int hlen, ipfw, mtu; 180 181 /* 182 * Are we active and forwarding packets? 183 */ 184 if (!ipfastforward_active || !ipforwarding) 185 return 0; 186 187 M_ASSERTVALID(m); 188 M_ASSERTPKTHDR(m); 189 190 ro.ro_rt = NULL; 191 192 /* 193 * Step 1: check for packet drop conditions (and sanity checks) 194 */ 195 196 /* 197 * Is entire packet big enough? 198 */ 199 if (m->m_pkthdr.len < sizeof(struct ip)) { 200 ipstat.ips_tooshort++; 201 goto drop; 202 } 203 204 /* 205 * Is first mbuf large enough for ip header and is header present? 206 */ 207 if (m->m_len < sizeof (struct ip) && 208 (m = m_pullup(m, sizeof (struct ip))) == 0) { 209 ipstat.ips_toosmall++; 210 goto drop; 211 } 212 213 ip = mtod(m, struct ip *); 214 215 /* 216 * Is it IPv4? 217 */ 218 if (ip->ip_v != IPVERSION) { 219 ipstat.ips_badvers++; 220 goto drop; 221 } 222 223 /* 224 * Is IP header length correct and is it in first mbuf? 225 */ 226 hlen = ip->ip_hl << 2; 227 if (hlen < sizeof(struct ip)) { /* minimum header length */ 228 ipstat.ips_badlen++; 229 goto drop; 230 } 231 if (hlen > m->m_len) { 232 if ((m = m_pullup(m, hlen)) == 0) { 233 ipstat.ips_badhlen++; 234 goto drop; 235 } 236 ip = mtod(m, struct ip *); 237 } 238 239 /* 240 * Checksum correct? 241 */ 242 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) 243 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 244 else { 245 if (hlen == sizeof(struct ip)) 246 sum = in_cksum_hdr(ip); 247 else 248 sum = in_cksum(m, hlen); 249 } 250 if (sum) { 251 ipstat.ips_badsum++; 252 goto drop; 253 } 254 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); 255 256 ip_len = ntohs(ip->ip_len); 257 258 /* 259 * Is IP length longer than packet we have got? 260 */ 261 if (m->m_pkthdr.len < ip_len) { 262 ipstat.ips_tooshort++; 263 goto drop; 264 } 265 266 /* 267 * Is packet longer than IP header tells us? If yes, truncate packet. 268 */ 269 if (m->m_pkthdr.len > ip_len) { 270 if (m->m_len == m->m_pkthdr.len) { 271 m->m_len = ip_len; 272 m->m_pkthdr.len = ip_len; 273 } else 274 m_adj(m, ip_len - m->m_pkthdr.len); 275 } 276 277 /* 278 * Is packet from or to 127/8? 279 */ 280 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 281 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 282 ipstat.ips_badaddr++; 283 goto drop; 284 } 285 286#ifdef ALTQ 287 /* 288 * Is packet dropped by traffic conditioner? 289 */ 290 if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 291 return 1; 292#endif 293 294 /* 295 * Step 2: fallback conditions to normal ip_input path processing 296 */ 297 298 /* 299 * Only IP packets without options 300 */ 301 if (ip->ip_hl != (sizeof(struct ip) >> 2)) { 302 if (ip_doopts == 1) 303 return 0; 304 else if (ip_doopts == 2) { 305 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB, 306 0, NULL); 307 return 1; 308 } 309 /* else ignore IP options and continue */ 310 } 311 312 /* 313 * Only unicast IP, not from loopback, no L2 or IP broadcast, 314 * no multicast, no INADDR_ANY 315 * 316 * XXX: Probably some of these checks could be direct drop 317 * conditions. However it is not clear whether there are some 318 * hacks or obscure behaviours which make it neccessary to 319 * let ip_input handle it. We play safe here and let ip_input 320 * deal with it until it is proven that we can directly drop it. 321 */ 322 if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || 323 ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST || 324 ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST || 325 IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || 326 IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 327 ip->ip_dst.s_addr == INADDR_ANY ) 328 return 0; 329 330 /* 331 * Is it for a local address on this host? 332 */ 333 LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 334 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) 335 return 0; 336 } 337 338 /* 339 * Or is it for a local IP broadcast address on this host? 340 */ 341 if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { 342 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { 343 if (ifa->ifa_addr->sa_family != AF_INET) 344 continue; 345 ia = ifatoia(ifa); 346 if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) 347 return 0; 348 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 349 ip->ip_dst.s_addr) 350 return 0; 351 } 352 } 353 ipstat.ips_total++; 354 355 /* 356 * Step 3: incoming packet firewall processing 357 */ 358 359 /* 360 * Convert to host representation 361 */ 362 ip->ip_len = ntohs(ip->ip_len); 363 ip->ip_off = ntohs(ip->ip_off); 364 365 odest = dest = ip->ip_dst.s_addr; 366#ifdef PFIL_HOOKS 367 /* 368 * Run through list of ipfilter hooks for input packets 369 */ 370 if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN) || 371 m == NULL) 372 return 1; 373 374 M_ASSERTVALID(m); 375 M_ASSERTPKTHDR(m); 376 377 ip = mtod(m, struct ip *); /* m may have changed by pfil hook */ 378 dest = ip->ip_dst.s_addr; 379#endif 380 381 /* 382 * Run through ipfw for input packets 383 */ 384 if (fw_enable && IPFW_LOADED) { 385 bzero(&args, sizeof(args)); 386 args.m = m; 387 388 ipfw = ip_fw_chk_ptr(&args); 389 m = args.m; 390 391 M_ASSERTVALID(m); 392 M_ASSERTPKTHDR(m); 393 394 /* 395 * Packet denied, drop it 396 */ 397 if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL) 398 goto drop; 399 /* 400 * Send packet to the appropriate pipe 401 */ 402 if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) { 403 ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_IN, &args); 404 return 1; 405 } 406#ifdef IPDIVERT 407 /* 408 * Divert packet 409 */ 410 if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) { 411 /* 412 * See if this is a fragment 413 */ 414 if (ip->ip_off & (IP_MF | IP_OFFMASK)) 415 goto droptoours; 416 /* 417 * Tee packet 418 */ 419 if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0) 420 clone = divert_clone(m); 421 else 422 clone = m; 423 if (clone == NULL) 424 goto passin; 425 426 /* 427 * Delayed checksums are not compatible 428 */ 429 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 430 in_delayed_cksum(m); 431 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 432 } 433 /* 434 * Restore packet header fields to original values 435 */ 436 tip = mtod(m, struct ip *); 437 tip->ip_len = htons(tip->ip_len); 438 tip->ip_off = htons(tip->ip_off); 439 /* 440 * Deliver packet to divert input routine 441 */ 442 divert_packet(m, 0); 443 /* 444 * If this was not tee, we are done 445 */ 446 m = clone; 447 if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0) 448 return 1; 449 /* Continue if it was tee */ 450 goto passin; 451 } 452#endif 453 if (ipfw == 0 && args.next_hop != NULL) { 454 dest = args.next_hop->sin_addr.s_addr; 455 goto passin; 456 } 457 /* 458 * Let through or not? 459 */ 460 if (ipfw != 0) 461 goto drop; 462 } 463passin: 464 ip = mtod(m, struct ip *); /* if m changed during fw processing */ 465 466 /* 467 * Destination address changed? 468 */ 469 if (odest != dest) { 470 /* 471 * Is it now for a local address on this host? 472 */ 473 LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 474 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) 475 goto forwardlocal; 476 } 477 /* 478 * Go on with new destination address 479 */ 480 } 481 482 /* 483 * Step 4: decrement TTL and look up route 484 */ 485 486 /* 487 * Check TTL 488 */ 489#ifdef IPSTEALTH 490 if (!ipstealth) { 491#endif 492 if (ip->ip_ttl <= IPTTLDEC) { 493 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, NULL); 494 return 1; 495 } 496 497 /* 498 * Decrement the TTL and incrementally change the checksum. 499 * Don't bother doing this with hw checksum offloading. 500 */ 501 ip->ip_ttl -= IPTTLDEC; 502 if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8)) 503 ip->ip_sum -= ~htons(IPTTLDEC << 8); 504 else 505 ip->ip_sum += htons(IPTTLDEC << 8); 506#ifdef IPSTEALTH 507 } 508#endif 509 510 /* 511 * Find route to destination. 512 */ 513 if ((dst = ip_findroute(&ro, dest, m)) == NULL) 514 return 1; /* icmp unreach already sent */ 515 ifp = ro.ro_rt->rt_ifp; 516 517 /* 518 * Step 5: outgoing firewall packet processing 519 */ 520 521#ifdef PFIL_HOOKS 522 /* 523 * Run through list of hooks for output packets. 524 */ 525 if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT) || m == NULL) { 526 goto consumed; 527 } 528 529 M_ASSERTVALID(m); 530 M_ASSERTPKTHDR(m); 531 532 ip = mtod(m, struct ip *); 533 dest = ip->ip_dst.s_addr; 534#endif 535 if (fw_enable && IPFW_LOADED && !args.next_hop) { 536 bzero(&args, sizeof(args)); 537 args.m = m; 538 args.oif = ifp; 539 540 ipfw = ip_fw_chk_ptr(&args); 541 m = args.m; 542 543 M_ASSERTVALID(m); 544 M_ASSERTPKTHDR(m); 545 546 if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL) 547 goto drop; 548 549 if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) { 550 /* 551 * XXX note: if the ifp or rt entry are deleted 552 * while a pkt is in dummynet, we are in trouble! 553 */ 554 args.ro = &ro; /* dummynet does not save it */ 555 args.dst = dst; 556 557 ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_OUT, &args); 558 goto consumed; 559 } 560#ifdef IPDIVERT 561 if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) { 562 /* 563 * See if this is a fragment 564 */ 565 if (ip->ip_off & (IP_MF | IP_OFFMASK)) 566 goto droptoours; 567 /* 568 * Tee packet 569 */ 570 if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0) 571 clone = divert_clone(m); 572 else 573 clone = m; 574 if (clone == NULL) 575 goto passout; 576 577 /* 578 * Delayed checksums are not compatible with divert 579 */ 580 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 581 in_delayed_cksum(m); 582 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 583 } 584 /* 585 * Restore packet header fields to original values 586 */ 587 tip = mtod(m, struct ip *); 588 tip->ip_len = htons(tip->ip_len); 589 tip->ip_off = htons(tip->ip_off); 590 /* 591 * Deliver packet to divert input routine 592 */ 593 divert_packet(m, 0); 594 /* 595 * If this was not tee, we are done 596 */ 597 m = clone; 598 if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0) { 599 goto consumed; 600 } 601 /* Continue if it was tee */ 602 goto passout; 603 } 604#endif 605 if (ipfw == 0 && args.next_hop != NULL) { 606 dest = args.next_hop->sin_addr.s_addr; 607 goto passout; 608 } 609 /* 610 * Let through or not? 611 */ 612 if (ipfw != 0) 613 goto drop; 614 } 615passout: 616 ip = mtod(m, struct ip *); 617 618 /* 619 * Destination address changed? 620 */ 621 if (odest != dest) { 622 /* 623 * Is it now for a local address on this host? 624 */ 625 LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 626 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) { 627forwardlocal: 628 if (args.next_hop) { 629 struct m_tag *mtag = m_tag_get( 630 PACKET_TAG_IPFORWARD, 631 sizeof(struct sockaddr_in *), 632 M_NOWAIT); 633 if (mtag == NULL) { 634 goto drop; 635 } 636 *(struct sockaddr_in **)(mtag+1) = 637 args.next_hop; 638 m_tag_prepend(m, mtag); 639 } 640#ifdef IPDIVERT 641droptoours: /* Used for DIVERT */ 642#endif 643 /* for ip_input */ 644 m->m_flags |= M_FASTFWD_OURS; 645 646 /* ip still points to the real packet */ 647 ip->ip_len = htons(ip->ip_len); 648 ip->ip_off = htons(ip->ip_off); 649 650 /* 651 * Return packet for processing by ip_input 652 */ 653 if (ro.ro_rt) 654 RTFREE(ro.ro_rt); 655 return 0; 656 } 657 } 658 /* 659 * Redo route lookup with new destination address 660 */ 661 RTFREE(ro.ro_rt); 662 if ((dst = ip_findroute(&ro, dest, m)) == NULL) 663 return 1; /* icmp unreach already sent */ 664 ifp = ro.ro_rt->rt_ifp; 665 } 666 667 /* 668 * Step 6: send off the packet 669 */ 670 671 /* 672 * Check if route is dampned (when ARP is unable to resolve) 673 */ 674 if ((ro.ro_rt->rt_flags & RTF_REJECT) && 675 ro.ro_rt->rt_rmx.rmx_expire >= time_second) { 676 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, NULL); 677 goto consumed; 678 } 679 680#ifndef ALTQ 681 /* 682 * Check if there is enough space in the interface queue 683 */ 684 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 685 ifp->if_snd.ifq_maxlen) { 686 ipstat.ips_odropped++; 687 /* would send source quench here but that is depreciated */ 688 goto drop; 689 } 690#endif 691 692 /* 693 * Check if media link state of interface is not down 694 */ 695 if (ifp->if_link_state == LINK_STATE_DOWN) { 696 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, NULL); 697 goto consumed; 698 } 699 700 /* 701 * Check if packet fits MTU or if hardware will fragement for us 702 */ 703 if (ro.ro_rt->rt_rmx.rmx_mtu) 704 mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu); 705 else 706 mtu = ifp->if_mtu; 707 708 if (ip->ip_len <= mtu || 709 (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) { 710 /* 711 * Restore packet header fields to original values 712 */ 713 ip->ip_len = htons(ip->ip_len); 714 ip->ip_off = htons(ip->ip_off); 715 /* 716 * Send off the packet via outgoing interface 717 */ 718 error = (*ifp->if_output)(ifp, m, 719 (struct sockaddr *)dst, ro.ro_rt); 720 } else { 721 /* 722 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery 723 */ 724 if (ip->ip_off & IP_DF) { 725 ipstat.ips_cantfrag++; 726 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 727 0, ifp); 728 goto consumed; 729 } else { 730 /* 731 * We have to fragement the packet 732 */ 733 m->m_pkthdr.csum_flags |= CSUM_IP; 734 /* 735 * ip_fragment expects ip_len and ip_off in host byte 736 * order but returns all packets in network byte order 737 */ 738 if (ip_fragment(ip, &m, mtu, ifp->if_hwassist, 739 (~ifp->if_hwassist & CSUM_DELAY_IP))) { 740 goto drop; 741 } 742 KASSERT(m != NULL, ("null mbuf and no error")); 743 /* 744 * Send off the fragments via outgoing interface 745 */ 746 error = 0; 747 do { 748 m0 = m->m_nextpkt; 749 m->m_nextpkt = NULL; 750 751 error = (*ifp->if_output)(ifp, m, 752 (struct sockaddr *)dst, ro.ro_rt); 753 if (error) 754 break; 755 } while ((m = m0) != NULL); 756 if (error) { 757 /* Reclaim remaining fragments */ 758 for (; m; m = m0) { 759 m0 = m->m_nextpkt; 760 m->m_nextpkt = NULL; 761 m_freem(m); 762 } 763 } else 764 ipstat.ips_fragmented++; 765 } 766 } 767 768 if (error != 0) 769 ipstat.ips_odropped++; 770 else { 771 ro.ro_rt->rt_rmx.rmx_pksent++; 772 ipstat.ips_forward++; 773 ipstat.ips_fastforward++; 774 } 775consumed: 776 RTFREE(ro.ro_rt); 777 return 1; 778drop: 779 if (m) 780 m_freem(m); 781 if (ro.ro_rt) 782 RTFREE(ro.ro_rt); 783 return 1; 784} 785