tcp_lro.c revision 304836
1235474Sbz/*- 2235474Sbz * Copyright (c) 2007, Myricom Inc. 3235474Sbz * Copyright (c) 2008, Intel Corporation. 4235944Sbz * Copyright (c) 2012 The FreeBSD Foundation 5235474Sbz * All rights reserved. 6235474Sbz * 7235944Sbz * Portions of this software were developed by Bjoern Zeeb 8235944Sbz * under sponsorship from the FreeBSD Foundation. 9235944Sbz * 10235474Sbz * Redistribution and use in source and binary forms, with or without 11235474Sbz * modification, are permitted provided that the following conditions 12235474Sbz * are met: 13235474Sbz * 1. Redistributions of source code must retain the above copyright 14235474Sbz * notice, this list of conditions and the following disclaimer. 15235474Sbz * 2. Redistributions in binary form must reproduce the above copyright 16235474Sbz * notice, this list of conditions and the following disclaimer in the 17235474Sbz * documentation and/or other materials provided with the distribution. 18235474Sbz * 19235474Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20235474Sbz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21235474Sbz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22235474Sbz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23235474Sbz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24235474Sbz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25235474Sbz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26235474Sbz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27235474Sbz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28235474Sbz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29235474Sbz * SUCH DAMAGE. 30235474Sbz */ 31179737Sjfv 32235944Sbz#include <sys/cdefs.h> 33235944Sbz__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_lro.c 304836 2016-08-26 06:19:12Z sephe $"); 34235944Sbz 35235944Sbz#include "opt_inet.h" 36235944Sbz#include "opt_inet6.h" 37235944Sbz 38179737Sjfv#include <sys/param.h> 39179737Sjfv#include <sys/systm.h> 40179737Sjfv#include <sys/mbuf.h> 41179737Sjfv#include <sys/kernel.h> 42179737Sjfv#include <sys/socket.h> 43179737Sjfv 44179737Sjfv#include <net/if.h> 45235944Sbz#include <net/if_var.h> 46179737Sjfv#include <net/ethernet.h> 47236394Sbz#include <net/vnet.h> 48179737Sjfv 49179737Sjfv#include <netinet/in_systm.h> 50179737Sjfv#include <netinet/in.h> 51235944Sbz#include <netinet/ip6.h> 52179737Sjfv#include <netinet/ip.h> 53235981Sbz#include <netinet/ip_var.h> 54179737Sjfv#include <netinet/tcp.h> 55179737Sjfv#include <netinet/tcp_lro.h> 56179737Sjfv 57235981Sbz#include <netinet6/ip6_var.h> 58235981Sbz 59179737Sjfv#include <machine/in_cksum.h> 60179737Sjfv 61235944Sbz#ifndef LRO_ENTRIES 62235944Sbz#define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */ 63235944Sbz#endif 64179737Sjfv 65235944Sbz#define TCP_LRO_UPDATE_CSUM 1 66235944Sbz#ifndef TCP_LRO_UPDATE_CSUM 67235944Sbz#define TCP_LRO_INVALID_CSUM 0x0000 68235944Sbz#endif 69179737Sjfv 70179737Sjfvint 71235944Sbztcp_lro_init(struct lro_ctrl *lc) 72179737Sjfv{ 73235944Sbz struct lro_entry *le; 74235944Sbz int error, i; 75179737Sjfv 76235944Sbz lc->lro_bad_csum = 0; 77235944Sbz lc->lro_queued = 0; 78235944Sbz lc->lro_flushed = 0; 79235944Sbz lc->lro_cnt = 0; 80235944Sbz SLIST_INIT(&lc->lro_free); 81235944Sbz SLIST_INIT(&lc->lro_active); 82179737Sjfv 83235944Sbz error = 0; 84179737Sjfv for (i = 0; i < LRO_ENTRIES; i++) { 85235944Sbz le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF, 86235944Sbz M_NOWAIT | M_ZERO); 87235944Sbz if (le == NULL) { 88179737Sjfv if (i == 0) 89179737Sjfv error = ENOMEM; 90179737Sjfv break; 91179737Sjfv } 92235944Sbz lc->lro_cnt = i + 1; 93235944Sbz SLIST_INSERT_HEAD(&lc->lro_free, le, next); 94179737Sjfv } 95179737Sjfv 96179737Sjfv return (error); 97179737Sjfv} 98179737Sjfv 99179737Sjfvvoid 100235944Sbztcp_lro_free(struct lro_ctrl *lc) 101179737Sjfv{ 102235944Sbz struct lro_entry *le; 103179737Sjfv 104235944Sbz while (!SLIST_EMPTY(&lc->lro_free)) { 105235944Sbz le = SLIST_FIRST(&lc->lro_free); 106235944Sbz SLIST_REMOVE_HEAD(&lc->lro_free, next); 107235944Sbz free(le, M_DEVBUF); 108179737Sjfv } 109179737Sjfv} 110179737Sjfv 111235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 112235944Sbzstatic uint16_t 113235944Sbztcp_lro_csum_th(struct tcphdr *th) 114235944Sbz{ 115235944Sbz uint32_t ch; 116235944Sbz uint16_t *p, l; 117235944Sbz 118235944Sbz ch = th->th_sum = 0x0000; 119235944Sbz l = th->th_off; 120235944Sbz p = (uint16_t *)th; 121235944Sbz while (l > 0) { 122235944Sbz ch += *p; 123235944Sbz p++; 124235944Sbz ch += *p; 125235944Sbz p++; 126235944Sbz l--; 127235944Sbz } 128235944Sbz while (ch > 0xffff) 129235944Sbz ch = (ch >> 16) + (ch & 0xffff); 130235944Sbz 131235944Sbz return (ch & 0xffff); 132235944Sbz} 133235944Sbz 134235944Sbzstatic uint16_t 135235944Sbztcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th, 136235944Sbz uint16_t tcp_data_len, uint16_t csum) 137235944Sbz{ 138235944Sbz uint32_t c; 139235944Sbz uint16_t cs; 140235944Sbz 141235944Sbz c = csum; 142235944Sbz 143235944Sbz /* Remove length from checksum. */ 144235944Sbz switch (le->eh_type) { 145235944Sbz#ifdef INET6 146235944Sbz case ETHERTYPE_IPV6: 147235944Sbz { 148235944Sbz struct ip6_hdr *ip6; 149235944Sbz 150235944Sbz ip6 = (struct ip6_hdr *)l3hdr; 151235944Sbz if (le->append_cnt == 0) 152235944Sbz cs = ip6->ip6_plen; 153235944Sbz else { 154235944Sbz uint32_t cx; 155235944Sbz 156235944Sbz cx = ntohs(ip6->ip6_plen); 157235944Sbz cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0); 158235944Sbz } 159235944Sbz break; 160235944Sbz } 161235944Sbz#endif 162235944Sbz#ifdef INET 163235944Sbz case ETHERTYPE_IP: 164235944Sbz { 165235944Sbz struct ip *ip4; 166235944Sbz 167235944Sbz ip4 = (struct ip *)l3hdr; 168235944Sbz if (le->append_cnt == 0) 169235944Sbz cs = ip4->ip_len; 170235944Sbz else { 171235944Sbz cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4), 172235944Sbz IPPROTO_TCP); 173235944Sbz cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr, 174235944Sbz htons(cs)); 175235944Sbz } 176235944Sbz break; 177235944Sbz } 178235944Sbz#endif 179235944Sbz default: 180235944Sbz cs = 0; /* Keep compiler happy. */ 181235944Sbz } 182235944Sbz 183235944Sbz cs = ~cs; 184235944Sbz c += cs; 185235944Sbz 186235944Sbz /* Remove TCP header csum. */ 187235944Sbz cs = ~tcp_lro_csum_th(th); 188235944Sbz c += cs; 189235944Sbz while (c > 0xffff) 190235944Sbz c = (c >> 16) + (c & 0xffff); 191235944Sbz 192235944Sbz return (c & 0xffff); 193235944Sbz} 194235944Sbz#endif 195235944Sbz 196179737Sjfvvoid 197255010Snptcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout) 198255010Snp{ 199255010Snp struct lro_entry *le, *le_tmp; 200255010Snp struct timeval tv; 201255010Snp 202255010Snp if (SLIST_EMPTY(&lc->lro_active)) 203255010Snp return; 204255010Snp 205255010Snp getmicrotime(&tv); 206255010Snp timevalsub(&tv, timeout); 207255010Snp SLIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) { 208255010Snp if (timevalcmp(&tv, &le->mtime, >=)) { 209255010Snp SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 210255010Snp tcp_lro_flush(lc, le); 211255010Snp } 212255010Snp } 213255010Snp} 214255010Snp 215255010Snpvoid 216235944Sbztcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le) 217179737Sjfv{ 218179737Sjfv 219235944Sbz if (le->append_cnt > 0) { 220235944Sbz struct tcphdr *th; 221235944Sbz uint16_t p_len; 222179737Sjfv 223235944Sbz p_len = htons(le->p_len); 224235944Sbz switch (le->eh_type) { 225235944Sbz#ifdef INET6 226235944Sbz case ETHERTYPE_IPV6: 227235944Sbz { 228235944Sbz struct ip6_hdr *ip6; 229179737Sjfv 230235944Sbz ip6 = le->le_ip6; 231235944Sbz ip6->ip6_plen = p_len; 232235944Sbz th = (struct tcphdr *)(ip6 + 1); 233235944Sbz le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | 234235944Sbz CSUM_PSEUDO_HDR; 235235944Sbz le->p_len += ETHER_HDR_LEN + sizeof(*ip6); 236235944Sbz break; 237235944Sbz } 238235944Sbz#endif 239235944Sbz#ifdef INET 240235944Sbz case ETHERTYPE_IP: 241235944Sbz { 242235944Sbz struct ip *ip4; 243235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 244235944Sbz uint32_t cl; 245235944Sbz uint16_t c; 246235944Sbz#endif 247179737Sjfv 248235944Sbz ip4 = le->le_ip4; 249235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 250235944Sbz /* Fix IP header checksum for new length. */ 251235944Sbz c = ~ip4->ip_sum; 252235944Sbz cl = c; 253235944Sbz c = ~ip4->ip_len; 254235944Sbz cl += c + p_len; 255235944Sbz while (cl > 0xffff) 256235944Sbz cl = (cl >> 16) + (cl & 0xffff); 257235944Sbz c = cl; 258235944Sbz ip4->ip_sum = ~c; 259235944Sbz#else 260235944Sbz ip4->ip_sum = TCP_LRO_INVALID_CSUM; 261235944Sbz#endif 262235944Sbz ip4->ip_len = p_len; 263235944Sbz th = (struct tcphdr *)(ip4 + 1); 264235944Sbz le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | 265235944Sbz CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID; 266235944Sbz le->p_len += ETHER_HDR_LEN; 267235944Sbz break; 268179737Sjfv } 269235944Sbz#endif 270235944Sbz default: 271235944Sbz th = NULL; /* Keep compiler happy. */ 272235944Sbz } 273235944Sbz le->m_head->m_pkthdr.csum_data = 0xffff; 274235944Sbz le->m_head->m_pkthdr.len = le->p_len; 275235944Sbz 276235944Sbz /* Incorporate the latest ACK into the TCP header. */ 277235944Sbz th->th_ack = le->ack_seq; 278235944Sbz th->th_win = le->window; 279235944Sbz /* Incorporate latest timestamp into the TCP header. */ 280235944Sbz if (le->timestamp != 0) { 281235944Sbz uint32_t *ts_ptr; 282235944Sbz 283235944Sbz ts_ptr = (uint32_t *)(th + 1); 284235944Sbz ts_ptr[1] = htonl(le->tsval); 285235944Sbz ts_ptr[2] = le->tsecr; 286235944Sbz } 287235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 288235944Sbz /* Update the TCP header checksum. */ 289235944Sbz le->ulp_csum += p_len; 290235944Sbz le->ulp_csum += tcp_lro_csum_th(th); 291235944Sbz while (le->ulp_csum > 0xffff) 292235944Sbz le->ulp_csum = (le->ulp_csum >> 16) + 293235944Sbz (le->ulp_csum & 0xffff); 294235944Sbz th->th_sum = (le->ulp_csum & 0xffff); 295235944Sbz th->th_sum = ~th->th_sum; 296235944Sbz#else 297235944Sbz th->th_sum = TCP_LRO_INVALID_CSUM; 298235944Sbz#endif 299179737Sjfv } 300235944Sbz 301235944Sbz (*lc->ifp->if_input)(lc->ifp, le->m_head); 302235944Sbz lc->lro_queued += le->append_cnt + 1; 303235944Sbz lc->lro_flushed++; 304235944Sbz bzero(le, sizeof(*le)); 305235944Sbz SLIST_INSERT_HEAD(&lc->lro_free, le, next); 306179737Sjfv} 307179737Sjfv 308235944Sbz#ifdef INET6 309235944Sbzstatic int 310235944Sbztcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6, 311235944Sbz struct tcphdr **th) 312179737Sjfv{ 313179737Sjfv 314235944Sbz /* XXX-BZ we should check the flow-label. */ 315179737Sjfv 316235944Sbz /* XXX-BZ We do not yet support ext. hdrs. */ 317235944Sbz if (ip6->ip6_nxt != IPPROTO_TCP) 318235944Sbz return (TCP_LRO_NOT_SUPPORTED); 319179737Sjfv 320235944Sbz /* Find the TCP header. */ 321235944Sbz *th = (struct tcphdr *)(ip6 + 1); 322179737Sjfv 323235944Sbz return (0); 324235944Sbz} 325235944Sbz#endif 326235944Sbz 327235944Sbz#ifdef INET 328235944Sbzstatic int 329235944Sbztcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4, 330235944Sbz struct tcphdr **th) 331235944Sbz{ 332235944Sbz int csum_flags; 333235944Sbz uint16_t csum; 334235944Sbz 335235944Sbz if (ip4->ip_p != IPPROTO_TCP) 336235944Sbz return (TCP_LRO_NOT_SUPPORTED); 337235944Sbz 338235944Sbz /* Ensure there are no options. */ 339235944Sbz if ((ip4->ip_hl << 2) != sizeof (*ip4)) 340235944Sbz return (TCP_LRO_CANNOT); 341235944Sbz 342235944Sbz /* .. and the packet is not fragmented. */ 343235944Sbz if (ip4->ip_off & htons(IP_MF|IP_OFFMASK)) 344235944Sbz return (TCP_LRO_CANNOT); 345235944Sbz 346235944Sbz /* Legacy IP has a header checksum that needs to be correct. */ 347235944Sbz csum_flags = m->m_pkthdr.csum_flags; 348182089Skmacy if (csum_flags & CSUM_IP_CHECKED) { 349182089Skmacy if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) { 350235944Sbz lc->lro_bad_csum++; 351235944Sbz return (TCP_LRO_CANNOT); 352182089Skmacy } 353182089Skmacy } else { 354235944Sbz csum = in_cksum_hdr(ip4); 355247104Sgallatin if (__predict_false((csum) != 0)) { 356235944Sbz lc->lro_bad_csum++; 357235944Sbz return (TCP_LRO_CANNOT); 358182089Skmacy } 359179737Sjfv } 360179737Sjfv 361235944Sbz /* Find the TCP header (we assured there are no IP options). */ 362235944Sbz *th = (struct tcphdr *)(ip4 + 1); 363179737Sjfv 364235944Sbz return (0); 365235944Sbz} 366235944Sbz#endif 367179737Sjfv 368235944Sbzint 369235944Sbztcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) 370235944Sbz{ 371235944Sbz struct lro_entry *le; 372235944Sbz struct ether_header *eh; 373235944Sbz#ifdef INET6 374235944Sbz struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */ 375235944Sbz#endif 376235944Sbz#ifdef INET 377235944Sbz struct ip *ip4 = NULL; /* Keep compiler happy. */ 378235944Sbz#endif 379235944Sbz struct tcphdr *th; 380235944Sbz void *l3hdr = NULL; /* Keep compiler happy. */ 381235944Sbz uint32_t *ts_ptr; 382235944Sbz tcp_seq seq; 383235944Sbz int error, ip_len, l; 384235944Sbz uint16_t eh_type, tcp_data_len; 385304836Ssephe int force_flush = 0; 386179737Sjfv 387235944Sbz /* We expect a contiguous header [eh, ip, tcp]. */ 388235944Sbz 389235944Sbz eh = mtod(m, struct ether_header *); 390235944Sbz eh_type = ntohs(eh->ether_type); 391235944Sbz switch (eh_type) { 392235944Sbz#ifdef INET6 393235944Sbz case ETHERTYPE_IPV6: 394236394Sbz { 395236394Sbz CURVNET_SET(lc->ifp->if_vnet); 396235981Sbz if (V_ip6_forwarding != 0) { 397235981Sbz /* XXX-BZ stats but changing lro_ctrl is a problem. */ 398236394Sbz CURVNET_RESTORE(); 399235981Sbz return (TCP_LRO_CANNOT); 400235981Sbz } 401236394Sbz CURVNET_RESTORE(); 402235944Sbz l3hdr = ip6 = (struct ip6_hdr *)(eh + 1); 403235944Sbz error = tcp_lro_rx_ipv6(lc, m, ip6, &th); 404235944Sbz if (error != 0) 405235944Sbz return (error); 406235944Sbz tcp_data_len = ntohs(ip6->ip6_plen); 407235944Sbz ip_len = sizeof(*ip6) + tcp_data_len; 408235944Sbz break; 409236394Sbz } 410235944Sbz#endif 411235944Sbz#ifdef INET 412235944Sbz case ETHERTYPE_IP: 413236394Sbz { 414236394Sbz CURVNET_SET(lc->ifp->if_vnet); 415235981Sbz if (V_ipforwarding != 0) { 416235981Sbz /* XXX-BZ stats but changing lro_ctrl is a problem. */ 417236394Sbz CURVNET_RESTORE(); 418235981Sbz return (TCP_LRO_CANNOT); 419235981Sbz } 420236394Sbz CURVNET_RESTORE(); 421235944Sbz l3hdr = ip4 = (struct ip *)(eh + 1); 422235944Sbz error = tcp_lro_rx_ipv4(lc, m, ip4, &th); 423235944Sbz if (error != 0) 424235944Sbz return (error); 425235944Sbz ip_len = ntohs(ip4->ip_len); 426235944Sbz tcp_data_len = ip_len - sizeof(*ip4); 427235944Sbz break; 428236394Sbz } 429235944Sbz#endif 430235944Sbz /* XXX-BZ what happens in case of VLAN(s)? */ 431235944Sbz default: 432235944Sbz return (TCP_LRO_NOT_SUPPORTED); 433179737Sjfv } 434179737Sjfv 435235944Sbz /* 436235944Sbz * If the frame is padded beyond the end of the IP packet, then we must 437235944Sbz * trim the extra bytes off. 438235944Sbz */ 439235944Sbz l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len); 440235944Sbz if (l != 0) { 441235944Sbz if (l < 0) 442235944Sbz /* Truncated packet. */ 443235944Sbz return (TCP_LRO_CANNOT); 444179737Sjfv 445235944Sbz m_adj(m, -l); 446235944Sbz } 447235944Sbz 448235944Sbz /* 449235944Sbz * Check TCP header constraints. 450179737Sjfv */ 451235944Sbz /* Ensure no bits set besides ACK or PSH. */ 452304836Ssephe if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) { 453304836Ssephe if (th->th_flags & TH_SYN) 454304836Ssephe return (TCP_LRO_CANNOT); 455304836Ssephe /* 456304836Ssephe * Make sure that previously seen segements/ACKs are delivered 457304836Ssephe * before this segement, e.g. FIN. 458304836Ssephe */ 459304836Ssephe force_flush = 1; 460304836Ssephe } 461235944Sbz 462302051Ssephe /* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */ 463235944Sbz /* XXX-BZ Ideally we'd flush on PUSH? */ 464235944Sbz 465235944Sbz /* 466235944Sbz * Check for timestamps. 467235944Sbz * Since the only option we handle are timestamps, we only have to 468235944Sbz * handle the simple case of aligned timestamps. 469235944Sbz */ 470235944Sbz l = (th->th_off << 2); 471235944Sbz tcp_data_len -= l; 472235944Sbz l -= sizeof(*th); 473235944Sbz ts_ptr = (uint32_t *)(th + 1); 474235944Sbz if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) || 475235944Sbz (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| 476304836Ssephe TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) { 477304836Ssephe /* 478304836Ssephe * Make sure that previously seen segements/ACKs are delivered 479304836Ssephe * before this segement. 480304836Ssephe */ 481304836Ssephe force_flush = 1; 482304836Ssephe } 483235944Sbz 484235944Sbz /* If the driver did not pass in the checksum, set it now. */ 485235944Sbz if (csum == 0x0000) 486235944Sbz csum = th->th_sum; 487235944Sbz 488235944Sbz seq = ntohl(th->th_seq); 489235944Sbz 490235944Sbz /* Try to find a matching previous segment. */ 491235944Sbz SLIST_FOREACH(le, &lc->lro_active, next) { 492235944Sbz if (le->eh_type != eh_type) 493235944Sbz continue; 494235944Sbz if (le->source_port != th->th_sport || 495235944Sbz le->dest_port != th->th_dport) 496235944Sbz continue; 497235944Sbz switch (eh_type) { 498235944Sbz#ifdef INET6 499235944Sbz case ETHERTYPE_IPV6: 500235944Sbz if (bcmp(&le->source_ip6, &ip6->ip6_src, 501235944Sbz sizeof(struct in6_addr)) != 0 || 502235944Sbz bcmp(&le->dest_ip6, &ip6->ip6_dst, 503235944Sbz sizeof(struct in6_addr)) != 0) 504235944Sbz continue; 505235944Sbz break; 506235944Sbz#endif 507235944Sbz#ifdef INET 508235944Sbz case ETHERTYPE_IP: 509235944Sbz if (le->source_ip4 != ip4->ip_src.s_addr || 510235944Sbz le->dest_ip4 != ip4->ip_dst.s_addr) 511235944Sbz continue; 512235944Sbz break; 513235944Sbz#endif 514179737Sjfv } 515179737Sjfv 516304836Ssephe if (force_flush) { 517304836Ssephe /* Timestamps mismatch; this is a FIN, etc */ 518304836Ssephe SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 519304836Ssephe tcp_lro_flush(lc, le); 520304836Ssephe return (TCP_LRO_CANNOT); 521304836Ssephe } 522304836Ssephe 523235944Sbz /* Flush now if appending will result in overflow. */ 524235944Sbz if (le->p_len > (65535 - tcp_data_len)) { 525235944Sbz SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 526235944Sbz tcp_lro_flush(lc, le); 527235944Sbz break; 528235944Sbz } 529179737Sjfv 530235944Sbz /* Try to append the new segment. */ 531235944Sbz if (__predict_false(seq != le->next_seq || 532235944Sbz (tcp_data_len == 0 && le->ack_seq == th->th_ack))) { 533235944Sbz /* Out of order packet or duplicate ACK. */ 534235944Sbz SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 535235944Sbz tcp_lro_flush(lc, le); 536235944Sbz return (TCP_LRO_CANNOT); 537235944Sbz } 538179737Sjfv 539235944Sbz if (l != 0) { 540235944Sbz uint32_t tsval = ntohl(*(ts_ptr + 1)); 541235944Sbz /* Make sure timestamp values are increasing. */ 542235944Sbz /* XXX-BZ flip and use TSTMP_GEQ macro for this? */ 543235944Sbz if (__predict_false(le->tsval > tsval || 544235944Sbz *(ts_ptr + 2) == 0)) 545235944Sbz return (TCP_LRO_CANNOT); 546235944Sbz le->tsval = tsval; 547235944Sbz le->tsecr = *(ts_ptr + 2); 548235944Sbz } 549223797Scperciva 550235944Sbz le->next_seq += tcp_data_len; 551235944Sbz le->ack_seq = th->th_ack; 552235944Sbz le->window = th->th_win; 553235944Sbz le->append_cnt++; 554179737Sjfv 555235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 556235944Sbz le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th, 557235944Sbz tcp_data_len, ~csum); 558235944Sbz#endif 559179737Sjfv 560235944Sbz if (tcp_data_len == 0) { 561235944Sbz m_freem(m); 562235944Sbz return (0); 563235944Sbz } 564179737Sjfv 565235944Sbz le->p_len += tcp_data_len; 566179737Sjfv 567235944Sbz /* 568235944Sbz * Adjust the mbuf so that m_data points to the first byte of 569235944Sbz * the ULP payload. Adjust the mbuf to avoid complications and 570235944Sbz * append new segment to existing mbuf chain. 571235944Sbz */ 572235944Sbz m_adj(m, m->m_pkthdr.len - tcp_data_len); 573235944Sbz m->m_flags &= ~M_PKTHDR; 574179737Sjfv 575235944Sbz le->m_tail->m_next = m; 576235944Sbz le->m_tail = m_last(m); 577235944Sbz 578235944Sbz /* 579235944Sbz * If a possible next full length packet would cause an 580235944Sbz * overflow, pro-actively flush now. 581235944Sbz */ 582235944Sbz if (le->p_len > (65535 - lc->ifp->if_mtu)) { 583235944Sbz SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); 584235944Sbz tcp_lro_flush(lc, le); 585255010Snp } else 586255010Snp getmicrotime(&le->mtime); 587235944Sbz 588235944Sbz return (0); 589179737Sjfv } 590179737Sjfv 591304836Ssephe if (force_flush) { 592304836Ssephe /* 593304836Ssephe * Nothing to flush, but this segment can not be further 594304836Ssephe * aggregated/delayed. 595304836Ssephe */ 596304836Ssephe return (TCP_LRO_CANNOT); 597304836Ssephe } 598304836Ssephe 599235944Sbz /* Try to find an empty slot. */ 600235944Sbz if (SLIST_EMPTY(&lc->lro_free)) 601301949Ssephe return (TCP_LRO_NO_ENTRIES); 602179737Sjfv 603235944Sbz /* Start a new segment chain. */ 604235944Sbz le = SLIST_FIRST(&lc->lro_free); 605235944Sbz SLIST_REMOVE_HEAD(&lc->lro_free, next); 606235944Sbz SLIST_INSERT_HEAD(&lc->lro_active, le, next); 607255010Snp getmicrotime(&le->mtime); 608179737Sjfv 609235944Sbz /* Start filling in details. */ 610235944Sbz switch (eh_type) { 611235944Sbz#ifdef INET6 612235944Sbz case ETHERTYPE_IPV6: 613235944Sbz le->le_ip6 = ip6; 614235944Sbz le->source_ip6 = ip6->ip6_src; 615235944Sbz le->dest_ip6 = ip6->ip6_dst; 616235944Sbz le->eh_type = eh_type; 617235944Sbz le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6); 618235944Sbz break; 619235944Sbz#endif 620235944Sbz#ifdef INET 621235944Sbz case ETHERTYPE_IP: 622235944Sbz le->le_ip4 = ip4; 623235944Sbz le->source_ip4 = ip4->ip_src.s_addr; 624235944Sbz le->dest_ip4 = ip4->ip_dst.s_addr; 625235944Sbz le->eh_type = eh_type; 626235944Sbz le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN; 627235944Sbz break; 628235944Sbz#endif 629235944Sbz } 630235944Sbz le->source_port = th->th_sport; 631235944Sbz le->dest_port = th->th_dport; 632235944Sbz 633235944Sbz le->next_seq = seq + tcp_data_len; 634235944Sbz le->ack_seq = th->th_ack; 635235944Sbz le->window = th->th_win; 636235944Sbz if (l != 0) { 637235944Sbz le->timestamp = 1; 638235944Sbz le->tsval = ntohl(*(ts_ptr + 1)); 639235944Sbz le->tsecr = *(ts_ptr + 2); 640235944Sbz } 641235944Sbz 642235944Sbz#ifdef TCP_LRO_UPDATE_CSUM 643235944Sbz /* 644235944Sbz * Do not touch the csum of the first packet. However save the 645235944Sbz * "adjusted" checksum of just the source and destination addresses, 646235944Sbz * the next header and the TCP payload. The length and TCP header 647235944Sbz * parts may change, so we remove those from the saved checksum and 648235944Sbz * re-add with final values on tcp_lro_flush() if needed. 649179737Sjfv */ 650235944Sbz KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n", 651235944Sbz __func__, le, le->ulp_csum)); 652235944Sbz 653235944Sbz le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len, 654235944Sbz ~csum); 655235944Sbz th->th_sum = csum; /* Restore checksum on first packet. */ 656235944Sbz#endif 657235944Sbz 658235944Sbz le->m_head = m; 659235944Sbz le->m_tail = m_last(m); 660235944Sbz 661235944Sbz return (0); 662179737Sjfv} 663235944Sbz 664235944Sbz/* end */ 665